File Coverage

blib/lib/Bio/DB/Query/HIVQuery.pm
Criterion Covered Total %
statement 267 487 54.8
branch 82 196 41.8
condition 17 54 31.4
subroutine 35 49 71.4
pod 15 19 78.9
total 416 805 51.6


line stmt bran cond sub pod time code
1             # to do: support for comment, reference annotations
2              
3             # $Id: HIVQuery.pm 232 2008-12-11 14:51:51Z maj $
4             #
5             # BioPerl module for Bio::DB::Query::LANLQuery
6             #
7             # Please direct questions and support issues to
8             #
9             # Cared for by Mark A. Jensen
10             #
11             # Copyright Mark A. Jensen
12             #
13             # You may distribute this module under the same terms as perl itself
14              
15             # POD documentation - main docs before the code
16              
17             =head1 NAME
18              
19             Bio::DB::Query::HIVQuery - Query interface to the Los Alamos HIV Sequence Database
20              
21             =head1 SYNOPSIS
22              
23             $q = new Bio::DB::Query::HIVQuery(" C[subtype] ZA[country] CXCR4[coreceptor] ");
24             $q = new Bio::DB::Query::HIVQuery(
25             -query=>{'subtype'=>'C',
26             'country'=>'ZA',
27             'coreceptor'=>'CXCR4'});
28              
29             $ac = $q->get_annotations_by_id(($q->ids)[0]);
30             $ac->get_value('Geo', 'country') # returns 'SOUTH AFRICA'
31              
32             $db = new Bio::DB::HIV();
33             $seqio = $db->get_Stream_by_query($q); # returns annotated Bio::Seqs
34              
35             # get subtype C sequences from South Africa and Brazil,
36             # with associated info on patient health, coreceptor use, and
37             # infection period:
38              
39             $q = new Bio::DB::Query::HIVQuery(
40             -query => {
41             'query' => {'subtype'=>'C',
42             'country'=>['ZA', 'BR']},
43             'annot' => ['patient_health',
44             'coreceptor',
45             'days_post_infection']
46             });
47            
48              
49             =head1 DESCRIPTION
50              
51             Bio::DB::Query::HIVQuery provides a query-like interface to the
52             cgi-based Los Alamos National Laboratory (LANL) HIV Sequence
53             Database. It uses Bioperl facilities to capture both sequences and
54             annotations in batch in an automated and computable way. Use with
55             L to create C objects and annotated C
56             streams.
57              
58             =head2 Query format
59              
60             The interface implements a simple query language emulation that understands AND,
61             OR, and parenthetical nesting. The basic query unit is
62              
63             (match1 match2 ...)[fieldname]
64              
65             Sequences are returned for which C equals C.
66             These units can be combined with AND, OR and parentheses. For example:
67              
68             (B, C)[subtype] AND (2000, 2001, 2002, 2003)[year] AND ((CN)[country] OR (ZA)[country])
69              
70             which can be shortened to
71              
72             (B C)[subtype] (2000 2001 2002 2003)[year] (CN ZA)[country]
73              
74             The user can specify annotation fields, that do not restrict the query, but
75             arrange for the return of the associated field data for each sequence returned.
76             Specify annotation fields between curly braces, as in:
77              
78             (B C)[subtype] 2000[year] {country cd4_count cd8_count}
79              
80             Annotations can be accessed off the query using methods described in APPENDIX.
81              
82             =head2 Hash specifications for query construction
83              
84             Single query specifications can be made as hash references provided to the
85             C<-query> argument of the constructor. There are two forms:
86              
87             -query => { 'country'=>'BR', 'phenotype'=>'NSI', 'cd4_count'=>'Any' }
88              
89             equivalent to
90              
91             -query => [ 'country'=>'BR', 'phenotype'=>'NSI', 'cd4_count'=>'Any' ]
92              
93             or
94              
95             -query => { 'query' => {'country'=>'BR', 'phenotype'=>'NSI'},
96             'annot' => ['cd4_count'] }
97              
98             In both cases, the CD4 count is included in the annotations returned, but does
99             not restrict the rest of the query.
100              
101             To 'OR' multiple values of a field, use an anonymous array ref:
102              
103             -query => { 'country'=>['ZA','BR','NL'], 'subtype'=>['A', 'C', 'D'] }
104              
105             =head2 Valid query field names
106              
107             An attempt was made to make the query field names natural and easy to
108             remember. Aliases are specified in an XML file (C) that is part
109             of the distribution. Custom field aliases can be set up by modifying this file.
110              
111             An HTML cheatsheet with valid field names, aliases, and match data can be
112             generated from the XML by using Chelp('help.html')>. A query
113             can also be validated locally before it is unleashed on the server; see below.
114              
115             =head2 Annotations
116              
117             LANL DB annotations have been organized into a number of natural
118             groupings, tagged C, C, C, and C. After a
119             successful query, each id is associated with a tree of
120             L objects. These can be accessed with
121             methods C and C described in APPENDIX.
122              
123             =head2 Delayed/partial query runs
124              
125             Accessing the LANL DB involves multiple HTTP requests. The query can
126             be instructed to proceed through all (the default) or only some of
127             them, using the named parameter C.
128              
129             To validate a query locally, use
130              
131             $q = new Bio::DB::Query::HIVQuery( -query => {...}, -RUN_OPTION=>0 )
132              
133             which will throw an exception if a field name or option is invalid.
134              
135             To get a query count only, you can save a server hit by using
136              
137             $q = new Bio::DB::Query::HIVQuery( -query => {...}, -RUN_OPTION=>1 )
138              
139             and asking for C<$q-Ecount>. To finish the query, do
140              
141             $q->_do_query(2)
142              
143             which picks up where you left off.
144              
145             C<-RUN_OPTION=E2>, the default, runs the full query, returning ids and
146             annotations.
147              
148             =head2 Query re-use
149              
150             You can clear the query results, retaining the same LANL session and query spec,
151             by doing C<$q-E_reset>. Change the query, and rerun with
152             C<$q-E_do_query($YOUR_RUN_OPTION)>.
153              
154             =head1 FEEDBACK
155              
156             =head2 Mailing Lists
157              
158             User feedback is an integral part of the evolution of this and other
159             Bioperl modules. Send your comments and suggestions preferably to
160             the Bioperl mailing list. Your participation is much appreciated.
161              
162             bioperl-l@bioperl.org - General discussion
163             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
164              
165             =head2 Support
166              
167             Please direct usage questions or support issues to the mailing list:
168              
169             I
170              
171             rather than to the module maintainer directly. Many experienced and
172             reponsive experts will be able look at the problem and quickly
173             address it. Please include a thorough description of the problem
174             with code and data examples if at all possible.
175              
176             =head2 Reporting Bugs
177              
178             Report bugs to the Bioperl bug tracking system to help us keep track
179             of the bugs and their resolution. Bug reports can be submitted via
180             the web:
181              
182             https://github.com/bioperl/bioperl-live/issues
183              
184             =head1 AUTHOR - Mark A. Jensen
185              
186             Email maj@fortinbras.us
187              
188             =head1 CONTRIBUTORS
189              
190             Mark A. Jensen
191              
192             =head1 APPENDIX
193              
194             The rest of the documentation details each of the object methods.
195             Internal methods are usually preceded with a _
196              
197             =cut
198              
199             # Let the code begin...
200              
201             package Bio::DB::Query::HIVQuery;
202 1     1   756 use strict;
  1         1  
  1         27  
203 1     1   4 use vars qw( $LANL_BASE $LANL_MAP_DB $LANL_MAKE_SEARCH_IF $LANL_SEARCH $SCHEMA_FILE $RUN_OPTION );
  1         1  
  1         67  
204              
205             # Object preamble - inherits from Bio::DB::QueryI
206 1     1   345 use Bio::Root::Root;
  1         1  
  1         25  
207 1     1   383 use Bio::Annotation::Collection;
  1         2  
  1         20  
208 1     1   339 use Bio::Annotation::Comment;
  1         1  
  1         20  
209 1     1   365 use Bio::Annotation::Reference;
  1         1  
  1         21  
210 1     1   319 use Bio::WebAgent;
  1         1  
  1         20  
211 1     1   4 use XML::Simple;
  1         1  
  1         6  
212 1     1   62 use CGI;
  1         1  
  1         5  
213              
214 1     1   597 use Bio::DB::HIV::HIVQueryHelper;
  1         1  
  1         28  
215              
216 1     1   4 use base qw(Bio::Root::Root Bio::DB::QueryI);
  1         1  
  1         368  
217              
218             # globals
219             BEGIN {
220             # change base to new search page 01/14/09 /maj
221 1     1   1 $LANL_BASE = "http://www.hiv.lanl.gov/components/sequence/HIV/asearch";
222 1         1 $LANL_MAP_DB = "map_db.comp";
223 1         1 $LANL_MAKE_SEARCH_IF = "make_search_if.comp";
224 1         1 $LANL_SEARCH = "search.comp";
225 1         5 $SCHEMA_FILE = Bio::Root::IO->catfile(qw(Bio DB HIV lanl-schema.xml));
226 1         3 $RUN_OPTION = 2; # execute query
227             # exceptions
228 1         10 @Bio::SchemaNotInit::Exception::ISA = qw( Bio::Root::Exception );
229 1         6 @Bio::WebError::Exception::ISA = qw( Bio::Root::Exception );
230 1         6 @Bio::QueryNotMade::Exception::ISA = qw( Bio::Root::Exception );
231 1         12 @Bio::QueryStringException::Exception::ISA = qw( Bio::Root::Exception );
232 1         3682 @Bio::HIVSorry::Exception::ISA = qw ( Bio::Root::Exception );
233              
234             }
235              
236             =head1 Constructor
237              
238             =head2 new
239              
240             Title : new
241             Usage : my $hiv_query = new Bio::DB::Query::HIVQuery();
242             Function: Builds a new Bio::DB::Query::HIVQuery object,
243             running a sequence query against the Los Alamos
244             HIV sequence database
245             Returns : an instance of Bio::DB::Query::HIVQuery
246             Args :
247              
248             =cut
249              
250             sub new {
251 1     1 1 132 my($class,@args) = @_;
252 1         11 my $self = $class->SUPER::new(@args);
253             # constructor option for web agent parameter spec: added 01/14/09 /maj
254 1         10 my ($query, $ids, $lanl_base, $lanl_map_db, $lanl_make_search_if, $lanl_search, $schema_file,$run_option, $uahash) =
255             $self->_rearrange([ qw(QUERY
256             IDS
257             LANL_BASE
258             LANL_MAP_DB
259             LANL_MAKE_SEARCH_IF
260             LANL_SEARCH
261             SCHEMA_FILE
262             RUN_OPTION
263             USER_AGENT_HASH
264             )], @args);
265              
266             # default globals
267 1   33     6 $lanl_base||= $LANL_BASE;
268 1   33     4 $lanl_map_db||=$LANL_MAP_DB;
269 1   33     10 $lanl_make_search_if||=$LANL_MAKE_SEARCH_IF;
270 1   33     4 $lanl_search||=$LANL_SEARCH;
271 1   33     3 $schema_file||=$SCHEMA_FILE;
272 1   50     5 $uahash ||= {timeout => 90};
273 1 50       2 defined $run_option || ($run_option = $RUN_OPTION);
274              
275 1         4 $self->lanl_base($lanl_base);
276 1         3 $self->map_db($lanl_map_db);
277 1         4 $self->make_search_if($lanl_make_search_if);
278 1         3 $self->search_($lanl_search);
279 1         4 $self->_run_option($run_option);
280 1         3 $self->_ua_hash($uahash);
281            
282             # catch this at the top
283 1 50       21 if (-e $schema_file) {
284 1         3 $self->_schema_file($schema_file);
285             }
286             else { # look around
287 0         0 my ($p) = $self->_schema_file( [grep {$_} map {
288 0         0 my $p = Bio::Root::IO->catfile($_, $schema_file);
  0         0  
289 0 0       0 $p if -e $p
290             } (@INC,"")]->[0]);
291 0 0       0 $self->throw(-class=>"Bio::Root::NoSuchThing",
292             -text=>"Schema file \"".$self->_schema_file."\" cannot be found",
293             -value=>$self->_schema_file) unless -e $self->_schema_file;
294             }
295              
296 1         4 $self->count(0);
297 1         2 $self->{_schema} = HIVSchema->new($self->_schema_file);
298              
299             # internal storage and flags
300 1         5 $self->{'_lanl_query'} = [];
301 1         2 $self->{'_lanl_response'} = [];
302 1         2 $self->{'_annotations'} = {}; # container for annotation collections assoc. with ids
303 1         2 $self->{'_RUN_LEVEL'} = undef; # set in _do_query()
304            
305             # work
306 1 50       4 defined $query && $self->query($query);
307 1 50       3 defined $ids && $self->ids($ids);
308            
309             # exec query
310              
311 1 50       5 $self->_do_query($self->_run_option) if $self->query;
312              
313 1         6 return $self;
314             }
315              
316             =head1 QueryI compliance
317              
318             =head2 count
319              
320             Title : count
321             Usage : $hiv_query->count($newval)
322             Function: return number of sequences found
323             Example :
324             Returns : value of count (a scalar)
325             Args : on set, new value (a scalar or undef, optional)
326             Note : count warns if it is accessed for reading before query
327             has been executed to at least level 1
328              
329             =cut
330              
331             sub count{
332 11     11 1 27 my $self = shift;
333 11 100       27 return $self->{'count'} = shift if @_;
334 1 50 33     9 if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 1)) {
335 1         8 $self->warn('Query not yet run at > level 1');
336             }
337 0         0 return $self->{'count'};
338             }
339              
340             =head2 ids
341              
342             Title : ids
343             Usage : $hiv_query->ids($newval)
344             Function: LANL ids of returned sequences
345             Example :
346             Returns : value of ids (an arrayref of sequence accessions/ids)
347             Args : on set, new value (an arrayref or undef, optional)
348              
349             =cut
350              
351             sub ids{
352 10     10 1 27 my $self = shift;
353 10 100       25 if (@_) {
354 9         9 my $a = shift;
355 9 50       26 $self->throw(-class=>'Bio::Root::BadParameter',
356             -text=>'Arrayref required',
357             -value=> ref $a) unless ref($a) eq 'ARRAY';
358 9 50       16 if (@$a) {
359 0         0 @{$self->{'ids'}}{@$a} = (1) x @$a;
  0         0  
360 0         0 return $a;
361             }
362             else { #with empty arrayref, clear the hash
363 9         19 $self->{'ids'} = {};
364             }
365             }
366 10 50       27 return keys %{$self->{'ids'}} if $self->{'ids'};
  10         21  
367             }
368              
369             =head2 query
370              
371             Title : query
372             Usage : $hiv_query->query
373             Function: Get/set the submitted query hash or string
374             Example :
375             Returns : hashref or string
376             Args : query in hash or string form (see DESCRIPTION)
377              
378             =cut
379              
380             sub query {
381 18     18 1 20 my $self = shift;
382 18 100       40 return $self->{'query'} = shift if @_;
383 10         25 return $self->{'query'};
384             }
385              
386             =head1 Bio::DB::Query::HIVQuery specific methods
387              
388             =head2 help
389              
390             Title : help
391             Usage : $hiv_query->help("help.html")
392             Function: get html-formatted listing of valid fields/aliases/options
393             based on current schema xml
394             Example : perl -MBio::DB::Query::HIVQuery -e "new Bio::DB::Query::HIVQuery()->help" | lynx -stdin
395             Returns : HTML
396             Args : optional filename; otherwise prints to stdout
397              
398             =cut
399              
400             sub help{
401 1     1 1 64 my ($self, $fname) = @_;
402 1         2 my (@ret, @tok);
403 1         4 my $schema = $self->_schema;
404 1         9 my $h = CGI->new();
405              
406 1         233 my (@tbls, @flds, @als, @opts, $fh);
407 1 50       4 if ($fname) {
408 1 50       46 open $fh, '>', $fname or $self->throw(-class => 'Bio::Root::IOException',
409             -text => "Error opening help html file $fname for writing",
410             -value => $!);
411             }
412             else {
413 0         0 open $fh, ">&1";
414             }
415 1         4 @tbls = $schema->tables;
416 1         10 @tbls = ('COMMAND', grep !/COMMAND/,@tbls);
417 1         7 print $fh (
418             $h->start_html(-title=>"HIVQuery Help")
419             );
420 1         345 print $fh $h->a({-id=>'TOP'}, $h->h2("Valid HIVQuery query fields and match data"));
421 1         85 print $fh "Fields are organized below according to their Los Alamos HIV database tables. Use aliases in place of full field names in queries; for example:
";
422 1         1 print $fh "
(CCR5 CXCR4)[coreceptor]
";
423 1         2 print $fh "rather than";
424 1         2 print $fh "
(CCR5 CXCR4)[seq_sample.ssam_second_receptor]
";
425 1         2 print $fh "(which does work, however). Click hyperlinks to see valid search options within the field. The token Any is the wildcard for all fields.

";
426 1         6 print $fh $h->start_table({-style=>"font-family:sans-serif;"}) ;
427 1         38 foreach my $tbl (@tbls) {
428 14         3486 @flds = grep /^$tbl/, $schema->fields;
429 14         104 @flds = grep !/_id/, @flds;
430 14         49 print $fh (
431             $h->start_Tr({-style=>"background-color: lightblue;"}),
432             $h->td([$h->a({-id=>$tbl},$tbl), $h->span({-style=>"font-style:italic"},"fields"), $h->span({-style=>"font-style:italic"}, "aliases")]),
433             $h->end_Tr
434             );
435 14         2154 foreach my $fld (@flds) {
436 74         16267 @als = reverse $schema->aliases($fld);
437 74   100     333 print $fh (
438             # note that aliases can sometimes be empty
439             $h->Tr( $h->td( ["", $h->a({-href=>"#opt$fld"}, shift @als || '???'), $h->code(join(',',@als))] ))
440             );
441 74         5785 my @tmp = grep {$_} $schema->options($fld);
  1158         949  
442 74 100 100     158 @tmp = sort {(($a =~ /^[0-9]+$/) && $b =~ /^[0-9]+$/) ? $a<=>$b : $a cmp $b} @tmp;
  5860         9513  
443 74 100       263 if (grep /Any/,@tmp) {
444 31         324 @tmp = grep !/Any/, @tmp;
445 31         93 unshift @tmp, 'Any';
446             }
447             #print STDERR join(', ',@tmp)."\n";
448 74 100       200 push @opts, $h->div(
    100          
449             {-style=>"font-family:sans-serif;font-size:small"},
450             $h->hr,
451             $h->a(
452             {-id=>"opt$fld"},
453             "Valid options for $fld: "
454             ),
455             $h->blockquote(
456             @tmp ? $h->code(join(", ", @tmp)) : $h->i("free text")
457             ),
458             $h->span(
459             "Other aliases: "
460             ),
461             $h->blockquote(
462             @als ? $h->code(join(",",@als)) : "none"
463             ),
464             " ",
465             $h->table(
466             $h->Tr(
467             $h->td([
468             $h->a({-href=>"#$tbl"}, $h->small('BACK')),
469             $h->a({-href=>"#TOP"}, $h->small('TOP'))
470             ])
471             )
472             )
473             );
474            
475             }
476             }
477 1         266 print $fh $h->end_table;
478 1         283 print $fh @opts;
479 1         6 print $fh $h->end_html;
480 1         37 close($fh);
481 1         34 return 1;
482             }
483              
484             =head1 Annotation manipulation methods
485              
486             =head2 get_annotations_by_ids
487              
488             Title : get_annotations_by_ids (or ..._by_id)
489             Usage : $ac = $hiv_query->get_annotations_by_ids(@ids)
490             Function: Get the Bio::Annotation::Collection for these sequence ids
491             Example :
492             Returns : A Bio::Annotation::Collection object
493             Args : an array of sequence ids
494              
495             =cut
496              
497             sub get_annotations_by_ids{
498 1     1 1 2 my $self = shift;
499 1         2 my @ids = @_;
500 1         1 my @ret;
501 1 50 33     5 if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) {
502 1         3 $self->warn('Requires query run at level 2');
503 0         0 return ();
504             }
505 0 0       0 @ret = map {$self->{'_annotations'}->{$_}} @ids if exists($self->{'_annotations'});
  0         0  
506              
507 0 0       0 return (wantarray ? @ret : $ret[0]) if @ret;
    0          
508 0         0 return {};
509             }
510              
511             # singular alias
512             sub get_annotations_by_id {
513 1     1 0 4 shift->get_annotations_by_ids(@_);
514             }
515              
516             =head2 add_annotations_for_id
517              
518             Title : add_annotations_for_id
519             Usage : $hiv_query->add_annotations_for_id( $id ) to create a new
520             empty collection for $id
521             $hiv_query->add_annotations_for_id( $id, $ac ) to associate
522             $ac with $id
523             Function: Associate a Bio::Annotation::Collection with this sequence id
524             Example :
525             Returns : a Bio::Annotation::Collection object
526             Args : sequence id [, Bio::Annotation::Collection object]
527              
528             =cut
529              
530             sub add_annotations_for_id{
531 0     0 1 0 my $self = shift;
532 0         0 my ($id, $ac) = @_;
533 0 0       0 $id = "" unless defined $id; # avoid warnings
534 0 0       0 $ac = Bio::Annotation::Collection->new() unless defined $ac;
535 0 0       0 $self->throw(-class=>'Bio::Root::BadParameter'
536             -text=>'Bio::Annotation::Collection required at arg 2',
537             -value=>"") unless ref($ac) eq 'Bio::Annotation::Collection';
538            
539 0 0       0 $self->{'_annotations'}->{$id} = $ac unless exists($self->{'_annotations'}->{$id});
540 0         0 return $ac;
541             }
542              
543             =head2 remove_annotations_for_ids
544              
545             Title : remove_annotations_for_ids (or ..._for_id)
546             Usage : $hiv_query->remove_annotations_for_ids( @ids)
547             Function: Remove annotation collection for this sequence id
548             Example :
549             Returns : An array of the previous annotation collections for these ids
550             Args : an array of sequence ids
551              
552             =cut
553              
554             sub remove_annotations_for_ids {
555 0     0 1 0 my $self = shift;
556 0         0 my @ids = @_;
557 0         0 my @ac;
558 0         0 foreach (@ids) {
559 0         0 push @ac, delete $self->{'_annotations'}->{$_};
560             }
561 0         0 return @ac;
562             }
563              
564             # singular alias
565             sub remove_annotations_for_id {
566 0     0 0 0 shift->remove_annotations_for_ids(@_);
567             }
568              
569             =head2 remove_annotations
570              
571             Title : remove_annotations
572             Usage : $hiv_query->remove_annotations()
573             Function: Remove all annotation collections for this object
574             Example :
575             Returns : The previous annotation collection hash for this object
576             Args : none
577              
578             =cut
579              
580             sub remove_annotations {
581 0     0 1 0 my $self = shift;
582              
583 0         0 my $ach = $self->{'_annotations'};
584 0         0 $self->{'_annotations'} = {};
585 0         0 return $ach;
586             }
587              
588             =head2 get_value
589              
590             Title : get_value
591             Usage : $ac->get_value($tagname) -or-
592             $ac->get_value( $tag_level1, $tag_level2,... )
593             Function: access the annotation value assocated with the given tags
594             Example :
595             Returns : a scalar
596             Args : an array of tagnames that descend into the annotation tree
597             Note : this is a L method added in
598             L
599              
600             =cut
601              
602             =head2 put_value
603              
604             Title : put_value
605             Usage : $ac->put_value($tagname, $value) -or-
606             $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or-
607             $ac->put_value( [$tag_level1, $tag_level2, ...] )
608             Function: create a node in an annotation tree, and assign a scalar value to it
609             if a value is specified
610             Example :
611             Returns : scalar or a Bio::AnnotationCollection object
612             Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname,
613             -VALUE=>$value) -or-
614             \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value )
615             Notes : This is a L method added in
616             L.
617             If intervening nodes do not exist, put_value creates them, replacing
618             existing nodes. So if $ac->put_value('x', 10) was done, then later,
619             $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed,
620             and $ac->get_value('x') will now return the annotation collection
621             with tagname 'y'.
622              
623             =cut
624              
625             =head2 get_keys
626              
627             Title : get_keys
628             Usage : $ac->get_keys($tagname_level_1, $tagname_level_2,...)
629             Function: Get an array of tagnames underneath the named tag nodes
630             Example : # prints the values of the members of Category 1...
631             print map { $ac->get_value($_) } $ac->get_keys('Category 1') ;
632             Returns : array of tagnames or empty list if the arguments represent a leaf
633             Args : [array of] tagname[s]
634              
635             =cut
636              
637             =head1 GenBank accession manipulation methods
638              
639             =head2 get_accessions
640              
641             Title : get_accessions
642             Usage : $hiv_query->get_accessions()
643             Function: Return an array of GenBank accessions associated with these
644             sequences (available only after a query is subjected to a
645             full run (i.e., when $RUN_OPTION == 2)
646             Example :
647             Returns : array of gb accession numbers, or () if none found for this query
648             Args : none
649              
650             =cut
651              
652             sub get_accessions{
653 0     0 1 0 my $self = shift;
654 0         0 my @ret;
655 0 0 0     0 if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) {
656 0         0 $self->warn('Requires query run at level 2');
657 0         0 return ();
658             }
659 0         0 my @ac = $self->get_annotations_by_ids($self->ids);
660 0         0 foreach (@ac) {
661 0         0 push @ret, $_->get_value('Special','accession');
662             };
663 0         0 return @ret;
664             }
665              
666             =head2 get_accessions_by_ids
667              
668             Title : get_accessions_by_ids (or ..._by_id)
669             Usage : $hiv_query->get_accessions_by_ids(@ids)
670             Function: Return an array of GenBank accessions associated with these
671             LANL ids (available only after a query is subjected to a
672             full run (i.e., when $RUN_OPTION == 2)
673             Example :
674             Returns : array of gb accession numbers, or () if none found for this query
675             Args : none
676              
677             =cut
678              
679             sub get_accessions_by_ids {
680 0     0 1 0 my $self = shift;
681 0         0 my @ids = @_;
682 0         0 my @ret;
683 0 0 0     0 if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) {
684 0         0 $self->warn('Requires query run at level 2');
685 0         0 return ();
686             }
687 0         0 my @ac = $self->get_annotations_by_ids(@ids);
688 0         0 foreach (@ac) {
689 0         0 push @ret, $_->get_value('Special', 'accession');
690             };
691 0 0       0 return wantarray ? @ret : $ret[0];
692             }
693              
694             # singular alias
695             sub get_accessions_by_id {
696 0     0 0 0 shift->get_accessions_by_ids(@_);
697             }
698              
699             ##########
700              
701             =head1 Query control methods
702              
703             =head2 _do_query
704              
705             Title : _do_query
706             Usage : $hiv_query->_do_query or $hiv_query->_do_query($run_level)
707             Function: Execute the query according to argument or $RUN_OPTION
708             and set _RUN_LEVEL
709             extent of query reflects the value of argument
710             0 : validate only (no HTTP action)
711             1 : return sequence count only
712             2 : return sequence ids (full query, returns with annotations)
713             noop if current _RUN_LEVEL of query is >= argument or $RUN_OPTION,
714             Example :
715             Returns : actual _RUN_LEVEL (0, 1, or 2) achieved
716             Args : desired run level (optional, global $RUN_OPTION is default)
717              
718             =cut
719              
720             sub _do_query{
721 9     9   105 my ($self,$rl) = @_;
722 9 100       22 $rl = $RUN_OPTION unless defined $rl;
723 9 50       86 $self->throw(-class=>"Bio::Root::BadParameter",
724             -text=>"Invalid run option \"$RUN_OPTION\"",
725             -value=>$RUN_OPTION) unless grep /^$RUN_OPTION$/, (0, 1, 2);
726 9 50       20 (!defined($self->{'_RUN_LEVEL'})) && do {
727 9         18 $self->_create_lanl_query();
728 5         13 $self->{'_RUN_LEVEL'} = 0;
729             };
730 5 0 0     11 ($rl > 0) && (!defined($self->{'_RUN_LEVEL'}) || ($self->{'_RUN_LEVEL'} <= 0)) && do {
      33        
731 0         0 $self->_do_lanl_request();
732 0         0 $self->{'_RUN_LEVEL'} = 1;
733             };
734 5 0 0     12 ($rl > 1) && (!defined($self->{'_RUN_LEVEL'}) || ($self->{'_RUN_LEVEL'} <= 1)) && do {
      33        
735 0         0 $self->_parse_lanl_response();
736 0         0 $self->{'_RUN_LEVEL'} = 2;
737             };
738 5         68 return $self->{'_RUN_LEVEL'};
739             }
740              
741             =head2 _reset
742              
743             Title : _reset
744             Usage : $hiv_query->_reset
745             Function: Resets query storage, count, and ids, while retaining session id,
746             original query string, and db schema
747             Example :
748             Returns : void
749             Args : none
750              
751             =cut
752              
753             sub _reset{
754 9     9   13 my $self = shift;
755 9         26 $self->ids([]);
756 9         22 $self->count(0);
757 9         13 $self->{'_annotations'} = {};
758 9         19 $self->{'_lanl_response'} = [];
759 9         11 $self->{'_lanl_query'} = [];
760 9         21 $self->{'_RUN_LEVEL'} = undef;
761 9         11 return;
762             }
763              
764             =head2 _session_id
765              
766             Title : _session_id
767             Usage : $hiv_query->_session_id($newval)
768             Function: Get/set HIV db session id (initialized in _do_lanl_request)
769             Example :
770             Returns : value of _session_id (a scalar)
771             Args : on set, new value (a scalar or undef, optional)
772              
773             =cut
774              
775             sub _session_id{
776 0     0   0 my $self = shift;
777              
778 0 0       0 return $self->{'_session_id'} = shift if @_;
779 0         0 return $self->{'_session_id'};
780             }
781             =head2 _run_level
782              
783             Title : _run_level
784             Usage : $obj->_run_level($newval)
785             Function: returns the level at which the query has so far been run
786             Example :
787             Returns : value of _run_level (a scalar)
788             Args : on set, new value (a scalar or undef, optional)
789              
790             =cut
791              
792             sub _run_level{
793 0     0   0 my $self = shift;
794              
795 0 0       0 return $self->{'_RUN_LEVEL'} = shift if @_;
796 0         0 return $self->{'_RUN_LEVEL'};
797             }
798              
799             =head2 _run_option
800              
801             Title : _run_option
802             Usage : $hiv_query->_run_option($newval)
803             Function: Get/set HIV db query run option (see _do_query for values)
804             Example :
805             Returns : value of _run_option (a scalar)
806             Args : on set, new value (a scalar or undef, optional)
807              
808             =cut
809              
810             sub _run_option{
811 2     2   4 my $self = shift;
812              
813 2 100       6 return $self->{'_run_option'} = shift if @_;
814 1         3 return $self->{'_run_option'};
815             }
816              
817             =head2 _ua_hash
818              
819             Title : _ua_hash
820             Usage : $obj->_ua_hash($newval)
821             Function:
822             Example :
823             Returns : value of _ua_hash (a scalar)
824             Args : on set, new value (a scalar or undef, optional)
825              
826             =cut
827              
828             sub _ua_hash{
829 1     1   1 my $self = shift;
830 1 50       3 if (@_) {
831 1         4 for (ref $_[0]) {
832 1 50       2 $_ eq 'HASH' && do {
833 1         2 $self->{'_ua_hash'} = $_[0];
834 1         3 last;
835             };
836 0 0       0 !$_ && do {
837 0         0 $self->{'_ua_hash'} = {@_};
838 0         0 last;
839             };
840 0         0 do {
841 0         0 $self->throw("Type ".ref($_)." unsupported as arg in _ua_hash");
842             };
843            
844             }
845             }
846 1         1 return %{$self->{'_ua_hash'}};
  1         1  
847             }
848              
849              
850             #######
851              
852             =head1 Internals
853              
854             =head2 add_id
855              
856             Title : add_id
857             Usage : $hiv_query->add_id($id)
858             Function: Add new id to ids
859             Example :
860             Returns : the new id
861             Args : a sequence id
862              
863             =cut
864              
865             sub add_id {
866 0     0 1 0 my $self = shift;
867 0         0 my $id = shift;
868 0 0       0 $id = "" unless defined $id; # avoid warnings
869 0         0 ${$self->{'ids'}}{$id}++;
  0         0  
870 0         0 return $id;
871             }
872              
873              
874             sub lanl_base{
875 4     4 0 6 my $self = shift;
876 4 100       11 return $self->{'lanl_base'} = shift if @_;
877 3         13 return $self->{'lanl_base'};
878             }
879              
880             =head2 map_db
881              
882             Title : map_db
883             Usage : $obj->map_db($newval)
884             Function:
885             Example :
886             Returns : value of map_db (a scalar)
887             Args : on set, new value (a scalar or undef, optional)
888              
889             =cut
890              
891             sub map_db{
892 2     2 1 3 my $self = shift;
893 2 100       5 return $self->{'map_db'} = shift if @_;
894 1         6 return $self->{'map_db'};
895             }
896              
897             =head2 make_search_if
898              
899             Title : make_search_if
900             Usage : $obj->make_search_if($newval)
901             Function:
902             Example :
903             Returns : value of make_search_if (a scalar)
904             Args : on set, new value (a scalar or undef, optional)
905              
906             =cut
907              
908             sub make_search_if{
909 2     2 1 4 my $self = shift;
910 2 100       5 return $self->{'make_search_if'} = shift if @_;
911 1         3 return $self->{'make_search_if'};
912             }
913              
914             =head2 search_
915              
916             Title : search_
917             Usage : $obj->search_($newval)
918             Function:
919             Example :
920             Returns : value of search_ (a scalar)
921             Args : on set, new value (a scalar or undef, optional)
922              
923             =cut
924              
925             sub search_{
926 2     2 1 4 my $self = shift;
927 2 100       6 return $self->{'search_'} = shift if @_;
928 1         3 return $self->{'search_'};
929             }
930              
931             =head2 _map_db_uri
932              
933             Title : _map_db_uri
934             Usage :
935             Function: return the full map_db uri ("Database Map")
936             Example :
937             Returns : scalar string
938             Args : none
939              
940             =cut
941              
942             sub _map_db_uri{
943 1     1   1554 my $self = shift;
944 1         4 return $self->lanl_base."/".$self->map_db;
945             }
946            
947              
948             =head2 _make_search_if_uri
949              
950             Title : _make_search_if_uri
951             Usage :
952             Function: return the full make_search_if uri ("Make Search Interface")
953             Example :
954             Returns : scalar string
955             Args : none
956              
957             =cut
958              
959             sub _make_search_if_uri{
960 1     1   2 my $self = shift;
961 1         2 return $self->lanl_base."/".$self->make_search_if;
962             }
963              
964             =head2 _search_uri
965              
966             Title : _search_uri
967             Usage :
968             Function: return the full search cgi uri ("Search Database")
969             Example :
970             Returns : scalar string
971             Args : none
972              
973             =cut
974              
975             sub _search_uri{
976 1     1   2 my $self = shift;
977 1         2 return $self->lanl_base."/".$self->search_;
978             }
979              
980             =head2 _schema_file
981              
982             Title : _schema_file
983             Usage : $hiv_query->_schema_file($newval)
984             Function:
985             Example :
986             Returns : value of _schema_file (an XML string or filename)
987             Args : on set, new value (an XML string or filename, or undef, optional)
988              
989             =cut
990              
991             sub _schema_file {
992 3     3   4 my $self = shift;
993              
994 3 100       10 return $self->{'_schema_file'} = shift if @_;
995 2         12 return $self->{'_schema_file'};
996             }
997              
998             =head2 _schema
999              
1000             Title : _schema
1001             Usage : $hiv_query->_schema($newVal)
1002             Function:
1003             Example :
1004             Returns : value of _schema (an HIVSchema object in package
1005             L)
1006             Args : none (field set directly in new())
1007              
1008             =cut
1009              
1010             sub _schema{
1011 10     10   11 my $self = shift;
1012            
1013             $self->{'_schema'} ?
1014 10 100       32 return $self->{'_schema'} :
1015             $self->throw(-class=>'Bio::SchemaNotInit::Exception',
1016             -text=>"DB schema not initialized",
1017             -value=>"");
1018            
1019             }
1020              
1021             =head2 _lanl_query
1022              
1023             Title : _lanl_query
1024             Usage : $hiv_query->_lanl_query(\@query_parms)
1025             Function: pushes \@query_parms onto @{$self->{'_lanl_query'}
1026             Example :
1027             Returns : value of _lanl_query (an arrayref)
1028             Args : on set, new value (an arrayref or undef, optional)
1029              
1030             =cut
1031              
1032             sub _lanl_query{
1033 5     5   6 my $self = shift;
1034 5         7 my $a = shift;
1035 5 50       8 return $self->{'_lanl_query'} unless $a;
1036 5 50       13 if (ref $a eq 'ARRAY') {
1037 5         7 push @{$self->{'_lanl_query'}}, $a;
  5         8  
1038 5         21 return $a;
1039             }
1040             else {
1041 0         0 $self->throw(-class=>'Bio::Root::BadParameter',
1042             -text=>'Array ref required for argument.',
1043             -value=>$a);
1044             }
1045              
1046             }
1047              
1048             =head2 _lanl_response
1049              
1050             Title : _lanl_response
1051             Usage : $hiv_query->_lanl_response($response)
1052             Function: pushes $response onto @{$hiv_query->{'_lanl_response'}}
1053             Example :
1054             Returns : value of _lanl_response (an arrayref of HTTP::Response objects)
1055             Args : on set, new value (an HTTP::Response object or undef, optional)
1056              
1057             =cut
1058              
1059             sub _lanl_response{
1060 0     0   0 my $self = shift;
1061 0 0       0 if (@_) {
1062 0         0 my $r = shift;
1063 0 0       0 $self->throw(-class=>'Bio::Root::BadParameter',
1064             -text=>'Requires an HTTP::Response object',
1065             -value=> ref $r) unless ref($r) eq 'HTTP::Response';
1066 0         0 push @{$self->{'_lanl_response'}}, $r;
  0         0  
1067 0         0 return $r;
1068             }
1069 0         0 return $self->{'_lanl_response'};
1070             }
1071              
1072             =head2 _create_lanl_query
1073              
1074             Title : _create_lanl_query
1075             Usage : $hiv_query->_create_lanl_query()
1076             Function: validate query hash or string, prepare for _do_lanl_request
1077             Example :
1078             Returns : 1 if successful; throws exception on invalid query
1079             Args :
1080              
1081             =cut
1082              
1083             sub _create_lanl_query {
1084 9     9   14 my $self = shift;
1085 9         12 my (%inhash, @query, @qhashes);
1086 0         0 my ($schema, @validFields, @validAliases);
1087            
1088 9         17 for ($self->query) {
1089 9 50       19 !defined && do {
1090 0         0 $self->throw(-class=>'Bio::Root::NoSuchThing',
1091             -text=>'Query not specified',
1092             -value=>'');
1093 0         0 last;
1094             };
1095 9 100       20 ref eq 'HASH' && do {
1096 2         6 %inhash = %$_;
1097 2 100       4 if ( grep /HASH/, map {ref} values %inhash ) {
  5         14  
1098             # check for {query=>{},annot=>[]} style
1099             $self->throw(-class=>'Bio::Root::BadParameter',
1100             -text=>'Query style unrecognized',
1101 1 50       4 -value=>"") unless defined $inhash{query};
1102 1         3 push @qhashes, $_;
1103             }
1104 2         3 last;
1105             };
1106 7 100       15 ref eq 'ARRAY' && do {
1107 3         10 $inhash{'query'} = {@$_};
1108 3         5 push @qhashes, \%inhash;
1109 3         5 last;
1110             };
1111             #else
1112 4         4 do {
1113 4         11 @qhashes = $self->_parse_query_string($_);
1114             };
1115             }
1116 9         22 $schema = $self->_schema;
1117 8         23 @validFields = $schema->fields;
1118 8         35 @validAliases = $schema->aliases;
1119              
1120             # validate args based on the xml specification file
1121             # only checks blanks and fields with explicitly specified options
1122             # text fields can put anything, and the query will be run before
1123             # an error is caught in these
1124 8         46 foreach my $qh (@qhashes) {
1125 8         12 @query=();
1126 8         6 foreach my $k (keys %{$$qh{'query'}}) {
  8         29  
1127 17         8 my $fld;
1128             # validate field
1129 17 50       1930 if (grep /^$k$/, @validFields) {
    100          
1130 0         0 $fld = $k;
1131             }
1132             elsif (grep /^$k$/, @validAliases) {
1133 15         32 foreach (@validFields) {
1134 1018 100       1302 if (grep (/^$k$/, $schema->aliases($_))) {
1135 15         15 $fld = $_;
1136 15         26 last;
1137             }
1138             # $fld contains the field corresp. to the alias
1139             }
1140             }
1141             else {
1142 2         21 $self->throw(-class=>'Bio::Root::BadParameter',
1143             -text=>"Invalid field or alias \"$k\"",
1144             -value=>$qh);
1145             }
1146             # validate matchdata
1147 15         34 my $vf = $schema->_sfieldh($fld);
1148 15 100       46 my @md = (ref($qh->{'query'}{$k}) eq 'ARRAY') ? @{$qh->{'query'}{$k}} : $qh->{'query'}{$k};
  6         12  
1149 15 50       46 if ($$vf{type} eq 'text') {
    50          
1150 0         0 foreach (@md) {
1151             $self->throw(-class=>'Bio::Root::BadParameter',
1152             -text=>'Value for field \"$k\" cannot be empty',
1153             -value=>$qh)
1154 0 0 0     0 if ($_ eq "") && ($$vf{blank_ok} eq 'false');
1155             }
1156             }
1157             elsif ($$vf{type} eq 'option') {
1158 15         20 foreach my $md (@md) {
1159             $self->throw(-class=>'Bio::Root::BadParameter',
1160             -text=>"Invalid value \"".$md."\" for field \"$fld\"",
1161             -value=>$md)
1162 21 50 66     45 unless $$vf{option} && grep {defined $_ && /^$md$/} @{$$vf{option}};
  3086 100       7864  
  21         33  
1163             }
1164             }
1165             # validated; add to query
1166 14         21 foreach (@md) {
1167 20         36 push @query, ($fld => $_);
1168             }
1169             }
1170 5 100       16 if ($qh->{'annot'}) {
1171             # validate the column names to be included in the query
1172             # to obtain annotations
1173 2         3 my @annot_cols = @{$qh->{'annot'}};
  2         6  
1174 2         3 foreach my $k (@annot_cols) {
1175 2         3 my $fld;
1176             # validate field
1177 2 50       206 if (grep /^$k$/, @validFields) {
    50          
1178 0         0 $fld = $k;
1179             }
1180             elsif (grep /^$k$/, @validAliases) {
1181 2         3 foreach (@validFields) {
1182 130 100       173 if (grep (/^$k$/, $schema->aliases($_))) {
1183 2         2 $fld = $_;
1184 2         3 last;
1185             }
1186             # $fld should contain the field corresp. to the alias
1187             }
1188             }
1189             else {
1190 0         0 $self->throw(-class=>'Bio::Root::NoSuchThing',
1191             -text=>"Invalid field or alias \"$k\"",
1192             -value=>$k);
1193             }
1194             # lazy: 'Any' may not be the right default (but appears to
1195             # be, based on the lanl html)
1196 2         4 push @query, ($fld => 'Any');
1197             }
1198             }
1199              
1200             # insure that LANL and GenBank ids are retrieved
1201 5 50       30 push @query, ('sequenceentry.se_id' => 'Any') unless grep /SequenceEntry\.SE_id/, @query;
1202 5 50       17 push @query, ('sequenceaccessions.sa_genbankaccession' => 'Any')
1203             unless grep /SequenceAccessions\.SA_GenBankAccession/, @query;
1204              
1205             # an "order" field is required by the LANL CGI
1206             # if not specified, default to SE_id
1207              
1208 5 50       23 push @query, ('order'=>'sequenceentry.se_id') unless grep /order/, @query;
1209              
1210             # @query now contains sfield=>matchdata pairs, as specified by user
1211             # include appropriate indexes to create correct automatic joins
1212             # established by the LANL CGI
1213 5         6 my (@qtbl, @qpk, @qfk);
1214              
1215             # the tables represented in query:
1216 5         28 my %q = @query; # squish the tables in the current query into hash keys
1217 5         25 @qtbl = $schema->tbl('-s', keys %q);
1218              
1219 5 50       13 if (@qtbl > 1) {
1220             # more than one table, see if they can be connected
1221             # get primary keys of query tables
1222 5         12 @qpk = $schema->pk(@qtbl);
1223              
1224             # we need to get each query table to join to
1225             # SequenceEntry.
1226             #
1227             # The schema is a graph with tables as nodes and
1228             # foreign keys<->primary keys as branches. To get a
1229             # join that works, need to include in the query
1230             # all branches along a path from SequenceEntry
1231             # to each query table.
1232             #
1233             # find_join does it...
1234             my @joink = map {
1235 5         6 my @k = $schema->find_join($_,'sequenceentry');
  15         34  
1236 15 100       18 map {$_ || ()} @k
  15         47  
1237             } @qtbl;
1238             # squish the keys in @joink
1239 5         8 my %j;
1240 5         17 @j{@joink} = (1) x @joink;
1241 5         11 @joink = keys %j;
1242             # add the fields not currently in the query
1243 5         11 foreach (@qpk, @joink) {
1244 15         12 my $fld = $_;
1245 15 100       243 if (!grep(/^$fld$/,keys %q)) {
1246             # lazy: 'Any' may not be the right default (but appears to
1247             # be, based on the lanl html)
1248 10         27 push @query, ($_ => 'Any');
1249             }
1250             }
1251              
1252             }
1253            
1254             # set object property
1255 5         30 $self->_lanl_query([@query]);
1256             }
1257 5         129 return 1;
1258             }
1259              
1260             # _do_lanl_request : post the queries created by _create_lanl_query
1261             #
1262             # @args (or {@args}) should be unaliased Table.Column=>Matchdata
1263             # pairs (these will be used directly in the POSTs)
1264              
1265             =head2 _do_lanl_request
1266              
1267             Title : _do_lanl_request
1268             Usage : $hiv_query->_do_lanl_request()
1269             Function: Perform search request on _create_lanl_query-validated query
1270             Example :
1271             Returns : 1 if successful
1272             Args :
1273              
1274             =cut
1275              
1276             sub _do_lanl_request {
1277 0     0   0 my $self = shift;
1278 0         0 my (@queries, @query, @interface,$interfGet,$searchGet,$response);
1279 0         0 my ($numseqs, $count);
1280              
1281             # handle args
1282 0 0       0 if (!$self->_lanl_query) {
1283 0         0 $self->throw(-class=>"Bio::Root::BadParameter",
1284             -text=>"_lanl_query empty, run _create_lanl_request first",
1285             -value=>"");
1286             }
1287             else {
1288 0         0 @queries = @{$self->_lanl_query};
  0         0  
1289             }
1290            
1291             ## utility vars
1292             ## search site specific CGI parms
1293 0         0 my @search_pms = ('action'=>'Search');
1294 0         0 my @searchif_pms = ('action'=>'Search Interface');
1295             # don't get the actual sequence data here (i.e., the cgi parm
1296             # 'incl_seq' remains undefined...
1297 0         0 my @download_pms = ('action Download.x'=>1, 'action Download.y'=>1);
1298              
1299             ## HTML-testing regexps
1300 0         0 my $tags_re = qr{(?:\s*<[^>]+>\s*)};
1301 0         0 my $session_id_re = qr{
1302 0         0 my $search_form_re = qr{]*action=".*/search.comp"};
1303 0         0 my $seqs_found_re = qr{Displaying$tags_re*(?:\s*[0-9-]*\s*)*$tags_re*of$tags_re*\s*([0-9]+)$tags_re*sequences found};
1304 0         0 my $no_seqs_found_re = qr{Sorry.*no sequences found};
1305 0         0 my $too_many_re = qr{too many records: $tags_re*([0-9]+)};
1306 0         0 my $sys_error_re = qr{[Ss]ystem error};
1307 0         0 my $sys_error_extract_re = qr{${tags_re}error:.*?]+>${tags_re}(.*?)
};
1308             # find something like:
1309             # tables without join:
SequenceAccessions
1310 0         0 my $tbl_no_join_re = qr{tables without join}i;
1311             # my $sorry_bud_re = qr{};
1312              
1313              
1314 0         0 foreach my $q (@queries) {
1315 0         0 @query = @$q;
1316             # default query control parameters
1317 0         0 my %qctrl = (
1318             max_rec=>100,
1319             sort_dir=>'ASC',
1320             translate=>'FALSE' # nucleotides
1321             );
1322            
1323             # do work...
1324              
1325             # pull out commands, designated by the COMMAND pseudo-table...
1326 0 0       0 my @commands = map { $query[$_] =~ s/^COMMAND\.// ? @query[$_..$_+1] : () } (0..$#query-1);
  0         0  
1327 0 0       0 @query = map { $query[$_] =~ /^COMMAND/ ? () : @query[2*$_..2*$_+1] } (0..($#query-1)/2);
  0         0  
1328              
1329            
1330             # set control parameters explicitly made in query
1331 0         0 foreach my $cp (keys %qctrl) {
1332 0 0       0 if (!grep( /^$cp$/, @query)) {
1333 0         0 push @query, ($cp, $qctrl{$cp});
1334             }
1335             }
1336              
1337             # note that @interface must be an array, since a single 'key' (the table)
1338             # can be associated with multiple 'values' (the columns) in the POST
1339              
1340             # squish fieldnames into hash keys
1341 0         0 my %q = @query;
1342 0         0 @interface = grep {defined} map {my ($tbl,$col) = /^(.*)\.(.*)$/} keys %q;
  0         0  
  0         0  
1343 0         0 my $err_val = ""; # to contain informative (ha!) value if error is parsed
1344              
1345 0         0 eval { # encapsulate communication errors here, defer biothrows...
1346            
1347             #mark the useragent should be setable from outside (so we can modify timeouts, etc)
1348 0         0 my $ua = Bio::WebAgent->new($self->_ua_hash);
1349 0         0 my $idPing = $ua->get($self->_map_db_uri);
1350 0 0       0 $idPing->is_success || do {
1351 0         0 $response=$idPing;
1352 0         0 die "Connect failed";
1353             };
1354             # get the session id
1355 0 0       0 if (!$self->_session_id) {
1356 0         0 ($self->{'_session_id'}) = ($idPing->content =~ /$session_id_re/);
1357 0 0       0 $self->_session_id || do {
1358 0         0 $response=$idPing;
1359 0         0 die "Session not established";
1360             };
1361             }
1362             # 10/07/08:
1363             # strange bug: if action=>'Search+Interface' below (note "+"),
1364             # the response to the search (in $searchGet) shows the correct
1365             # >number< of sequences found, but also an error "No sequences
1366             # match" and an SQL barf. Changing the "+" to a " " sets up the
1367             # interface to lead to the actual sequences being delivered as
1368             # expected. maj
1369 0         0 $interfGet = $ua->post($self->_make_search_if_uri, [@interface, @searchif_pms, id=>$self->_session_id]);
1370 0 0       0 $interfGet->is_success || do {
1371 0         0 $response=$interfGet;
1372 0         0 die "Interface request failed";
1373             };
1374             # see if a search form was returned...
1375            
1376 0 0       0 $interfGet->content =~ /$search_form_re/ || do {
1377 0         0 $response=$interfGet;
1378 0         0 die "Interface request failed";
1379             };
1380            
1381 0         0 $searchGet = $ua->post($self->_search_uri, [@query, @commands, @search_pms, id=>$self->_session_id]);
1382 0 0       0 $searchGet->is_success || do {
1383 0         0 $response = $searchGet;
1384 0         0 die "Search failed";
1385             };
1386 0         0 $response = $searchGet;
1387 0         0 for ($searchGet->content) {
1388 0 0       0 /$no_seqs_found_re/ && do {
1389 0         0 $err_val = 0;
1390 0         0 die "No sequences found";
1391 0         0 last;
1392             };
1393 0 0       0 /$too_many_re/ && do {
1394 0         0 $err_val = $1;
1395 0         0 die "Too many records ($1): must be <10000";
1396 0         0 last;
1397             };
1398 0 0       0 /$tbl_no_join_re/ && do {
1399 0         0 die "Some required tables went unjoined to query";
1400 0         0 last;
1401             };
1402 0 0       0 /$sys_error_re/ && do {
1403 0         0 /$sys_error_extract_re/;
1404 0         0 $err_val = $1;
1405 0         0 die "LANL system error";
1406             };
1407 0 0       0 /$seqs_found_re/ && do {
1408 0         0 $numseqs = $1;
1409 0         0 $count += $numseqs;
1410 0         0 last;
1411             };
1412             # else...
1413 0         0 do {
1414 0         0 die "Search failed (response not parsed)";
1415             };
1416             }
1417 0         0 $response = $ua->post($self->_search_uri, [@download_pms, id=>$self->_session_id]);
1418 0 0       0 $response->is_success || die "Query failed";
1419             # $response->content is a tab-separated value table of sequences
1420             # and metadata, first line starts with \# and contains fieldnames
1421             };
1422 0         0 $self->_lanl_response($response);
1423             # throw, if necessary
1424 0 0       0 if ($@) {
1425 0 0       0 ($@ !~ "No sequences found") && do {
1426 0         0 $self->throw(-class=>'Bio::WebError::Exception',
1427             -text=>$@,
1428             -value=>$err_val);
1429             };
1430             }
1431             }
1432              
1433 0 0       0 $self->warn("No sequences found for this query") unless $count;
1434 0         0 $self->count($count);
1435 0         0 return 1; # made it.
1436              
1437             }
1438              
1439             =head2 _parse_lanl_response
1440              
1441             Title : _parse_lanl_response
1442             Usage : $hiv_query->_parse_lanl_response()
1443             Function: Parse the tab-separated-value response obtained by _do_lanl_request
1444             for sequence ids, accessions, and annotations
1445             Example :
1446             Returns : 1 if successful
1447             Args :
1448              
1449             =cut
1450              
1451             sub _parse_lanl_response {
1452              
1453             ### handle parsing and merging multiple responses into the query object
1454             ### (ids and annotations)
1455 0     0   0 my $self = shift;
1456            
1457 0         0 my ($seqGet) = (@_);
1458 0         0 my (@data, @cols, %antbl, %antype);
1459 0         0 my $numseq = 0;
1460 0         0 my ($schema, @retseqs, %rec, $ac);
1461 0         0 $schema = $self->_schema;
1462            
1463 0 0       0 $self->_lanl_response ||
1464             $self->throw(-class=>"Bio::QueryNotMade::Exception",
1465             -text=>"Query not yet performed; call _do_lanl_request()",
1466             -value=>"");
1467 0         0 foreach my $rsp (@{$self->_lanl_response}) {
  0         0  
1468 0         0 @data = split(/\r|\n/, $rsp->content);
1469 0         0 my $l;
1470 0         0 do {
1471 0         0 $l = shift @data;
1472             } while ($l !~ /Number/);
1473 0         0 $numseq += ( $l =~ /Number.*:\s([0-9]+)/ )[0];
1474 0         0 @cols = split(/\t/, shift(@data));
1475             # mappings from column headings to annotation keys
1476             # squish into hash keys
1477 0         0 my %q = @{ shift @{$self->_lanl_query} };
  0         0  
  0         0  
1478 0         0 %antbl = $schema->ankh(keys %q);
1479             # get the category for each annotation
1480 0         0 map { $antype{ $_->{ankey} } = $_->{antype} } values %antbl;
  0         0  
1481             # normalize column headers
1482 0         0 map { tr/ /_/; $_ = lc; } @cols;
  0         0  
  0         0  
1483 0         0 foreach (@data) {
1484 0         0 @rec{@cols} = split /\t/;
1485 0         0 my $id = $rec{'se_id'};
1486 0         0 $self->add_id($id);
1487 0         0 $ac = Bio::Annotation::Collection->new();
1488             #create annotations
1489 0         0 foreach (@cols) {
1490 0 0       0 next if $_ eq '#';
1491 0   0     0 my $t = $antype{$_} || "Unclassified";
1492 0         0 my $d = $rec{$_}; # the data
1493 0         0 $ac->put_value(-KEYS=>[$t, $_], -VALUE=>$d);
1494             }
1495 0         0 $self->add_annotations_for_id($id, $ac);
1496             }
1497 0         0 1;
1498             }
1499 0         0 return 1; # made it.
1500             }
1501            
1502             =head2 _parse_query_string
1503              
1504             Title : _parse_query_string
1505             Usage : $hiv_query->_parse_query_string($str)
1506             Function: Parses a query string using query language emulator QRY
1507             : in L
1508             Example :
1509             Returns : arrayref of hash structures suitable for passing to _create_lanl_query
1510             Args : a string scalar
1511              
1512             =cut
1513              
1514             sub _parse_query_string {
1515 4     4   5 my $self = shift;
1516 4         5 my $qstring = shift;
1517 4         4 my ($ptree, @ret);
1518             #syntax errors thrown in QRY (in HIVQueryHelper module)
1519 4         13 $ptree = QRY::_parse_q( $qstring );
1520 4         12 @ret = QRY::_make_q($ptree);
1521 4         62 return @ret;
1522             }
1523              
1524             =head1 Dude, sorry-
1525              
1526             =head2 _sorry
1527              
1528             Title : _sorry
1529             Usage : $hiv_query->_sorry("-president=>Powell")
1530             Function: Throws an exception for unsupported option or parameter
1531             Example :
1532             Returns :
1533             Args : scalar string
1534              
1535             =cut
1536              
1537             sub _sorry{
1538 0     0     my $self = shift;
1539 0           my $parm = shift;
1540 0           $self->throw(-class=>"Bio::HIVSorry::Exception",
1541             -text=>"Sorry, option/parameter \"$parm\" not (yet) supported. See manpage to complain.",
1542             -value=>$parm);
1543 0           return;
1544             }
1545              
1546             1;