| blib/lib/Bio/DB/Query/HIVQuery.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 267 | 487 | 54.8 |
| branch | 82 | 196 | 41.8 |
| condition | 17 | 54 | 31.4 |
| subroutine | 35 | 49 | 71.4 |
| pod | 15 | 19 | 78.9 |
| total | 416 | 805 | 51.6 |
| line | stmt | bran | cond | sub | pod | time | code | |
|---|---|---|---|---|---|---|---|---|
| 1 | # to do: support for comment, reference annotations | |||||||
| 2 | ||||||||
| 3 | # $Id: HIVQuery.pm 232 2008-12-11 14:51:51Z maj $ | |||||||
| 4 | # | |||||||
| 5 | # BioPerl module for Bio::DB::Query::LANLQuery | |||||||
| 6 | # | |||||||
| 7 | # Please direct questions and support issues to |
|||||||
| 8 | # | |||||||
| 9 | # Cared for by Mark A. Jensen |
|||||||
| 10 | # | |||||||
| 11 | # Copyright Mark A. Jensen | |||||||
| 12 | # | |||||||
| 13 | # You may distribute this module under the same terms as perl itself | |||||||
| 14 | ||||||||
| 15 | # POD documentation - main docs before the code | |||||||
| 16 | ||||||||
| 17 | =head1 NAME | |||||||
| 18 | ||||||||
| 19 | Bio::DB::Query::HIVQuery - Query interface to the Los Alamos HIV Sequence Database | |||||||
| 20 | ||||||||
| 21 | =head1 SYNOPSIS | |||||||
| 22 | ||||||||
| 23 | $q = new Bio::DB::Query::HIVQuery(" C[subtype] ZA[country] CXCR4[coreceptor] "); | |||||||
| 24 | $q = new Bio::DB::Query::HIVQuery( | |||||||
| 25 | -query=>{'subtype'=>'C', | |||||||
| 26 | 'country'=>'ZA', | |||||||
| 27 | 'coreceptor'=>'CXCR4'}); | |||||||
| 28 | ||||||||
| 29 | $ac = $q->get_annotations_by_id(($q->ids)[0]); | |||||||
| 30 | $ac->get_value('Geo', 'country') # returns 'SOUTH AFRICA' | |||||||
| 31 | ||||||||
| 32 | $db = new Bio::DB::HIV(); | |||||||
| 33 | $seqio = $db->get_Stream_by_query($q); # returns annotated Bio::Seqs | |||||||
| 34 | ||||||||
| 35 | # get subtype C sequences from South Africa and Brazil, | |||||||
| 36 | # with associated info on patient health, coreceptor use, and | |||||||
| 37 | # infection period: | |||||||
| 38 | ||||||||
| 39 | $q = new Bio::DB::Query::HIVQuery( | |||||||
| 40 | -query => { | |||||||
| 41 | 'query' => {'subtype'=>'C', | |||||||
| 42 | 'country'=>['ZA', 'BR']}, | |||||||
| 43 | 'annot' => ['patient_health', | |||||||
| 44 | 'coreceptor', | |||||||
| 45 | 'days_post_infection'] | |||||||
| 46 | }); | |||||||
| 47 | ||||||||
| 48 | ||||||||
| 49 | =head1 DESCRIPTION | |||||||
| 50 | ||||||||
| 51 | Bio::DB::Query::HIVQuery provides a query-like interface to the | |||||||
| 52 | cgi-based Los Alamos National Laboratory (LANL) HIV Sequence | |||||||
| 53 | Database. It uses Bioperl facilities to capture both sequences and | |||||||
| 54 | annotations in batch in an automated and computable way. Use with | |||||||
| 55 | L |
|||||||
| 56 | streams. | |||||||
| 57 | ||||||||
| 58 | =head2 Query format | |||||||
| 59 | ||||||||
| 60 | The interface implements a simple query language emulation that understands AND, | |||||||
| 61 | OR, and parenthetical nesting. The basic query unit is | |||||||
| 62 | ||||||||
| 63 | (match1 match2 ...)[fieldname] | |||||||
| 64 | ||||||||
| 65 | Sequences are returned for which C |
|||||||
| 66 | These units can be combined with AND, OR and parentheses. For example: | |||||||
| 67 | ||||||||
| 68 | (B, C)[subtype] AND (2000, 2001, 2002, 2003)[year] AND ((CN)[country] OR (ZA)[country]) | |||||||
| 69 | ||||||||
| 70 | which can be shortened to | |||||||
| 71 | ||||||||
| 72 | (B C)[subtype] (2000 2001 2002 2003)[year] (CN ZA)[country] | |||||||
| 73 | ||||||||
| 74 | The user can specify annotation fields, that do not restrict the query, but | |||||||
| 75 | arrange for the return of the associated field data for each sequence returned. | |||||||
| 76 | Specify annotation fields between curly braces, as in: | |||||||
| 77 | ||||||||
| 78 | (B C)[subtype] 2000[year] {country cd4_count cd8_count} | |||||||
| 79 | ||||||||
| 80 | Annotations can be accessed off the query using methods described in APPENDIX. | |||||||
| 81 | ||||||||
| 82 | =head2 Hash specifications for query construction | |||||||
| 83 | ||||||||
| 84 | Single query specifications can be made as hash references provided to the | |||||||
| 85 | C<-query> argument of the constructor. There are two forms: | |||||||
| 86 | ||||||||
| 87 | -query => { 'country'=>'BR', 'phenotype'=>'NSI', 'cd4_count'=>'Any' } | |||||||
| 88 | ||||||||
| 89 | equivalent to | |||||||
| 90 | ||||||||
| 91 | -query => [ 'country'=>'BR', 'phenotype'=>'NSI', 'cd4_count'=>'Any' ] | |||||||
| 92 | ||||||||
| 93 | or | |||||||
| 94 | ||||||||
| 95 | -query => { 'query' => {'country'=>'BR', 'phenotype'=>'NSI'}, | |||||||
| 96 | 'annot' => ['cd4_count'] } | |||||||
| 97 | ||||||||
| 98 | In both cases, the CD4 count is included in the annotations returned, but does | |||||||
| 99 | not restrict the rest of the query. | |||||||
| 100 | ||||||||
| 101 | To 'OR' multiple values of a field, use an anonymous array ref: | |||||||
| 102 | ||||||||
| 103 | -query => { 'country'=>['ZA','BR','NL'], 'subtype'=>['A', 'C', 'D'] } | |||||||
| 104 | ||||||||
| 105 | =head2 Valid query field names | |||||||
| 106 | ||||||||
| 107 | An attempt was made to make the query field names natural and easy to | |||||||
| 108 | remember. Aliases are specified in an XML file (C |
|||||||
| 109 | of the distribution. Custom field aliases can be set up by modifying this file. | |||||||
| 110 | ||||||||
| 111 | An HTML cheatsheet with valid field names, aliases, and match data can be | |||||||
| 112 | generated from the XML by using C |
|||||||
| 113 | can also be validated locally before it is unleashed on the server; see below. | |||||||
| 114 | ||||||||
| 115 | =head2 Annotations | |||||||
| 116 | ||||||||
| 117 | LANL DB annotations have been organized into a number of natural | |||||||
| 118 | groupings, tagged C |
|||||||
| 119 | successful query, each id is associated with a tree of | |||||||
| 120 | L |
|||||||
| 121 | methods C |
|||||||
| 122 | ||||||||
| 123 | =head2 Delayed/partial query runs | |||||||
| 124 | ||||||||
| 125 | Accessing the LANL DB involves multiple HTTP requests. The query can | |||||||
| 126 | be instructed to proceed through all (the default) or only some of | |||||||
| 127 | them, using the named parameter C |
|||||||
| 128 | ||||||||
| 129 | To validate a query locally, use | |||||||
| 130 | ||||||||
| 131 | $q = new Bio::DB::Query::HIVQuery( -query => {...}, -RUN_OPTION=>0 ) | |||||||
| 132 | ||||||||
| 133 | which will throw an exception if a field name or option is invalid. | |||||||
| 134 | ||||||||
| 135 | To get a query count only, you can save a server hit by using | |||||||
| 136 | ||||||||
| 137 | $q = new Bio::DB::Query::HIVQuery( -query => {...}, -RUN_OPTION=>1 ) | |||||||
| 138 | ||||||||
| 139 | and asking for C<$q-E |
|||||||
| 140 | ||||||||
| 141 | $q->_do_query(2) | |||||||
| 142 | ||||||||
| 143 | which picks up where you left off. | |||||||
| 144 | ||||||||
| 145 | C<-RUN_OPTION=E |
|||||||
| 146 | annotations. | |||||||
| 147 | ||||||||
| 148 | =head2 Query re-use | |||||||
| 149 | ||||||||
| 150 | You can clear the query results, retaining the same LANL session and query spec, | |||||||
| 151 | by doing C<$q-E |
|||||||
| 152 | C<$q-E |
|||||||
| 153 | ||||||||
| 154 | =head1 FEEDBACK | |||||||
| 155 | ||||||||
| 156 | =head2 Mailing Lists | |||||||
| 157 | ||||||||
| 158 | User feedback is an integral part of the evolution of this and other | |||||||
| 159 | Bioperl modules. Send your comments and suggestions preferably to | |||||||
| 160 | the Bioperl mailing list. Your participation is much appreciated. | |||||||
| 161 | ||||||||
| 162 | bioperl-l@bioperl.org - General discussion | |||||||
| 163 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | |||||||
| 164 | ||||||||
| 165 | =head2 Support | |||||||
| 166 | ||||||||
| 167 | Please direct usage questions or support issues to the mailing list: | |||||||
| 168 | ||||||||
| 169 | I |
|||||||
| 170 | ||||||||
| 171 | rather than to the module maintainer directly. Many experienced and | |||||||
| 172 | reponsive experts will be able look at the problem and quickly | |||||||
| 173 | address it. Please include a thorough description of the problem | |||||||
| 174 | with code and data examples if at all possible. | |||||||
| 175 | ||||||||
| 176 | =head2 Reporting Bugs | |||||||
| 177 | ||||||||
| 178 | Report bugs to the Bioperl bug tracking system to help us keep track | |||||||
| 179 | of the bugs and their resolution. Bug reports can be submitted via | |||||||
| 180 | the web: | |||||||
| 181 | ||||||||
| 182 | https://github.com/bioperl/bioperl-live/issues | |||||||
| 183 | ||||||||
| 184 | =head1 AUTHOR - Mark A. Jensen | |||||||
| 185 | ||||||||
| 186 | Email maj@fortinbras.us | |||||||
| 187 | ||||||||
| 188 | =head1 CONTRIBUTORS | |||||||
| 189 | ||||||||
| 190 | Mark A. Jensen | |||||||
| 191 | ||||||||
| 192 | =head1 APPENDIX | |||||||
| 193 | ||||||||
| 194 | The rest of the documentation details each of the object methods. | |||||||
| 195 | Internal methods are usually preceded with a _ | |||||||
| 196 | ||||||||
| 197 | =cut | |||||||
| 198 | ||||||||
| 199 | # Let the code begin... | |||||||
| 200 | ||||||||
| 201 | package Bio::DB::Query::HIVQuery; | |||||||
| 202 | 1 | 1 | 756 | use strict; | ||||
| 1 | 1 | |||||||
| 1 | 27 | |||||||
| 203 | 1 | 1 | 4 | use vars qw( $LANL_BASE $LANL_MAP_DB $LANL_MAKE_SEARCH_IF $LANL_SEARCH $SCHEMA_FILE $RUN_OPTION ); | ||||
| 1 | 1 | |||||||
| 1 | 67 | |||||||
| 204 | ||||||||
| 205 | # Object preamble - inherits from Bio::DB::QueryI | |||||||
| 206 | 1 | 1 | 345 | use Bio::Root::Root; | ||||
| 1 | 1 | |||||||
| 1 | 25 | |||||||
| 207 | 1 | 1 | 383 | use Bio::Annotation::Collection; | ||||
| 1 | 2 | |||||||
| 1 | 20 | |||||||
| 208 | 1 | 1 | 339 | use Bio::Annotation::Comment; | ||||
| 1 | 1 | |||||||
| 1 | 20 | |||||||
| 209 | 1 | 1 | 365 | use Bio::Annotation::Reference; | ||||
| 1 | 1 | |||||||
| 1 | 21 | |||||||
| 210 | 1 | 1 | 319 | use Bio::WebAgent; | ||||
| 1 | 1 | |||||||
| 1 | 20 | |||||||
| 211 | 1 | 1 | 4 | use XML::Simple; | ||||
| 1 | 1 | |||||||
| 1 | 6 | |||||||
| 212 | 1 | 1 | 62 | use CGI; | ||||
| 1 | 1 | |||||||
| 1 | 5 | |||||||
| 213 | ||||||||
| 214 | 1 | 1 | 597 | use Bio::DB::HIV::HIVQueryHelper; | ||||
| 1 | 1 | |||||||
| 1 | 28 | |||||||
| 215 | ||||||||
| 216 | 1 | 1 | 4 | use base qw(Bio::Root::Root Bio::DB::QueryI); | ||||
| 1 | 1 | |||||||
| 1 | 368 | |||||||
| 217 | ||||||||
| 218 | # globals | |||||||
| 219 | BEGIN { | |||||||
| 220 | # change base to new search page 01/14/09 /maj | |||||||
| 221 | 1 | 1 | 1 | $LANL_BASE = "http://www.hiv.lanl.gov/components/sequence/HIV/asearch"; | ||||
| 222 | 1 | 1 | $LANL_MAP_DB = "map_db.comp"; | |||||
| 223 | 1 | 1 | $LANL_MAKE_SEARCH_IF = "make_search_if.comp"; | |||||
| 224 | 1 | 1 | $LANL_SEARCH = "search.comp"; | |||||
| 225 | 1 | 5 | $SCHEMA_FILE = Bio::Root::IO->catfile(qw(Bio DB HIV lanl-schema.xml)); | |||||
| 226 | 1 | 3 | $RUN_OPTION = 2; # execute query | |||||
| 227 | # exceptions | |||||||
| 228 | 1 | 10 | @Bio::SchemaNotInit::Exception::ISA = qw( Bio::Root::Exception ); | |||||
| 229 | 1 | 6 | @Bio::WebError::Exception::ISA = qw( Bio::Root::Exception ); | |||||
| 230 | 1 | 6 | @Bio::QueryNotMade::Exception::ISA = qw( Bio::Root::Exception ); | |||||
| 231 | 1 | 12 | @Bio::QueryStringException::Exception::ISA = qw( Bio::Root::Exception ); | |||||
| 232 | 1 | 3682 | @Bio::HIVSorry::Exception::ISA = qw ( Bio::Root::Exception ); | |||||
| 233 | ||||||||
| 234 | } | |||||||
| 235 | ||||||||
| 236 | =head1 Constructor | |||||||
| 237 | ||||||||
| 238 | =head2 new | |||||||
| 239 | ||||||||
| 240 | Title : new | |||||||
| 241 | Usage : my $hiv_query = new Bio::DB::Query::HIVQuery(); | |||||||
| 242 | Function: Builds a new Bio::DB::Query::HIVQuery object, | |||||||
| 243 | running a sequence query against the Los Alamos | |||||||
| 244 | HIV sequence database | |||||||
| 245 | Returns : an instance of Bio::DB::Query::HIVQuery | |||||||
| 246 | Args : | |||||||
| 247 | ||||||||
| 248 | =cut | |||||||
| 249 | ||||||||
| 250 | sub new { | |||||||
| 251 | 1 | 1 | 1 | 132 | my($class,@args) = @_; | |||
| 252 | 1 | 11 | my $self = $class->SUPER::new(@args); | |||||
| 253 | # constructor option for web agent parameter spec: added 01/14/09 /maj | |||||||
| 254 | 1 | 10 | my ($query, $ids, $lanl_base, $lanl_map_db, $lanl_make_search_if, $lanl_search, $schema_file,$run_option, $uahash) = | |||||
| 255 | $self->_rearrange([ qw(QUERY | |||||||
| 256 | IDS | |||||||
| 257 | LANL_BASE | |||||||
| 258 | LANL_MAP_DB | |||||||
| 259 | LANL_MAKE_SEARCH_IF | |||||||
| 260 | LANL_SEARCH | |||||||
| 261 | SCHEMA_FILE | |||||||
| 262 | RUN_OPTION | |||||||
| 263 | USER_AGENT_HASH | |||||||
| 264 | )], @args); | |||||||
| 265 | ||||||||
| 266 | # default globals | |||||||
| 267 | 1 | 33 | 6 | $lanl_base||= $LANL_BASE; | ||||
| 268 | 1 | 33 | 4 | $lanl_map_db||=$LANL_MAP_DB; | ||||
| 269 | 1 | 33 | 10 | $lanl_make_search_if||=$LANL_MAKE_SEARCH_IF; | ||||
| 270 | 1 | 33 | 4 | $lanl_search||=$LANL_SEARCH; | ||||
| 271 | 1 | 33 | 3 | $schema_file||=$SCHEMA_FILE; | ||||
| 272 | 1 | 50 | 5 | $uahash ||= {timeout => 90}; | ||||
| 273 | 1 | 50 | 2 | defined $run_option || ($run_option = $RUN_OPTION); | ||||
| 274 | ||||||||
| 275 | 1 | 4 | $self->lanl_base($lanl_base); | |||||
| 276 | 1 | 3 | $self->map_db($lanl_map_db); | |||||
| 277 | 1 | 4 | $self->make_search_if($lanl_make_search_if); | |||||
| 278 | 1 | 3 | $self->search_($lanl_search); | |||||
| 279 | 1 | 4 | $self->_run_option($run_option); | |||||
| 280 | 1 | 3 | $self->_ua_hash($uahash); | |||||
| 281 | ||||||||
| 282 | # catch this at the top | |||||||
| 283 | 1 | 50 | 21 | if (-e $schema_file) { | ||||
| 284 | 1 | 3 | $self->_schema_file($schema_file); | |||||
| 285 | } | |||||||
| 286 | else { # look around | |||||||
| 287 | 0 | 0 | my ($p) = $self->_schema_file( [grep {$_} map { | |||||
| 288 | 0 | 0 | my $p = Bio::Root::IO->catfile($_, $schema_file); | |||||
| 0 | 0 | |||||||
| 289 | 0 | 0 | 0 | $p if -e $p | ||||
| 290 | } (@INC,"")]->[0]); | |||||||
| 291 | 0 | 0 | 0 | $self->throw(-class=>"Bio::Root::NoSuchThing", | ||||
| 292 | -text=>"Schema file \"".$self->_schema_file."\" cannot be found", | |||||||
| 293 | -value=>$self->_schema_file) unless -e $self->_schema_file; | |||||||
| 294 | } | |||||||
| 295 | ||||||||
| 296 | 1 | 4 | $self->count(0); | |||||
| 297 | 1 | 2 | $self->{_schema} = HIVSchema->new($self->_schema_file); | |||||
| 298 | ||||||||
| 299 | # internal storage and flags | |||||||
| 300 | 1 | 5 | $self->{'_lanl_query'} = []; | |||||
| 301 | 1 | 2 | $self->{'_lanl_response'} = []; | |||||
| 302 | 1 | 2 | $self->{'_annotations'} = {}; # container for annotation collections assoc. with ids | |||||
| 303 | 1 | 2 | $self->{'_RUN_LEVEL'} = undef; # set in _do_query() | |||||
| 304 | ||||||||
| 305 | # work | |||||||
| 306 | 1 | 50 | 4 | defined $query && $self->query($query); | ||||
| 307 | 1 | 50 | 3 | defined $ids && $self->ids($ids); | ||||
| 308 | ||||||||
| 309 | # exec query | |||||||
| 310 | ||||||||
| 311 | 1 | 50 | 5 | $self->_do_query($self->_run_option) if $self->query; | ||||
| 312 | ||||||||
| 313 | 1 | 6 | return $self; | |||||
| 314 | } | |||||||
| 315 | ||||||||
| 316 | =head1 QueryI compliance | |||||||
| 317 | ||||||||
| 318 | =head2 count | |||||||
| 319 | ||||||||
| 320 | Title : count | |||||||
| 321 | Usage : $hiv_query->count($newval) | |||||||
| 322 | Function: return number of sequences found | |||||||
| 323 | Example : | |||||||
| 324 | Returns : value of count (a scalar) | |||||||
| 325 | Args : on set, new value (a scalar or undef, optional) | |||||||
| 326 | Note : count warns if it is accessed for reading before query | |||||||
| 327 | has been executed to at least level 1 | |||||||
| 328 | ||||||||
| 329 | =cut | |||||||
| 330 | ||||||||
| 331 | sub count{ | |||||||
| 332 | 11 | 11 | 1 | 27 | my $self = shift; | |||
| 333 | 11 | 100 | 27 | return $self->{'count'} = shift if @_; | ||||
| 334 | 1 | 50 | 33 | 9 | if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 1)) { | |||
| 335 | 1 | 8 | $self->warn('Query not yet run at > level 1'); | |||||
| 336 | } | |||||||
| 337 | 0 | 0 | return $self->{'count'}; | |||||
| 338 | } | |||||||
| 339 | ||||||||
| 340 | =head2 ids | |||||||
| 341 | ||||||||
| 342 | Title : ids | |||||||
| 343 | Usage : $hiv_query->ids($newval) | |||||||
| 344 | Function: LANL ids of returned sequences | |||||||
| 345 | Example : | |||||||
| 346 | Returns : value of ids (an arrayref of sequence accessions/ids) | |||||||
| 347 | Args : on set, new value (an arrayref or undef, optional) | |||||||
| 348 | ||||||||
| 349 | =cut | |||||||
| 350 | ||||||||
| 351 | sub ids{ | |||||||
| 352 | 10 | 10 | 1 | 27 | my $self = shift; | |||
| 353 | 10 | 100 | 25 | if (@_) { | ||||
| 354 | 9 | 9 | my $a = shift; | |||||
| 355 | 9 | 50 | 26 | $self->throw(-class=>'Bio::Root::BadParameter', | ||||
| 356 | -text=>'Arrayref required', | |||||||
| 357 | -value=> ref $a) unless ref($a) eq 'ARRAY'; | |||||||
| 358 | 9 | 50 | 16 | if (@$a) { | ||||
| 359 | 0 | 0 | @{$self->{'ids'}}{@$a} = (1) x @$a; | |||||
| 0 | 0 | |||||||
| 360 | 0 | 0 | return $a; | |||||
| 361 | } | |||||||
| 362 | else { #with empty arrayref, clear the hash | |||||||
| 363 | 9 | 19 | $self->{'ids'} = {}; | |||||
| 364 | } | |||||||
| 365 | } | |||||||
| 366 | 10 | 50 | 27 | return keys %{$self->{'ids'}} if $self->{'ids'}; | ||||
| 10 | 21 | |||||||
| 367 | } | |||||||
| 368 | ||||||||
| 369 | =head2 query | |||||||
| 370 | ||||||||
| 371 | Title : query | |||||||
| 372 | Usage : $hiv_query->query | |||||||
| 373 | Function: Get/set the submitted query hash or string | |||||||
| 374 | Example : | |||||||
| 375 | Returns : hashref or string | |||||||
| 376 | Args : query in hash or string form (see DESCRIPTION) | |||||||
| 377 | ||||||||
| 378 | =cut | |||||||
| 379 | ||||||||
| 380 | sub query { | |||||||
| 381 | 18 | 18 | 1 | 20 | my $self = shift; | |||
| 382 | 18 | 100 | 40 | return $self->{'query'} = shift if @_; | ||||
| 383 | 10 | 25 | return $self->{'query'}; | |||||
| 384 | } | |||||||
| 385 | ||||||||
| 386 | =head1 Bio::DB::Query::HIVQuery specific methods | |||||||
| 387 | ||||||||
| 388 | =head2 help | |||||||
| 389 | ||||||||
| 390 | Title : help | |||||||
| 391 | Usage : $hiv_query->help("help.html") | |||||||
| 392 | Function: get html-formatted listing of valid fields/aliases/options | |||||||
| 393 | based on current schema xml | |||||||
| 394 | Example : perl -MBio::DB::Query::HIVQuery -e "new Bio::DB::Query::HIVQuery()->help" | lynx -stdin | |||||||
| 395 | Returns : HTML | |||||||
| 396 | Args : optional filename; otherwise prints to stdout | |||||||
| 397 | ||||||||
| 398 | =cut | |||||||
| 399 | ||||||||
| 400 | sub help{ | |||||||
| 401 | 1 | 1 | 1 | 64 | my ($self, $fname) = @_; | |||
| 402 | 1 | 2 | my (@ret, @tok); | |||||
| 403 | 1 | 4 | my $schema = $self->_schema; | |||||
| 404 | 1 | 9 | my $h = CGI->new(); | |||||
| 405 | ||||||||
| 406 | 1 | 233 | my (@tbls, @flds, @als, @opts, $fh); | |||||
| 407 | 1 | 50 | 4 | if ($fname) { | ||||
| 408 | 1 | 50 | 46 | open $fh, '>', $fname or $self->throw(-class => 'Bio::Root::IOException', | ||||
| 409 | -text => "Error opening help html file $fname for writing", | |||||||
| 410 | -value => $!); | |||||||
| 411 | } | |||||||
| 412 | else { | |||||||
| 413 | 0 | 0 | open $fh, ">&1"; | |||||
| 414 | } | |||||||
| 415 | 1 | 4 | @tbls = $schema->tables; | |||||
| 416 | 1 | 10 | @tbls = ('COMMAND', grep !/COMMAND/,@tbls); | |||||
| 417 | 1 | 7 | print $fh ( | |||||
| 418 | $h->start_html(-title=>"HIVQuery Help") | |||||||
| 419 | ); | |||||||
| 420 | 1 | 345 | print $fh $h->a({-id=>'TOP'}, $h->h2("Valid HIVQuery query fields and match data")); | |||||
| 421 | 1 | 85 | print $fh "Fields are organized below according to their Los Alamos HIV database tables. Use aliases in place of full field names in queries; for example: "; |
|||||
| 422 | 1 | 1 | print $fh ""; |
|||||
| 423 | 1 | 2 | print $fh "rather than"; | |||||
| 424 | 1 | 2 | print $fh ""; |
|||||
| 425 | 1 | 2 | print $fh "(which does work, however). Click hyperlinks to see valid search options within the field. The token Any is the wildcard for all fields."; |
|||||
| 426 | 1 | 6 | print $fh $h->start_table({-style=>"font-family:sans-serif;"}) ; | |||||
| 427 | 1 | 38 | foreach my $tbl (@tbls) { | |||||
| 428 | 14 | 3486 | @flds = grep /^$tbl/, $schema->fields; | |||||
| 429 | 14 | 104 | @flds = grep !/_id/, @flds; | |||||
| 430 | 14 | 49 | print $fh ( | |||||
| 431 | $h->start_Tr({-style=>"background-color: lightblue;"}), | |||||||
| 432 | $h->td([$h->a({-id=>$tbl},$tbl), $h->span({-style=>"font-style:italic"},"fields"), $h->span({-style=>"font-style:italic"}, "aliases")]), | |||||||
| 433 | $h->end_Tr | |||||||
| 434 | ); | |||||||
| 435 | 14 | 2154 | foreach my $fld (@flds) { | |||||
| 436 | 74 | 16267 | @als = reverse $schema->aliases($fld); | |||||
| 437 | 74 | 100 | 333 | print $fh ( | ||||
| 438 | # note that aliases can sometimes be empty | |||||||
| 439 | $h->Tr( $h->td( ["", $h->a({-href=>"#opt$fld"}, shift @als || '???'), $h->code(join(',',@als))] )) | |||||||
| 440 | ); | |||||||
| 441 | 74 | 5785 | my @tmp = grep {$_} $schema->options($fld); | |||||
| 1158 | 949 | |||||||
| 442 | 74 | 100 | 100 | 158 | @tmp = sort {(($a =~ /^[0-9]+$/) && $b =~ /^[0-9]+$/) ? $a<=>$b : $a cmp $b} @tmp; | |||
| 5860 | 9513 | |||||||
| 443 | 74 | 100 | 263 | if (grep /Any/,@tmp) { | ||||
| 444 | 31 | 324 | @tmp = grep !/Any/, @tmp; | |||||
| 445 | 31 | 93 | unshift @tmp, 'Any'; | |||||
| 446 | } | |||||||
| 447 | #print STDERR join(', ',@tmp)."\n"; | |||||||
| 448 | 74 | 100 | 200 | push @opts, $h->div( | ||||
| 100 | ||||||||
| 449 | {-style=>"font-family:sans-serif;font-size:small"}, | |||||||
| 450 | $h->hr, | |||||||
| 451 | $h->a( | |||||||
| 452 | {-id=>"opt$fld"}, | |||||||
| 453 | "Valid options for $fld: " | |||||||
| 454 | ), | |||||||
| 455 | $h->blockquote( | |||||||
| 456 | @tmp ? $h->code(join(", ", @tmp)) : $h->i("free text") | |||||||
| 457 | ), | |||||||
| 458 | $h->span( | |||||||
| 459 | "Other aliases: " | |||||||
| 460 | ), | |||||||
| 461 | $h->blockquote( | |||||||
| 462 | @als ? $h->code(join(",",@als)) : "none" | |||||||
| 463 | ), | |||||||
| 464 | " ", | |||||||
| 465 | $h->table( | |||||||
| 466 | $h->Tr( | |||||||
| 467 | $h->td([ | |||||||
| 468 | $h->a({-href=>"#$tbl"}, $h->small('BACK')), | |||||||
| 469 | $h->a({-href=>"#TOP"}, $h->small('TOP')) | |||||||
| 470 | ]) | |||||||
| 471 | ) | |||||||
| 472 | ) | |||||||
| 473 | ); | |||||||
| 474 | ||||||||
| 475 | } | |||||||
| 476 | } | |||||||
| 477 | 1 | 266 | print $fh $h->end_table; | |||||
| 478 | 1 | 283 | print $fh @opts; | |||||
| 479 | 1 | 6 | print $fh $h->end_html; | |||||
| 480 | 1 | 37 | close($fh); | |||||
| 481 | 1 | 34 | return 1; | |||||
| 482 | } | |||||||
| 483 | ||||||||
| 484 | =head1 Annotation manipulation methods | |||||||
| 485 | ||||||||
| 486 | =head2 get_annotations_by_ids | |||||||
| 487 | ||||||||
| 488 | Title : get_annotations_by_ids (or ..._by_id) | |||||||
| 489 | Usage : $ac = $hiv_query->get_annotations_by_ids(@ids) | |||||||
| 490 | Function: Get the Bio::Annotation::Collection for these sequence ids | |||||||
| 491 | Example : | |||||||
| 492 | Returns : A Bio::Annotation::Collection object | |||||||
| 493 | Args : an array of sequence ids | |||||||
| 494 | ||||||||
| 495 | =cut | |||||||
| 496 | ||||||||
| 497 | sub get_annotations_by_ids{ | |||||||
| 498 | 1 | 1 | 1 | 2 | my $self = shift; | |||
| 499 | 1 | 2 | my @ids = @_; | |||||
| 500 | 1 | 1 | my @ret; | |||||
| 501 | 1 | 50 | 33 | 5 | if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) { | |||
| 502 | 1 | 3 | $self->warn('Requires query run at level 2'); | |||||
| 503 | 0 | 0 | return (); | |||||
| 504 | } | |||||||
| 505 | 0 | 0 | 0 | @ret = map {$self->{'_annotations'}->{$_}} @ids if exists($self->{'_annotations'}); | ||||
| 0 | 0 | |||||||
| 506 | ||||||||
| 507 | 0 | 0 | 0 | return (wantarray ? @ret : $ret[0]) if @ret; | ||||
| 0 | ||||||||
| 508 | 0 | 0 | return {}; | |||||
| 509 | } | |||||||
| 510 | ||||||||
| 511 | # singular alias | |||||||
| 512 | sub get_annotations_by_id { | |||||||
| 513 | 1 | 1 | 0 | 4 | shift->get_annotations_by_ids(@_); | |||
| 514 | } | |||||||
| 515 | ||||||||
| 516 | =head2 add_annotations_for_id | |||||||
| 517 | ||||||||
| 518 | Title : add_annotations_for_id | |||||||
| 519 | Usage : $hiv_query->add_annotations_for_id( $id ) to create a new | |||||||
| 520 | empty collection for $id | |||||||
| 521 | $hiv_query->add_annotations_for_id( $id, $ac ) to associate | |||||||
| 522 | $ac with $id | |||||||
| 523 | Function: Associate a Bio::Annotation::Collection with this sequence id | |||||||
| 524 | Example : | |||||||
| 525 | Returns : a Bio::Annotation::Collection object | |||||||
| 526 | Args : sequence id [, Bio::Annotation::Collection object] | |||||||
| 527 | ||||||||
| 528 | =cut | |||||||
| 529 | ||||||||
| 530 | sub add_annotations_for_id{ | |||||||
| 531 | 0 | 0 | 1 | 0 | my $self = shift; | |||
| 532 | 0 | 0 | my ($id, $ac) = @_; | |||||
| 533 | 0 | 0 | 0 | $id = "" unless defined $id; # avoid warnings | ||||
| 534 | 0 | 0 | 0 | $ac = Bio::Annotation::Collection->new() unless defined $ac; | ||||
| 535 | 0 | 0 | 0 | $self->throw(-class=>'Bio::Root::BadParameter' | ||||
| 536 | -text=>'Bio::Annotation::Collection required at arg 2', | |||||||
| 537 | -value=>"") unless ref($ac) eq 'Bio::Annotation::Collection'; | |||||||
| 538 | ||||||||
| 539 | 0 | 0 | 0 | $self->{'_annotations'}->{$id} = $ac unless exists($self->{'_annotations'}->{$id}); | ||||
| 540 | 0 | 0 | return $ac; | |||||
| 541 | } | |||||||
| 542 | ||||||||
| 543 | =head2 remove_annotations_for_ids | |||||||
| 544 | ||||||||
| 545 | Title : remove_annotations_for_ids (or ..._for_id) | |||||||
| 546 | Usage : $hiv_query->remove_annotations_for_ids( @ids) | |||||||
| 547 | Function: Remove annotation collection for this sequence id | |||||||
| 548 | Example : | |||||||
| 549 | Returns : An array of the previous annotation collections for these ids | |||||||
| 550 | Args : an array of sequence ids | |||||||
| 551 | ||||||||
| 552 | =cut | |||||||
| 553 | ||||||||
| 554 | sub remove_annotations_for_ids { | |||||||
| 555 | 0 | 0 | 1 | 0 | my $self = shift; | |||
| 556 | 0 | 0 | my @ids = @_; | |||||
| 557 | 0 | 0 | my @ac; | |||||
| 558 | 0 | 0 | foreach (@ids) { | |||||
| 559 | 0 | 0 | push @ac, delete $self->{'_annotations'}->{$_}; | |||||
| 560 | } | |||||||
| 561 | 0 | 0 | return @ac; | |||||
| 562 | } | |||||||
| 563 | ||||||||
| 564 | # singular alias | |||||||
| 565 | sub remove_annotations_for_id { | |||||||
| 566 | 0 | 0 | 0 | 0 | shift->remove_annotations_for_ids(@_); | |||
| 567 | } | |||||||
| 568 | ||||||||
| 569 | =head2 remove_annotations | |||||||
| 570 | ||||||||
| 571 | Title : remove_annotations | |||||||
| 572 | Usage : $hiv_query->remove_annotations() | |||||||
| 573 | Function: Remove all annotation collections for this object | |||||||
| 574 | Example : | |||||||
| 575 | Returns : The previous annotation collection hash for this object | |||||||
| 576 | Args : none | |||||||
| 577 | ||||||||
| 578 | =cut | |||||||
| 579 | ||||||||
| 580 | sub remove_annotations { | |||||||
| 581 | 0 | 0 | 1 | 0 | my $self = shift; | |||
| 582 | ||||||||
| 583 | 0 | 0 | my $ach = $self->{'_annotations'}; | |||||
| 584 | 0 | 0 | $self->{'_annotations'} = {}; | |||||
| 585 | 0 | 0 | return $ach; | |||||
| 586 | } | |||||||
| 587 | ||||||||
| 588 | =head2 get_value | |||||||
| 589 | ||||||||
| 590 | Title : get_value | |||||||
| 591 | Usage : $ac->get_value($tagname) -or- | |||||||
| 592 | $ac->get_value( $tag_level1, $tag_level2,... ) | |||||||
| 593 | Function: access the annotation value assocated with the given tags | |||||||
| 594 | Example : | |||||||
| 595 | Returns : a scalar | |||||||
| 596 | Args : an array of tagnames that descend into the annotation tree | |||||||
| 597 | Note : this is a L |
|||||||
| 598 | L |
|||||||
| 599 | ||||||||
| 600 | =cut | |||||||
| 601 | ||||||||
| 602 | =head2 put_value | |||||||
| 603 | ||||||||
| 604 | Title : put_value | |||||||
| 605 | Usage : $ac->put_value($tagname, $value) -or- | |||||||
| 606 | $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or- | |||||||
| 607 | $ac->put_value( [$tag_level1, $tag_level2, ...] ) | |||||||
| 608 | Function: create a node in an annotation tree, and assign a scalar value to it | |||||||
| 609 | if a value is specified | |||||||
| 610 | Example : | |||||||
| 611 | Returns : scalar or a Bio::AnnotationCollection object | |||||||
| 612 | Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname, | |||||||
| 613 | -VALUE=>$value) -or- | |||||||
| 614 | \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value ) | |||||||
| 615 | Notes : This is a L |
|||||||
| 616 | L |
|||||||
| 617 | If intervening nodes do not exist, put_value creates them, replacing | |||||||
| 618 | existing nodes. So if $ac->put_value('x', 10) was done, then later, | |||||||
| 619 | $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed, | |||||||
| 620 | and $ac->get_value('x') will now return the annotation collection | |||||||
| 621 | with tagname 'y'. | |||||||
| 622 | ||||||||
| 623 | =cut | |||||||
| 624 | ||||||||
| 625 | =head2 get_keys | |||||||
| 626 | ||||||||
| 627 | Title : get_keys | |||||||
| 628 | Usage : $ac->get_keys($tagname_level_1, $tagname_level_2,...) | |||||||
| 629 | Function: Get an array of tagnames underneath the named tag nodes | |||||||
| 630 | Example : # prints the values of the members of Category 1... | |||||||
| 631 | print map { $ac->get_value($_) } $ac->get_keys('Category 1') ; | |||||||
| 632 | Returns : array of tagnames or empty list if the arguments represent a leaf | |||||||
| 633 | Args : [array of] tagname[s] | |||||||
| 634 | ||||||||
| 635 | =cut | |||||||
| 636 | ||||||||
| 637 | =head1 GenBank accession manipulation methods | |||||||
| 638 | ||||||||
| 639 | =head2 get_accessions | |||||||
| 640 | ||||||||
| 641 | Title : get_accessions | |||||||
| 642 | Usage : $hiv_query->get_accessions() | |||||||
| 643 | Function: Return an array of GenBank accessions associated with these | |||||||
| 644 | sequences (available only after a query is subjected to a | |||||||
| 645 | full run (i.e., when $RUN_OPTION == 2) | |||||||
| 646 | Example : | |||||||
| 647 | Returns : array of gb accession numbers, or () if none found for this query | |||||||
| 648 | Args : none | |||||||
| 649 | ||||||||
| 650 | =cut | |||||||
| 651 | ||||||||
| 652 | sub get_accessions{ | |||||||
| 653 | 0 | 0 | 1 | 0 | my $self = shift; | |||
| 654 | 0 | 0 | my @ret; | |||||
| 655 | 0 | 0 | 0 | 0 | if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) { | |||
| 656 | 0 | 0 | $self->warn('Requires query run at level 2'); | |||||
| 657 | 0 | 0 | return (); | |||||
| 658 | } | |||||||
| 659 | 0 | 0 | my @ac = $self->get_annotations_by_ids($self->ids); | |||||
| 660 | 0 | 0 | foreach (@ac) { | |||||
| 661 | 0 | 0 | push @ret, $_->get_value('Special','accession'); | |||||
| 662 | }; | |||||||
| 663 | 0 | 0 | return @ret; | |||||
| 664 | } | |||||||
| 665 | ||||||||
| 666 | =head2 get_accessions_by_ids | |||||||
| 667 | ||||||||
| 668 | Title : get_accessions_by_ids (or ..._by_id) | |||||||
| 669 | Usage : $hiv_query->get_accessions_by_ids(@ids) | |||||||
| 670 | Function: Return an array of GenBank accessions associated with these | |||||||
| 671 | LANL ids (available only after a query is subjected to a | |||||||
| 672 | full run (i.e., when $RUN_OPTION == 2) | |||||||
| 673 | Example : | |||||||
| 674 | Returns : array of gb accession numbers, or () if none found for this query | |||||||
| 675 | Args : none | |||||||
| 676 | ||||||||
| 677 | =cut | |||||||
| 678 | ||||||||
| 679 | sub get_accessions_by_ids { | |||||||
| 680 | 0 | 0 | 1 | 0 | my $self = shift; | |||
| 681 | 0 | 0 | my @ids = @_; | |||||
| 682 | 0 | 0 | my @ret; | |||||
| 683 | 0 | 0 | 0 | 0 | if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) { | |||
| 684 | 0 | 0 | $self->warn('Requires query run at level 2'); | |||||
| 685 | 0 | 0 | return (); | |||||
| 686 | } | |||||||
| 687 | 0 | 0 | my @ac = $self->get_annotations_by_ids(@ids); | |||||
| 688 | 0 | 0 | foreach (@ac) { | |||||
| 689 | 0 | 0 | push @ret, $_->get_value('Special', 'accession'); | |||||
| 690 | }; | |||||||
| 691 | 0 | 0 | 0 | return wantarray ? @ret : $ret[0]; | ||||
| 692 | } | |||||||
| 693 | ||||||||
| 694 | # singular alias | |||||||
| 695 | sub get_accessions_by_id { | |||||||
| 696 | 0 | 0 | 0 | 0 | shift->get_accessions_by_ids(@_); | |||
| 697 | } | |||||||
| 698 | ||||||||
| 699 | ########## | |||||||
| 700 | ||||||||
| 701 | =head1 Query control methods | |||||||
| 702 | ||||||||
| 703 | =head2 _do_query | |||||||
| 704 | ||||||||
| 705 | Title : _do_query | |||||||
| 706 | Usage : $hiv_query->_do_query or $hiv_query->_do_query($run_level) | |||||||
| 707 | Function: Execute the query according to argument or $RUN_OPTION | |||||||
| 708 | and set _RUN_LEVEL | |||||||
| 709 | extent of query reflects the value of argument | |||||||
| 710 | 0 : validate only (no HTTP action) | |||||||
| 711 | 1 : return sequence count only | |||||||
| 712 | 2 : return sequence ids (full query, returns with annotations) | |||||||
| 713 | noop if current _RUN_LEVEL of query is >= argument or $RUN_OPTION, | |||||||
| 714 | Example : | |||||||
| 715 | Returns : actual _RUN_LEVEL (0, 1, or 2) achieved | |||||||
| 716 | Args : desired run level (optional, global $RUN_OPTION is default) | |||||||
| 717 | ||||||||
| 718 | =cut | |||||||
| 719 | ||||||||
| 720 | sub _do_query{ | |||||||
| 721 | 9 | 9 | 105 | my ($self,$rl) = @_; | ||||
| 722 | 9 | 100 | 22 | $rl = $RUN_OPTION unless defined $rl; | ||||
| 723 | 9 | 50 | 86 | $self->throw(-class=>"Bio::Root::BadParameter", | ||||
| 724 | -text=>"Invalid run option \"$RUN_OPTION\"", | |||||||
| 725 | -value=>$RUN_OPTION) unless grep /^$RUN_OPTION$/, (0, 1, 2); | |||||||
| 726 | 9 | 50 | 20 | (!defined($self->{'_RUN_LEVEL'})) && do { | ||||
| 727 | 9 | 18 | $self->_create_lanl_query(); | |||||
| 728 | 5 | 13 | $self->{'_RUN_LEVEL'} = 0; | |||||
| 729 | }; | |||||||
| 730 | 5 | 0 | 0 | 11 | ($rl > 0) && (!defined($self->{'_RUN_LEVEL'}) || ($self->{'_RUN_LEVEL'} <= 0)) && do { | |||
| 33 | ||||||||
| 731 | 0 | 0 | $self->_do_lanl_request(); | |||||
| 732 | 0 | 0 | $self->{'_RUN_LEVEL'} = 1; | |||||
| 733 | }; | |||||||
| 734 | 5 | 0 | 0 | 12 | ($rl > 1) && (!defined($self->{'_RUN_LEVEL'}) || ($self->{'_RUN_LEVEL'} <= 1)) && do { | |||
| 33 | ||||||||
| 735 | 0 | 0 | $self->_parse_lanl_response(); | |||||
| 736 | 0 | 0 | $self->{'_RUN_LEVEL'} = 2; | |||||
| 737 | }; | |||||||
| 738 | 5 | 68 | return $self->{'_RUN_LEVEL'}; | |||||
| 739 | } | |||||||
| 740 | ||||||||
| 741 | =head2 _reset | |||||||
| 742 | ||||||||
| 743 | Title : _reset | |||||||
| 744 | Usage : $hiv_query->_reset | |||||||
| 745 | Function: Resets query storage, count, and ids, while retaining session id, | |||||||
| 746 | original query string, and db schema | |||||||
| 747 | Example : | |||||||
| 748 | Returns : void | |||||||
| 749 | Args : none | |||||||
| 750 | ||||||||
| 751 | =cut | |||||||
| 752 | ||||||||
| 753 | sub _reset{ | |||||||
| 754 | 9 | 9 | 13 | my $self = shift; | ||||
| 755 | 9 | 26 | $self->ids([]); | |||||
| 756 | 9 | 22 | $self->count(0); | |||||
| 757 | 9 | 13 | $self->{'_annotations'} = {}; | |||||
| 758 | 9 | 19 | $self->{'_lanl_response'} = []; | |||||
| 759 | 9 | 11 | $self->{'_lanl_query'} = []; | |||||
| 760 | 9 | 21 | $self->{'_RUN_LEVEL'} = undef; | |||||
| 761 | 9 | 11 | return; | |||||
| 762 | } | |||||||
| 763 | ||||||||
| 764 | =head2 _session_id | |||||||
| 765 | ||||||||
| 766 | Title : _session_id | |||||||
| 767 | Usage : $hiv_query->_session_id($newval) | |||||||
| 768 | Function: Get/set HIV db session id (initialized in _do_lanl_request) | |||||||
| 769 | Example : | |||||||
| 770 | Returns : value of _session_id (a scalar) | |||||||
| 771 | Args : on set, new value (a scalar or undef, optional) | |||||||
| 772 | ||||||||
| 773 | =cut | |||||||
| 774 | ||||||||
| 775 | sub _session_id{ | |||||||
| 776 | 0 | 0 | 0 | my $self = shift; | ||||
| 777 | ||||||||
| 778 | 0 | 0 | 0 | return $self->{'_session_id'} = shift if @_; | ||||
| 779 | 0 | 0 | return $self->{'_session_id'}; | |||||
| 780 | } | |||||||
| 781 | =head2 _run_level | |||||||
| 782 | ||||||||
| 783 | Title : _run_level | |||||||
| 784 | Usage : $obj->_run_level($newval) | |||||||
| 785 | Function: returns the level at which the query has so far been run | |||||||
| 786 | Example : | |||||||
| 787 | Returns : value of _run_level (a scalar) | |||||||
| 788 | Args : on set, new value (a scalar or undef, optional) | |||||||
| 789 | ||||||||
| 790 | =cut | |||||||
| 791 | ||||||||
| 792 | sub _run_level{ | |||||||
| 793 | 0 | 0 | 0 | my $self = shift; | ||||
| 794 | ||||||||
| 795 | 0 | 0 | 0 | return $self->{'_RUN_LEVEL'} = shift if @_; | ||||
| 796 | 0 | 0 | return $self->{'_RUN_LEVEL'}; | |||||
| 797 | } | |||||||
| 798 | ||||||||
| 799 | =head2 _run_option | |||||||
| 800 | ||||||||
| 801 | Title : _run_option | |||||||
| 802 | Usage : $hiv_query->_run_option($newval) | |||||||
| 803 | Function: Get/set HIV db query run option (see _do_query for values) | |||||||
| 804 | Example : | |||||||
| 805 | Returns : value of _run_option (a scalar) | |||||||
| 806 | Args : on set, new value (a scalar or undef, optional) | |||||||
| 807 | ||||||||
| 808 | =cut | |||||||
| 809 | ||||||||
| 810 | sub _run_option{ | |||||||
| 811 | 2 | 2 | 4 | my $self = shift; | ||||
| 812 | ||||||||
| 813 | 2 | 100 | 6 | return $self->{'_run_option'} = shift if @_; | ||||
| 814 | 1 | 3 | return $self->{'_run_option'}; | |||||
| 815 | } | |||||||
| 816 | ||||||||
| 817 | =head2 _ua_hash | |||||||
| 818 | ||||||||
| 819 | Title : _ua_hash | |||||||
| 820 | Usage : $obj->_ua_hash($newval) | |||||||
| 821 | Function: | |||||||
| 822 | Example : | |||||||
| 823 | Returns : value of _ua_hash (a scalar) | |||||||
| 824 | Args : on set, new value (a scalar or undef, optional) | |||||||
| 825 | ||||||||
| 826 | =cut | |||||||
| 827 | ||||||||
| 828 | sub _ua_hash{ | |||||||
| 829 | 1 | 1 | 1 | my $self = shift; | ||||
| 830 | 1 | 50 | 3 | if (@_) { | ||||
| 831 | 1 | 4 | for (ref $_[0]) { | |||||
| 832 | 1 | 50 | 2 | $_ eq 'HASH' && do { | ||||
| 833 | 1 | 2 | $self->{'_ua_hash'} = $_[0]; | |||||
| 834 | 1 | 3 | last; | |||||
| 835 | }; | |||||||
| 836 | 0 | 0 | 0 | !$_ && do { | ||||
| 837 | 0 | 0 | $self->{'_ua_hash'} = {@_}; | |||||
| 838 | 0 | 0 | last; | |||||
| 839 | }; | |||||||
| 840 | 0 | 0 | do { | |||||
| 841 | 0 | 0 | $self->throw("Type ".ref($_)." unsupported as arg in _ua_hash"); | |||||
| 842 | }; | |||||||
| 843 | ||||||||
| 844 | } | |||||||
| 845 | } | |||||||
| 846 | 1 | 1 | return %{$self->{'_ua_hash'}}; | |||||
| 1 | 1 | |||||||
| 847 | } | |||||||
| 848 | ||||||||
| 849 | ||||||||
| 850 | ####### | |||||||
| 851 | ||||||||
| 852 | =head1 Internals | |||||||
| 853 | ||||||||
| 854 | =head2 add_id | |||||||
| 855 | ||||||||
| 856 | Title : add_id | |||||||
| 857 | Usage : $hiv_query->add_id($id) | |||||||
| 858 | Function: Add new id to ids | |||||||
| 859 | Example : | |||||||
| 860 | Returns : the new id | |||||||
| 861 | Args : a sequence id | |||||||
| 862 | ||||||||
| 863 | =cut | |||||||
| 864 | ||||||||
| 865 | sub add_id { | |||||||
| 866 | 0 | 0 | 1 | 0 | my $self = shift; | |||
| 867 | 0 | 0 | my $id = shift; | |||||
| 868 | 0 | 0 | 0 | $id = "" unless defined $id; # avoid warnings | ||||
| 869 | 0 | 0 | ${$self->{'ids'}}{$id}++; | |||||
| 0 | 0 | |||||||
| 870 | 0 | 0 | return $id; | |||||
| 871 | } | |||||||
| 872 | ||||||||
| 873 | ||||||||
| 874 | sub lanl_base{ | |||||||
| 875 | 4 | 4 | 0 | 6 | my $self = shift; | |||
| 876 | 4 | 100 | 11 | return $self->{'lanl_base'} = shift if @_; | ||||
| 877 | 3 | 13 | return $self->{'lanl_base'}; | |||||
| 878 | } | |||||||
| 879 | ||||||||
| 880 | =head2 map_db | |||||||
| 881 | ||||||||
| 882 | Title : map_db | |||||||
| 883 | Usage : $obj->map_db($newval) | |||||||
| 884 | Function: | |||||||
| 885 | Example : | |||||||
| 886 | Returns : value of map_db (a scalar) | |||||||
| 887 | Args : on set, new value (a scalar or undef, optional) | |||||||
| 888 | ||||||||
| 889 | =cut | |||||||
| 890 | ||||||||
| 891 | sub map_db{ | |||||||
| 892 | 2 | 2 | 1 | 3 | my $self = shift; | |||
| 893 | 2 | 100 | 5 | return $self->{'map_db'} = shift if @_; | ||||
| 894 | 1 | 6 | return $self->{'map_db'}; | |||||
| 895 | } | |||||||
| 896 | ||||||||
| 897 | =head2 make_search_if | |||||||
| 898 | ||||||||
| 899 | Title : make_search_if | |||||||
| 900 | Usage : $obj->make_search_if($newval) | |||||||
| 901 | Function: | |||||||
| 902 | Example : | |||||||
| 903 | Returns : value of make_search_if (a scalar) | |||||||
| 904 | Args : on set, new value (a scalar or undef, optional) | |||||||
| 905 | ||||||||
| 906 | =cut | |||||||
| 907 | ||||||||
| 908 | sub make_search_if{ | |||||||
| 909 | 2 | 2 | 1 | 4 | my $self = shift; | |||
| 910 | 2 | 100 | 5 | return $self->{'make_search_if'} = shift if @_; | ||||
| 911 | 1 | 3 | return $self->{'make_search_if'}; | |||||
| 912 | } | |||||||
| 913 | ||||||||
| 914 | =head2 search_ | |||||||
| 915 | ||||||||
| 916 | Title : search_ | |||||||
| 917 | Usage : $obj->search_($newval) | |||||||
| 918 | Function: | |||||||
| 919 | Example : | |||||||
| 920 | Returns : value of search_ (a scalar) | |||||||
| 921 | Args : on set, new value (a scalar or undef, optional) | |||||||
| 922 | ||||||||
| 923 | =cut | |||||||
| 924 | ||||||||
| 925 | sub search_{ | |||||||
| 926 | 2 | 2 | 1 | 4 | my $self = shift; | |||
| 927 | 2 | 100 | 6 | return $self->{'search_'} = shift if @_; | ||||
| 928 | 1 | 3 | return $self->{'search_'}; | |||||
| 929 | } | |||||||
| 930 | ||||||||
| 931 | =head2 _map_db_uri | |||||||
| 932 | ||||||||
| 933 | Title : _map_db_uri | |||||||
| 934 | Usage : | |||||||
| 935 | Function: return the full map_db uri ("Database Map") | |||||||
| 936 | Example : | |||||||
| 937 | Returns : scalar string | |||||||
| 938 | Args : none | |||||||
| 939 | ||||||||
| 940 | =cut | |||||||
| 941 | ||||||||
| 942 | sub _map_db_uri{ | |||||||
| 943 | 1 | 1 | 1554 | my $self = shift; | ||||
| 944 | 1 | 4 | return $self->lanl_base."/".$self->map_db; | |||||
| 945 | } | |||||||
| 946 | ||||||||
| 947 | ||||||||
| 948 | =head2 _make_search_if_uri | |||||||
| 949 | ||||||||
| 950 | Title : _make_search_if_uri | |||||||
| 951 | Usage : | |||||||
| 952 | Function: return the full make_search_if uri ("Make Search Interface") | |||||||
| 953 | Example : | |||||||
| 954 | Returns : scalar string | |||||||
| 955 | Args : none | |||||||
| 956 | ||||||||
| 957 | =cut | |||||||
| 958 | ||||||||
| 959 | sub _make_search_if_uri{ | |||||||
| 960 | 1 | 1 | 2 | my $self = shift; | ||||
| 961 | 1 | 2 | return $self->lanl_base."/".$self->make_search_if; | |||||
| 962 | } | |||||||
| 963 | ||||||||
| 964 | =head2 _search_uri | |||||||
| 965 | ||||||||
| 966 | Title : _search_uri | |||||||
| 967 | Usage : | |||||||
| 968 | Function: return the full search cgi uri ("Search Database") | |||||||
| 969 | Example : | |||||||
| 970 | Returns : scalar string | |||||||
| 971 | Args : none | |||||||
| 972 | ||||||||
| 973 | =cut | |||||||
| 974 | ||||||||
| 975 | sub _search_uri{ | |||||||
| 976 | 1 | 1 | 2 | my $self = shift; | ||||
| 977 | 1 | 2 | return $self->lanl_base."/".$self->search_; | |||||
| 978 | } | |||||||
| 979 | ||||||||
| 980 | =head2 _schema_file | |||||||
| 981 | ||||||||
| 982 | Title : _schema_file | |||||||
| 983 | Usage : $hiv_query->_schema_file($newval) | |||||||
| 984 | Function: | |||||||
| 985 | Example : | |||||||
| 986 | Returns : value of _schema_file (an XML string or filename) | |||||||
| 987 | Args : on set, new value (an XML string or filename, or undef, optional) | |||||||
| 988 | ||||||||
| 989 | =cut | |||||||
| 990 | ||||||||
| 991 | sub _schema_file { | |||||||
| 992 | 3 | 3 | 4 | my $self = shift; | ||||
| 993 | ||||||||
| 994 | 3 | 100 | 10 | return $self->{'_schema_file'} = shift if @_; | ||||
| 995 | 2 | 12 | return $self->{'_schema_file'}; | |||||
| 996 | } | |||||||
| 997 | ||||||||
| 998 | =head2 _schema | |||||||
| 999 | ||||||||
| 1000 | Title : _schema | |||||||
| 1001 | Usage : $hiv_query->_schema($newVal) | |||||||
| 1002 | Function: | |||||||
| 1003 | Example : | |||||||
| 1004 | Returns : value of _schema (an HIVSchema object in package | |||||||
| 1005 | L |
|||||||
| 1006 | Args : none (field set directly in new()) | |||||||
| 1007 | ||||||||
| 1008 | =cut | |||||||
| 1009 | ||||||||
| 1010 | sub _schema{ | |||||||
| 1011 | 10 | 10 | 11 | my $self = shift; | ||||
| 1012 | ||||||||
| 1013 | $self->{'_schema'} ? | |||||||
| 1014 | 10 | 100 | 32 | return $self->{'_schema'} : | ||||
| 1015 | $self->throw(-class=>'Bio::SchemaNotInit::Exception', | |||||||
| 1016 | -text=>"DB schema not initialized", | |||||||
| 1017 | -value=>""); | |||||||
| 1018 | ||||||||
| 1019 | } | |||||||
| 1020 | ||||||||
| 1021 | =head2 _lanl_query | |||||||
| 1022 | ||||||||
| 1023 | Title : _lanl_query | |||||||
| 1024 | Usage : $hiv_query->_lanl_query(\@query_parms) | |||||||
| 1025 | Function: pushes \@query_parms onto @{$self->{'_lanl_query'} | |||||||
| 1026 | Example : | |||||||
| 1027 | Returns : value of _lanl_query (an arrayref) | |||||||
| 1028 | Args : on set, new value (an arrayref or undef, optional) | |||||||
| 1029 | ||||||||
| 1030 | =cut | |||||||
| 1031 | ||||||||
| 1032 | sub _lanl_query{ | |||||||
| 1033 | 5 | 5 | 6 | my $self = shift; | ||||
| 1034 | 5 | 7 | my $a = shift; | |||||
| 1035 | 5 | 50 | 8 | return $self->{'_lanl_query'} unless $a; | ||||
| 1036 | 5 | 50 | 13 | if (ref $a eq 'ARRAY') { | ||||
| 1037 | 5 | 7 | push @{$self->{'_lanl_query'}}, $a; | |||||
| 5 | 8 | |||||||
| 1038 | 5 | 21 | return $a; | |||||
| 1039 | } | |||||||
| 1040 | else { | |||||||
| 1041 | 0 | 0 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||
| 1042 | -text=>'Array ref required for argument.', | |||||||
| 1043 | -value=>$a); | |||||||
| 1044 | } | |||||||
| 1045 | ||||||||
| 1046 | } | |||||||
| 1047 | ||||||||
| 1048 | =head2 _lanl_response | |||||||
| 1049 | ||||||||
| 1050 | Title : _lanl_response | |||||||
| 1051 | Usage : $hiv_query->_lanl_response($response) | |||||||
| 1052 | Function: pushes $response onto @{$hiv_query->{'_lanl_response'}} | |||||||
| 1053 | Example : | |||||||
| 1054 | Returns : value of _lanl_response (an arrayref of HTTP::Response objects) | |||||||
| 1055 | Args : on set, new value (an HTTP::Response object or undef, optional) | |||||||
| 1056 | ||||||||
| 1057 | =cut | |||||||
| 1058 | ||||||||
| 1059 | sub _lanl_response{ | |||||||
| 1060 | 0 | 0 | 0 | my $self = shift; | ||||
| 1061 | 0 | 0 | 0 | if (@_) { | ||||
| 1062 | 0 | 0 | my $r = shift; | |||||
| 1063 | 0 | 0 | 0 | $self->throw(-class=>'Bio::Root::BadParameter', | ||||
| 1064 | -text=>'Requires an HTTP::Response object', | |||||||
| 1065 | -value=> ref $r) unless ref($r) eq 'HTTP::Response'; | |||||||
| 1066 | 0 | 0 | push @{$self->{'_lanl_response'}}, $r; | |||||
| 0 | 0 | |||||||
| 1067 | 0 | 0 | return $r; | |||||
| 1068 | } | |||||||
| 1069 | 0 | 0 | return $self->{'_lanl_response'}; | |||||
| 1070 | } | |||||||
| 1071 | ||||||||
| 1072 | =head2 _create_lanl_query | |||||||
| 1073 | ||||||||
| 1074 | Title : _create_lanl_query | |||||||
| 1075 | Usage : $hiv_query->_create_lanl_query() | |||||||
| 1076 | Function: validate query hash or string, prepare for _do_lanl_request | |||||||
| 1077 | Example : | |||||||
| 1078 | Returns : 1 if successful; throws exception on invalid query | |||||||
| 1079 | Args : | |||||||
| 1080 | ||||||||
| 1081 | =cut | |||||||
| 1082 | ||||||||
| 1083 | sub _create_lanl_query { | |||||||
| 1084 | 9 | 9 | 14 | my $self = shift; | ||||
| 1085 | 9 | 12 | my (%inhash, @query, @qhashes); | |||||
| 1086 | 0 | 0 | my ($schema, @validFields, @validAliases); | |||||
| 1087 | ||||||||
| 1088 | 9 | 17 | for ($self->query) { | |||||
| 1089 | 9 | 50 | 19 | !defined && do { | ||||
| 1090 | 0 | 0 | $self->throw(-class=>'Bio::Root::NoSuchThing', | |||||
| 1091 | -text=>'Query not specified', | |||||||
| 1092 | -value=>''); | |||||||
| 1093 | 0 | 0 | last; | |||||
| 1094 | }; | |||||||
| 1095 | 9 | 100 | 20 | ref eq 'HASH' && do { | ||||
| 1096 | 2 | 6 | %inhash = %$_; | |||||
| 1097 | 2 | 100 | 4 | if ( grep /HASH/, map {ref} values %inhash ) { | ||||
| 5 | 14 | |||||||
| 1098 | # check for {query=>{},annot=>[]} style | |||||||
| 1099 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||||
| 1100 | -text=>'Query style unrecognized', | |||||||
| 1101 | 1 | 50 | 4 | -value=>"") unless defined $inhash{query}; | ||||
| 1102 | 1 | 3 | push @qhashes, $_; | |||||
| 1103 | } | |||||||
| 1104 | 2 | 3 | last; | |||||
| 1105 | }; | |||||||
| 1106 | 7 | 100 | 15 | ref eq 'ARRAY' && do { | ||||
| 1107 | 3 | 10 | $inhash{'query'} = {@$_}; | |||||
| 1108 | 3 | 5 | push @qhashes, \%inhash; | |||||
| 1109 | 3 | 5 | last; | |||||
| 1110 | }; | |||||||
| 1111 | #else | |||||||
| 1112 | 4 | 4 | do { | |||||
| 1113 | 4 | 11 | @qhashes = $self->_parse_query_string($_); | |||||
| 1114 | }; | |||||||
| 1115 | } | |||||||
| 1116 | 9 | 22 | $schema = $self->_schema; | |||||
| 1117 | 8 | 23 | @validFields = $schema->fields; | |||||
| 1118 | 8 | 35 | @validAliases = $schema->aliases; | |||||
| 1119 | ||||||||
| 1120 | # validate args based on the xml specification file | |||||||
| 1121 | # only checks blanks and fields with explicitly specified options | |||||||
| 1122 | # text fields can put anything, and the query will be run before | |||||||
| 1123 | # an error is caught in these | |||||||
| 1124 | 8 | 46 | foreach my $qh (@qhashes) { | |||||
| 1125 | 8 | 12 | @query=(); | |||||
| 1126 | 8 | 6 | foreach my $k (keys %{$$qh{'query'}}) { | |||||
| 8 | 29 | |||||||
| 1127 | 17 | 8 | my $fld; | |||||
| 1128 | # validate field | |||||||
| 1129 | 17 | 50 | 1930 | if (grep /^$k$/, @validFields) { | ||||
| 100 | ||||||||
| 1130 | 0 | 0 | $fld = $k; | |||||
| 1131 | } | |||||||
| 1132 | elsif (grep /^$k$/, @validAliases) { | |||||||
| 1133 | 15 | 32 | foreach (@validFields) { | |||||
| 1134 | 1018 | 100 | 1302 | if (grep (/^$k$/, $schema->aliases($_))) { | ||||
| 1135 | 15 | 15 | $fld = $_; | |||||
| 1136 | 15 | 26 | last; | |||||
| 1137 | } | |||||||
| 1138 | # $fld contains the field corresp. to the alias | |||||||
| 1139 | } | |||||||
| 1140 | } | |||||||
| 1141 | else { | |||||||
| 1142 | 2 | 21 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||
| 1143 | -text=>"Invalid field or alias \"$k\"", | |||||||
| 1144 | -value=>$qh); | |||||||
| 1145 | } | |||||||
| 1146 | # validate matchdata | |||||||
| 1147 | 15 | 34 | my $vf = $schema->_sfieldh($fld); | |||||
| 1148 | 15 | 100 | 46 | my @md = (ref($qh->{'query'}{$k}) eq 'ARRAY') ? @{$qh->{'query'}{$k}} : $qh->{'query'}{$k}; | ||||
| 6 | 12 | |||||||
| 1149 | 15 | 50 | 46 | if ($$vf{type} eq 'text') { | ||||
| 50 | ||||||||
| 1150 | 0 | 0 | foreach (@md) { | |||||
| 1151 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||||
| 1152 | -text=>'Value for field \"$k\" cannot be empty', | |||||||
| 1153 | -value=>$qh) | |||||||
| 1154 | 0 | 0 | 0 | 0 | if ($_ eq "") && ($$vf{blank_ok} eq 'false'); | |||
| 1155 | } | |||||||
| 1156 | } | |||||||
| 1157 | elsif ($$vf{type} eq 'option') { | |||||||
| 1158 | 15 | 20 | foreach my $md (@md) { | |||||
| 1159 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||||
| 1160 | -text=>"Invalid value \"".$md."\" for field \"$fld\"", | |||||||
| 1161 | -value=>$md) | |||||||
| 1162 | 21 | 50 | 66 | 45 | unless $$vf{option} && grep {defined $_ && /^$md$/} @{$$vf{option}}; | |||
| 3086 | 100 | 7864 | ||||||
| 21 | 33 | |||||||
| 1163 | } | |||||||
| 1164 | } | |||||||
| 1165 | # validated; add to query | |||||||
| 1166 | 14 | 21 | foreach (@md) { | |||||
| 1167 | 20 | 36 | push @query, ($fld => $_); | |||||
| 1168 | } | |||||||
| 1169 | } | |||||||
| 1170 | 5 | 100 | 16 | if ($qh->{'annot'}) { | ||||
| 1171 | # validate the column names to be included in the query | |||||||
| 1172 | # to obtain annotations | |||||||
| 1173 | 2 | 3 | my @annot_cols = @{$qh->{'annot'}}; | |||||
| 2 | 6 | |||||||
| 1174 | 2 | 3 | foreach my $k (@annot_cols) { | |||||
| 1175 | 2 | 3 | my $fld; | |||||
| 1176 | # validate field | |||||||
| 1177 | 2 | 50 | 206 | if (grep /^$k$/, @validFields) { | ||||
| 50 | ||||||||
| 1178 | 0 | 0 | $fld = $k; | |||||
| 1179 | } | |||||||
| 1180 | elsif (grep /^$k$/, @validAliases) { | |||||||
| 1181 | 2 | 3 | foreach (@validFields) { | |||||
| 1182 | 130 | 100 | 173 | if (grep (/^$k$/, $schema->aliases($_))) { | ||||
| 1183 | 2 | 2 | $fld = $_; | |||||
| 1184 | 2 | 3 | last; | |||||
| 1185 | } | |||||||
| 1186 | # $fld should contain the field corresp. to the alias | |||||||
| 1187 | } | |||||||
| 1188 | } | |||||||
| 1189 | else { | |||||||
| 1190 | 0 | 0 | $self->throw(-class=>'Bio::Root::NoSuchThing', | |||||
| 1191 | -text=>"Invalid field or alias \"$k\"", | |||||||
| 1192 | -value=>$k); | |||||||
| 1193 | } | |||||||
| 1194 | # lazy: 'Any' may not be the right default (but appears to | |||||||
| 1195 | # be, based on the lanl html) | |||||||
| 1196 | 2 | 4 | push @query, ($fld => 'Any'); | |||||
| 1197 | } | |||||||
| 1198 | } | |||||||
| 1199 | ||||||||
| 1200 | # insure that LANL and GenBank ids are retrieved | |||||||
| 1201 | 5 | 50 | 30 | push @query, ('sequenceentry.se_id' => 'Any') unless grep /SequenceEntry\.SE_id/, @query; | ||||
| 1202 | 5 | 50 | 17 | push @query, ('sequenceaccessions.sa_genbankaccession' => 'Any') | ||||
| 1203 | unless grep /SequenceAccessions\.SA_GenBankAccession/, @query; | |||||||
| 1204 | ||||||||
| 1205 | # an "order" field is required by the LANL CGI | |||||||
| 1206 | # if not specified, default to SE_id | |||||||
| 1207 | ||||||||
| 1208 | 5 | 50 | 23 | push @query, ('order'=>'sequenceentry.se_id') unless grep /order/, @query; | ||||
| 1209 | ||||||||
| 1210 | # @query now contains sfield=>matchdata pairs, as specified by user | |||||||
| 1211 | # include appropriate indexes to create correct automatic joins | |||||||
| 1212 | # established by the LANL CGI | |||||||
| 1213 | 5 | 6 | my (@qtbl, @qpk, @qfk); | |||||
| 1214 | ||||||||
| 1215 | # the tables represented in query: | |||||||
| 1216 | 5 | 28 | my %q = @query; # squish the tables in the current query into hash keys | |||||
| 1217 | 5 | 25 | @qtbl = $schema->tbl('-s', keys %q); | |||||
| 1218 | ||||||||
| 1219 | 5 | 50 | 13 | if (@qtbl > 1) { | ||||
| 1220 | # more than one table, see if they can be connected | |||||||
| 1221 | # get primary keys of query tables | |||||||
| 1222 | 5 | 12 | @qpk = $schema->pk(@qtbl); | |||||
| 1223 | ||||||||
| 1224 | # we need to get each query table to join to | |||||||
| 1225 | # SequenceEntry. | |||||||
| 1226 | # | |||||||
| 1227 | # The schema is a graph with tables as nodes and | |||||||
| 1228 | # foreign keys<->primary keys as branches. To get a | |||||||
| 1229 | # join that works, need to include in the query | |||||||
| 1230 | # all branches along a path from SequenceEntry | |||||||
| 1231 | # to each query table. | |||||||
| 1232 | # | |||||||
| 1233 | # find_join does it... | |||||||
| 1234 | my @joink = map { | |||||||
| 1235 | 5 | 6 | my @k = $schema->find_join($_,'sequenceentry'); | |||||
| 15 | 34 | |||||||
| 1236 | 15 | 100 | 18 | map {$_ || ()} @k | ||||
| 15 | 47 | |||||||
| 1237 | } @qtbl; | |||||||
| 1238 | # squish the keys in @joink | |||||||
| 1239 | 5 | 8 | my %j; | |||||
| 1240 | 5 | 17 | @j{@joink} = (1) x @joink; | |||||
| 1241 | 5 | 11 | @joink = keys %j; | |||||
| 1242 | # add the fields not currently in the query | |||||||
| 1243 | 5 | 11 | foreach (@qpk, @joink) { | |||||
| 1244 | 15 | 12 | my $fld = $_; | |||||
| 1245 | 15 | 100 | 243 | if (!grep(/^$fld$/,keys %q)) { | ||||
| 1246 | # lazy: 'Any' may not be the right default (but appears to | |||||||
| 1247 | # be, based on the lanl html) | |||||||
| 1248 | 10 | 27 | push @query, ($_ => 'Any'); | |||||
| 1249 | } | |||||||
| 1250 | } | |||||||
| 1251 | ||||||||
| 1252 | } | |||||||
| 1253 | ||||||||
| 1254 | # set object property | |||||||
| 1255 | 5 | 30 | $self->_lanl_query([@query]); | |||||
| 1256 | } | |||||||
| 1257 | 5 | 129 | return 1; | |||||
| 1258 | } | |||||||
| 1259 | ||||||||
| 1260 | # _do_lanl_request : post the queries created by _create_lanl_query | |||||||
| 1261 | # | |||||||
| 1262 | # @args (or {@args}) should be unaliased Table.Column=>Matchdata | |||||||
| 1263 | # pairs (these will be used directly in the POSTs) | |||||||
| 1264 | ||||||||
| 1265 | =head2 _do_lanl_request | |||||||
| 1266 | ||||||||
| 1267 | Title : _do_lanl_request | |||||||
| 1268 | Usage : $hiv_query->_do_lanl_request() | |||||||
| 1269 | Function: Perform search request on _create_lanl_query-validated query | |||||||
| 1270 | Example : | |||||||
| 1271 | Returns : 1 if successful | |||||||
| 1272 | Args : | |||||||
| 1273 | ||||||||
| 1274 | =cut | |||||||
| 1275 | ||||||||
| 1276 | sub _do_lanl_request { | |||||||
| 1277 | 0 | 0 | 0 | my $self = shift; | ||||
| 1278 | 0 | 0 | my (@queries, @query, @interface,$interfGet,$searchGet,$response); | |||||
| 1279 | 0 | 0 | my ($numseqs, $count); | |||||
| 1280 | ||||||||
| 1281 | # handle args | |||||||
| 1282 | 0 | 0 | 0 | if (!$self->_lanl_query) { | ||||
| 1283 | 0 | 0 | $self->throw(-class=>"Bio::Root::BadParameter", | |||||
| 1284 | -text=>"_lanl_query empty, run _create_lanl_request first", | |||||||
| 1285 | -value=>""); | |||||||
| 1286 | } | |||||||
| 1287 | else { | |||||||
| 1288 | 0 | 0 | @queries = @{$self->_lanl_query}; | |||||
| 0 | 0 | |||||||
| 1289 | } | |||||||
| 1290 | ||||||||
| 1291 | ## utility vars | |||||||
| 1292 | ## search site specific CGI parms | |||||||
| 1293 | 0 | 0 | my @search_pms = ('action'=>'Search'); | |||||
| 1294 | 0 | 0 | my @searchif_pms = ('action'=>'Search Interface'); | |||||
| 1295 | # don't get the actual sequence data here (i.e., the cgi parm | |||||||
| 1296 | # 'incl_seq' remains undefined... | |||||||
| 1297 | 0 | 0 | my @download_pms = ('action Download.x'=>1, 'action Download.y'=>1); | |||||
| 1298 | ||||||||
| 1299 | ## HTML-testing regexps | |||||||
| 1300 | 0 | 0 | my $tags_re = qr{(?:\s*<[^>]+>\s*)}; | |||||
| 1301 | 0 | 0 | my $session_id_re = qr{ | |||||
| 1302 | 0 | 0 | my $search_form_re = qr{ | |||||
| 1303 | 0 | 0 | my $seqs_found_re = qr{Displaying$tags_re*(?:\s*[0-9-]*\s*)*$tags_re*of$tags_re*\s*([0-9]+)$tags_re*sequences found}; | |||||
| 1304 | 0 | 0 | my $no_seqs_found_re = qr{Sorry.*no sequences found}; | |||||
| 1305 | 0 | 0 | my $too_many_re = qr{too many records: $tags_re*([0-9]+)}; | |||||
| 1306 | 0 | 0 | my $sys_error_re = qr{[Ss]ystem error}; | |||||
| 1307 | 0 | 0 | my $sys_error_extract_re = qr{${tags_re}error:.*? | ]+>${tags_re}(.*?) }; |
||||
| 1308 | # find something like: | |||||||
| 1309 | # tables without join: SequenceAccessions |
|||||||
| 1310 | 0 | 0 | my $tbl_no_join_re = qr{tables without join}i; | |||||
| 1311 | # my $sorry_bud_re = qr{}; | |||||||
| 1312 | ||||||||
| 1313 | ||||||||
| 1314 | 0 | 0 | foreach my $q (@queries) { | |||||
| 1315 | 0 | 0 | @query = @$q; | |||||
| 1316 | # default query control parameters | |||||||
| 1317 | 0 | 0 | my %qctrl = ( | |||||
| 1318 | max_rec=>100, | |||||||
| 1319 | sort_dir=>'ASC', | |||||||
| 1320 | translate=>'FALSE' # nucleotides | |||||||
| 1321 | ); | |||||||
| 1322 | ||||||||
| 1323 | # do work... | |||||||
| 1324 | ||||||||
| 1325 | # pull out commands, designated by the COMMAND pseudo-table... | |||||||
| 1326 | 0 | 0 | 0 | my @commands = map { $query[$_] =~ s/^COMMAND\.// ? @query[$_..$_+1] : () } (0..$#query-1); | ||||
| 0 | 0 | |||||||
| 1327 | 0 | 0 | 0 | @query = map { $query[$_] =~ /^COMMAND/ ? () : @query[2*$_..2*$_+1] } (0..($#query-1)/2); | ||||
| 0 | 0 | |||||||
| 1328 | ||||||||
| 1329 | ||||||||
| 1330 | # set control parameters explicitly made in query | |||||||
| 1331 | 0 | 0 | foreach my $cp (keys %qctrl) { | |||||
| 1332 | 0 | 0 | 0 | if (!grep( /^$cp$/, @query)) { | ||||
| 1333 | 0 | 0 | push @query, ($cp, $qctrl{$cp}); | |||||
| 1334 | } | |||||||
| 1335 | } | |||||||
| 1336 | ||||||||
| 1337 | # note that @interface must be an array, since a single 'key' (the table) | |||||||
| 1338 | # can be associated with multiple 'values' (the columns) in the POST | |||||||
| 1339 | ||||||||
| 1340 | # squish fieldnames into hash keys | |||||||
| 1341 | 0 | 0 | my %q = @query; | |||||
| 1342 | 0 | 0 | @interface = grep {defined} map {my ($tbl,$col) = /^(.*)\.(.*)$/} keys %q; | |||||
| 0 | 0 | |||||||
| 0 | 0 | |||||||
| 1343 | 0 | 0 | my $err_val = ""; # to contain informative (ha!) value if error is parsed | |||||
| 1344 | ||||||||
| 1345 | 0 | 0 | eval { # encapsulate communication errors here, defer biothrows... | |||||
| 1346 | ||||||||
| 1347 | #mark the useragent should be setable from outside (so we can modify timeouts, etc) | |||||||
| 1348 | 0 | 0 | my $ua = Bio::WebAgent->new($self->_ua_hash); | |||||
| 1349 | 0 | 0 | my $idPing = $ua->get($self->_map_db_uri); | |||||
| 1350 | 0 | 0 | 0 | $idPing->is_success || do { | ||||
| 1351 | 0 | 0 | $response=$idPing; | |||||
| 1352 | 0 | 0 | die "Connect failed"; | |||||
| 1353 | }; | |||||||
| 1354 | # get the session id | |||||||
| 1355 | 0 | 0 | 0 | if (!$self->_session_id) { | ||||
| 1356 | 0 | 0 | ($self->{'_session_id'}) = ($idPing->content =~ /$session_id_re/); | |||||
| 1357 | 0 | 0 | 0 | $self->_session_id || do { | ||||
| 1358 | 0 | 0 | $response=$idPing; | |||||
| 1359 | 0 | 0 | die "Session not established"; | |||||
| 1360 | }; | |||||||
| 1361 | } | |||||||
| 1362 | # 10/07/08: | |||||||
| 1363 | # strange bug: if action=>'Search+Interface' below (note "+"), | |||||||
| 1364 | # the response to the search (in $searchGet) shows the correct | |||||||
| 1365 | # >number< of sequences found, but also an error "No sequences | |||||||
| 1366 | # match" and an SQL barf. Changing the "+" to a " " sets up the | |||||||
| 1367 | # interface to lead to the actual sequences being delivered as | |||||||
| 1368 | # expected. maj | |||||||
| 1369 | 0 | 0 | $interfGet = $ua->post($self->_make_search_if_uri, [@interface, @searchif_pms, id=>$self->_session_id]); | |||||
| 1370 | 0 | 0 | 0 | $interfGet->is_success || do { | ||||
| 1371 | 0 | 0 | $response=$interfGet; | |||||
| 1372 | 0 | 0 | die "Interface request failed"; | |||||
| 1373 | }; | |||||||
| 1374 | # see if a search form was returned... | |||||||
| 1375 | ||||||||
| 1376 | 0 | 0 | 0 | $interfGet->content =~ /$search_form_re/ || do { | ||||
| 1377 | 0 | 0 | $response=$interfGet; | |||||
| 1378 | 0 | 0 | die "Interface request failed"; | |||||
| 1379 | }; | |||||||
| 1380 | ||||||||
| 1381 | 0 | 0 | $searchGet = $ua->post($self->_search_uri, [@query, @commands, @search_pms, id=>$self->_session_id]); | |||||
| 1382 | 0 | 0 | 0 | $searchGet->is_success || do { | ||||
| 1383 | 0 | 0 | $response = $searchGet; | |||||
| 1384 | 0 | 0 | die "Search failed"; | |||||
| 1385 | }; | |||||||
| 1386 | 0 | 0 | $response = $searchGet; | |||||
| 1387 | 0 | 0 | for ($searchGet->content) { | |||||
| 1388 | 0 | 0 | 0 | /$no_seqs_found_re/ && do { | ||||
| 1389 | 0 | 0 | $err_val = 0; | |||||
| 1390 | 0 | 0 | die "No sequences found"; | |||||
| 1391 | 0 | 0 | last; | |||||
| 1392 | }; | |||||||
| 1393 | 0 | 0 | 0 | /$too_many_re/ && do { | ||||
| 1394 | 0 | 0 | $err_val = $1; | |||||
| 1395 | 0 | 0 | die "Too many records ($1): must be <10000"; | |||||
| 1396 | 0 | 0 | last; | |||||
| 1397 | }; | |||||||
| 1398 | 0 | 0 | 0 | /$tbl_no_join_re/ && do { | ||||
| 1399 | 0 | 0 | die "Some required tables went unjoined to query"; | |||||
| 1400 | 0 | 0 | last; | |||||
| 1401 | }; | |||||||
| 1402 | 0 | 0 | 0 | /$sys_error_re/ && do { | ||||
| 1403 | 0 | 0 | /$sys_error_extract_re/; | |||||
| 1404 | 0 | 0 | $err_val = $1; | |||||
| 1405 | 0 | 0 | die "LANL system error"; | |||||
| 1406 | }; | |||||||
| 1407 | 0 | 0 | 0 | /$seqs_found_re/ && do { | ||||
| 1408 | 0 | 0 | $numseqs = $1; | |||||
| 1409 | 0 | 0 | $count += $numseqs; | |||||
| 1410 | 0 | 0 | last; | |||||
| 1411 | }; | |||||||
| 1412 | # else... | |||||||
| 1413 | 0 | 0 | do { | |||||
| 1414 | 0 | 0 | die "Search failed (response not parsed)"; | |||||
| 1415 | }; | |||||||
| 1416 | } | |||||||
| 1417 | 0 | 0 | $response = $ua->post($self->_search_uri, [@download_pms, id=>$self->_session_id]); | |||||
| 1418 | 0 | 0 | 0 | $response->is_success || die "Query failed"; | ||||
| 1419 | # $response->content is a tab-separated value table of sequences | |||||||
| 1420 | # and metadata, first line starts with \# and contains fieldnames | |||||||
| 1421 | }; | |||||||
| 1422 | 0 | 0 | $self->_lanl_response($response); | |||||
| 1423 | # throw, if necessary | |||||||
| 1424 | 0 | 0 | 0 | if ($@) { | ||||
| 1425 | 0 | 0 | 0 | ($@ !~ "No sequences found") && do { | ||||
| 1426 | 0 | 0 | $self->throw(-class=>'Bio::WebError::Exception', | |||||
| 1427 | -text=>$@, | |||||||
| 1428 | -value=>$err_val); | |||||||
| 1429 | }; | |||||||
| 1430 | } | |||||||
| 1431 | } | |||||||
| 1432 | ||||||||
| 1433 | 0 | 0 | 0 | $self->warn("No sequences found for this query") unless $count; | ||||
| 1434 | 0 | 0 | $self->count($count); | |||||
| 1435 | 0 | 0 | return 1; # made it. | |||||
| 1436 | ||||||||
| 1437 | } | |||||||
| 1438 | ||||||||
| 1439 | =head2 _parse_lanl_response | |||||||
| 1440 | ||||||||
| 1441 | Title : _parse_lanl_response | |||||||
| 1442 | Usage : $hiv_query->_parse_lanl_response() | |||||||
| 1443 | Function: Parse the tab-separated-value response obtained by _do_lanl_request | |||||||
| 1444 | for sequence ids, accessions, and annotations | |||||||
| 1445 | Example : | |||||||
| 1446 | Returns : 1 if successful | |||||||
| 1447 | Args : | |||||||
| 1448 | ||||||||
| 1449 | =cut | |||||||
| 1450 | ||||||||
| 1451 | sub _parse_lanl_response { | |||||||
| 1452 | ||||||||
| 1453 | ### handle parsing and merging multiple responses into the query object | |||||||
| 1454 | ### (ids and annotations) | |||||||
| 1455 | 0 | 0 | 0 | my $self = shift; | ||||
| 1456 | ||||||||
| 1457 | 0 | 0 | my ($seqGet) = (@_); | |||||
| 1458 | 0 | 0 | my (@data, @cols, %antbl, %antype); | |||||
| 1459 | 0 | 0 | my $numseq = 0; | |||||
| 1460 | 0 | 0 | my ($schema, @retseqs, %rec, $ac); | |||||
| 1461 | 0 | 0 | $schema = $self->_schema; | |||||
| 1462 | ||||||||
| 1463 | 0 | 0 | 0 | $self->_lanl_response || | ||||
| 1464 | $self->throw(-class=>"Bio::QueryNotMade::Exception", | |||||||
| 1465 | -text=>"Query not yet performed; call _do_lanl_request()", | |||||||
| 1466 | -value=>""); | |||||||
| 1467 | 0 | 0 | foreach my $rsp (@{$self->_lanl_response}) { | |||||
| 0 | 0 | |||||||
| 1468 | 0 | 0 | @data = split(/\r|\n/, $rsp->content); | |||||
| 1469 | 0 | 0 | my $l; | |||||
| 1470 | 0 | 0 | do { | |||||
| 1471 | 0 | 0 | $l = shift @data; | |||||
| 1472 | } while ($l !~ /Number/); | |||||||
| 1473 | 0 | 0 | $numseq += ( $l =~ /Number.*:\s([0-9]+)/ )[0]; | |||||
| 1474 | 0 | 0 | @cols = split(/\t/, shift(@data)); | |||||
| 1475 | # mappings from column headings to annotation keys | |||||||
| 1476 | # squish into hash keys | |||||||
| 1477 | 0 | 0 | my %q = @{ shift @{$self->_lanl_query} }; | |||||
| 0 | 0 | |||||||
| 0 | 0 | |||||||
| 1478 | 0 | 0 | %antbl = $schema->ankh(keys %q); | |||||
| 1479 | # get the category for each annotation | |||||||
| 1480 | 0 | 0 | map { $antype{ $_->{ankey} } = $_->{antype} } values %antbl; | |||||
| 0 | 0 | |||||||
| 1481 | # normalize column headers | |||||||
| 1482 | 0 | 0 | map { tr/ /_/; $_ = lc; } @cols; | |||||
| 0 | 0 | |||||||
| 0 | 0 | |||||||
| 1483 | 0 | 0 | foreach (@data) { | |||||
| 1484 | 0 | 0 | @rec{@cols} = split /\t/; | |||||
| 1485 | 0 | 0 | my $id = $rec{'se_id'}; | |||||
| 1486 | 0 | 0 | $self->add_id($id); | |||||
| 1487 | 0 | 0 | $ac = Bio::Annotation::Collection->new(); | |||||
| 1488 | #create annotations | |||||||
| 1489 | 0 | 0 | foreach (@cols) { | |||||
| 1490 | 0 | 0 | 0 | next if $_ eq '#'; | ||||
| 1491 | 0 | 0 | 0 | my $t = $antype{$_} || "Unclassified"; | ||||
| 1492 | 0 | 0 | my $d = $rec{$_}; # the data | |||||
| 1493 | 0 | 0 | $ac->put_value(-KEYS=>[$t, $_], -VALUE=>$d); | |||||
| 1494 | } | |||||||
| 1495 | 0 | 0 | $self->add_annotations_for_id($id, $ac); | |||||
| 1496 | } | |||||||
| 1497 | 0 | 0 | 1; | |||||
| 1498 | } | |||||||
| 1499 | 0 | 0 | return 1; # made it. | |||||
| 1500 | } | |||||||
| 1501 | ||||||||
| 1502 | =head2 _parse_query_string | |||||||
| 1503 | ||||||||
| 1504 | Title : _parse_query_string | |||||||
| 1505 | Usage : $hiv_query->_parse_query_string($str) | |||||||
| 1506 | Function: Parses a query string using query language emulator QRY | |||||||
| 1507 | : in L |
|||||||
| 1508 | Example : | |||||||
| 1509 | Returns : arrayref of hash structures suitable for passing to _create_lanl_query | |||||||
| 1510 | Args : a string scalar | |||||||
| 1511 | ||||||||
| 1512 | =cut | |||||||
| 1513 | ||||||||
| 1514 | sub _parse_query_string { | |||||||
| 1515 | 4 | 4 | 5 | my $self = shift; | ||||
| 1516 | 4 | 5 | my $qstring = shift; | |||||
| 1517 | 4 | 4 | my ($ptree, @ret); | |||||
| 1518 | #syntax errors thrown in QRY (in HIVQueryHelper module) | |||||||
| 1519 | 4 | 13 | $ptree = QRY::_parse_q( $qstring ); | |||||
| 1520 | 4 | 12 | @ret = QRY::_make_q($ptree); | |||||
| 1521 | 4 | 62 | return @ret; | |||||
| 1522 | } | |||||||
| 1523 | ||||||||
| 1524 | =head1 Dude, sorry- | |||||||
| 1525 | ||||||||
| 1526 | =head2 _sorry | |||||||
| 1527 | ||||||||
| 1528 | Title : _sorry | |||||||
| 1529 | Usage : $hiv_query->_sorry("-president=>Powell") | |||||||
| 1530 | Function: Throws an exception for unsupported option or parameter | |||||||
| 1531 | Example : | |||||||
| 1532 | Returns : | |||||||
| 1533 | Args : scalar string | |||||||
| 1534 | ||||||||
| 1535 | =cut | |||||||
| 1536 | ||||||||
| 1537 | sub _sorry{ | |||||||
| 1538 | 0 | 0 | my $self = shift; | |||||
| 1539 | 0 | my $parm = shift; | ||||||
| 1540 | 0 | $self->throw(-class=>"Bio::HIVSorry::Exception", | ||||||
| 1541 | -text=>"Sorry, option/parameter \"$parm\" not (yet) supported. See manpage to complain.", | |||||||
| 1542 | -value=>$parm); | |||||||
| 1543 | 0 | return; | ||||||
| 1544 | } | |||||||
| 1545 | ||||||||
| 1546 | 1; |