| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # BioPerl module for Bio::SearchIO::infernal | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Please direct questions and support issues to | 
| 5 |  |  |  |  |  |  | # | 
| 6 |  |  |  |  |  |  | # Cared for by Chris Fields | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # Copyright Chris Fields | 
| 9 |  |  |  |  |  |  | # | 
| 10 |  |  |  |  |  |  | # You may distribute this module under the same terms as perl itself | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | # POD documentation - main docs before the code | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =head1 NAME | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | Bio::SearchIO::infernal - SearchIO-based Infernal parser | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | my $parser = Bio::SearchIO->new(-format => 'infernal', | 
| 21 |  |  |  |  |  |  | -file => 'purine.inf'); | 
| 22 |  |  |  |  |  |  | while( my $result = $parser->next_result ) { | 
| 23 |  |  |  |  |  |  | # general result info, such as model used, Infernal version | 
| 24 |  |  |  |  |  |  | while( my $hit = $result->next_hit ) { | 
| 25 |  |  |  |  |  |  | while( my $hsp = $hit->next_hsp ) { | 
| 26 |  |  |  |  |  |  | # ... | 
| 27 |  |  |  |  |  |  | } | 
| 28 |  |  |  |  |  |  | } | 
| 29 |  |  |  |  |  |  | } | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | This is a SearchIO-based parser for Infernal output from the cmsearch program. | 
| 34 |  |  |  |  |  |  | It currently parses cmsearch output for Infernal versions 0.7-1.1; older | 
| 35 |  |  |  |  |  |  | versions may work but will not be supported. | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | The latest version of Infernal is 1.1. The output has changed substantially | 
| 38 |  |  |  |  |  |  | relative to version 1.0. Versions 1.x are stable releases (and output has | 
| 39 |  |  |  |  |  |  | stabilized) therefore it is highly recommended that users upgrade to using | 
| 40 |  |  |  |  |  |  | the latest Infernal release. Support for the older pre-v.1 developer releases | 
| 41 |  |  |  |  |  |  | will be dropped for future core 1.6 releases. | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | =head1 FEEDBACK | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | =head2 Mailing Lists | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | User feedback is an integral part of the evolution of this and other | 
| 48 |  |  |  |  |  |  | Bioperl modules. Send your comments and suggestions preferably to | 
| 49 |  |  |  |  |  |  | the Bioperl mailing list.  Your participation is much appreciated. | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | bioperl-l@bioperl.org                  - General discussion | 
| 52 |  |  |  |  |  |  | http://bioperl.org/wiki/Mailing_lists  - About the mailing lists | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | =head2 Support | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | Please direct usage questions or support issues to the mailing list: | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | I | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | rather than to the module maintainer directly. Many experienced and | 
| 61 |  |  |  |  |  |  | reponsive experts will be able look at the problem and quickly | 
| 62 |  |  |  |  |  |  | address it. Please include a thorough description of the problem | 
| 63 |  |  |  |  |  |  | with code and data examples if at all possible. | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | =head2 Reporting Bugs | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | Report bugs to the Bioperl bug tracking system to help us keep track | 
| 68 |  |  |  |  |  |  | of the bugs and their resolution. Bug reports can be submitted via the | 
| 69 |  |  |  |  |  |  | web: | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | https://github.com/bioperl/bioperl-live/issues | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | =head1 AUTHOR - Chris Fields | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | Email cjfields-at-uiuc-dot-edu | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | =head1 CONTRIBUTORS | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | Jeffrey Barrick, Michigan State University | 
| 80 |  |  |  |  |  |  | Paul Cantalupo, University of Pittsburgh | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | =head1 APPENDIX | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | The rest of the documentation details each of the object methods. | 
| 85 |  |  |  |  |  |  | Internal methods are usually preceded with a _ | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | =cut | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | # Let the code begin... | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | package Bio::SearchIO::infernal; | 
| 92 | 1 |  |  | 1 |  | 5 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 26 |  | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 1 |  |  | 1 |  | 5 | use Data::Dumper; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 45 |  | 
| 95 | 1 |  |  | 1 |  | 4 | use base qw(Bio::SearchIO); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 5345 |  | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | our %MODEMAP = ( | 
| 98 |  |  |  |  |  |  | 'Result'             => 'result', | 
| 99 |  |  |  |  |  |  | 'Hit'                => 'hit', | 
| 100 |  |  |  |  |  |  | 'Hsp'                => 'hsp' | 
| 101 |  |  |  |  |  |  | ); | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | our %MAPPING = ( | 
| 104 |  |  |  |  |  |  | 'Hsp_bit-score'   => 'HSP-bits', | 
| 105 |  |  |  |  |  |  | 'Hsp_score'       => 'HSP-score', | 
| 106 |  |  |  |  |  |  | 'Hsp_evalue'      => 'HSP-evalue', # evalues only in v0.81, optional | 
| 107 |  |  |  |  |  |  | 'Hsp_pvalue'      => 'HSP-pvalue', # pvalues only in v0.81, optional | 
| 108 |  |  |  |  |  |  | 'Hsp_query-from'  => 'HSP-query_start', | 
| 109 |  |  |  |  |  |  | 'Hsp_query-to'    => 'HSP-query_end', | 
| 110 |  |  |  |  |  |  | 'Hsp_query-strand'=> 'HSP-query_strand', | 
| 111 |  |  |  |  |  |  | 'Hsp_hit-from'    => 'HSP-hit_start', | 
| 112 |  |  |  |  |  |  | 'Hsp_hit-to'      => 'HSP-hit_end', | 
| 113 |  |  |  |  |  |  | 'Hsp_hit-strand'  => 'HSP-hit_strand', | 
| 114 |  |  |  |  |  |  | 'Hsp_gaps'        => 'HSP-hsp_gaps', | 
| 115 |  |  |  |  |  |  | 'Hsp_hitgaps'     => 'HSP-hit_gaps', | 
| 116 |  |  |  |  |  |  | 'Hsp_querygaps'   => 'HSP-query_gaps', | 
| 117 |  |  |  |  |  |  | 'Hsp_qseq'        => 'HSP-query_seq', | 
| 118 |  |  |  |  |  |  | 'Hsp_ncline'      => 'HSP-nc_seq', | 
| 119 |  |  |  |  |  |  | 'Hsp_hseq'        => 'HSP-hit_seq', | 
| 120 |  |  |  |  |  |  | 'Hsp_midline'     => 'HSP-homology_seq', | 
| 121 |  |  |  |  |  |  | 'Hsp_pline'       => 'HSP-pp_seq', | 
| 122 |  |  |  |  |  |  | 'Hsp_structure'   => 'HSP-meta', | 
| 123 |  |  |  |  |  |  | 'Hsp_align-len'   => 'HSP-hsp_length', | 
| 124 |  |  |  |  |  |  | 'Hsp_stranded'    => 'HSP-stranded', | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | 'Hit_id'        => 'HIT-name', | 
| 127 |  |  |  |  |  |  | 'Hit_len'       => 'HIT-length', | 
| 128 |  |  |  |  |  |  | 'Hit_gi'        => 'HIT-ncbi_gi', | 
| 129 |  |  |  |  |  |  | 'Hit_accession' => 'HIT-accession', | 
| 130 |  |  |  |  |  |  | 'Hit_desc'      => 'HIT-description', | 
| 131 |  |  |  |  |  |  | 'Hit_def'       => 'HIT-description', | 
| 132 |  |  |  |  |  |  | 'Hit_signif'    => 'HIT-significance', # evalues in v1.1 and v0.81, optional | 
| 133 |  |  |  |  |  |  | 'Hit_p'         => 'HIT-p',            # pvalues only in 1.0, optional | 
| 134 |  |  |  |  |  |  | 'Hit_score'     => 'HIT-score', # best HSP bit score (in v1.1, the only HSP bit score) | 
| 135 |  |  |  |  |  |  | 'Hit_bits'      => 'HIT-bits',  # best HSP bit score (ditto) | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | 'Infernal_program'  => 'RESULT-algorithm_name', # get/set | 
| 138 |  |  |  |  |  |  | 'Infernal_version'  => 'RESULT-algorithm_version', # get/set | 
| 139 |  |  |  |  |  |  | 'Infernal_query-def'=> 'RESULT-query_name', # get/set | 
| 140 |  |  |  |  |  |  | 'Infernal_query-len'=> 'RESULT-query_length', | 
| 141 |  |  |  |  |  |  | 'Infernal_query-acc'=> 'RESULT-query_accession', # get/set | 
| 142 |  |  |  |  |  |  | 'Infernal_querydesc'=> 'RESULT-query_description', # get/set | 
| 143 |  |  |  |  |  |  | 'Infernal_cm'       => 'RESULT-cm_name', | 
| 144 |  |  |  |  |  |  | 'Infernal_db'       => 'RESULT-database_name',  # get/set | 
| 145 |  |  |  |  |  |  | 'Infernal_db-len'   => 'RESULT-database_entries', # in v1.1 only | 
| 146 |  |  |  |  |  |  | 'Infernal_db-let'   => 'RESULT-database_letters', # in v1.1 only | 
| 147 |  |  |  |  |  |  | ); | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | my $MINSCORE = 0; | 
| 150 |  |  |  |  |  |  | my $DEFAULT_ALGORITHM = 'cmsearch'; | 
| 151 |  |  |  |  |  |  | my $DEFAULT_VERSION = '1.1'; | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | my @VALID_SYMBOLS = qw(5-prime 3-prime single-strand unknown gap); | 
| 154 |  |  |  |  |  |  | my %STRUCTURE_SYMBOLS = ( | 
| 155 |  |  |  |  |  |  | '5-prime'        => '<', | 
| 156 |  |  |  |  |  |  | '3-prime'        => '>', | 
| 157 |  |  |  |  |  |  | 'single-strand'  => ':', | 
| 158 |  |  |  |  |  |  | 'unknown'        => '?', | 
| 159 |  |  |  |  |  |  | 'gap'            => '.' | 
| 160 |  |  |  |  |  |  | ); | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | =head2 new | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | Title   : new | 
| 165 |  |  |  |  |  |  | Usage   : my $obj = Bio::SearchIO::infernal->new(); | 
| 166 |  |  |  |  |  |  | Function: Builds a new Bio::SearchIO::infernal object | 
| 167 |  |  |  |  |  |  | Returns : Bio::SearchIO::infernal | 
| 168 |  |  |  |  |  |  | Args    : -fh/-file      => cmsearch (infernal) filename | 
| 169 |  |  |  |  |  |  | -format        => 'infernal' | 
| 170 |  |  |  |  |  |  | -model         => query model (Rfam ID) (default undef) | 
| 171 |  |  |  |  |  |  | -database      => database name (default undef) | 
| 172 |  |  |  |  |  |  | -query_acc     => query accession, eg. Rfam accession RF#### | 
| 173 |  |  |  |  |  |  | -query_desc    => query description, eg. Rfam description | 
| 174 |  |  |  |  |  |  | -hsp_minscore  => minimum HSP score cutoff | 
| 175 |  |  |  |  |  |  | -convert_meta  => boolean, set to convert meta string to simple WUSS format | 
| 176 |  |  |  |  |  |  | -symbols       => hash ref of structure symbols to use | 
| 177 |  |  |  |  |  |  | (default symbols in %STRUCTURE_SYMBOLS hash) | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | =cut | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | sub _initialize { | 
| 182 | 7 |  |  | 7 |  | 28 | my ( $self, @args ) = @_; | 
| 183 | 7 |  |  |  |  | 33 | $self->SUPER::_initialize(@args); | 
| 184 | 7 |  |  |  |  | 43 | my ($model, $database, $convert, $symbols, $cutoff, | 
| 185 |  |  |  |  |  |  | $desc, $accession, $algorithm, $version) = | 
| 186 |  |  |  |  |  |  | $self->_rearrange([qw(MODEL | 
| 187 |  |  |  |  |  |  | DATABASE | 
| 188 |  |  |  |  |  |  | CONVERT_META | 
| 189 |  |  |  |  |  |  | SYMBOLS | 
| 190 |  |  |  |  |  |  | HSP_MINSCORE | 
| 191 |  |  |  |  |  |  | QUERY_DESC | 
| 192 |  |  |  |  |  |  | QUERY_ACC | 
| 193 |  |  |  |  |  |  | ALGORITHM | 
| 194 |  |  |  |  |  |  | VERSION)],@args); | 
| 195 | 7 |  |  |  |  | 31 | my $handler = $self->_eventHandler; | 
| 196 | 7 |  |  |  |  | 24 | $handler->register_factory( | 
| 197 |  |  |  |  |  |  | 'result', | 
| 198 |  |  |  |  |  |  | Bio::Factory::ObjectFactory->new( | 
| 199 |  |  |  |  |  |  | -type      => 'Bio::Search::Result::INFERNALResult', | 
| 200 |  |  |  |  |  |  | -interface => 'Bio::Search::Result::ResultI', | 
| 201 |  |  |  |  |  |  | -verbose   => $self->verbose | 
| 202 |  |  |  |  |  |  | ) | 
| 203 |  |  |  |  |  |  | ); | 
| 204 |  |  |  |  |  |  |  | 
| 205 | 7 |  |  |  |  | 27 | $handler->register_factory( | 
| 206 |  |  |  |  |  |  | 'hit', | 
| 207 |  |  |  |  |  |  | Bio::Factory::ObjectFactory->new( | 
| 208 |  |  |  |  |  |  | -type      => 'Bio::Search::Hit::ModelHit', | 
| 209 |  |  |  |  |  |  | -interface => 'Bio::Search::Hit::HitI', | 
| 210 |  |  |  |  |  |  | -verbose   => $self->verbose | 
| 211 |  |  |  |  |  |  | ) | 
| 212 |  |  |  |  |  |  | ); | 
| 213 |  |  |  |  |  |  |  | 
| 214 | 7 |  |  |  |  | 26 | $handler->register_factory( | 
| 215 |  |  |  |  |  |  | 'hsp', | 
| 216 |  |  |  |  |  |  | Bio::Factory::ObjectFactory->new( | 
| 217 |  |  |  |  |  |  | -type      => 'Bio::Search::HSP::ModelHSP', | 
| 218 |  |  |  |  |  |  | -interface => 'Bio::Search::HSP::HSPI', | 
| 219 |  |  |  |  |  |  | -verbose   => $self->verbose | 
| 220 |  |  |  |  |  |  | ) | 
| 221 |  |  |  |  |  |  | ); | 
| 222 |  |  |  |  |  |  |  | 
| 223 | 7 | 100 |  |  |  | 34 | defined $model     && $self->model($model); | 
| 224 | 7 | 100 |  |  |  | 29 | defined $database  && $self->database($database); | 
| 225 | 7 | 100 |  |  |  | 33 | defined $accession && $self->query_accession($accession); | 
| 226 | 7 | 100 |  |  |  | 28 | defined $convert   && $self->convert_meta($convert); | 
| 227 | 7 | 100 |  |  |  | 27 | defined $desc      && $self->query_description($desc); | 
| 228 |  |  |  |  |  |  |  | 
| 229 | 7 |  | 66 |  |  | 25 | $version ||= $DEFAULT_VERSION; | 
| 230 | 7 |  |  |  |  | 24 | $self->version($version); | 
| 231 | 7 |  | 100 |  |  | 30 | $symbols ||= \%STRUCTURE_SYMBOLS; | 
| 232 | 7 |  |  |  |  | 25 | $self->structure_symbols($symbols); | 
| 233 | 7 |  | 66 |  |  | 28 | $cutoff ||= $MINSCORE; | 
| 234 | 7 |  |  |  |  | 23 | $self->hsp_minscore($cutoff); | 
| 235 | 7 |  | 33 |  |  | 29 | $algorithm ||= $DEFAULT_ALGORITHM; | 
| 236 | 7 |  |  |  |  | 22 | $self->algorithm($algorithm); | 
| 237 |  |  |  |  |  |  | } | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | =head2 next_result | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | Title   : next_result | 
| 242 |  |  |  |  |  |  | Usage   : my $hit = $searchio->next_result; | 
| 243 |  |  |  |  |  |  | Function: Returns the next Result from a search | 
| 244 |  |  |  |  |  |  | Returns : Bio::Search::Result::ResultI object | 
| 245 |  |  |  |  |  |  | Args    : none | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | =cut | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | sub next_result { | 
| 250 | 9 |  |  | 9 | 1 | 601 | my ($self) = @_; | 
| 251 | 9 | 100 |  |  |  | 27 | unless (exists $self->{'_handlerset'}) { | 
| 252 | 7 |  |  |  |  | 10 | my $line; | 
| 253 | 7 |  |  |  |  | 28 | while ($line = $self->_readline) { | 
| 254 |  |  |  |  |  |  | # advance to first line | 
| 255 | 9 | 100 |  |  |  | 48 | next if $line =~ m{^\s*$}; | 
| 256 |  |  |  |  |  |  | # newer output starts with model name | 
| 257 | 7 | 100 |  |  |  | 43 | if ($line =~ m{^\#\s+cmsearch\s}) { | 
|  |  | 100 |  |  |  |  |  | 
| 258 | 4 |  |  |  |  | 9 | my $secondline = $self->_readline; | 
| 259 | 4 | 100 |  |  |  | 12 | if ($secondline =~ m{INFERNAL 1\.1}) { | 
| 260 | 3 |  |  |  |  | 6 | $self->{'_handlerset'} = '1.1'; | 
| 261 |  |  |  |  |  |  | } | 
| 262 |  |  |  |  |  |  | else { | 
| 263 | 1 |  |  |  |  | 3 | $self->{'_handlerset'} = 'latest';  # v1.0 | 
| 264 |  |  |  |  |  |  | } | 
| 265 | 4 |  |  |  |  | 13 | $self->_pushback($secondline); | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  | elsif ($line =~ m{^CM\s\d+:}) { | 
| 268 | 1 |  |  |  |  | 8 | $self->{'_handlerset'} = 'pre-1.0'; | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  | else { | 
| 271 | 2 |  |  |  |  | 6 | $self->{'_handlerset'} ='old'; | 
| 272 |  |  |  |  |  |  | } | 
| 273 | 7 |  |  |  |  | 12 | last; | 
| 274 |  |  |  |  |  |  | } | 
| 275 | 7 |  |  |  |  | 24 | $self->_pushback($line); | 
| 276 |  |  |  |  |  |  | #if ($self->{'_handlerset'} ne '1.0') { | 
| 277 |  |  |  |  |  |  | #	$self->deprecated( | 
| 278 |  |  |  |  |  |  | #	-message => "Parsing of Infernal pre-1.0 release is deprecated;\n". | 
| 279 |  |  |  |  |  |  | #		"upgrading to Infernal 1.0 or above is highly recommended", | 
| 280 |  |  |  |  |  |  | #	-version => 1.007); | 
| 281 |  |  |  |  |  |  | #} | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  | return ($self->{'_handlerset'} eq '1.1')     ? $self->_parse_v1_1 : | 
| 284 |  |  |  |  |  |  | ($self->{'_handlerset'} eq 'latest')  ? $self->_parse_latest : | 
| 285 | 9 | 100 |  |  |  | 60 | ($self->{'_handlerset'} eq 'pre-1.0') ? $self->_parse_pre : | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | $self->_parse_old; | 
| 287 |  |  |  |  |  |  | } | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | sub _parse_v1_1 { | 
| 291 | 5 |  |  | 5 |  | 8 | my ($self) = @_; | 
| 292 | 5 |  |  |  |  | 8 | my $seentop = 0; | 
| 293 | 5 |  |  |  |  | 20 | local $/ = "\n"; | 
| 294 | 5 |  |  |  |  | 14 | my ($accession, $description) = ($self->query_accession, $self->query_description); | 
| 295 | 5 |  |  |  |  | 11 | my ($buffer, $last, %modelcounter, @hit_list, %hitindex, | 
| 296 |  |  |  |  |  |  | @hsp_list, %hspindex); | 
| 297 | 5 |  |  |  |  | 15 | $self->start_document(); | 
| 298 | 5 |  |  |  |  | 13 | $buffer = $self->_readline; | 
| 299 | 5 | 100 | 66 |  |  | 25 | if ( !defined $buffer || $buffer =~ m/^\[ok/ ) {  # end of report | 
| 300 | 1 |  |  |  |  | 8 | return undef; | 
| 301 |  |  |  |  |  |  | } | 
| 302 |  |  |  |  |  |  | else { | 
| 303 | 4 |  |  |  |  | 7 | $self->_pushback($buffer); | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | PARSER: # Parse each line of report | 
| 307 | 4 |  |  |  |  | 6 | while ( defined( $buffer = $self->_readline ) ) { | 
| 308 | 52 |  |  |  |  | 54 | my $hit_counter = 0; | 
| 309 | 52 |  |  |  |  | 54 | my $lineorig = $buffer; | 
| 310 | 52 |  |  |  |  | 54 | chomp $buffer; | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | # INFERNAL program name | 
| 313 | 52 | 100 | 66 |  |  | 300 | if ( $buffer =~ m/^\#\s(\S+)\s\:\:\s/ ) { | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 314 | 3 |  |  |  |  | 5 | $seentop = 1; | 
| 315 | 3 |  |  |  |  | 6 | my $prog = $1; | 
| 316 | 3 |  |  |  |  | 19 | $self->start_element( { 'Name' => 'Result' } ); | 
| 317 | 3 |  |  |  |  | 14 | $self->element_hash( { 'Infernal_program' => uc($prog) } ); | 
| 318 |  |  |  |  |  |  | } | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  | # INFERNAL version and release date | 
| 321 |  |  |  |  |  |  | elsif ( $buffer =~ m/^\#\sINFERNAL\s+(\S+)\s+\((.+)\)/ ) { | 
| 322 | 3 |  |  |  |  | 7 | my $version     = $1; | 
| 323 | 3 |  |  |  |  | 5 | my $versiondate = $2; | 
| 324 | 3 |  |  |  |  | 6 | $self->{'_cmidline'} = $buffer; | 
| 325 | 3 |  |  |  |  | 7 | $self->element_hash( { 'Infernal_version' => $version } ); | 
| 326 |  |  |  |  |  |  | } | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | # Query info | 
| 329 |  |  |  |  |  |  | elsif ( $buffer =~ /^\#\squery (?:\w+ )?file\:\s+(\S+)/ ) { | 
| 330 | 3 |  |  |  |  | 8 | $self->{'_cmfileline'} = $lineorig; | 
| 331 | 3 |  |  |  |  | 23 | $self->element_hash( { 'Infernal_cm' => $1 } ); | 
| 332 |  |  |  |  |  |  | } | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | # Database info | 
| 335 |  |  |  |  |  |  | elsif ( $buffer =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) { | 
| 336 | 3 |  |  |  |  | 18 | $self->{'_cmseqline'} = $lineorig; | 
| 337 | 3 |  |  |  |  | 11 | $self->element_hash( { 'Infernal_db' => $1 } ); | 
| 338 |  |  |  |  |  |  | } | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | # Query data | 
| 341 |  |  |  |  |  |  | elsif ( $buffer =~ m/^Query:\s+(\S+)\s+\[CLEN=(\d+)\]$/ ) { | 
| 342 | 4 |  |  |  |  | 23 | $self->element_hash( { 'Infernal_query-def' => $1, | 
| 343 |  |  |  |  |  |  | 'Infernal_query-len' => $2, | 
| 344 |  |  |  |  |  |  | 'Infernal_query-acc' => $accession, | 
| 345 |  |  |  |  |  |  | 'Infernal_querydesc' => $description, | 
| 346 |  |  |  |  |  |  | } ); | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | # Get query accession | 
| 350 |  |  |  |  |  |  | elsif ( $buffer =~ s/^Accession:\s+// && ! $accession) { | 
| 351 | 3 |  |  |  |  | 9 | $buffer =~ s/\s+$//; | 
| 352 | 3 |  |  |  |  | 8 | $self->element_hash( { 'Infernal_query-acc' => $buffer } ); | 
| 353 |  |  |  |  |  |  | } | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | # Get query description | 
| 356 |  |  |  |  |  |  | elsif ( $buffer =~ s/^Description:\s+// && ! $description) { | 
| 357 | 2 |  |  |  |  | 6 | $buffer =~ s/\s+$//; | 
| 358 | 2 |  |  |  |  | 6 | $self->element_hash( { 'Infernal_querydesc' => $buffer } ); | 
| 359 |  |  |  |  |  |  | } | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | # Process hit table - including those below inclusion threshold | 
| 362 |  |  |  |  |  |  | elsif ( $buffer =~ m/^Hit scores:/) { | 
| 363 | 4 |  |  |  |  | 6 | @hit_list = ();   # here is case there are multi-query reports | 
| 364 | 4 |  |  |  |  | 9 | while ( defined( $buffer = $self->_readline ) ) { | 
| 365 | 38 | 100 | 100 |  |  | 180 | if ( $buffer =~ m/^Hit alignments:/ ) { | 
|  |  | 100 | 100 |  |  |  |  | 
|  |  |  | 100 |  |  |  |  | 
| 366 | 4 |  |  |  |  | 9 | $self->_pushback($buffer); | 
| 367 | 4 |  |  |  |  | 4 | last; | 
| 368 |  |  |  |  |  |  | } | 
| 369 |  |  |  |  |  |  | elsif (   $buffer =~ m/^\s+rank\s+E-value/ | 
| 370 |  |  |  |  |  |  | || $buffer =~ m/\-\-\-/ | 
| 371 |  |  |  |  |  |  | || $buffer =~ m/^$/ | 
| 372 |  |  |  |  |  |  | || $buffer =~ m/No hits detected/ ) { | 
| 373 | 19 |  |  |  |  | 32 | next; | 
| 374 |  |  |  |  |  |  | } | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | # Process hit | 
| 377 | 15 |  |  |  |  | 18 | $hit_counter++; | 
| 378 | 15 |  |  |  |  | 69 | my ($rank, $threshold, $eval, $score, | 
| 379 |  |  |  |  |  |  | $bias, $hitid, $start, $end, $strand, | 
| 380 |  |  |  |  |  |  | $mdl, $truc, $gc, @desc) = split( " ", $buffer ); | 
| 381 | 15 |  |  |  |  | 31 | my $desc = join " ", @desc; | 
| 382 | 15 | 50 |  |  |  | 21 | $desc = '' if ( !defined($desc) ); | 
| 383 |  |  |  |  |  |  |  | 
| 384 | 15 |  |  |  |  | 30 | push @hit_list, [ $hitid, $desc, $eval, $score ]; | 
| 385 | 15 |  |  |  |  | 49 | $hitindex{ $hitid.$hit_counter } = $#hit_list; | 
| 386 |  |  |  |  |  |  | } | 
| 387 |  |  |  |  |  |  | } | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | # Process hit alignments | 
| 390 |  |  |  |  |  |  | elsif ( $buffer =~ /^Hit alignments:/ ) { | 
| 391 | 4 |  |  |  |  | 5 | my $hitid; | 
| 392 | 4 |  |  |  |  | 5 | my $align_counter = 0; | 
| 393 | 4 |  |  |  |  | 8 | while ( defined( $buffer = $self->_readline ) ) { | 
| 394 | 38 | 100 |  |  |  | 59 | if ( $buffer =~ /^Internal CM pipeline statistics summary/ ) { | 
| 395 | 4 |  |  |  |  | 10 | $self->_pushback($buffer); | 
| 396 | 4 |  |  |  |  | 5 | last; | 
| 397 |  |  |  |  |  |  | } | 
| 398 | 34 | 100 |  |  |  | 92 | if ( $buffer =~ m/^\>\>\s(\S*)\s+(.*)/ ) {  # defline of hit | 
|  |  | 100 |  |  |  |  |  | 
| 399 | 15 |  |  |  |  | 26 | $hitid    = $1; | 
| 400 | 15 |  |  |  |  | 20 | my $desc = $2; | 
| 401 | 15 |  |  |  |  | 15 | $align_counter++; | 
| 402 | 15 |  |  |  |  | 21 | my $hitid_alignctr = $hitid.$align_counter; | 
| 403 | 15 |  |  |  |  | 23 | $modelcounter{$hitid_alignctr} = 0; | 
| 404 |  |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  | # The Hit Description from the Hit table can be truncated if | 
| 406 |  |  |  |  |  |  | # it is too long, so use the '>>' line description instead | 
| 407 | 15 |  |  |  |  | 26 | $hit_list[ $hitindex{$hitid_alignctr} ][1] = $desc; | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | # Process hit information table | 
| 410 | 15 |  |  |  |  | 24 | while ( defined( $buffer = $self->_readline ) ) { | 
| 411 | 75 | 100 | 66 |  |  | 430 | if (   $buffer =~ m/^Internal CM pipeline statistics/ | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  |  | 100 |  |  |  |  | 
|  |  |  | 100 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
| 412 |  |  |  |  |  |  | || $buffer =~ m/NC$/ | 
| 413 |  |  |  |  |  |  | || $buffer =~ m/^\>\>/ ) { | 
| 414 | 15 |  |  |  |  | 31 | $self->_pushback($buffer); | 
| 415 | 15 |  |  |  |  | 29 | last; | 
| 416 |  |  |  |  |  |  | } | 
| 417 |  |  |  |  |  |  | elsif (   $buffer =~ m/^\s+rank\s+E-value/ | 
| 418 |  |  |  |  |  |  | || $buffer =~ m/^\s----/ | 
| 419 |  |  |  |  |  |  | || $buffer =~ m/^$/ | 
| 420 |  |  |  |  |  |  | || $buffer =~ m/No hits detected/ ) { | 
| 421 | 45 |  |  |  |  | 80 | next; | 
| 422 |  |  |  |  |  |  | } | 
| 423 |  |  |  |  |  |  |  | 
| 424 |  |  |  |  |  |  | # Get hsp data from table, push into @hsp; | 
| 425 | 15 |  |  |  |  | 80 | my ( $rank,      $threshold, $eval, | 
| 426 |  |  |  |  |  |  | $score,     $bias,      $model, | 
| 427 |  |  |  |  |  |  | $cm_start,  $cm_stop,   $cm_cov, | 
| 428 |  |  |  |  |  |  | $seq_start, $seq_stop,  $seq_strand, $seq_cov, | 
| 429 |  |  |  |  |  |  | $acc,       $trunc,     $gc, | 
| 430 |  |  |  |  |  |  | ) = split( " ", $buffer ); | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  | # Try to get the Hit Length from the alignment information. | 
| 433 |  |  |  |  |  |  | # For cmsearch, if sequence coverage ends in ']' it means that the | 
| 434 |  |  |  |  |  |  | # alignment runs full-length flush to the end of the target. | 
| 435 | 15 | 100 |  |  |  | 39 | my $hitlength = ( $seq_cov =~ m/\]$/ ) ? $seq_stop : 0; | 
| 436 |  |  |  |  |  |  |  | 
| 437 | 15 |  |  |  |  | 23 | my $tmphit = $hit_list[ $hitindex{$hitid_alignctr} ]; | 
| 438 | 15 | 50 |  |  |  | 24 | if ( !defined $tmphit ) { | 
| 439 | 0 |  |  |  |  | 0 | $self->warn("Incomplete information: can't find HSP $hitid in list of hits\n"); | 
| 440 | 0 |  |  |  |  | 0 | next; | 
| 441 |  |  |  |  |  |  | } | 
| 442 |  |  |  |  |  |  |  | 
| 443 | 15 |  |  |  |  | 39 | push @hsp_list, [ $hitid, | 
| 444 |  |  |  |  |  |  | $cm_start, $cm_stop, | 
| 445 |  |  |  |  |  |  | $seq_start, $seq_stop, | 
| 446 |  |  |  |  |  |  | $score,     $eval, | 
| 447 |  |  |  |  |  |  | $hitlength]; | 
| 448 | 15 |  |  |  |  | 17 | $modelcounter{$hitid_alignctr}++; | 
| 449 | 15 |  |  |  |  | 29 | my $hsp_key = $hitid_alignctr . "_" . $modelcounter{$hitid_alignctr}; | 
| 450 | 15 |  |  |  |  | 45 | $hspindex{$hsp_key} = $#hsp_list; | 
| 451 |  |  |  |  |  |  | } | 
| 452 |  |  |  |  |  |  | } | 
| 453 |  |  |  |  |  |  | elsif ( $buffer =~ m/NC$/ ) { # start of HSP | 
| 454 |  |  |  |  |  |  | # need CS line to get number of spaces before structure data | 
| 455 | 15 |  |  |  |  | 24 | my $csline = $self->_readline; | 
| 456 | 15 |  |  |  |  | 45 | $csline =~ m/^(\s+)\S+ CS$/; | 
| 457 | 15 |  |  |  |  | 31 | my $offset = length($1); | 
| 458 | 15 |  |  |  |  | 32 | $self->_pushback($csline); | 
| 459 | 15 |  |  |  |  | 25 | $self->_pushback($buffer); # set up for loop | 
| 460 |  |  |  |  |  |  |  | 
| 461 | 15 |  |  |  |  | 19 | my ($ct, $strln) = 0; | 
| 462 | 15 |  |  |  |  | 16 | my $hspdata; | 
| 463 | 15 |  |  |  |  | 44 | HSP: | 
| 464 |  |  |  |  |  |  | my %hspline = ('0' => 'nc',    '1' => 'meta', | 
| 465 |  |  |  |  |  |  | '2' => 'query', '3' => 'midline', | 
| 466 |  |  |  |  |  |  | '4' => 'hit',   '5' => 'pp'); | 
| 467 |  |  |  |  |  |  | HSP: | 
| 468 | 15 |  |  |  |  | 27 | while (defined ($buffer = $self->_readline)) { | 
| 469 | 168 |  |  |  |  | 187 | chomp $buffer; | 
| 470 | 168 | 100 | 100 |  |  | 537 | if (   $buffer =~ /^>>\s/ | 
|  |  | 100 | 100 |  |  |  |  | 
| 471 |  |  |  |  |  |  | || $buffer =~ /^Internal CM pipeline statistics/) { | 
| 472 | 15 |  |  |  |  | 30 | $self->_pushback($buffer); | 
| 473 | 15 |  |  |  |  | 15 | last HSP; | 
| 474 |  |  |  |  |  |  | } | 
| 475 |  |  |  |  |  |  | elsif ( $ct % 6 == 0 && $buffer =~ /^$/ ) { | 
| 476 | 27 |  |  |  |  | 41 | next; | 
| 477 |  |  |  |  |  |  | } | 
| 478 | 126 |  |  |  |  | 126 | my $iterator = $ct % 6; | 
| 479 |  |  |  |  |  |  | # NC line ends with ' NC' so remove these from the strlen count | 
| 480 | 126 | 100 |  |  |  | 140 | $strln = ( length($buffer) - 3 ) if $iterator == 0; | 
| 481 | 126 |  |  |  |  | 155 | my $data = substr($buffer, $offset, $strln-$offset); | 
| 482 | 126 |  |  |  |  | 265 | $hspdata->{ $hspline{$iterator} } .= $data; | 
| 483 |  |  |  |  |  |  |  | 
| 484 | 126 |  |  |  |  | 191 | $ct++; | 
| 485 |  |  |  |  |  |  | } # 'HSP' while loop | 
| 486 |  |  |  |  |  |  |  | 
| 487 | 15 |  |  |  |  | 18 | my $strlen = 0; | 
| 488 |  |  |  |  |  |  | # catch any insertions and add them into the actual length | 
| 489 | 15 |  |  |  |  | 39 | while ($hspdata->{'query'} =~ m{\*\[\s*(\d+)\s*\]\*}g) { | 
| 490 | 2 |  |  |  |  | 8 | $strlen += $1; | 
| 491 |  |  |  |  |  |  | } | 
| 492 |  |  |  |  |  |  | # add on the actual residues | 
| 493 | 15 |  |  |  |  | 21 | $strlen += $hspdata->{'query'} =~ tr{A-Za-z}{A-Za-z}; | 
| 494 |  |  |  |  |  |  | my $metastr = ($self->convert_meta) ? ($self->simple_meta($hspdata->{'meta'})) : | 
| 495 | 15 | 50 |  |  |  | 30 | $hspdata->{'meta'}; | 
| 496 |  |  |  |  |  |  |  | 
| 497 | 15 |  |  |  |  | 22 | my $hitid_alignctr = $hitid . $align_counter; | 
| 498 | 15 |  |  |  |  | 21 | my $hsp_key = $hitid_alignctr . "_" . $modelcounter{$hitid_alignctr}; | 
| 499 | 15 |  |  |  |  | 22 | my $hsp = $hsp_list[ $hspindex{$hsp_key} ]; | 
| 500 |  |  |  |  |  |  | push (@$hsp, $hspdata->{'nc'},    $metastr, | 
| 501 |  |  |  |  |  |  | $hspdata->{'query'}, $hspdata->{'midline'}, | 
| 502 | 15 |  |  |  |  | 73 | $hspdata->{'hit'},   $hspdata->{'pp'}); | 
| 503 |  |  |  |  |  |  | } | 
| 504 |  |  |  |  |  |  | } | 
| 505 |  |  |  |  |  |  | }  # end of 'Hit alignments:' section of report | 
| 506 |  |  |  |  |  |  |  | 
| 507 |  |  |  |  |  |  | # Process internal pipeline stats (end of report) | 
| 508 |  |  |  |  |  |  | elsif ( $buffer =~ m/Internal CM pipeline statistics summary:/ ) { | 
| 509 | 4 |  |  |  |  | 9 | while ( defined( $buffer = $self->_readline ) ) { | 
| 510 | 68 | 100 |  |  |  | 114 | last if ( $buffer =~ m!^//! ); | 
| 511 |  |  |  |  |  |  |  | 
| 512 | 64 | 100 |  |  |  | 110 | if ( $buffer =~ /^Target sequences:\s+(\d+)\s+\((\d+) residues/ ) { | 
| 513 | 4 |  |  |  |  | 19 | $self->element_hash( { 'Infernal_db-len' => $1, | 
| 514 |  |  |  |  |  |  | 'Infernal_db-let' => $2, } ); | 
| 515 |  |  |  |  |  |  | } | 
| 516 |  |  |  |  |  |  | } | 
| 517 | 4 |  |  |  |  | 7 | last;    # of the outer while defined $self->readline | 
| 518 |  |  |  |  |  |  | } | 
| 519 |  |  |  |  |  |  |  | 
| 520 |  |  |  |  |  |  | # Leftovers | 
| 521 |  |  |  |  |  |  | else { | 
| 522 |  |  |  |  |  |  | #print STDERR "Missed line: $buffer\n"; | 
| 523 | 19 |  |  |  |  | 36 | $self->debug($buffer); | 
| 524 |  |  |  |  |  |  | } | 
| 525 | 48 |  |  |  |  | 105 | $last = $buffer; | 
| 526 |  |  |  |  |  |  | } # PARSER end | 
| 527 |  |  |  |  |  |  |  | 
| 528 |  |  |  |  |  |  | # Final processing of hits and hsps | 
| 529 | 4 |  |  |  |  | 7 | my $hit_counter = 0; | 
| 530 | 4 |  |  |  |  | 7 | foreach my $hit ( @hit_list ) { | 
| 531 | 15 |  |  |  |  | 16 | $hit_counter++; | 
| 532 | 15 |  |  |  |  | 26 | my ($hit_name, $hit_desc, $hit_signif, $hit_score) = @$hit; | 
| 533 | 15 |  | 50 |  |  | 37 | my $num_hsp = $modelcounter{$hit_name . $hit_counter} || 0; | 
| 534 |  |  |  |  |  |  |  | 
| 535 | 15 |  |  |  |  | 36 | $self->start_element( { 'Name' => 'Hit' } ); | 
| 536 | 15 |  |  |  |  | 66 | $self->element_hash( {'Hit_id'    => $hit_name, | 
| 537 |  |  |  |  |  |  | 'Hit_desc'  => $hit_desc, | 
| 538 |  |  |  |  |  |  | 'Hit_signif'=> $hit_signif, | 
| 539 |  |  |  |  |  |  | 'Hit_score' => $hit_score, | 
| 540 |  |  |  |  |  |  | 'Hit_bits'  => $hit_score, } ); | 
| 541 | 15 |  |  |  |  | 40 | for my $i ( 1 .. $num_hsp ) { | 
| 542 | 15 |  |  |  |  | 31 | my $hsp_key = $hit_name . $hit_counter . "_" . $i; | 
| 543 | 15 |  |  |  |  | 18 | my $hsp = $hsp_list[ $hspindex{$hsp_key} ]; | 
| 544 | 15 | 50 |  |  |  | 21 | if ( defined $hsp ) { | 
| 545 | 15 |  |  |  |  | 20 | my $hspid = shift @$hsp; | 
| 546 |  |  |  |  |  |  |  | 
| 547 | 15 |  |  |  |  | 40 | my ($cm_start,  $cm_stop, $seq_start, $seq_stop, | 
| 548 |  |  |  |  |  |  | $score,     $eval,    $hitlength, $ncline, | 
| 549 |  |  |  |  |  |  | $csline, $qseq, $midline, $hseq, $pline) = @$hsp; | 
| 550 | 15 | 100 |  |  |  | 32 | if ( $hitlength != 0 ) { | 
| 551 | 10 |  |  |  |  | 27 | $self->element( | 
| 552 |  |  |  |  |  |  | { 'Name' => 'Hit_len', 'Data' => $hitlength } | 
| 553 |  |  |  |  |  |  | ); | 
| 554 |  |  |  |  |  |  | } | 
| 555 |  |  |  |  |  |  |  | 
| 556 | 15 |  |  |  |  | 38 | $self->start_element( { 'Name' => 'Hsp' } ); | 
| 557 | 15 |  |  |  |  | 119 | $self->element_hash( { 'Hsp_stranded'   => 'HIT', | 
| 558 |  |  |  |  |  |  | 'Hsp_query-from' => $cm_start, | 
| 559 |  |  |  |  |  |  | 'Hsp_query-to'   => $cm_stop, | 
| 560 |  |  |  |  |  |  | 'Hsp_hit-from'   => $seq_start, | 
| 561 |  |  |  |  |  |  | 'Hsp_hit-to'     => $seq_stop, | 
| 562 |  |  |  |  |  |  | 'Hsp_score'      => $score, | 
| 563 |  |  |  |  |  |  | 'Hsp_bit-score'  => $score, | 
| 564 |  |  |  |  |  |  | 'Hsp_evalue'     => $eval, | 
| 565 |  |  |  |  |  |  | 'Hsp_ncline'     => $ncline, | 
| 566 |  |  |  |  |  |  | 'Hsp_structure'  => $csline, | 
| 567 |  |  |  |  |  |  | 'Hsp_qseq'       => $qseq, | 
| 568 |  |  |  |  |  |  | 'Hsp_midline'    => $midline, | 
| 569 |  |  |  |  |  |  | 'Hsp_hseq'       => $hseq, | 
| 570 |  |  |  |  |  |  | 'Hsp_pline'      => $pline, | 
| 571 |  |  |  |  |  |  | } ); | 
| 572 |  |  |  |  |  |  |  | 
| 573 | 15 |  |  |  |  | 68 | $self->end_element( { 'Name' => 'Hsp' } ); | 
| 574 |  |  |  |  |  |  | } | 
| 575 |  |  |  |  |  |  | } | 
| 576 | 15 |  |  |  |  | 31 | $self->end_element( { 'Name' => 'Hit' } ); | 
| 577 |  |  |  |  |  |  | } | 
| 578 |  |  |  |  |  |  |  | 
| 579 | 4 |  |  |  |  | 12 | $self->end_element( { 'Name' => 'Result' } ); | 
| 580 | 4 |  |  |  |  | 12 | my $result = $self->end_document(); | 
| 581 | 4 |  |  |  |  | 43 | return $result; | 
| 582 |  |  |  |  |  |  | } | 
| 583 |  |  |  |  |  |  |  | 
| 584 |  |  |  |  |  |  |  | 
| 585 |  |  |  |  |  |  | =head2 start_element | 
| 586 |  |  |  |  |  |  |  | 
| 587 |  |  |  |  |  |  | Title   : start_element | 
| 588 |  |  |  |  |  |  | Usage   : $eventgenerator->start_element | 
| 589 |  |  |  |  |  |  | Function: Handles a start element event | 
| 590 |  |  |  |  |  |  | Returns : none | 
| 591 |  |  |  |  |  |  | Args    : hashref with at least 2 keys 'Data' and 'Name' | 
| 592 |  |  |  |  |  |  |  | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | =cut | 
| 595 |  |  |  |  |  |  |  | 
| 596 |  |  |  |  |  |  | sub start_element { | 
| 597 | 69 |  |  | 69 | 1 | 104 | my ( $self, $data ) = @_; | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | # we currently don't care about attributes | 
| 600 | 69 |  |  |  |  | 95 | my $nm   = $data->{'Name'}; | 
| 601 | 69 |  |  |  |  | 99 | my $type = $MODEMAP{$nm}; | 
| 602 | 69 | 50 |  |  |  | 106 | if ($type) { | 
| 603 | 69 | 50 |  |  |  | 156 | if ( $self->_eventHandler->will_handle($type) ) { | 
| 604 | 69 |  |  |  |  | 221 | my $func = sprintf( "start_%s", lc $type ); | 
| 605 | 69 |  |  |  |  | 121 | $self->_eventHandler->$func( $data->{'Attributes'} ); | 
| 606 |  |  |  |  |  |  | } | 
| 607 | 69 |  |  |  |  | 129 | unshift @{ $self->{'_elements'} }, $type; | 
|  | 69 |  |  |  |  | 150 |  | 
| 608 |  |  |  |  |  |  | } | 
| 609 | 69 | 100 | 66 |  |  | 293 | if ( defined $type | 
| 610 |  |  |  |  |  |  | && $type eq 'result' ) | 
| 611 |  |  |  |  |  |  | { | 
| 612 | 7 |  |  |  |  | 14 | $self->{'_values'} = {}; | 
| 613 | 7 |  |  |  |  | 20 | $self->{'_result'} = undef; | 
| 614 |  |  |  |  |  |  | } | 
| 615 |  |  |  |  |  |  | } | 
| 616 |  |  |  |  |  |  |  | 
| 617 |  |  |  |  |  |  | =head2 end_element | 
| 618 |  |  |  |  |  |  |  | 
| 619 |  |  |  |  |  |  | Title   : start_element | 
| 620 |  |  |  |  |  |  | Usage   : $eventgenerator->end_element | 
| 621 |  |  |  |  |  |  | Function: Handles an end element event | 
| 622 |  |  |  |  |  |  | Returns : none | 
| 623 |  |  |  |  |  |  | Args    : hashref with at least 2 keys, 'Data' and 'Name' | 
| 624 |  |  |  |  |  |  |  | 
| 625 |  |  |  |  |  |  | =cut | 
| 626 |  |  |  |  |  |  |  | 
| 627 |  |  |  |  |  |  | sub end_element { | 
| 628 | 83 |  |  | 83 | 1 | 126 | my ( $self, $data ) = @_; | 
| 629 | 83 |  |  |  |  | 126 | my $nm   = $data->{'Name'}; | 
| 630 | 83 |  |  |  |  | 112 | my $type = $MODEMAP{$nm}; | 
| 631 | 83 |  |  |  |  | 109 | my $rc; | 
| 632 |  |  |  |  |  |  |  | 
| 633 | 83 | 100 |  |  |  | 127 | if ($type) { | 
|  |  | 50 |  |  |  |  |  | 
| 634 | 70 | 50 |  |  |  | 221 | if ( $self->_eventHandler->will_handle($type) ) { | 
| 635 | 70 |  |  |  |  | 218 | my $func = sprintf( "end_%s", lc $type ); | 
| 636 |  |  |  |  |  |  | $rc = $self->_eventHandler->$func( $self->{'_reporttype'}, | 
| 637 | 70 |  |  |  |  | 127 | $self->{'_values'} ); | 
| 638 |  |  |  |  |  |  | } | 
| 639 | 70 |  |  |  |  | 160 | my $lastelem = shift @{ $self->{'_elements'} }; | 
|  | 70 |  |  |  |  | 132 |  | 
| 640 |  |  |  |  |  |  |  | 
| 641 |  |  |  |  |  |  | # Infernal 1.1 allows one to know hit->length in some instances | 
| 642 |  |  |  |  |  |  | # so remove it so it doesn't carry over to next hit. Tried flushing | 
| 643 |  |  |  |  |  |  | # all 'type' values from {_values} buffer but it breaks legacy tests | 
| 644 | 70 | 100 |  |  |  | 159 | if ($type eq 'hit' ) { | 
| 645 | 23 |  |  |  |  | 37 | delete $self->{_values}{'HIT-length'}; | 
| 646 | 23 |  |  |  |  | 36 | delete $self->{_values}{'HSP-hit_length'}; | 
| 647 |  |  |  |  |  |  | } | 
| 648 |  |  |  |  |  |  | } | 
| 649 |  |  |  |  |  |  | elsif ( $MAPPING{$nm} ) { | 
| 650 | 13 | 50 |  |  |  | 23 | if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { | 
| 651 | 0 |  |  |  |  | 0 | my $key = ( keys %{ $MAPPING{$nm} } )[0]; | 
|  | 0 |  |  |  |  | 0 |  | 
| 652 |  |  |  |  |  |  | $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = | 
| 653 | 0 |  |  |  |  | 0 | $self->{'_last_data'}; | 
| 654 |  |  |  |  |  |  | } | 
| 655 |  |  |  |  |  |  | else { | 
| 656 | 13 |  |  |  |  | 35 | $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'}; | 
| 657 |  |  |  |  |  |  | } | 
| 658 |  |  |  |  |  |  | } | 
| 659 |  |  |  |  |  |  | else { | 
| 660 | 0 |  |  |  |  | 0 | $self->debug("unknown nm $nm, ignoring\n"); | 
| 661 |  |  |  |  |  |  | } | 
| 662 | 83 |  |  |  |  | 120 | $self->{'_last_data'} = '';    # remove read data if we are at | 
| 663 |  |  |  |  |  |  | # end of an element | 
| 664 | 83 | 100 | 100 |  |  | 263 | $self->{'_result'} = $rc if ( defined $type && $type eq 'result' ); | 
| 665 | 83 |  |  |  |  | 285 | return $rc; | 
| 666 |  |  |  |  |  |  | } | 
| 667 |  |  |  |  |  |  |  | 
| 668 |  |  |  |  |  |  | =head2 element | 
| 669 |  |  |  |  |  |  |  | 
| 670 |  |  |  |  |  |  | Title   : element | 
| 671 |  |  |  |  |  |  | Usage   : $eventhandler->element({'Name' => $name, 'Data' => $str}); | 
| 672 |  |  |  |  |  |  | Function: Convenience method that calls start_element, characters, end_element | 
| 673 |  |  |  |  |  |  | Returns : none | 
| 674 |  |  |  |  |  |  | Args    : Hash ref with the keys 'Name' and 'Data' | 
| 675 |  |  |  |  |  |  |  | 
| 676 |  |  |  |  |  |  | =cut | 
| 677 |  |  |  |  |  |  |  | 
| 678 |  |  |  |  |  |  | sub element { | 
| 679 | 13 |  |  | 13 | 1 | 20 | my ( $self, $data ) = @_; | 
| 680 |  |  |  |  |  |  | # simple data calls (%MAPPING) do not need start_element | 
| 681 | 13 |  |  |  |  | 29 | $self->characters($data); | 
| 682 | 13 |  |  |  |  | 26 | $self->end_element($data); | 
| 683 |  |  |  |  |  |  | } | 
| 684 |  |  |  |  |  |  |  | 
| 685 |  |  |  |  |  |  | =head2 element_hash | 
| 686 |  |  |  |  |  |  |  | 
| 687 |  |  |  |  |  |  | Title   : element | 
| 688 |  |  |  |  |  |  | Usage   : $eventhandler->element_hash({'Hsp_hit-from' => $start, | 
| 689 |  |  |  |  |  |  | 'Hsp_hit-to'   => $end, | 
| 690 |  |  |  |  |  |  | 'Hsp_score'    => $lastscore}); | 
| 691 |  |  |  |  |  |  | Function: Convenience method that takes multiple simple data elements and | 
| 692 |  |  |  |  |  |  | maps to appropriate parameters | 
| 693 |  |  |  |  |  |  | Returns : none | 
| 694 |  |  |  |  |  |  | Args    : Hash ref with the mapped key (in %MAPPING) and value | 
| 695 |  |  |  |  |  |  |  | 
| 696 |  |  |  |  |  |  | =cut | 
| 697 |  |  |  |  |  |  |  | 
| 698 |  |  |  |  |  |  | sub element_hash { | 
| 699 | 124 |  |  | 124 | 1 | 234 | my ($self, $data) = @_; | 
| 700 | 124 | 50 | 33 |  |  | 403 | $self->throw("Must provide data hash ref") if !$data || !ref($data); | 
| 701 | 124 |  |  |  |  | 154 | for my $nm (sort keys %{$data}) { | 
|  | 124 |  |  |  |  | 594 |  | 
| 702 | 715 | 100 | 100 |  |  | 2242 | next if $data->{$nm} && $data->{$nm} =~ m{^\s*$}o; | 
| 703 | 707 | 50 |  |  |  | 1079 | if ( $MAPPING{$nm} ) { | 
| 704 | 707 | 50 |  |  |  | 915 | if ( ref( $MAPPING{$nm} ) =~ /hash/i ) { | 
| 705 | 0 |  |  |  |  | 0 | my $key = ( keys %{ $MAPPING{$nm} } )[0]; | 
|  | 0 |  |  |  |  | 0 |  | 
| 706 |  |  |  |  |  |  | $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } = | 
| 707 | 0 |  |  |  |  | 0 | $data->{$nm}; | 
| 708 |  |  |  |  |  |  | } | 
| 709 |  |  |  |  |  |  | else { | 
| 710 | 707 |  |  |  |  | 1361 | $self->{'_values'}->{ $MAPPING{$nm} } = $data->{$nm}; | 
| 711 |  |  |  |  |  |  | } | 
| 712 |  |  |  |  |  |  | } | 
| 713 |  |  |  |  |  |  | } | 
| 714 |  |  |  |  |  |  | } | 
| 715 |  |  |  |  |  |  |  | 
| 716 |  |  |  |  |  |  | =head2 characters | 
| 717 |  |  |  |  |  |  |  | 
| 718 |  |  |  |  |  |  | Title   : characters | 
| 719 |  |  |  |  |  |  | Usage   : $eventgenerator->characters($str) | 
| 720 |  |  |  |  |  |  | Function: Send a character events | 
| 721 |  |  |  |  |  |  | Returns : none | 
| 722 |  |  |  |  |  |  | Args    : string | 
| 723 |  |  |  |  |  |  |  | 
| 724 |  |  |  |  |  |  |  | 
| 725 |  |  |  |  |  |  | =cut | 
| 726 |  |  |  |  |  |  |  | 
| 727 |  |  |  |  |  |  | sub characters { | 
| 728 | 13 |  |  | 13 | 1 | 16 | my ( $self, $data ) = @_; | 
| 729 | 13 | 50 | 33 |  |  | 81 | return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o ); | 
| 730 | 13 |  |  |  |  | 25 | $self->{'_last_data'} = $data->{'Data'}; | 
| 731 |  |  |  |  |  |  | } | 
| 732 |  |  |  |  |  |  |  | 
| 733 |  |  |  |  |  |  | =head2 within_element | 
| 734 |  |  |  |  |  |  |  | 
| 735 |  |  |  |  |  |  | Title   : within_element | 
| 736 |  |  |  |  |  |  | Usage   : if( $eventgenerator->within_element($element) ) {} | 
| 737 |  |  |  |  |  |  | Function: Test if we are within a particular element | 
| 738 |  |  |  |  |  |  | This is different than 'in' because within can be tested | 
| 739 |  |  |  |  |  |  | for a whole block. | 
| 740 |  |  |  |  |  |  | Returns : boolean | 
| 741 |  |  |  |  |  |  | Args    : string element name | 
| 742 |  |  |  |  |  |  |  | 
| 743 |  |  |  |  |  |  | =cut | 
| 744 |  |  |  |  |  |  |  | 
| 745 |  |  |  |  |  |  | sub within_element { | 
| 746 | 43 |  |  | 43 | 1 | 84 | my ( $self, $name ) = @_; | 
| 747 |  |  |  |  |  |  | return 0 | 
| 748 |  |  |  |  |  |  | if ( !defined $name | 
| 749 |  |  |  |  |  |  | || !defined $self->{'_elements'} | 
| 750 | 43 | 100 | 33 |  |  | 181 | || scalar @{ $self->{'_elements'} } == 0 ); | 
|  | 43 |  | 66 |  |  | 132 |  | 
| 751 | 40 |  |  |  |  | 58 | foreach ( @{ $self->{'_elements'} } ) { | 
|  | 40 |  |  |  |  | 94 |  | 
| 752 | 52 | 100 |  |  |  | 164 | return 1 if ( $_ eq $name ); | 
| 753 |  |  |  |  |  |  | } | 
| 754 | 12 |  |  |  |  | 40 | return 0; | 
| 755 |  |  |  |  |  |  | } | 
| 756 |  |  |  |  |  |  |  | 
| 757 |  |  |  |  |  |  | =head2 in_element | 
| 758 |  |  |  |  |  |  |  | 
| 759 |  |  |  |  |  |  | Title   : in_element | 
| 760 |  |  |  |  |  |  | Usage   : if( $eventgenerator->in_element($element) ) {} | 
| 761 |  |  |  |  |  |  | Function: Test if we are in a particular element | 
| 762 |  |  |  |  |  |  | This is different than 'within' because 'in' only | 
| 763 |  |  |  |  |  |  | tests its immediate parent. | 
| 764 |  |  |  |  |  |  | Returns : boolean | 
| 765 |  |  |  |  |  |  | Args    : string element name | 
| 766 |  |  |  |  |  |  |  | 
| 767 |  |  |  |  |  |  | =cut | 
| 768 |  |  |  |  |  |  |  | 
| 769 |  |  |  |  |  |  | sub in_element { | 
| 770 | 86 |  |  | 86 | 1 | 165 | my ( $self, $name ) = @_; | 
| 771 | 86 | 50 |  |  |  | 211 | return 0 if !defined $self->{'_elements'}->[0]; | 
| 772 | 86 |  |  |  |  | 364 | return ( $self->{'_elements'}->[0] eq $name ); | 
| 773 |  |  |  |  |  |  | } | 
| 774 |  |  |  |  |  |  |  | 
| 775 |  |  |  |  |  |  | =head2 start_document | 
| 776 |  |  |  |  |  |  |  | 
| 777 |  |  |  |  |  |  | Title   : start_document | 
| 778 |  |  |  |  |  |  | Usage   : $eventgenerator->start_document | 
| 779 |  |  |  |  |  |  | Function: Handle a start document event | 
| 780 |  |  |  |  |  |  | Returns : none | 
| 781 |  |  |  |  |  |  | Args    : none | 
| 782 |  |  |  |  |  |  |  | 
| 783 |  |  |  |  |  |  | =cut | 
| 784 |  |  |  |  |  |  |  | 
| 785 |  |  |  |  |  |  | sub start_document { | 
| 786 | 9 |  |  | 9 | 1 | 14 | my ($self) = @_; | 
| 787 | 9 |  |  |  |  | 18 | $self->{'_lasttype'} = ''; | 
| 788 | 9 |  |  |  |  | 40 | $self->{'_values'}   = {}; | 
| 789 | 9 |  |  |  |  | 19 | $self->{'_result'}   = undef; | 
| 790 | 9 |  |  |  |  | 20 | $self->{'_elements'} = []; | 
| 791 |  |  |  |  |  |  | } | 
| 792 |  |  |  |  |  |  |  | 
| 793 |  |  |  |  |  |  | =head2 end_document | 
| 794 |  |  |  |  |  |  |  | 
| 795 |  |  |  |  |  |  | Title   : end_document | 
| 796 |  |  |  |  |  |  | Usage   : $eventgenerator->end_document | 
| 797 |  |  |  |  |  |  | Function: Handles an end document event | 
| 798 |  |  |  |  |  |  | Returns : Bio::Search::Result::ResultI object | 
| 799 |  |  |  |  |  |  | Args    : none | 
| 800 |  |  |  |  |  |  |  | 
| 801 |  |  |  |  |  |  | =cut | 
| 802 |  |  |  |  |  |  |  | 
| 803 |  |  |  |  |  |  | sub end_document { | 
| 804 | 8 |  |  | 8 | 1 | 14 | my ($self) = @_; | 
| 805 | 8 |  |  |  |  | 52 | return $self->{'_result'}; | 
| 806 |  |  |  |  |  |  | } | 
| 807 |  |  |  |  |  |  |  | 
| 808 |  |  |  |  |  |  | =head2 result_count | 
| 809 |  |  |  |  |  |  |  | 
| 810 |  |  |  |  |  |  | Title   : result_count | 
| 811 |  |  |  |  |  |  | Usage   : my $count = $searchio->result_count | 
| 812 |  |  |  |  |  |  | Function: Returns the number of results we have processed | 
| 813 |  |  |  |  |  |  | Returns : integer | 
| 814 |  |  |  |  |  |  | Args    : none | 
| 815 |  |  |  |  |  |  |  | 
| 816 |  |  |  |  |  |  | =cut | 
| 817 |  |  |  |  |  |  |  | 
| 818 |  |  |  |  |  |  | sub result_count { | 
| 819 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 820 | 0 |  |  |  |  | 0 | return $self->{'_result_count'}; | 
| 821 |  |  |  |  |  |  | } | 
| 822 |  |  |  |  |  |  |  | 
| 823 |  |  |  |  |  |  | =head2 model | 
| 824 |  |  |  |  |  |  |  | 
| 825 |  |  |  |  |  |  | Title   : model | 
| 826 |  |  |  |  |  |  | Usage   : my $model = $parser->model(); | 
| 827 |  |  |  |  |  |  | Function: Get/Set model; Infernal currently does not output | 
| 828 |  |  |  |  |  |  | the model name (Rfam ID) | 
| 829 |  |  |  |  |  |  | Returns : String (name of model) | 
| 830 |  |  |  |  |  |  | Args    : [optional] String (name of model) | 
| 831 |  |  |  |  |  |  |  | 
| 832 |  |  |  |  |  |  | =cut | 
| 833 |  |  |  |  |  |  |  | 
| 834 |  |  |  |  |  |  | sub model { | 
| 835 | 5 |  |  | 5 | 1 | 10 | my $self = shift; | 
| 836 | 5 | 100 |  |  |  | 21 | return $self->{'_model'} = shift if @_; | 
| 837 | 2 |  |  |  |  | 8 | return $self->{'_model'}; | 
| 838 |  |  |  |  |  |  | } | 
| 839 |  |  |  |  |  |  |  | 
| 840 |  |  |  |  |  |  | =head2 database | 
| 841 |  |  |  |  |  |  |  | 
| 842 |  |  |  |  |  |  | Title   : database | 
| 843 |  |  |  |  |  |  | Usage   : my $database = $parser->database(); | 
| 844 |  |  |  |  |  |  | Function: Get/Set database; pre-v.1 versions of Infernal do not output | 
| 845 |  |  |  |  |  |  | the database name | 
| 846 |  |  |  |  |  |  | Returns : String (database name) | 
| 847 |  |  |  |  |  |  | Args    : [optional] String (database name) | 
| 848 |  |  |  |  |  |  |  | 
| 849 |  |  |  |  |  |  | =cut | 
| 850 |  |  |  |  |  |  |  | 
| 851 |  |  |  |  |  |  | sub database { | 
| 852 | 6 |  |  | 6 | 1 | 11 | my $self = shift; | 
| 853 | 6 | 100 |  |  |  | 25 | return $self->{'_database'} = shift if @_; | 
| 854 | 3 |  |  |  |  | 22 | return $self->{'_database'}; | 
| 855 |  |  |  |  |  |  | } | 
| 856 |  |  |  |  |  |  |  | 
| 857 |  |  |  |  |  |  | =head2 algorithm | 
| 858 |  |  |  |  |  |  |  | 
| 859 |  |  |  |  |  |  | Title   : algorithm | 
| 860 |  |  |  |  |  |  | Usage   : my $algorithm = $parser->algorithm(); | 
| 861 |  |  |  |  |  |  | Function: Get/Set algorithm; pre-v.1 versions of Infernal do not output | 
| 862 |  |  |  |  |  |  | the algorithm name | 
| 863 |  |  |  |  |  |  | Returns : String (algorithm name) | 
| 864 |  |  |  |  |  |  | Args    : [optional] String (algorithm name) | 
| 865 |  |  |  |  |  |  |  | 
| 866 |  |  |  |  |  |  | =cut | 
| 867 |  |  |  |  |  |  |  | 
| 868 |  |  |  |  |  |  | sub algorithm { | 
| 869 | 10 |  |  | 10 | 1 | 18 | my $self = shift; | 
| 870 | 10 | 100 |  |  |  | 51 | return $self->{'_algorithm'} = shift if @_; | 
| 871 | 3 |  |  |  |  | 14 | return $self->{'_algorithm'}; | 
| 872 |  |  |  |  |  |  | } | 
| 873 |  |  |  |  |  |  |  | 
| 874 |  |  |  |  |  |  | =head2 query_accession | 
| 875 |  |  |  |  |  |  |  | 
| 876 |  |  |  |  |  |  | Title   : query_accession | 
| 877 |  |  |  |  |  |  | Usage   : my $acc = $parser->query_accession(); | 
| 878 |  |  |  |  |  |  | Function: Get/Set query (model) accession; pre-v1.1 Infernal does not output | 
| 879 |  |  |  |  |  |  | the accession number (Rfam accession #) | 
| 880 |  |  |  |  |  |  | Returns : String (accession) | 
| 881 |  |  |  |  |  |  | Args    : [optional] String (accession) | 
| 882 |  |  |  |  |  |  |  | 
| 883 |  |  |  |  |  |  | =cut | 
| 884 |  |  |  |  |  |  |  | 
| 885 |  |  |  |  |  |  | sub query_accession { | 
| 886 | 13 |  |  | 13 | 1 | 23 | my $self = shift; | 
| 887 | 13 | 100 |  |  |  | 34 | return $self->{'_query_accession'} = shift if @_; | 
| 888 | 9 |  |  |  |  | 30 | return $self->{'_query_accession'}; | 
| 889 |  |  |  |  |  |  | } | 
| 890 |  |  |  |  |  |  |  | 
| 891 |  |  |  |  |  |  | =head2 query_description | 
| 892 |  |  |  |  |  |  |  | 
| 893 |  |  |  |  |  |  | Title   : query_description | 
| 894 |  |  |  |  |  |  | Usage   : my $acc = $parser->query_description(); | 
| 895 |  |  |  |  |  |  | Function: Get/Set query (model) description; pre-v1.1 Infernal does not output | 
| 896 |  |  |  |  |  |  | the Rfam description | 
| 897 |  |  |  |  |  |  | Returns : String (description) | 
| 898 |  |  |  |  |  |  | Args    : [optional] String (description) | 
| 899 |  |  |  |  |  |  |  | 
| 900 |  |  |  |  |  |  | =cut | 
| 901 |  |  |  |  |  |  |  | 
| 902 |  |  |  |  |  |  | sub query_description { | 
| 903 | 13 |  |  | 13 | 1 | 21 | my $self = shift; | 
| 904 | 13 | 100 |  |  |  | 26 | return $self->{'_query_description'} = shift if @_; | 
| 905 | 9 |  |  |  |  | 24 | return $self->{'_query_description'}; | 
| 906 |  |  |  |  |  |  | } | 
| 907 |  |  |  |  |  |  |  | 
| 908 |  |  |  |  |  |  | =head2 hsp_minscore | 
| 909 |  |  |  |  |  |  |  | 
| 910 |  |  |  |  |  |  | Title   : hsp_minscore | 
| 911 |  |  |  |  |  |  | Usage   : my $cutoff = $parser->hsp_minscore(); | 
| 912 |  |  |  |  |  |  | Function: Get/Set min bit score cutoff (for generating Hits/HSPs) | 
| 913 |  |  |  |  |  |  | Returns : score (number) | 
| 914 |  |  |  |  |  |  | Args    : [optional] score (number) | 
| 915 |  |  |  |  |  |  |  | 
| 916 |  |  |  |  |  |  | =cut | 
| 917 |  |  |  |  |  |  |  | 
| 918 |  |  |  |  |  |  | sub hsp_minscore { | 
| 919 | 9 |  |  | 9 | 1 | 12 | my $self = shift; | 
| 920 | 9 | 100 |  |  |  | 33 | return $self->{'_hsp_minscore'} = shift if @_; | 
| 921 | 2 |  |  |  |  | 5 | return $self->{'_hsp_minscore'}; | 
| 922 |  |  |  |  |  |  | } | 
| 923 |  |  |  |  |  |  |  | 
| 924 |  |  |  |  |  |  | =head2 convert_meta | 
| 925 |  |  |  |  |  |  |  | 
| 926 |  |  |  |  |  |  | Title   : convert_meta | 
| 927 |  |  |  |  |  |  | Usage   : $parser->convert_meta(1); | 
| 928 |  |  |  |  |  |  | Function: Get/Set boolean flag for converting Infernal WUSS format | 
| 929 |  |  |  |  |  |  | to a simple bracketed format (simple WUSS by default) | 
| 930 |  |  |  |  |  |  | Returns : boolean flag (TRUE or FALSE) | 
| 931 |  |  |  |  |  |  | Args    : [optional] boolean (eval's to TRUE or FALSE) | 
| 932 |  |  |  |  |  |  |  | 
| 933 |  |  |  |  |  |  | =cut | 
| 934 |  |  |  |  |  |  |  | 
| 935 |  |  |  |  |  |  | sub convert_meta { | 
| 936 | 42 |  |  | 42 | 1 | 67 | my $self = shift; | 
| 937 | 42 | 100 |  |  |  | 80 | return $self->{'_convert_meta'} = shift if @_; | 
| 938 | 39 |  |  |  |  | 107 | return $self->{'_convert_meta'}; | 
| 939 |  |  |  |  |  |  | } | 
| 940 |  |  |  |  |  |  |  | 
| 941 |  |  |  |  |  |  | =head2 version | 
| 942 |  |  |  |  |  |  |  | 
| 943 |  |  |  |  |  |  | Title   : version | 
| 944 |  |  |  |  |  |  | Usage   : $parser->version(); | 
| 945 |  |  |  |  |  |  | Function: Set the Infernal cmsearch version | 
| 946 |  |  |  |  |  |  | Returns : version | 
| 947 |  |  |  |  |  |  | Args    : [optional] version | 
| 948 |  |  |  |  |  |  |  | 
| 949 |  |  |  |  |  |  | =cut | 
| 950 |  |  |  |  |  |  |  | 
| 951 |  |  |  |  |  |  | sub version { | 
| 952 | 9 |  |  | 9 | 1 | 14 | my $self = shift; | 
| 953 | 9 | 100 |  |  |  | 28 | return $self->{'_version'} = shift if @_; | 
| 954 | 2 |  |  |  |  | 10 | return $self->{'_version'}; | 
| 955 |  |  |  |  |  |  | } | 
| 956 |  |  |  |  |  |  |  | 
| 957 |  |  |  |  |  |  | =head2 structure_symbols | 
| 958 |  |  |  |  |  |  |  | 
| 959 |  |  |  |  |  |  | Title   : structure_symbols | 
| 960 |  |  |  |  |  |  | Usage   : my $hashref = $parser->structure_symbols(); | 
| 961 |  |  |  |  |  |  | Function: Get/Set RNA structure symbols | 
| 962 |  |  |  |  |  |  | Returns : Hash ref of delimiters (5' stem, 3' stem, single-strand, etc) | 
| 963 |  |  |  |  |  |  | : default = < (5-prime) | 
| 964 |  |  |  |  |  |  | > (3-prime) | 
| 965 |  |  |  |  |  |  | : (single-strand) | 
| 966 |  |  |  |  |  |  | ? (unknown) | 
| 967 |  |  |  |  |  |  | . (gap) | 
| 968 |  |  |  |  |  |  | Args    : Hash ref of substitute delimiters, using above keys. | 
| 969 |  |  |  |  |  |  |  | 
| 970 |  |  |  |  |  |  | =cut | 
| 971 |  |  |  |  |  |  |  | 
| 972 |  |  |  |  |  |  | sub structure_symbols { | 
| 973 | 10 |  |  | 10 | 1 | 21 | my ($self, $delim) = @_; | 
| 974 | 10 | 100 |  |  |  | 31 | if ($delim) { | 
| 975 | 7 | 50 |  |  |  | 36 | if (ref($delim) =~ m{HASH}) { | 
| 976 | 7 |  |  |  |  | 11 | my %data = %{ $delim }; | 
|  | 7 |  |  |  |  | 52 |  | 
| 977 | 7 |  |  |  |  | 20 | for my $d (@VALID_SYMBOLS) { | 
| 978 | 35 | 50 |  |  |  | 61 | if ( exists $data{$d} ) { | 
| 979 | 35 |  |  |  |  | 79 | $self->{'_delimiter'}->{$d} = $data{$d}; | 
| 980 |  |  |  |  |  |  | } | 
| 981 |  |  |  |  |  |  | } | 
| 982 |  |  |  |  |  |  | } else { | 
| 983 | 0 |  |  |  |  | 0 | $self->throw("Args to helix_delimiters() should be in a hash reference"); | 
| 984 |  |  |  |  |  |  | } | 
| 985 |  |  |  |  |  |  | } | 
| 986 | 10 |  |  |  |  | 24 | return $self->{'_delimiter'}; | 
| 987 |  |  |  |  |  |  | } | 
| 988 |  |  |  |  |  |  |  | 
| 989 |  |  |  |  |  |  | =head2 simple_meta | 
| 990 |  |  |  |  |  |  |  | 
| 991 |  |  |  |  |  |  | Title   : simple_meta | 
| 992 |  |  |  |  |  |  | Usage   : my $string = $parser->simple_meta($str); | 
| 993 |  |  |  |  |  |  | Function: converts more complex WUSS meta format into simple bracket format | 
| 994 |  |  |  |  |  |  | using symbols defined in structure_symbols() | 
| 995 |  |  |  |  |  |  | Returns : converted string | 
| 996 |  |  |  |  |  |  | Args    : [required] string to convert | 
| 997 |  |  |  |  |  |  | Note    : This is a very simple conversion method to get simple bracketed | 
| 998 |  |  |  |  |  |  | format from Infernal data.  If the convert_meta() flag is set, | 
| 999 |  |  |  |  |  |  | this is the method used to convert the strings. | 
| 1000 |  |  |  |  |  |  |  | 
| 1001 |  |  |  |  |  |  | =cut | 
| 1002 |  |  |  |  |  |  |  | 
| 1003 |  |  |  |  |  |  | sub simple_meta { | 
| 1004 | 3 |  |  | 3 | 1 | 12 | my ($self, $str) = @_; | 
| 1005 | 3 | 50 |  |  |  | 9 | $self->throw("No string arg sent!") if !$str; | 
| 1006 | 3 |  |  |  |  | 11 | my $structs = $self->structure_symbols(); | 
| 1007 |  |  |  |  |  |  | my ($ls, $rs, $ss, $unk, $gap) = ($structs->{'5-prime'}, $structs->{'3-prime'}, | 
| 1008 |  |  |  |  |  |  | $structs->{'single-strand'}, $structs->{'unknown'}, | 
| 1009 | 3 |  |  |  |  | 16 | $structs->{'gap'}); | 
| 1010 | 3 |  |  |  |  | 56 | $str =~ s{[\(\<\[\{]}{$ls}g; | 
| 1011 | 3 |  |  |  |  | 50 | $str =~ s{[\)\>\]\}]}{$rs}g; | 
| 1012 | 3 |  |  |  |  | 84 | $str =~ s{[:,_-]}{$ss}g; | 
| 1013 | 3 |  |  |  |  | 15 | $str =~ s{\.}{$gap}g; | 
| 1014 |  |  |  |  |  |  | # unknown not handled yet | 
| 1015 | 3 |  |  |  |  | 13 | return $str; | 
| 1016 |  |  |  |  |  |  | } | 
| 1017 |  |  |  |  |  |  |  | 
| 1018 |  |  |  |  |  |  | ## private methods | 
| 1019 |  |  |  |  |  |  |  | 
| 1020 |  |  |  |  |  |  | # this is a hack which guesses the format and sets the handler for parsing in | 
| 1021 |  |  |  |  |  |  | # an instance; it'll be taken out when infernal 1.0 is released | 
| 1022 |  |  |  |  |  |  |  | 
| 1023 |  |  |  |  |  |  | sub _parse_latest { | 
| 1024 | 1 |  |  | 1 |  | 2 | my ($self) = @_; | 
| 1025 | 1 |  |  |  |  | 2 | my $seentop = 0; | 
| 1026 | 1 |  |  |  |  | 4 | local $/ = "\n"; | 
| 1027 | 1 |  |  |  |  | 3 | my ($accession, $description) = ($self->query_accession, $self->query_description); | 
| 1028 | 1 |  |  |  |  | 2 | my ($maxscore, $mineval, $minpval); | 
| 1029 | 1 |  |  |  |  | 3 | $self->start_document(); | 
| 1030 | 1 |  |  |  |  | 2 | my ($lasthit, $lastscore, $lasteval, $lastpval, $laststart, $lastend); | 
| 1031 |  |  |  |  |  |  | PARSER: | 
| 1032 | 1 |  |  |  |  | 2 | while (my $line = $self->_readline) { | 
| 1033 | 55 | 100 |  |  |  | 123 | next if $line =~ m{^\s+$}; | 
| 1034 |  |  |  |  |  |  | # stats aren't parsed yet... | 
| 1035 | 48 | 100 |  |  |  | 290 | if ($line =~ m{^\#\s+cmsearch}xms) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 1036 | 1 |  |  |  |  | 2 | $seentop = 1; | 
| 1037 | 1 |  |  |  |  | 3 | $self->start_element({'Name' => 'Result'}); | 
| 1038 | 1 |  |  |  |  | 4 | $self->element_hash({ | 
| 1039 |  |  |  |  |  |  | 'Infernal_program'   => 'CMSEARCH' | 
| 1040 |  |  |  |  |  |  | }); | 
| 1041 |  |  |  |  |  |  | } | 
| 1042 |  |  |  |  |  |  | elsif ($line =~ m{^\#\sINFERNAL\s+(\d+\.\d+)}xms) { | 
| 1043 | 1 |  |  |  |  | 3 | $self->element_hash({ | 
| 1044 |  |  |  |  |  |  | 'Infernal_version'   => $1, | 
| 1045 |  |  |  |  |  |  | }); | 
| 1046 |  |  |  |  |  |  | } | 
| 1047 |  |  |  |  |  |  | elsif ($line =~ m{^\#\scommand:.*?\s(\S+)$}xms) { | 
| 1048 | 1 |  |  |  |  | 5 | $self->element_hash({ | 
| 1049 |  |  |  |  |  |  | 'Infernal_db'   => $1, | 
| 1050 |  |  |  |  |  |  | }); | 
| 1051 |  |  |  |  |  |  | } | 
| 1052 |  |  |  |  |  |  | elsif ($line =~ m{^\#\s+dbsize\(Mb\):\s+(\d+\.\d+)}xms) { | 
| 1053 |  |  |  |  |  |  | # store absolute DB length | 
| 1054 | 1 |  |  |  |  | 8 | $self->element_hash({ | 
| 1055 |  |  |  |  |  |  | 'Infernal_db-let'   => $1 * 1e6 | 
| 1056 |  |  |  |  |  |  | }); | 
| 1057 |  |  |  |  |  |  | } | 
| 1058 |  |  |  |  |  |  | elsif ($line =~ m{^CM(?:\s(\d+))?:\s*(\S+)}xms) { | 
| 1059 |  |  |  |  |  |  | # not sure, but it's possible single reports may contain multiple | 
| 1060 |  |  |  |  |  |  | # models; if so, they should be rolled over into a new ResultI | 
| 1061 |  |  |  |  |  |  | #print STDERR "ACC: $accession\nDESC: $description\n"; | 
| 1062 | 1 |  |  |  |  | 7 | $self->element_hash({ | 
| 1063 |  |  |  |  |  |  | 'Infernal_query-def' => $2, # present in output now | 
| 1064 |  |  |  |  |  |  | 'Infernal_query-acc' => $accession, | 
| 1065 |  |  |  |  |  |  | 'Infernal_querydesc' => $description | 
| 1066 |  |  |  |  |  |  | }); | 
| 1067 |  |  |  |  |  |  | } | 
| 1068 |  |  |  |  |  |  | elsif ($line =~ m{^>\s*(\S+)} ){ | 
| 1069 |  |  |  |  |  |  | #$self->debug("Start Hit: Found hit:$1\n"); | 
| 1070 | 1 | 50 |  |  |  | 3 | if ($self->in_element('hit')) { | 
| 1071 | 0 |  |  |  |  | 0 | $self->element_hash({'Hit_score' => $maxscore, | 
| 1072 |  |  |  |  |  |  | 'Hit_bits'  => $maxscore}); | 
| 1073 | 0 |  |  |  |  | 0 | ($maxscore, $minpval, $mineval) = undef; | 
| 1074 | 0 |  |  |  |  | 0 | $self->end_element({'Name' => 'Hit'}); | 
| 1075 |  |  |  |  |  |  | } | 
| 1076 | 1 |  |  |  |  | 4 | $lasthit = $1; | 
| 1077 |  |  |  |  |  |  | } | 
| 1078 |  |  |  |  |  |  | elsif ($line =~ m{ | 
| 1079 |  |  |  |  |  |  | ^\sQuery\s=\s\d+\s-\s\d+,\s   # Query start/end | 
| 1080 |  |  |  |  |  |  | Target\s=\s(\d+)\s-\s(\d+)    # Target start/end | 
| 1081 |  |  |  |  |  |  | }xmso) { | 
| 1082 |  |  |  |  |  |  | # Query (model) start/end always the same, determined from | 
| 1083 |  |  |  |  |  |  | # the HSP length | 
| 1084 | 3 |  |  |  |  | 11 | ($laststart, $lastend) = ($1, $2); | 
| 1085 |  |  |  |  |  |  | #$self->debug("Found hit coords:$laststart - $lastend\n"); | 
| 1086 |  |  |  |  |  |  | } elsif ($line =~ m{ | 
| 1087 |  |  |  |  |  |  | ^\sScore\s=\s([\d\.]+),\s   # Score = Bitscore (for now) | 
| 1088 |  |  |  |  |  |  | (?:E\s=\s([\d\.e-]+),\s     # E-val optional | 
| 1089 |  |  |  |  |  |  | P\s=\s([\d\.e-]+),\s)?     # P-val optional | 
| 1090 |  |  |  |  |  |  | GC\s=                       # GC not captured | 
| 1091 |  |  |  |  |  |  | }xmso | 
| 1092 |  |  |  |  |  |  | ) { | 
| 1093 | 3 |  |  |  |  | 8 | ($lastscore, $lasteval, $lastpval) = ($1, $2, $3); | 
| 1094 |  |  |  |  |  |  | #$self->debug(sprintf("Found hit data:Score:%s,Eval:%s,Pval:%s\n",$lastscore, $lasteval||'', $lastpval||'')); | 
| 1095 | 3 |  | 66 |  |  | 9 | $maxscore ||= $lastscore; | 
| 1096 | 3 | 50 | 33 |  |  | 10 | if ($lasteval && $lastpval) { | 
| 1097 | 3 |  | 66 |  |  | 6 | $mineval ||= $lasteval; | 
| 1098 | 3 |  | 66 |  |  | 7 | $minpval ||= $lastpval; | 
| 1099 | 3 | 50 |  |  |  | 9 | $mineval = ($mineval > $lasteval)  ? $lasteval : | 
| 1100 |  |  |  |  |  |  | $mineval; | 
| 1101 | 3 | 50 |  |  |  | 7 | $minpval = ($minpval > $lastpval)  ? $lastpval : | 
| 1102 |  |  |  |  |  |  | $minpval; | 
| 1103 |  |  |  |  |  |  | } | 
| 1104 | 3 | 50 |  |  |  | 7 | $maxscore = ($maxscore < $lastscore)  ? $lastscore : | 
| 1105 |  |  |  |  |  |  | $maxscore; | 
| 1106 | 3 | 100 |  |  |  | 8 | if (!$self->within_element('hit')) { | 
| 1107 | 1 |  |  |  |  | 8 | my ($gi, $acc, $ver) = $self->_get_seq_identifiers($lasthit); | 
| 1108 | 1 |  |  |  |  | 5 | $self->start_element({'Name' => 'Hit'}); | 
| 1109 | 1 | 50 |  |  |  | 7 | $self->element_hash({ | 
|  |  | 50 |  |  |  |  |  | 
| 1110 |  |  |  |  |  |  | 'Hit_id'           => $lasthit, | 
| 1111 |  |  |  |  |  |  | 'Hit_accession'    => $ver ? "$acc.$ver" : | 
| 1112 |  |  |  |  |  |  | $acc ? $acc : $lasthit, | 
| 1113 |  |  |  |  |  |  | 'Hit_gi'           => $gi | 
| 1114 |  |  |  |  |  |  | }); | 
| 1115 |  |  |  |  |  |  | } | 
| 1116 | 3 | 50 |  |  |  | 8 | if (!$self->in_element('hsp')) { | 
| 1117 | 3 |  |  |  |  | 8 | $self->start_element({'Name' => 'Hsp'}); | 
| 1118 |  |  |  |  |  |  | } | 
| 1119 |  |  |  |  |  |  |  | 
| 1120 |  |  |  |  |  |  | # hsp is similar to older output | 
| 1121 |  |  |  |  |  |  | } elsif ($line =~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) { # start of HSP | 
| 1122 | 3 |  |  |  |  | 6 | $self->_pushback($line); # set up for loop | 
| 1123 |  |  |  |  |  |  | #$self->debug("Start HSP\n"); | 
| 1124 |  |  |  |  |  |  | # what is length of the gap to the structure data? | 
| 1125 | 3 |  |  |  |  | 7 | my $offset = length($1); | 
| 1126 | 3 |  |  |  |  | 4 | my ($ct, $strln) = 0; | 
| 1127 | 3 |  |  |  |  | 4 | my $hsp; | 
| 1128 | 3 |  |  |  |  | 10 | HSP: | 
| 1129 |  |  |  |  |  |  | my %hsp_key = ('0' => 'meta', | 
| 1130 |  |  |  |  |  |  | '1' => 'query', | 
| 1131 |  |  |  |  |  |  | '2' => 'midline', | 
| 1132 |  |  |  |  |  |  | '3' => 'hit'); | 
| 1133 |  |  |  |  |  |  | HSP: | 
| 1134 | 3 |  |  |  |  | 6 | while (defined ($line = $self->_readline)) { | 
| 1135 | 31 |  |  |  |  | 33 | chomp $line; | 
| 1136 | 31 | 100 |  |  |  | 39 | next if (!$line); # toss empty lines | 
| 1137 |  |  |  |  |  |  | # next if $line =~ m{^\s*$}; # toss empty lines | 
| 1138 |  |  |  |  |  |  | # it is possible to have homology lines consisting | 
| 1139 |  |  |  |  |  |  | # entirely of spaces if the subject has a large | 
| 1140 |  |  |  |  |  |  | # insertion where nothing matches the model | 
| 1141 |  |  |  |  |  |  |  | 
| 1142 |  |  |  |  |  |  | # exit loop if at end of file or upon next hit/HSP | 
| 1143 | 23 | 100 |  |  |  | 48 | if ($line =~ m{^\s{0,2}\S+}) { | 
| 1144 | 3 |  |  |  |  | 8 | $self->_pushback($line); | 
| 1145 | 3 |  |  |  |  | 4 | last HSP; | 
| 1146 |  |  |  |  |  |  | } | 
| 1147 |  |  |  |  |  |  | # iterate to keep track of each line (4 lines per hsp block) | 
| 1148 | 20 |  |  |  |  | 25 | my $iterator = $ct % 4; | 
| 1149 |  |  |  |  |  |  | # strlen set only with structure lines (proper length) | 
| 1150 | 20 | 100 |  |  |  | 22 | $strln = length($line) if $iterator == 0; | 
| 1151 |  |  |  |  |  |  | # only grab the data needed (hit start and stop in hit line above) | 
| 1152 |  |  |  |  |  |  |  | 
| 1153 | 20 |  |  |  |  | 28 | my $data = substr($line, $offset, $strln-$offset); | 
| 1154 | 20 |  |  |  |  | 37 | $hsp->{ $hsp_key{$iterator} } .= $data; | 
| 1155 |  |  |  |  |  |  |  | 
| 1156 | 20 |  |  |  |  | 33 | $ct++; | 
| 1157 |  |  |  |  |  |  | } | 
| 1158 |  |  |  |  |  |  |  | 
| 1159 |  |  |  |  |  |  | # query start, end are from the actual query length (entire hit is | 
| 1160 |  |  |  |  |  |  | # mapped to CM data, so all CM data is represented) | 
| 1161 |  |  |  |  |  |  | # works for now... | 
| 1162 | 3 | 50 |  |  |  | 6 | if ($self->in_element('hsp')) { | 
| 1163 |  |  |  |  |  |  | # In some cases with HSPs unaligned residues are present in | 
| 1164 |  |  |  |  |  |  | # the hit or query (Ex: '*[ 8]*' is 8 unaligned residues). | 
| 1165 |  |  |  |  |  |  | # This info needs to be passed on unmodifed to the HSP class | 
| 1166 |  |  |  |  |  |  | # and handled there as it is subjectively changed based on | 
| 1167 |  |  |  |  |  |  | # use. | 
| 1168 | 3 |  |  |  |  | 4 | my $strlen = 0; | 
| 1169 |  |  |  |  |  |  |  | 
| 1170 |  |  |  |  |  |  | # catch any insertions and add them into the actual length | 
| 1171 | 3 |  |  |  |  | 12 | while ($hsp->{'query'} =~ m{\*\[\s*(\d+)\s*\]\*}g) { | 
| 1172 | 2 |  |  |  |  | 9 | $strlen += $1; | 
| 1173 |  |  |  |  |  |  | } | 
| 1174 |  |  |  |  |  |  | # add on the actual residues | 
| 1175 | 3 |  |  |  |  | 4 | $strlen += $hsp->{'query'} =~ tr{A-Za-z}{A-Za-z}; | 
| 1176 |  |  |  |  |  |  |  | 
| 1177 |  |  |  |  |  |  | my $metastr = ($self->convert_meta) ? ($self->simple_meta($hsp->{'meta'})) : | 
| 1178 | 3 | 50 |  |  |  | 8 | $hsp->{'meta'}; | 
| 1179 |  |  |  |  |  |  | $self->element_hash( | 
| 1180 |  |  |  |  |  |  | {'Hsp_stranded'  => 'HIT', | 
| 1181 |  |  |  |  |  |  | 'Hsp_qseq'      => $hsp->{'query'}, | 
| 1182 |  |  |  |  |  |  | 'Hsp_hseq'      => $hsp->{'hit'}, | 
| 1183 | 3 |  |  |  |  | 34 | 'Hsp_midline'   => $hsp->{'midline'}, | 
| 1184 |  |  |  |  |  |  | 'Hsp_structure' => $metastr, | 
| 1185 |  |  |  |  |  |  | 'Hsp_query-from' => 1, | 
| 1186 |  |  |  |  |  |  | 'Infernal_query-len' => $strlen, | 
| 1187 |  |  |  |  |  |  | 'Hsp_query-to'   => $strlen, | 
| 1188 |  |  |  |  |  |  | 'Hsp_hit-from'  => $laststart, | 
| 1189 |  |  |  |  |  |  | 'Hsp_hit-to'    => $lastend, | 
| 1190 |  |  |  |  |  |  | 'Hsp_score'     => $lastscore, | 
| 1191 |  |  |  |  |  |  | 'Hsp_bit-score' => $lastscore, | 
| 1192 |  |  |  |  |  |  | }); | 
| 1193 | 3 | 50 | 33 |  |  | 21 | $self->element_hash( | 
| 1194 |  |  |  |  |  |  | {'Hsp_evalue'    => $lasteval, | 
| 1195 |  |  |  |  |  |  | 'Hsp_pvalue'    => $lastpval, | 
| 1196 |  |  |  |  |  |  | }) if ($lasteval && $lastpval); | 
| 1197 | 3 |  |  |  |  | 9 | $self->end_element({'Name' => 'Hsp'}); | 
| 1198 |  |  |  |  |  |  | } | 
| 1199 |  |  |  |  |  |  | # result now ends with // and 'Fin' | 
| 1200 |  |  |  |  |  |  | } elsif ($line =~ m{^//}xms )  { | 
| 1201 | 1 | 50 | 33 |  |  | 3 | if ($self->within_element('result') && $seentop) { | 
| 1202 | 1 | 50 |  |  |  | 2 | if ($self->in_element('hit')) { | 
| 1203 | 1 |  |  |  |  | 5 | $self->element_hash({'Hit_score'    => $maxscore, | 
| 1204 |  |  |  |  |  |  | 'Hit_bits'     => $maxscore}); | 
| 1205 |  |  |  |  |  |  | # don't know where to put minpval yet | 
| 1206 | 1 | 50 |  |  |  | 6 | $self->element_hash({'Hit_signif'   => $mineval}) if $mineval; | 
| 1207 | 1 | 50 |  |  |  | 5 | $self->element_hash({'Hit_p'        => $minpval}) if $minpval; | 
| 1208 | 1 |  |  |  |  | 3 | $self->end_element({'Name' => 'Hit'}); | 
| 1209 |  |  |  |  |  |  | } | 
| 1210 | 1 |  |  |  |  | 4 | last PARSER; | 
| 1211 |  |  |  |  |  |  | } | 
| 1212 |  |  |  |  |  |  | } | 
| 1213 |  |  |  |  |  |  | } | 
| 1214 | 1 | 50 |  |  |  | 4 | $self->within_element('hit') && $self->end_element( { 'Name' => 'Hit' } ); | 
| 1215 | 1 | 50 |  |  |  | 5 | $self->end_element( { 'Name' => 'Result' } ) if $seentop; | 
| 1216 | 1 |  |  |  |  | 4 | return $self->end_document(); | 
| 1217 |  |  |  |  |  |  | } | 
| 1218 |  |  |  |  |  |  |  | 
| 1219 |  |  |  |  |  |  | # cmsearch 0.81 (pre-1.0) | 
| 1220 |  |  |  |  |  |  | sub _parse_pre { | 
| 1221 | 1 |  |  | 1 |  | 4 | my ($self) = @_; | 
| 1222 | 1 |  |  |  |  | 4 | my $seentop = 0; | 
| 1223 | 1 |  |  |  |  | 9 | local $/ = "\n"; | 
| 1224 | 1 |  |  |  |  | 6 | my ($accession, $db, $algorithm, $description, $version) = | 
| 1225 |  |  |  |  |  |  | ($self->query_accession, $self->database, $self->algorithm, | 
| 1226 |  |  |  |  |  |  | $self->query_description, '0.81'); | 
| 1227 | 1 |  |  |  |  | 3 | my ($maxscore, $mineval, $minpval); | 
| 1228 | 1 |  |  |  |  | 7 | $self->start_document(); | 
| 1229 | 1 |  |  |  |  | 4 | my ($lasthit, $lastscore, $lasteval, $lastpval, $laststart, $lastend); | 
| 1230 |  |  |  |  |  |  | PARSER: | 
| 1231 | 1 |  |  |  |  | 7 | while (my $line = $self->_readline) { | 
| 1232 | 85 | 100 |  |  |  | 370 | next if $line =~ m{^\s+$}; | 
| 1233 |  |  |  |  |  |  | # stats aren't parsed yet... | 
| 1234 | 60 | 100 |  |  |  | 405 | if ($line =~ m{CM\s\d+:\s*(\S+)}xms) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 1235 |  |  |  |  |  |  | #$self->debug("Start Result: Found model:$1\n"); | 
| 1236 | 1 | 50 |  |  |  | 5 | if (!$self->within_element('result')) { | 
| 1237 | 1 |  |  |  |  | 4 | $seentop = 1; | 
| 1238 | 1 |  |  |  |  | 9 | $self->start_element({'Name' => 'Result'}); | 
| 1239 | 1 |  |  |  |  | 16 | $self->element_hash({ | 
| 1240 |  |  |  |  |  |  | 'Infernal_program'   => $algorithm, | 
| 1241 |  |  |  |  |  |  | 'Infernal_query-def' => $1, # present in output now | 
| 1242 |  |  |  |  |  |  | 'Infernal_query-acc' => $accession, | 
| 1243 |  |  |  |  |  |  | 'Infernal_querydesc' => $description, | 
| 1244 |  |  |  |  |  |  | 'Infernal_db'        => $db | 
| 1245 |  |  |  |  |  |  | }); | 
| 1246 |  |  |  |  |  |  | } | 
| 1247 |  |  |  |  |  |  | } elsif ($line =~ m{^>\s*(\S+)} ){ | 
| 1248 |  |  |  |  |  |  | #$self->debug("Start Hit: Found hit:$1\n"); | 
| 1249 | 3 | 100 |  |  |  | 10 | if ($self->in_element('hit')) { | 
| 1250 | 2 |  |  |  |  | 13 | $self->element_hash({'Hit_score' => $maxscore, | 
| 1251 |  |  |  |  |  |  | 'Hit_bits'  => $maxscore}); | 
| 1252 | 2 |  |  |  |  | 6 | ($maxscore, $minpval, $mineval) = undef; | 
| 1253 | 2 |  |  |  |  | 7 | $self->end_element({'Name' => 'Hit'}); | 
| 1254 |  |  |  |  |  |  | } | 
| 1255 | 3 |  |  |  |  | 20 | $lasthit = $1; | 
| 1256 |  |  |  |  |  |  | } | 
| 1257 |  |  |  |  |  |  | elsif ($line =~ m{ | 
| 1258 |  |  |  |  |  |  | ^\sQuery\s=\s\d+\s-\s\d+,\s   # Query start/end | 
| 1259 |  |  |  |  |  |  | Target\s=\s(\d+)\s-\s(\d+)    # Target start/end | 
| 1260 |  |  |  |  |  |  | }xmso) { | 
| 1261 |  |  |  |  |  |  | # Query (model) start/end always the same, determined from | 
| 1262 |  |  |  |  |  |  | # the HSP length | 
| 1263 | 15 |  |  |  |  | 79 | ($laststart, $lastend) = ($1, $2); | 
| 1264 |  |  |  |  |  |  | #$self->debug("Found hit coords:$laststart - $lastend\n"); | 
| 1265 |  |  |  |  |  |  | } elsif ($line =~ m{ | 
| 1266 |  |  |  |  |  |  | ^\sScore\s=\s([\d\.]+),\s   # Score = Bitscore (for now) | 
| 1267 |  |  |  |  |  |  | (?:E\s=\s([\d\.e-]+),\s     # E-val optional | 
| 1268 |  |  |  |  |  |  | P\s=\s([\d\.e-]+),\s)?     # P-val optional | 
| 1269 |  |  |  |  |  |  | GC\s=                       # GC not captured | 
| 1270 |  |  |  |  |  |  | }xmso | 
| 1271 |  |  |  |  |  |  | ) { | 
| 1272 | 15 |  |  |  |  | 58 | ($lastscore, $lasteval, $lastpval) = ($1, $2, $3); | 
| 1273 |  |  |  |  |  |  | #$self->debug(sprintf("Found hit data:Score:%s,Eval:%s,Pval:%s\n",$lastscore, $lasteval||'', $lastpval||'')); | 
| 1274 | 15 |  | 66 |  |  | 44 | $maxscore ||= $lastscore; | 
| 1275 | 15 | 50 | 33 |  |  | 56 | if ($lasteval && $lastpval) { | 
| 1276 | 15 |  | 66 |  |  | 34 | $mineval ||= $lasteval; | 
| 1277 | 15 |  | 66 |  |  | 38 | $minpval ||= $lastpval; | 
| 1278 | 15 | 50 |  |  |  | 57 | $mineval = ($mineval > $lasteval)  ? $lasteval : | 
| 1279 |  |  |  |  |  |  | $mineval; | 
| 1280 | 15 | 50 |  |  |  | 40 | $minpval = ($minpval > $lastpval)  ? $lastpval : | 
| 1281 |  |  |  |  |  |  | $minpval; | 
| 1282 |  |  |  |  |  |  | } | 
| 1283 | 15 | 50 |  |  |  | 31 | $maxscore = ($maxscore < $lastscore)  ? $lastscore : | 
| 1284 |  |  |  |  |  |  | $maxscore; | 
| 1285 | 15 | 100 |  |  |  | 37 | if (!$self->within_element('hit')) { | 
| 1286 | 3 |  |  |  |  | 17 | my ($gi, $acc, $ver) = $self->_get_seq_identifiers($lasthit); | 
| 1287 | 3 |  |  |  |  | 18 | $self->start_element({'Name' => 'Hit'}); | 
| 1288 | 3 | 0 |  |  |  | 32 | $self->element_hash({ | 
|  |  | 50 |  |  |  |  |  | 
| 1289 |  |  |  |  |  |  | 'Hit_id'           => $lasthit, | 
| 1290 |  |  |  |  |  |  | 'Hit_accession'    => $ver ? "$acc.$ver" : | 
| 1291 |  |  |  |  |  |  | $acc ? $acc : $lasthit, | 
| 1292 |  |  |  |  |  |  | 'Hit_gi'           => $gi | 
| 1293 |  |  |  |  |  |  | }); | 
| 1294 |  |  |  |  |  |  | } | 
| 1295 | 15 | 50 |  |  |  | 39 | if (!$self->in_element('hsp')) { | 
| 1296 | 15 |  |  |  |  | 45 | $self->start_element({'Name' => 'Hsp'}); | 
| 1297 |  |  |  |  |  |  | } | 
| 1298 |  |  |  |  |  |  |  | 
| 1299 |  |  |  |  |  |  | # hsp is similar to older output | 
| 1300 |  |  |  |  |  |  | } elsif ($line =~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) { # start of HSP | 
| 1301 | 15 |  |  |  |  | 41 | $self->_pushback($line); # set up for loop | 
| 1302 |  |  |  |  |  |  | #$self->debug("Start HSP\n"); | 
| 1303 |  |  |  |  |  |  | # what is length of the gap to the structure data? | 
| 1304 | 15 |  |  |  |  | 40 | my $offset = length($1); | 
| 1305 | 15 |  |  |  |  | 30 | my ($ct, $strln) = 0; | 
| 1306 | 15 |  |  |  |  | 20 | my $hsp; | 
| 1307 | 15 |  |  |  |  | 58 | HSP: | 
| 1308 |  |  |  |  |  |  | my %hsp_key = ('0' => 'meta', | 
| 1309 |  |  |  |  |  |  | '1' => 'query', | 
| 1310 |  |  |  |  |  |  | '2' => 'midline', | 
| 1311 |  |  |  |  |  |  | '3' => 'hit'); | 
| 1312 |  |  |  |  |  |  | HSP: | 
| 1313 | 15 |  |  |  |  | 45 | while (defined ($line = $self->_readline)) { | 
| 1314 | 180 |  |  |  |  | 263 | chomp $line; | 
| 1315 | 180 | 100 |  |  |  | 301 | next if (!$line); # toss empty lines | 
| 1316 |  |  |  |  |  |  | # next if $line =~ m{^\s*$}; # toss empty lines | 
| 1317 |  |  |  |  |  |  | # it is possible to have homology lines consisting | 
| 1318 |  |  |  |  |  |  | # entirely of spaces if the subject has a large | 
| 1319 |  |  |  |  |  |  | # insertion where nothing matches the model | 
| 1320 |  |  |  |  |  |  |  | 
| 1321 |  |  |  |  |  |  | # exit loop if at end of file or upon next hit/HSP | 
| 1322 | 135 | 100 |  |  |  | 370 | if ($line =~ m{^\s{0,2}\S+}) { | 
| 1323 | 15 |  |  |  |  | 40 | $self->_pushback($line); | 
| 1324 | 15 |  |  |  |  | 21 | last HSP; | 
| 1325 |  |  |  |  |  |  | } | 
| 1326 |  |  |  |  |  |  | # iterate to keep track of each line (4 lines per hsp block) | 
| 1327 | 120 |  |  |  |  | 167 | my $iterator = $ct%4; | 
| 1328 |  |  |  |  |  |  | # strlen set only with structure lines (proper length) | 
| 1329 | 120 | 100 |  |  |  | 170 | $strln = length($line) if $iterator == 0; | 
| 1330 |  |  |  |  |  |  | # only grab the data needed (hit start and stop in hit line above) | 
| 1331 |  |  |  |  |  |  |  | 
| 1332 | 120 |  |  |  |  | 220 | my $data = substr($line, $offset, $strln-$offset); | 
| 1333 | 120 |  |  |  |  | 342 | $hsp->{ $hsp_key{$iterator} } .= $data; | 
| 1334 |  |  |  |  |  |  |  | 
| 1335 | 120 |  |  |  |  | 281 | $ct++; | 
| 1336 |  |  |  |  |  |  | } | 
| 1337 |  |  |  |  |  |  |  | 
| 1338 |  |  |  |  |  |  | # query start, end are from the actual query length (entire hit is | 
| 1339 |  |  |  |  |  |  | # mapped to CM data, so all CM data is represented) | 
| 1340 |  |  |  |  |  |  | # works for now... | 
| 1341 | 15 | 50 |  |  |  | 36 | if ($self->in_element('hsp')) { | 
| 1342 | 15 |  |  |  |  | 30 | my $strlen = $hsp->{'query'} =~ tr{A-Za-z}{A-Za-z}; | 
| 1343 |  |  |  |  |  |  |  | 
| 1344 | 15 |  |  |  |  | 25 | my $metastr; | 
| 1345 |  |  |  |  |  |  | $metastr = ($self->convert_meta) ? ($self->simple_meta($hsp->{'meta'})) : | 
| 1346 | 15 | 50 |  |  |  | 31 | ($hsp->{'meta'}); | 
| 1347 |  |  |  |  |  |  | $self->element_hash( | 
| 1348 |  |  |  |  |  |  | {'Hsp_stranded'  => 'HIT', | 
| 1349 |  |  |  |  |  |  | 'Hsp_qseq'      => $hsp->{'query'}, | 
| 1350 |  |  |  |  |  |  | 'Hsp_hseq'      => $hsp->{'hit'}, | 
| 1351 | 15 |  |  |  |  | 153 | 'Hsp_midline'   => $hsp->{'midline'}, | 
| 1352 |  |  |  |  |  |  | 'Hsp_structure' => $metastr, | 
| 1353 |  |  |  |  |  |  | 'Hsp_query-from' => 1, | 
| 1354 |  |  |  |  |  |  | 'Infernal_query-len' => $strlen, | 
| 1355 |  |  |  |  |  |  | 'Hsp_query-to'   => $strlen, | 
| 1356 |  |  |  |  |  |  | 'Hsp_hit-from'  => $laststart, | 
| 1357 |  |  |  |  |  |  | 'Hsp_hit-to'    => $lastend, | 
| 1358 |  |  |  |  |  |  | 'Hsp_score'     => $lastscore, | 
| 1359 |  |  |  |  |  |  | 'Hsp_bit-score' => $lastscore, | 
| 1360 |  |  |  |  |  |  | }); | 
| 1361 | 15 | 50 | 33 |  |  | 142 | $self->element_hash( | 
| 1362 |  |  |  |  |  |  | {'Hsp_evalue'    => $lasteval, | 
| 1363 |  |  |  |  |  |  | 'Hsp_pvalue'    => $lastpval, | 
| 1364 |  |  |  |  |  |  | }) if ($lasteval && $lastpval); | 
| 1365 | 15 |  |  |  |  | 70 | $self->end_element({'Name' => 'Hsp'}); | 
| 1366 |  |  |  |  |  |  | } | 
| 1367 |  |  |  |  |  |  | # result now ends with // and 'Fin' | 
| 1368 |  |  |  |  |  |  | } elsif ($line =~ m{^//}xms )  { | 
| 1369 | 1 | 50 | 33 |  |  | 4 | if ($self->within_element('result') && $seentop) { | 
| 1370 | 1 |  |  |  |  | 7 | $self->element( | 
| 1371 |  |  |  |  |  |  | {'Name' => 'Infernal_version', | 
| 1372 |  |  |  |  |  |  | 'Data' => $version} | 
| 1373 |  |  |  |  |  |  | ); | 
| 1374 | 1 | 50 |  |  |  | 3 | if ($self->in_element('hit')) { | 
| 1375 | 1 |  |  |  |  | 4 | $self->element_hash({'Hit_score'    => $maxscore, | 
| 1376 |  |  |  |  |  |  | 'Hit_bits'     => $maxscore}); | 
| 1377 |  |  |  |  |  |  | # don't know where to put minpval yet | 
| 1378 | 1 | 50 |  |  |  | 14 | $self->element_hash({'Hit_signif'   => $mineval}) if $mineval; | 
| 1379 | 1 |  |  |  |  | 5 | $self->end_element({'Name' => 'Hit'}); | 
| 1380 |  |  |  |  |  |  | } | 
| 1381 | 1 |  |  |  |  | 3 | last PARSER; | 
| 1382 |  |  |  |  |  |  | } | 
| 1383 |  |  |  |  |  |  | } | 
| 1384 |  |  |  |  |  |  | } | 
| 1385 | 1 | 50 |  |  |  | 4 | $self->within_element('hit') && $self->end_element( { 'Name' => 'Hit' } ); | 
| 1386 | 1 | 50 |  |  |  | 6 | $self->end_element( { 'Name' => 'Result' } ) if $seentop; | 
| 1387 | 1 |  |  |  |  | 5 | return $self->end_document(); | 
| 1388 |  |  |  |  |  |  | } | 
| 1389 |  |  |  |  |  |  |  | 
| 1390 |  |  |  |  |  |  | # cmsearch 0.72 and below; will likely be dropped when Infernal 1.0 is released | 
| 1391 |  |  |  |  |  |  | sub _parse_old { | 
| 1392 | 2 |  |  | 2 |  | 6 | my ($self) = @_; | 
| 1393 | 2 |  |  |  |  | 5 | my $seentop = 0; | 
| 1394 | 2 |  |  |  |  | 14 | local $/ = "\n"; | 
| 1395 | 2 |  |  |  |  | 8 | my ($accession, $db, $algorithm, $model, $description, $version) = | 
| 1396 |  |  |  |  |  |  | ($self->query_accession, $self->database, $self->algorithm, | 
| 1397 |  |  |  |  |  |  | $self->model, $self->query_description, $self->version); | 
| 1398 | 2 |  |  |  |  | 5 | my $maxscore; | 
| 1399 | 2 |  |  |  |  | 5 | my $cutoff = $self->hsp_minscore; | 
| 1400 | 2 |  |  |  |  | 9 | $self->start_document(); | 
| 1401 | 2 |  |  |  |  | 4 | local ($_); | 
| 1402 | 2 |  |  |  |  | 3 | my $line; | 
| 1403 | 2 |  |  |  |  | 7 | my ($lasthit, $lastscore, $laststart, $lastend); | 
| 1404 | 2 |  |  |  |  | 0 | my $hitline; | 
| 1405 |  |  |  |  |  |  | PARSER: | 
| 1406 | 2 |  |  |  |  | 8 | while ( defined( $line = $self->_readline ) ) { | 
| 1407 | 88 | 50 |  |  |  | 348 | next if $line =~ m{^\s+$}; | 
| 1408 |  |  |  |  |  |  | # bypass this for now... | 
| 1409 | 88 | 50 |  |  |  | 208 | next if $line =~ m{^HMM\shit}; | 
| 1410 |  |  |  |  |  |  | # pre-0.81 | 
| 1411 | 88 | 100 | 66 |  |  | 507 | if ($line =~ m{^sequence:\s+(\S+)} ){ | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 1412 | 4 | 100 |  |  |  | 16 | if (!$self->within_element('result')) { | 
| 1413 | 2 |  |  |  |  | 5 | $seentop = 1; | 
| 1414 | 2 |  |  |  |  | 13 | $self->start_element({'Name' => 'Result'}); | 
| 1415 | 2 |  |  |  |  | 19 | $self->element_hash({ | 
| 1416 |  |  |  |  |  |  | 'Infernal_program'   => $algorithm, | 
| 1417 |  |  |  |  |  |  | 'Infernal_query-def' => $model, | 
| 1418 |  |  |  |  |  |  | 'Infernal_query-acc' => $accession, | 
| 1419 |  |  |  |  |  |  | 'Infernal_querydesc' => $description, | 
| 1420 |  |  |  |  |  |  | 'Infernal_db'        => $db | 
| 1421 |  |  |  |  |  |  | }); | 
| 1422 |  |  |  |  |  |  | } | 
| 1423 | 4 | 100 |  |  |  | 17 | if ($self->in_element('hit')) { | 
| 1424 | 2 |  |  |  |  | 14 | $self->element_hash({'Hit_score' => $maxscore, | 
| 1425 |  |  |  |  |  |  | 'Hit_bits'  => $maxscore}); | 
| 1426 | 2 |  |  |  |  | 9 | $maxscore = undef; | 
| 1427 | 2 |  |  |  |  | 11 | $self->end_element({'Name' => 'Hit'}); | 
| 1428 |  |  |  |  |  |  | } | 
| 1429 | 4 |  |  |  |  | 24 | $lasthit = $1; | 
| 1430 |  |  |  |  |  |  | } elsif ($line =~ m{^hit\s+\d+\s+:\s+(\d+)\s+(\d+)\s+(\d+\.\d+)\s+bits}xms) { | 
| 1431 | 52 |  |  |  |  | 192 | ($laststart, $lastend, $lastscore) = ($1, $2, $3); | 
| 1432 | 52 | 100 |  |  |  | 123 | $maxscore = $lastscore unless $maxscore; | 
| 1433 | 52 | 100 |  |  |  | 483 | if ($lastscore > $cutoff) { | 
| 1434 | 12 | 100 |  |  |  | 32 | if (!$self->within_element('hit')) { | 
| 1435 | 4 |  |  |  |  | 18 | my ($gi, $acc, $ver) = $self->_get_seq_identifiers($lasthit); | 
| 1436 | 4 |  |  |  |  | 30 | $self->start_element({'Name' => 'Hit'}); | 
| 1437 | 4 | 0 |  |  |  | 40 | $self->element_hash({ | 
|  |  | 50 |  |  |  |  |  | 
| 1438 |  |  |  |  |  |  | 'Hit_id'           => $lasthit, | 
| 1439 |  |  |  |  |  |  | 'Hit_accession'    => $ver ? "$acc.$ver" : | 
| 1440 |  |  |  |  |  |  | $acc ? $acc : $lasthit, | 
| 1441 |  |  |  |  |  |  | 'Hit_gi'           => $gi | 
| 1442 |  |  |  |  |  |  | }); | 
| 1443 |  |  |  |  |  |  | } | 
| 1444 |  |  |  |  |  |  | # necessary as infernal 0.71 has repeated hit line | 
| 1445 | 12 | 100 |  |  |  | 38 | if (!$self->in_element('hsp')) { | 
| 1446 | 6 |  |  |  |  | 23 | $self->start_element({'Name' => 'Hsp'}); | 
| 1447 |  |  |  |  |  |  | } | 
| 1448 | 12 | 100 |  |  |  | 61 | $maxscore = ($maxscore < $lastscore)  ? $lastscore : | 
| 1449 |  |  |  |  |  |  | $maxscore; | 
| 1450 |  |  |  |  |  |  | } | 
| 1451 |  |  |  |  |  |  | } elsif ($line =~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) { # start of HSP | 
| 1452 | 26 |  |  |  |  | 84 | $self->_pushback($line); # set up for loop | 
| 1453 |  |  |  |  |  |  | # what is length of the gap to the structure data? | 
| 1454 | 26 |  |  |  |  | 62 | my $offset = length($1); | 
| 1455 | 26 |  |  |  |  | 48 | my ($ct, $strln) = 0; | 
| 1456 | 26 |  |  |  |  | 33 | my $hsp; | 
| 1457 | 26 |  |  |  |  | 101 | HSP: | 
| 1458 |  |  |  |  |  |  | my %hsp_key = ('0' => 'meta', | 
| 1459 |  |  |  |  |  |  | '1' => 'query', | 
| 1460 |  |  |  |  |  |  | '2' => 'midline', | 
| 1461 |  |  |  |  |  |  | '3' => 'hit'); | 
| 1462 |  |  |  |  |  |  | HSP: | 
| 1463 | 26 |  |  |  |  | 71 | while ($line = $self->_readline) { | 
| 1464 | 288 | 100 |  |  |  | 1127 | next if $line =~ m{^\s*$}; # toss empty lines | 
| 1465 | 234 |  |  |  |  | 355 | chomp $line; | 
| 1466 |  |  |  |  |  |  | # exit loop if at end of file or upon next hit/HSP | 
| 1467 | 234 | 100 | 66 |  |  | 834 | if (!defined($line) || $line =~ m{^\S+}) { | 
| 1468 | 26 |  |  |  |  | 87 | $self->_pushback($line); | 
| 1469 | 26 |  |  |  |  | 51 | last HSP; | 
| 1470 |  |  |  |  |  |  | } | 
| 1471 |  |  |  |  |  |  | # iterate to keep track of each line (4 lines per hsp block) | 
| 1472 | 208 |  |  |  |  | 370 | my $iterator = $ct%4; | 
| 1473 |  |  |  |  |  |  | # strlen set only with structure lines (proper length) | 
| 1474 | 208 | 100 |  |  |  | 359 | $strln = length($line) if $iterator == 0; | 
| 1475 |  |  |  |  |  |  | # only grab the data needed (hit start and stop in hit line above) | 
| 1476 |  |  |  |  |  |  |  | 
| 1477 | 208 |  |  |  |  | 427 | my $data = substr($line, $offset, $strln-$offset); | 
| 1478 | 208 |  |  |  |  | 714 | $hsp->{ $hsp_key{$iterator} } .= $data; | 
| 1479 | 208 |  |  |  |  | 667 | $ct++; | 
| 1480 |  |  |  |  |  |  | } | 
| 1481 |  |  |  |  |  |  | # query start, end are from the actual query length (entire hit is | 
| 1482 |  |  |  |  |  |  | # mapped to CM data, so all CM data is represented) | 
| 1483 |  |  |  |  |  |  | # works for now... | 
| 1484 | 26 | 100 |  |  |  | 64 | if ($self->in_element('hsp')) { | 
| 1485 | 6 |  |  |  |  | 20 | my $strlen = $hsp->{'query'} =~ tr{A-Za-z}{A-Za-z}; | 
| 1486 |  |  |  |  |  |  |  | 
| 1487 | 6 |  |  |  |  | 10 | my $metastr; | 
| 1488 |  |  |  |  |  |  | # Ugh...these should be passed in a hash | 
| 1489 |  |  |  |  |  |  | $metastr = ($self->convert_meta) ? ($self->simple_meta($hsp->{'meta'})) : | 
| 1490 | 6 | 100 |  |  |  | 17 | ($hsp->{'meta'}); | 
| 1491 |  |  |  |  |  |  | $self->element_hash( | 
| 1492 |  |  |  |  |  |  | {'Hsp_stranded'  => 'HIT', | 
| 1493 |  |  |  |  |  |  | 'Hsp_qseq'      => $hsp->{'query'}, | 
| 1494 |  |  |  |  |  |  | 'Hsp_hseq'      => $hsp->{'hit'}, | 
| 1495 | 6 |  |  |  |  | 81 | 'Hsp_midline'   => $hsp->{'midline'}, | 
| 1496 |  |  |  |  |  |  | 'Hsp_structure' => $metastr, | 
| 1497 |  |  |  |  |  |  | 'Hsp_query-from' => 1, | 
| 1498 |  |  |  |  |  |  | 'Infernal_query-len' => $strlen, | 
| 1499 |  |  |  |  |  |  | 'Hsp_query-to'   => $strlen, | 
| 1500 |  |  |  |  |  |  | 'Hsp_hit-from'  => $laststart, | 
| 1501 |  |  |  |  |  |  | 'Hsp_hit-to'    => $lastend, | 
| 1502 |  |  |  |  |  |  | 'Hsp_score'     => $lastscore, | 
| 1503 |  |  |  |  |  |  | 'Hsp_bit-score' => $lastscore | 
| 1504 |  |  |  |  |  |  | }); | 
| 1505 | 6 |  |  |  |  | 50 | $self->end_element({'Name' => 'Hsp'}); | 
| 1506 |  |  |  |  |  |  | } | 
| 1507 |  |  |  |  |  |  | } elsif ($line =~ m{^memory}xms || $line =~ m{^CYK\smemory}xms )  { | 
| 1508 | 2 | 50 | 33 |  |  | 8 | if ($self->within_element('result') && $seentop) { | 
| 1509 | 2 |  |  |  |  | 14 | $self->element( | 
| 1510 |  |  |  |  |  |  | {'Name' => 'Infernal_version', | 
| 1511 |  |  |  |  |  |  | 'Data' => $version} | 
| 1512 |  |  |  |  |  |  | ); | 
| 1513 | 2 | 50 |  |  |  | 9 | if ($self->in_element('hit')) { | 
| 1514 | 2 |  |  |  |  | 12 | $self->element_hash({'Hit_score'    => $maxscore, | 
| 1515 |  |  |  |  |  |  | 'Hit_bits'     => $maxscore}); | 
| 1516 | 2 |  |  |  |  | 13 | $self->end_element({'Name' => 'Hit'}); | 
| 1517 |  |  |  |  |  |  | } | 
| 1518 | 2 |  |  |  |  | 7 | last PARSER; | 
| 1519 |  |  |  |  |  |  | } | 
| 1520 |  |  |  |  |  |  | } | 
| 1521 |  |  |  |  |  |  | } | 
| 1522 | 2 | 50 |  |  |  | 7 | $self->within_element('hit') && $self->end_element( { 'Name' => 'Hit' } ); | 
| 1523 | 2 | 50 |  |  |  | 13 | $self->end_element( { 'Name' => 'Result' } ) if $seentop; | 
| 1524 | 2 |  |  |  |  | 12 | return $self->end_document(); | 
| 1525 |  |  |  |  |  |  | } | 
| 1526 |  |  |  |  |  |  |  | 
| 1527 |  |  |  |  |  |  | 1; |