File Coverage

Bio/SearchIO/IteratedSearchResultEventBuilder.pm
Criterion Covered Total %
statement 89 92 96.7
branch 28 28 100.0
condition 9 23 39.1
subroutine 10 10 100.0
pod 5 5 100.0
total 141 158 89.2


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------
2             #
3             # BioPerl module for Bio::SearchIO::IteratedSearchResultEventBuilder
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Steve Chervitz and Jason Stajich
8             #
9             # Copyright Steve Chervitz
10             #
11             # You may distribute this module under the same terms as perl itself
12             #------------------------------------------------------------------
13              
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::SearchIO::IteratedSearchResultEventBuilder - Event Handler for
19             SearchIO events.
20              
21             =head1 SYNOPSIS
22              
23             # Do not use this object directly, this object is part of the SearchIO
24             # event based parsing system.
25              
26             =head1 DESCRIPTION
27              
28             This object handles Search Events generated by the SearchIO classes
29             and build appropriate Bio::Search::* objects from them.
30              
31             =head1 FEEDBACK
32              
33             =head2 Mailing Lists
34              
35             User feedback is an integral part of the evolution of this and other
36             Bioperl modules. Send your comments and suggestions preferably to
37             the Bioperl mailing list. Your participation is much appreciated.
38              
39             bioperl-l@bioperl.org - General discussion
40             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41              
42             =head2 Support
43              
44             Please direct usage questions or support issues to the mailing list:
45              
46             I
47              
48             rather than to the module maintainer directly. Many experienced and
49             reponsive experts will be able look at the problem and quickly
50             address it. Please include a thorough description of the problem
51             with code and data examples if at all possible.
52              
53             =head2 Reporting Bugs
54              
55             Report bugs to the Bioperl bug tracking system to help us keep track
56             of the bugs and their resolution. Bug reports can be submitted via the
57             web:
58              
59             https://github.com/bioperl/bioperl-live/issues
60              
61             =head1 AUTHOR - Steve Chervitz
62              
63             Email sac-at-bioperl.org
64              
65             =head1 CONTRIBUTORS
66              
67             Parts of code based on SearchResultEventBuilder by Jason Stajich
68             jason@bioperl.org
69              
70             Sendu Bala, bix@sendu.me.uk
71              
72             =head1 APPENDIX
73              
74             The rest of the documentation details each of the object methods.
75             Internal methods are usually preceded with a _
76              
77             =cut
78              
79              
80             # Let the code begin...
81              
82              
83             package Bio::SearchIO::IteratedSearchResultEventBuilder;
84              
85 12     12   44 use strict;
  12         13  
  12         286  
86              
87 12     12   37 use Bio::Factory::ObjectFactory;
  12         12  
  12         354  
88              
89 12     12   35 use base qw(Bio::SearchIO::SearchResultEventBuilder);
  12         12  
  12         8731  
90              
91             =head2 new
92              
93             Title : new
94             Usage : my $obj = Bio::SearchIO::IteratedSearchResultEventBuilder->new();
95             Function: Builds a new Bio::SearchIO::IteratedSearchResultEventBuilder object
96             Returns : Bio::SearchIO::IteratedSearchResultEventBuilder
97             Args : -hsp_factory => Bio::Factory::ObjectFactoryI
98             -hit_factory => Bio::Factory::ObjectFactoryI
99             -result_factory => Bio::Factory::ObjectFactoryI
100             -iteration_factory => Bio::Factory::ObjectFactoryI
101             -inclusion_threshold => e-value threshold for inclusion in the
102             PSI-BLAST score matrix model (blastpgp)
103             -signif => float or scientific notation number to be used
104             as a P- or Expect value cutoff
105             -score => integer or scientific notation number to be used
106             as a blast score value cutoff
107             -bits => integer or scientific notation number to be used
108             as a bit score value cutoff
109             -hit_filter => reference to a function to be used for
110             filtering hits based on arbitrary criteria.
111              
112             See L for more information
113              
114             =cut
115              
116             sub new {
117 92     92 1 232 my ($class,@args) = @_;
118 92         352 my $self = $class->SUPER::new(@args);
119 92         490 my ($resultF, $iterationF, $hitF, $hspF) =
120             $self->_rearrange([qw(RESULT_FACTORY
121             ITERATION_FACTORY
122             HIT_FACTORY
123             HSP_FACTORY)],@args);
124 92         330 $self->_init_parse_params(@args);
125              
126             # Note that we need to override the setting of result and factories here
127             # so that we can set different default factories than are set by the super class.
128 92   33     632 $self->register_factory('result', $resultF ||
129             Bio::Factory::ObjectFactory->new(
130             -type => 'Bio::Search::Result::BlastResult',
131             -interface => 'Bio::Search::Result::ResultI'));
132              
133 92   33     225 $self->register_factory('hit', $hitF ||
134             Bio::Factory::ObjectFactory->new(
135             -type => 'Bio::Search::Hit::BlastHit',
136             -interface => 'Bio::Search::Hit::HitI'));
137              
138 92   33     128 $self->register_factory('hsp', $hspF ||
139             Bio::Factory::ObjectFactory->new(
140             -type => 'Bio::Search::HSP::GenericHSP',
141             -interface => 'Bio::Search::HSP::HSPI'));
142              
143             # TODO: Change this to BlastIteration (maybe)
144 92   33     164 $self->register_factory('iteration', $iterationF ||
145             Bio::Factory::ObjectFactory->new(
146             -type => 'Bio::Search::Iteration::GenericIteration',
147             -interface => 'Bio::Search::Iteration::IterationI'));
148              
149 92         261 return $self;
150             }
151              
152             =head2 will_handle
153              
154             Title : will_handle
155             Usage : if( $handler->will_handle($event_type) ) { ... }
156             Function: Tests if this event builder knows how to process a specific event
157             Returns : boolean
158             Args : event type name
159              
160             =cut
161              
162             sub will_handle{
163 338     338 1 326 my ($self,$type) = @_;
164             # these are the events we recognize
165 338   33     2591 return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result'
166             || $type eq 'iteration' || $type eq 'newhits' || $type eq 'oldhits' );
167             }
168              
169             =head2 SAX methods
170              
171             =cut
172              
173             =head2 start_result
174              
175             Title : start_result
176             Usage : $handler->start_result($resulttype)
177             Function: Begins a result event cycle
178             Returns : none
179             Args : Type of Report
180              
181             =cut
182              
183             sub start_result {
184 96     96 1 149 my $self = shift;
185             #print STDERR "ISREB: start_result()\n";
186 96         439 $self->SUPER::start_result(@_);
187 96         292 $self->{'_iterations'} = [];
188 96         189 $self->{'_iteration_count'} = 0;
189 96         160 $self->{'_old_hit_names'} = undef;
190 96         185 $self->{'_hit_names_below'} = undef;
191 96         339 return;
192             }
193              
194             =head2 start_iteration
195              
196             Title : start_iteration
197             Usage : $handler->start_iteration()
198             Function: Starts an Iteration event cycle
199             Returns : none
200             Args : type of event and associated hashref
201              
202             =cut
203              
204             sub start_iteration {
205 93     93 1 193 my ($self,$type) = @_;
206              
207             #print STDERR "ISREB: start_iteration()\n";
208 93         161 $self->{'_iteration_count'}++;
209              
210             # Reset arrays for the various classes of hits.
211             # $self->{'_newhits_unclassified'} = [];
212 93         257 $self->{'_newhits_below'} = [];
213 93         673 $self->{'_newhits_not_below'} = [];
214 93         302 $self->{'_oldhits_below'} = [];
215 93         183 $self->{'_oldhits_newly_below'} = [];
216 93         274 $self->{'_oldhits_not_below'} = [];
217 93         134 $self->{'_hitcount'} = 0;
218 93         204 return;
219             }
220              
221              
222             =head2 end_iteration
223              
224             Title : end_iteration
225             Usage : $handler->end_iteration()
226             Function: Ends an Iteration event cycle
227             Returns : Bio::Search::Iteration object
228             Args : type of event and associated hashref
229              
230             =cut
231              
232             sub end_iteration {
233 93     93 1 195 my ($self,$type,$data) = @_;
234              
235             # print STDERR "ISREB: end_iteration()\n";
236              
237 0         0 my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); }
  0         0  
  0         0  
238 93         143 grep { /^ITERATION/ } keys %{$data};
  2842         2439  
  93         494  
239              
240 93         305 $args{'-number'} = $self->{'_iteration_count'};
241 93         204 $args{'-oldhits_below'} = $self->{'_oldhits_below'};
242 93         223 $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
243 93         175 $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
244 93         167 $args{'-newhits_below'} = $self->{'_newhits_below'};
245 93         152 $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
246 93         273 $args{'-hit_factory'} = $self->factory('hit');
247              
248 93         260 my $it = $self->factory('iteration')->create_object(%args);
249 93         184 push @{$self->{'_iterations'}}, $it;
  93         281  
250 93         308 return $it;
251             }
252              
253             # Title : _add_hit (private function for internal use only)
254             # Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
255             # Argument: Bio::Search::Hit::HitI object
256              
257             sub _add_hit {
258 2446     2446   2116 my ($self, $hit) = @_;
259              
260 2446         3977 my $hit_name = uc($hit->{-name});
261 2446         2937 my $hit_signif = $hit->{-significance};
262 2446         2349 my $ithresh = $self->{'_inclusion_threshold'};
263              
264             # Test significance using custom function (if supplied)
265 2446         1848 my $add_hit = 1;
266              
267 2446         2252 my $hit_filter = $self->{'_hit_filter'};
268              
269 2446 100       2946 if($hit_filter) {
270             # since &hit_filter is out of our control and would expect a HitI object,
271             # we're forced to make one for it
272 4         10 $hit = $self->factory('hit')->create_object(%{$hit});
  4         21  
273 4 100       15 $add_hit = 0 unless &$hit_filter($hit);
274             }
275             else {
276 2442 100       3952 if($self->{'_confirm_significance'}) {
277 4 100       19 $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
278             }
279 2442 100       3851 if($self->{'_confirm_score'}) {
280 4   33     21 my $hit_score = $hit->{-score} || $hit->{-hsps}->[0]->{-score};
281 4 100       13 $add_hit = 0 unless $hit_score >= $self->{'_min_score'};
282             }
283 2442 100       3558 if($self->{'_confirm_bits'}) {
284 4   33     89 my $hit_bits = $hit->{-bits} || $hit->{-hsps}->[0]->{-bits};
285 4 100       26 $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
286             }
287             }
288              
289 2446 100       5079 $add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
290             # Building hit lookup hashes for determining if the hit is old/new and
291             # above/below threshold.
292 2446         4195 $self->{'_old_hit_names'}->{$hit_name}++;
293 2446 100       7614 $self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
294             }
295              
296             # Title : _store_hit (private function for internal use only)
297             # Purpose : Collects hit objects into defined sets that are useful for
298             # analyzing PSI-blast results.
299             # These are ultimately added to the iteration object in end_iteration().
300             #
301             # Strategy:
302             # Primary split = old vs. new
303             # Secondary split = below vs. above threshold
304             # 1. Has this hit occurred in a previous iteration?
305             # 1.1. If yes, was it below threshold?
306             # 1.1.1. If yes, ---> [oldhits_below]
307             # 1.1.2. If no, is it now below threshold?
308             # 1.1.2.1. If yes, ---> [oldhits_newly_below]
309             # 1.1.2.2. If no, ---> [oldhits_not_below]
310             # 1.2. If no, is it below threshold?
311             # 1.2.1. If yes, ---> [newhits_below]
312             # 1.2.2. If no, ---> [newhits_not_below]
313             # 1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified]
314             # Note: As long as there's a default inclusion threshold,
315             # there won't be an unclassified set.
316             #
317             # For the first iteration, it might be nice to detect non-PSI blast reports
318             # and put the hits in the unclassified set.
319             # However, it shouldn't matter where the hits get put for the first iteration
320             # for non-PSI blast reports since they'll get flattened out in the
321             # result and iteration search objects.
322              
323             sub _store_hit {
324 2438     2438   3233 my ($self, $hit, $hit_name, $hit_signif) = @_;
325              
326 2438         2162 my $ithresh = $self->{'_inclusion_threshold'};
327              
328             # This is the assumption leading to Bug 1986. The assumption here is that
329             # the hit name is unique (and thus new), therefore any subsequent encounters
330             # with a hit containing the same name are filed as old hits. This isn't
331             # always true (see the bug report for a few examples). Adding an explicit
332             # check for the presence of iterations, adding to new hits otherwise.
333              
334 2438 100 100     5951 if (exists $self->{'_old_hit_names'}->{$hit_name}
335 121         183 && scalar @{$self->{_iterations}}) {
336 117 100       264 if (exists $self->{'_hit_names_below'}->{$hit_name}) {
    100          
337 109         89 push @{$self->{'_oldhits_below'}}, $hit;
  109         159  
338             } elsif ($hit_signif <= $ithresh) {
339 3         5 push @{$self->{'_oldhits_newly_below'}}, $hit;
  3         7  
340             } else {
341 5         6 push @{$self->{'_oldhits_not_below'}}, $hit;
  5         9  
342             }
343             } else {
344 2321 100       5879 if ($hit_signif <= $ithresh) {
345 1773         1296 push @{$self->{'_newhits_below'}}, $hit;
  1773         2788  
346             } else {
347 548         451 push @{$self->{'_newhits_not_below'}}, $hit;
  548         1007  
348             }
349             }
350 2438         3292 $self->{'_hitcount'}++;
351             }
352              
353             1;