File Coverage

Bio/SearchIO/IteratedSearchResultEventBuilder.pm
Criterion Covered Total %
statement 89 92 96.7
branch 28 28 100.0
condition 9 23 39.1
subroutine 10 10 100.0
pod 5 5 100.0
total 141 158 89.2


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------
2             #
3             # BioPerl module for Bio::SearchIO::IteratedSearchResultEventBuilder
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Steve Chervitz and Jason Stajich
8             #
9             # Copyright Steve Chervitz
10             #
11             # You may distribute this module under the same terms as perl itself
12             #------------------------------------------------------------------
13              
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::SearchIO::IteratedSearchResultEventBuilder - Event Handler for
19             SearchIO events.
20              
21             =head1 SYNOPSIS
22              
23             # Do not use this object directly, this object is part of the SearchIO
24             # event based parsing system.
25              
26             =head1 DESCRIPTION
27              
28             This object handles Search Events generated by the SearchIO classes
29             and build appropriate Bio::Search::* objects from them.
30              
31             =head1 FEEDBACK
32              
33             =head2 Mailing Lists
34              
35             User feedback is an integral part of the evolution of this and other
36             Bioperl modules. Send your comments and suggestions preferably to
37             the Bioperl mailing list. Your participation is much appreciated.
38              
39             bioperl-l@bioperl.org - General discussion
40             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41              
42             =head2 Support
43              
44             Please direct usage questions or support issues to the mailing list:
45              
46             I
47              
48             rather than to the module maintainer directly. Many experienced and
49             reponsive experts will be able look at the problem and quickly
50             address it. Please include a thorough description of the problem
51             with code and data examples if at all possible.
52              
53             =head2 Reporting Bugs
54              
55             Report bugs to the Bioperl bug tracking system to help us keep track
56             of the bugs and their resolution. Bug reports can be submitted via the
57             web:
58              
59             https://github.com/bioperl/bioperl-live/issues
60              
61             =head1 AUTHOR - Steve Chervitz
62              
63             Email sac-at-bioperl.org
64              
65             =head1 CONTRIBUTORS
66              
67             Parts of code based on SearchResultEventBuilder by Jason Stajich
68             jason@bioperl.org
69              
70             Sendu Bala, bix@sendu.me.uk
71              
72             =head1 APPENDIX
73              
74             The rest of the documentation details each of the object methods.
75             Internal methods are usually preceded with a _
76              
77             =cut
78              
79              
80             # Let the code begin...
81              
82              
83             package Bio::SearchIO::IteratedSearchResultEventBuilder;
84              
85 12     12   72 use strict;
  12         24  
  12         357  
86              
87 12     12   59 use Bio::Factory::ObjectFactory;
  12         23  
  12         379  
88              
89 12     12   56 use base qw(Bio::SearchIO::SearchResultEventBuilder);
  12         21  
  12         10657  
90              
91             =head2 new
92              
93             Title : new
94             Usage : my $obj = Bio::SearchIO::IteratedSearchResultEventBuilder->new();
95             Function: Builds a new Bio::SearchIO::IteratedSearchResultEventBuilder object
96             Returns : Bio::SearchIO::IteratedSearchResultEventBuilder
97             Args : -hsp_factory => Bio::Factory::ObjectFactoryI
98             -hit_factory => Bio::Factory::ObjectFactoryI
99             -result_factory => Bio::Factory::ObjectFactoryI
100             -iteration_factory => Bio::Factory::ObjectFactoryI
101             -inclusion_threshold => e-value threshold for inclusion in the
102             PSI-BLAST score matrix model (blastpgp)
103             -signif => float or scientific notation number to be used
104             as a P- or Expect value cutoff
105             -score => integer or scientific notation number to be used
106             as a blast score value cutoff
107             -bits => integer or scientific notation number to be used
108             as a bit score value cutoff
109             -hit_filter => reference to a function to be used for
110             filtering hits based on arbitrary criteria.
111              
112             See L for more information
113              
114             =cut
115              
116             sub new {
117 92     92 1 322 my ($class,@args) = @_;
118 92         412 my $self = $class->SUPER::new(@args);
119 92         486 my ($resultF, $iterationF, $hitF, $hspF) =
120             $self->_rearrange([qw(RESULT_FACTORY
121             ITERATION_FACTORY
122             HIT_FACTORY
123             HSP_FACTORY)],@args);
124 92         409 $self->_init_parse_params(@args);
125              
126             # Note that we need to override the setting of result and factories here
127             # so that we can set different default factories than are set by the super class.
128 92   33     510 $self->register_factory('result', $resultF ||
129             Bio::Factory::ObjectFactory->new(
130             -type => 'Bio::Search::Result::BlastResult',
131             -interface => 'Bio::Search::Result::ResultI'));
132              
133 92   33     597 $self->register_factory('hit', $hitF ||
134             Bio::Factory::ObjectFactory->new(
135             -type => 'Bio::Search::Hit::BlastHit',
136             -interface => 'Bio::Search::Hit::HitI'));
137              
138 92   33     565 $self->register_factory('hsp', $hspF ||
139             Bio::Factory::ObjectFactory->new(
140             -type => 'Bio::Search::HSP::GenericHSP',
141             -interface => 'Bio::Search::HSP::HSPI'));
142              
143             # TODO: Change this to BlastIteration (maybe)
144 92   33     533 $self->register_factory('iteration', $iterationF ||
145             Bio::Factory::ObjectFactory->new(
146             -type => 'Bio::Search::Iteration::GenericIteration',
147             -interface => 'Bio::Search::Iteration::IterationI'));
148              
149 92         335 return $self;
150             }
151              
152             =head2 will_handle
153              
154             Title : will_handle
155             Usage : if( $handler->will_handle($event_type) ) { ... }
156             Function: Tests if this event builder knows how to process a specific event
157             Returns : boolean
158             Args : event type name
159              
160             =cut
161              
162             sub will_handle{
163 338     338 1 639 my ($self,$type) = @_;
164             # these are the events we recognize
165 338   33     2168 return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result'
166             || $type eq 'iteration' || $type eq 'newhits' || $type eq 'oldhits' );
167             }
168              
169             =head2 SAX methods
170              
171             =cut
172              
173             =head2 start_result
174              
175             Title : start_result
176             Usage : $handler->start_result($resulttype)
177             Function: Begins a result event cycle
178             Returns : none
179             Args : Type of Report
180              
181             =cut
182              
183             sub start_result {
184 96     96 1 180 my $self = shift;
185             #print STDERR "ISREB: start_result()\n";
186 96         464 $self->SUPER::start_result(@_);
187 96         301 $self->{'_iterations'} = [];
188 96         182 $self->{'_iteration_count'} = 0;
189 96         234 $self->{'_old_hit_names'} = undef;
190 96         192 $self->{'_hit_names_below'} = undef;
191 96         352 return;
192             }
193              
194             =head2 start_iteration
195              
196             Title : start_iteration
197             Usage : $handler->start_iteration()
198             Function: Starts an Iteration event cycle
199             Returns : none
200             Args : type of event and associated hashref
201              
202             =cut
203              
204             sub start_iteration {
205 93     93 1 327 my ($self,$type) = @_;
206              
207             #print STDERR "ISREB: start_iteration()\n";
208 93         193 $self->{'_iteration_count'}++;
209              
210             # Reset arrays for the various classes of hits.
211             # $self->{'_newhits_unclassified'} = [];
212 93         675 $self->{'_newhits_below'} = [];
213 93         347 $self->{'_newhits_not_below'} = [];
214 93         223 $self->{'_oldhits_below'} = [];
215 93         198 $self->{'_oldhits_newly_below'} = [];
216 93         338 $self->{'_oldhits_not_below'} = [];
217 93         174 $self->{'_hitcount'} = 0;
218 93         245 return;
219             }
220              
221              
222             =head2 end_iteration
223              
224             Title : end_iteration
225             Usage : $handler->end_iteration()
226             Function: Ends an Iteration event cycle
227             Returns : Bio::Search::Iteration object
228             Args : type of event and associated hashref
229              
230             =cut
231              
232             sub end_iteration {
233 93     93 1 265 my ($self,$type,$data) = @_;
234              
235             # print STDERR "ISREB: end_iteration()\n";
236              
237 0         0 my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); }
  0         0  
  0         0  
238 93         181 grep { /^ITERATION/ } keys %{$data};
  2842         3699  
  93         516  
239              
240 93         321 $args{'-number'} = $self->{'_iteration_count'};
241 93         244 $args{'-oldhits_below'} = $self->{'_oldhits_below'};
242 93         192 $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
243 93         200 $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
244 93         204 $args{'-newhits_below'} = $self->{'_newhits_below'};
245 93         184 $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
246 93         336 $args{'-hit_factory'} = $self->factory('hit');
247              
248 93         257 my $it = $self->factory('iteration')->create_object(%args);
249 93         233 push @{$self->{'_iterations'}}, $it;
  93         246  
250 93         313 return $it;
251             }
252              
253             # Title : _add_hit (private function for internal use only)
254             # Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
255             # Argument: Bio::Search::Hit::HitI object
256              
257             sub _add_hit {
258 2446     2446   3725 my ($self, $hit) = @_;
259              
260 2446         4046 my $hit_name = uc($hit->{-name});
261 2446         3147 my $hit_signif = $hit->{-significance};
262 2446         3134 my $ithresh = $self->{'_inclusion_threshold'};
263              
264             # Test significance using custom function (if supplied)
265 2446         2623 my $add_hit = 1;
266              
267 2446         2865 my $hit_filter = $self->{'_hit_filter'};
268              
269 2446 100       3638 if($hit_filter) {
270             # since &hit_filter is out of our control and would expect a HitI object,
271             # we're forced to make one for it
272 4         11 $hit = $self->factory('hit')->create_object(%{$hit});
  4         19  
273 4 100       16 $add_hit = 0 unless &$hit_filter($hit);
274             }
275             else {
276 2442 100       4145 if($self->{'_confirm_significance'}) {
277 4 100       19 $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
278             }
279 2442 100       3822 if($self->{'_confirm_score'}) {
280 4   33     16 my $hit_score = $hit->{-score} || $hit->{-hsps}->[0]->{-score};
281 4 100       14 $add_hit = 0 unless $hit_score >= $self->{'_min_score'};
282             }
283 2442 100       3964 if($self->{'_confirm_bits'}) {
284 4   33     16 my $hit_bits = $hit->{-bits} || $hit->{-hsps}->[0]->{-bits};
285 4 100       16 $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
286             }
287             }
288              
289 2446 100       6352 $add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
290             # Building hit lookup hashes for determining if the hit is old/new and
291             # above/below threshold.
292 2446         5487 $self->{'_old_hit_names'}->{$hit_name}++;
293 2446 100       7929 $self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
294             }
295              
296             # Title : _store_hit (private function for internal use only)
297             # Purpose : Collects hit objects into defined sets that are useful for
298             # analyzing PSI-blast results.
299             # These are ultimately added to the iteration object in end_iteration().
300             #
301             # Strategy:
302             # Primary split = old vs. new
303             # Secondary split = below vs. above threshold
304             # 1. Has this hit occurred in a previous iteration?
305             # 1.1. If yes, was it below threshold?
306             # 1.1.1. If yes, ---> [oldhits_below]
307             # 1.1.2. If no, is it now below threshold?
308             # 1.1.2.1. If yes, ---> [oldhits_newly_below]
309             # 1.1.2.2. If no, ---> [oldhits_not_below]
310             # 1.2. If no, is it below threshold?
311             # 1.2.1. If yes, ---> [newhits_below]
312             # 1.2.2. If no, ---> [newhits_not_below]
313             # 1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified]
314             # Note: As long as there's a default inclusion threshold,
315             # there won't be an unclassified set.
316             #
317             # For the first iteration, it might be nice to detect non-PSI blast reports
318             # and put the hits in the unclassified set.
319             # However, it shouldn't matter where the hits get put for the first iteration
320             # for non-PSI blast reports since they'll get flattened out in the
321             # result and iteration search objects.
322              
323             sub _store_hit {
324 2438     2438   3983 my ($self, $hit, $hit_name, $hit_signif) = @_;
325              
326 2438         3168 my $ithresh = $self->{'_inclusion_threshold'};
327              
328             # This is the assumption leading to Bug 1986. The assumption here is that
329             # the hit name is unique (and thus new), therefore any subsequent encounters
330             # with a hit containing the same name are filed as old hits. This isn't
331             # always true (see the bug report for a few examples). Adding an explicit
332             # check for the presence of iterations, adding to new hits otherwise.
333              
334 2438 100 100     6074 if (exists $self->{'_old_hit_names'}->{$hit_name}
335 121         341 && scalar @{$self->{_iterations}}) {
336 117 100       266 if (exists $self->{'_hit_names_below'}->{$hit_name}) {
    100          
337 109         115 push @{$self->{'_oldhits_below'}}, $hit;
  109         184  
338             } elsif ($hit_signif <= $ithresh) {
339 3         4 push @{$self->{'_oldhits_newly_below'}}, $hit;
  3         7  
340             } else {
341 5         8 push @{$self->{'_oldhits_not_below'}}, $hit;
  5         13  
342             }
343             } else {
344 2321 100       6998 if ($hit_signif <= $ithresh) {
345 1773         1991 push @{$self->{'_newhits_below'}}, $hit;
  1773         3566  
346             } else {
347 548         709 push @{$self->{'_newhits_not_below'}}, $hit;
  548         1208  
348             }
349             }
350 2438         4238 $self->{'_hitcount'}++;
351             }
352              
353             1;