File Coverage

Bio/Search/Tiling/MapTiling.pm
Criterion Covered Total %
statement 396 473 83.7
branch 93 136 68.3
condition 26 51 50.9
subroutine 43 47 91.4
pod 22 24 91.6
total 580 731 79.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Tiling::MapTiling
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Mark A. Jensen
7             #
8             # Copyright Mark A. Jensen
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Tiling::MapTiling - An implementation of an HSP tiling
17             algorithm, with methods to obtain frequently-requested statistics
18              
19             =head1 SYNOPSIS
20              
21             # get a BLAST $hit from somewhere, then
22             $tiling = Bio::Search::Tiling::MapTiling->new($hit);
23              
24             # stats
25             $numID = $tiling->identities();
26             $numCons = $tiling->conserved();
27             $query_length = $tiling->length('query');
28             $subject_length = $tiling->length('subject'); # or...
29             $subject_length = $tiling->length('hit');
30              
31             # get a visual on the coverage map
32             print $tiling->coverage_map_as_text('query',$context,'LEGEND');
33              
34             # tilings
35             $context = $tiling->_context( -type => 'subject', -strand=> 1, -frame=>1);
36             @covering_hsps_for_subject = $tiling->next_tiling('subject',$context);
37             $context = $tiling->_context( -type => 'query', -strand=> -1, -frame=>0);
38             @covering_hsps_for_query = $tiling->next_tiling('query', $context);
39              
40             =head1 DESCRIPTION
41              
42             Frequently, users want to use a set of high-scoring pairs (HSPs)
43             obtained from a BLAST or other search to assess the overall level of
44             identity, conservation, or coverage represented by matches between a
45             subject and a query sequence. Because a set of HSPs frequently
46             describes multiple overlapping sequence fragments, a simple summation of
47             statistics over the HSPs will generally overestimate those
48             statistics. To obtain an accurate estimate of global hit statistics, a
49             'tiling' of HSPs onto either the subject or the query sequence must be
50             performed, in order to properly correct for this.
51              
52             This module will execute a tiling algorithm on a given hit based on an
53             interval decomposition I'm calling the "coverage map". Internal object
54             methods compute the various statistics, which are then stored in
55             appropriately-named public object attributes. See
56             L for more info on the algorithm.
57              
58             =head2 STRAND/FRAME CONTEXTS
59              
60             In BLASTX, TBLASTN, and TBLASTX reports, strand and frame information
61             are reported for the query, subject, or query and subject,
62             respectively, for each HSP. Tilings for these sequence types are only
63             meaningful when they include HSPs in the same strand and frame, or
64             "context". So, in these situations, the context must be specified
65             in the method calls or the methods will throw.
66              
67             Contexts are specified as strings: C<[ 'all' | [m|p][_|0|1|2] ]>, where
68             C = all HSPs (will throw if context must be specified), C = minus
69             strand, C

= plus strand, and C<_> = no frame info, C<0,1,2> = respective

70             (absolute) frame. The L method will convert a (strand,
71             frame) specification to a context string, e.g.:
72              
73             $context = $self->_context(-type=>'query', -strand=>-1, -frame=>-2);
74              
75             returns C.
76              
77             The contexts present among the HSPs in a hit are identified and stored
78             for convenience upon object construction. These are accessed off the
79             object with the L method. If contexts don't apply for the
80             given report, this returns C<('all')>.
81              
82             =head1 TILED ALIGNMENTS
83              
84             The experimental method L will use a tiling
85             to concatenate tiled hsps into a series of L
86             objects:
87              
88             @alns = $tiling->get_tiled_alns($type, $context);
89              
90             Each alignment contains two sequences with ids 'query' and 'subject',
91             and consists of a concatenation of tiling HSPs which overlap or are
92             directly adjacent. The alignment are returned in C<$type> sequence
93             order. When HSPs overlap, the alignment sequence is taken from the HSP
94             which comes first in the coverage map array.
95              
96             The sequences in each alignment contain features (even though they are
97             L objects) which map the original query/subject
98             coordinates to the new alignment sequence coordinates. You can
99             determine the original BLAST fragments this way:
100              
101             $aln = ($tiling->get_tiled_alns)[0];
102             $qseq = $aln->get_seq_by_id('query');
103             $hseq = $aln->get_seq_by_id('subject');
104             foreach my $feat ($qseq->get_SeqFeatures) {
105             $org_start = ($feat->get_tag_values('query_start'))[0];
106             $org_end = ($feat->get_tag_values('query_end'))[0];
107             # original fragment as represented in the tiled alignment:
108             $org_fragment = $feat->seq;
109             }
110             foreach my $feat ($hseq->get_SeqFeatures) {
111             $org_start = ($feat->get_tag_values('subject_start'))[0];
112             $org_end = ($feat->get_tag_values('subject_end'))[0];
113             # original fragment as represented in the tiled alignment:
114             $org_fragment = $feat->seq;
115             }
116              
117             =head1 DESIGN NOTE
118              
119             The major calculations are made just-in-time, and then memoized. So,
120             for example, for a given MapTiling object, a coverage map would
121             usually be calculated only once (for the query), and at most twice (if
122             the subject perspective is also desired), and then only when a
123             statistic is first accessed. Afterward, the map and/or any statistic
124             is read from storage. So feel free to call the statistic methods
125             frequently if it suits you.
126              
127             =head1 FEEDBACK
128              
129             =head2 Mailing Lists
130              
131             User feedback is an integral part of the evolution of this and other
132             Bioperl modules. Send your comments and suggestions preferably to
133             the Bioperl mailing list. Your participation is much appreciated.
134              
135             bioperl-l@bioperl.org - General discussion
136             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
137              
138             =head2 Support
139              
140             Please direct usage questions or support issues to the mailing list:
141              
142             I
143              
144             rather than to the module maintainer directly. Many experienced and
145             reponsive experts will be able look at the problem and quickly
146             address it. Please include a thorough description of the problem
147             with code and data examples if at all possible.
148              
149             =head2 Reporting Bugs
150              
151             Report bugs to the Bioperl bug tracking system to help us keep track
152             of the bugs and their resolution. Bug reports can be submitted via
153             the web:
154              
155             https://github.com/bioperl/bioperl-live/issues
156              
157             =head1 AUTHOR - Mark A. Jensen
158              
159             Email maj -at- fortinbras -dot- us
160              
161             =head1 APPENDIX
162              
163             The rest of the documentation details each of the object methods.
164             Internal methods are usually preceded with a _
165              
166             =cut
167              
168             # Let the code begin...
169              
170             package Bio::Search::Tiling::MapTiling;
171 1     1   826 use strict;
  1         2  
  1         24  
172 1     1   2 use warnings;
  1         1  
  1         25  
173              
174             # Object preamble - inherits from Bio::Root::Root
175             #use lib '../../..';
176              
177 1     1   390 use Bio::Root::Root;
  1         2  
  1         32  
178 1     1   479 use Bio::Search::Tiling::TilingI;
  1         1  
  1         20  
179 1     1   433 use Bio::Search::Tiling::MapTileUtils;
  1         2  
  1         67  
180 1     1   419 use Bio::LocatableSeq;
  1         3  
  1         32  
181              
182             # use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI);
183 1     1   5 use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI);
  1         1  
  1         3974  
184              
185             =head1 CONSTRUCTOR
186              
187             =head2 new
188              
189             Title : new
190             Usage : my $obj = new Bio::Search::Tiling::GenericTiling();
191             Function: Builds a new Bio::Search::Tiling::GenericTiling object
192             Returns : an instance of Bio::Search::Tiling::GenericTiling
193             Args : -hit => $a_Bio_Search_Hit_HitI_object
194             general filter function:
195             -hsp_filter => sub { my $this_hsp = shift;
196             ...;
197             return 1 if $wanted;
198             return 0; }
199              
200             =cut
201              
202             sub new {
203 519     519 1 3735 my $class = shift;
204 519         816 my @args = @_;
205 519         1241 my $self = $class->SUPER::new(@args);
206 519         1528 my($hit, $filter) = $self->_rearrange( [qw( HIT HSP_FILTER)],@args );
207              
208 519 50       1183 $self->throw("HitI object required") unless $hit;
209 519 50 33     2666 $self->throw("Argument must be HitI object") unless ( ref $hit && $hit->isa('Bio::Search::Hit::HitI') );
210 519         632 $self->{hit} = $hit;
211 519         1214 $self->_set_attributes();
212 519         806 $self->{"_algorithm"} = $hit->algorithm;
213              
214 519         880 my @hsps = $hit->hsps;
215             # apply filter function if requested
216 519 50       964 if ( defined $filter ) {
217 0 0       0 if ( ref($filter) eq 'CODE' ) {
218 0 0       0 @hsps = map { $filter->($_) ? $_ : () } @hsps;
  0         0  
219             }
220             else {
221 0         0 $self->warn("-filter is not a coderef; ignoring");
222             }
223             }
224            
225             # identify available contexts
226 519         641 for my $t (qw( query hit )) {
227 1038         761 my %contexts;
228 1038         1509 for my $i (0..$#hsps) {
229 546         1666 my $ctxt = $self->_context(
230             -type => $t,
231             -strand => $hsps[$i]->strand($t),
232             -frame => $hsps[$i]->frame($t));
233 546   100     1617 $contexts{$ctxt} ||= [];
234 546         375 push @{$contexts{$ctxt}}, $i;
  546         1102  
235             }
236 1038         2039 $self->{"_contexts_${t}"} = \%contexts;
237             }
238              
239 519 100       1506 $self->warn("No HSPs present in hit after filtering") unless (@hsps);
240 519         954 $self->hsps(\@hsps);
241 519         1864 return $self;
242             }
243              
244             # a tiling is based on the set of hsps contained in a single hit.
245             # check all the boundaries - zero hsps, one hsp, all disjoint hsps
246              
247             =head1 TILING ITERATORS
248              
249             =head2 next_tiling
250              
251             Title : next_tiling
252             Usage : @hsps = $self->next_tiling($type);
253             Function: Obtain a tiling: a minimal set of HSPs covering the $type
254             ('hit', 'subject', 'query') sequence
255             Example :
256             Returns : an array of HSPI objects
257             Args : scalar $type: one of 'hit', 'subject', 'query', with
258             'subject' an alias for 'hit'
259              
260             =cut
261              
262             sub next_tiling{
263 268     268 1 1962 my $self = shift;
264 268         205 my ($type, $context) = @_;
265 268         285 $self->_check_type_arg(\$type);
266 268         373 $self->_check_context_arg($type, \$context);
267 268         332 return $self->_tiling_iterator($type, $context)->();
268             }
269              
270             =head2 rewind_tilings
271              
272             Title : rewind_tilings
273             Usage : $self->rewind_tilings($type)
274             Function: Reset the next_tilings($type) iterator
275             Example :
276             Returns : True on success
277             Args : scalar $type: one of 'hit', 'subject', 'query';
278             default is 'query'
279              
280             =cut
281              
282             sub rewind_tilings{
283 2     2 1 314 my $self = shift;
284 2         4 my ($type,$context) = @_;
285 2         6 $self->_check_type_arg(\$type);
286 2         6 $self->_check_context_arg($type, \$context);
287 2         6 return $self->_tiling_iterator($type, $context)->('REWIND');
288             }
289              
290             =head1 ALIGNMENTS
291              
292             =head2 get_tiled_alns()
293              
294             Title : get_tiled_alns
295             Usage : @alns = $tiling->get_tiled_alns($type, $context)
296             Function: Use a tiling to construct a minimal set of alignment
297             objects covering the region specified by $type/$context
298             by splicing adjacent HSP tiles
299             Returns : an array of Bio::SimpleAlign objects; see Note below
300             Args : scalar $type: one of 'hit', 'subject', 'query'
301             default is 'query'
302             scalar $context: strand/frame context string
303             Following $type and $context, an array of
304             ordered, tiled HSP objects can be specified; this is
305             the tiling that will directly the alignment construction
306             default -- the first tiling provided by a tiling iterator
307             Notes : Each returned alignment is a concatenation of adjacent tiles.
308             The set of alignments will cover all regions described by the
309             $type/$context pair in the hit. The pair of sequences in each
310             alignment have ids 'query' and 'subject', and each sequence
311             possesses SeqFeatures that map the original query or subject
312             coordinates to the sequence coordinates in the tiled alignment.
313            
314             =cut
315              
316             sub get_tiled_alns {
317 1     1 1 3 my $self = shift;
318 1         2 my ($type, $context) = @_;
319 1         3 $self->_check_type_arg(\$type);
320 1         4 $self->_check_context_arg($type, \$context);
321 1         2 my $t = shift; # first arg after type/context, arrayref to a tiling
322 1         2 my @tiling;
323 1 50 33     6 if ($t && (ref($t) eq 'ARRAY')) {
324 0         0 @tiling = @$t;
325             }
326             else { # otherwise, take the first tiling available
327              
328 1         3 @tiling = $self->_make_tiling_iterator($type,$context)->();
329             }
330 1         11 my @ret;
331              
332 1         4 my @map = $self->coverage_map($type, $context);
333 1         3 my @intervals = map {$_->[0]} @map; # disjoint decomp
  13         11  
334             # divide into disjoint covering groups
335 1         6 my @groups = covering_groups(@intervals);
336              
337 1         1053 require Bio::SimpleAlign;
338 1         9 require Bio::SeqFeature::Generic;
339             # cut hsp aligns along the disjoint decomp
340             # look for gaps...or add gaps?
341 1         2 my ($q_start, $h_start, $q_strand, $h_strand);
342             # build seqs
343 1         4 for my $grp (@groups) {
344 6         18 my $taln = Bio::SimpleAlign->new();
345 6         9 my (@qfeats, @hfeats);
346 6         7 my $query_string = '';
347 6         4 my $hit_string = '';
348 6         8 my ($qlen,$hlen) = (0,0);
349 6         5 my ($qinc, $hinc, $qstart, $hstart);
350 6         11 for my $intvl (@$grp) {
351             # following just chooses the first available hsp containing the
352             # interval -- this is arbitrary, could be smarter.
353 12         37 my $aln = ( containing_hsps($intvl, @tiling) )[0]->get_aln;
354 12         29 my $qseq = $aln->get_seq_by_pos(1);
355 12         22 my $hseq = $aln->get_seq_by_pos(2);
356 12   66     35 $qstart ||= $qseq->start;
357 12   66     30 $hstart ||= $hseq->start;
358             # calculate the slice boundaries
359 12         14 my ($beg, $end);
360 12         18 for ($type) {
361 12 50       30 /query/ && do {
362 12         25 $beg = $aln->column_from_residue_number($qseq->id, $intvl->[0]);
363 12         29 $end = $aln->column_from_residue_number($qseq->id, $intvl->[1]);
364 12         21 last;
365             };
366 0 0       0 /subject|hit/ && do {
367 0         0 $beg = $aln->column_from_residue_number($hseq->id, $intvl->[0]);
368 0         0 $end = $aln->column_from_residue_number($hseq->id, $intvl->[1]);
369 0         0 last;
370             };
371             }
372 12         31 $aln = $aln->slice($beg, $end);
373 12         35 $qseq = $aln->get_seq_by_pos(1);
374 12         23 $hseq = $aln->get_seq_by_pos(2);
375 12         20 $qinc = $qseq->length - $qseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS);
376 12         22 $hinc = $hseq->length - $hseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS);
377              
378 12 50 33     41 push @qfeats, Bio::SeqFeature::Generic->new(
    50 33        
379             -start => $qlen+1,
380             -end => $qlen+$qinc,
381             -strand => $qseq->strand,
382             -primary => 'query',
383             -source_tag => 'BLAST',
384             -display_name => 'query coordinates',
385             -tag => { query_id => $qseq->id,
386             query_desc => $qseq->desc,
387             query_start => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*$qlen,
388             query_end => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*($qlen+$qinc-1),
389             }
390             );
391 12 50 33     58 push @hfeats, Bio::SeqFeature::Generic->new(
    50 33        
392             -start => $hlen+1,
393             -end => $hlen+$hinc,
394             -strand => $hseq->strand,
395             -primary => 'subject/hit',
396             -source_tag => 'BLAST',
397             -display_name => 'subject/hit coordinates',
398             -tag => { subject_id => $hseq->id,
399             subject_desc => $hseq->desc,
400             subject_start => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*$hlen,
401             subject_end => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*($hlen+$hinc-1)
402             }
403             );
404 12         45 $query_string .= $qseq->seq;
405 12         23 $hit_string .= $hseq->seq;
406 12         14 $qlen += $qinc;
407 12         35 $hlen += $hinc;
408             }
409             # create the LocatableSeqs and add the features to each
410             # then add the seqs to the new aln
411             # note in MapTileUtils, Bio::FeatureHolderI methods have been
412             # mixed into Bio::LocatableSeq
413 6         25 my $qseq = Bio::LocatableSeq->new( -id => 'query',
414             -seq => $query_string);
415 6         24 $qseq->add_SeqFeature(@qfeats);
416 6         23 my $hseq = Bio::LocatableSeq->new( -id => 'subject',
417             -seq => $hit_string );
418 6         17 $hseq->add_SeqFeature(@hfeats);
419 6         19 $taln->add_seq($qseq);
420 6         12 $taln->add_seq($hseq);
421 6         17 push @ret, $taln;
422             }
423            
424 1         20 return @ret;
425             }
426              
427             =head1 STATISTICS
428              
429             =head2 identities
430              
431             Title : identities
432             Usage : $tiling->identities($type, $action, $context)
433             Function: Retrieve the calculated number of identities for the invocant
434             Example :
435             Returns : value of identities (a scalar)
436             Args : scalar $type: one of 'hit', 'subject', 'query'
437             default is 'query'
438             option scalar $action: one of 'exact', 'est', 'fast', 'max'
439             default is 'exact'
440             option scalar $context: strand/frame context string
441             Note : getter only
442              
443             =cut
444              
445             sub identities{
446 538     538 1 1037 my $self = shift;
447 538         691 my ($type, $action, $context) = @_;
448 538         732 $self->_check_type_arg(\$type);
449 538         780 $self->_check_action_arg(\$action);
450 538         939 $self->_check_context_arg($type, \$context);
451 538 100       1609 if (!defined $self->{"identities_${type}_${action}_${context}"}) {
452 531         932 $self->_calc_stats($type, $action, $context);
453             }
454 538         1179 return $self->{"identities_${type}_${action}_${context}"};
455             }
456              
457             =head2 conserved
458              
459             Title : conserved
460             Usage : $tiling->conserved($type, $action)
461             Function: Retrieve the calculated number of conserved sites for the invocant
462             Example :
463             Returns : value of conserved (a scalar)
464             Args : scalar $type: one of 'hit', 'subject', 'query'
465             default is 'query'
466             option scalar $action: one of 'exact', 'est', 'fast', 'max'
467             default is 'exact'
468             option scalar $context: strand/frame context string
469             Note : getter only
470              
471             =cut
472              
473             sub conserved{
474 538     538 1 1234 my $self = shift;
475 538         615 my ($type, $action, $context) = @_;
476 538         794 $self->_check_type_arg(\$type);
477 538         912 $self->_check_action_arg(\$action);
478 538         835 $self->_check_context_arg($type, \$context);
479 538 50       1591 if (!defined $self->{"conserved_${type}_${action}_${context}"}) {
480 0         0 $self->_calc_stats($type, $action, $context);
481             }
482 538         1007 return $self->{"conserved_${type}_${action}_${context}"};
483             }
484              
485             =head2 length
486              
487             Title : length
488             Usage : $tiling->length($type, $action, $context)
489             Function: Retrieve the total length of aligned residues for
490             the seq $type
491             Example :
492             Returns : value of length (a scalar)
493             Args : scalar $type: one of 'hit', 'subject', 'query'
494             default is 'query'
495             option scalar $action: one of 'exact', 'est', 'fast', 'max'
496             default is 'exact'
497             option scalar $context: strand/frame context string
498             Note : getter only
499              
500             =cut
501              
502             sub length{
503 1051     1051 1 1162 my $self = shift;
504 1051         1170 my ($type,$action,$context) = @_;
505 1051         1507 $self->_check_type_arg(\$type);
506 1051         1432 $self->_check_action_arg(\$action);
507 1051         1710 $self->_check_context_arg($type, \$context);
508 1051 100       2436 if (!defined $self->{"length_${type}_${action}_${context}"}) {
509 1         5 $self->_calc_stats($type, $action, $context);
510             }
511 1051         3608 return $self->{"length_${type}_${action}_${context}"};
512             }
513              
514             =head2 frac
515              
516             Title : frac
517             Usage : $tiling->frac($type, $denom, $action, $context, $method)
518             Function: Return the fraction of sequence length consisting
519             of desired kinds of pairs (given by $method),
520             with respect to $denom
521             Returns : scalar float
522             Args : -type => one of 'hit', 'subject', 'query'
523             -denom => one of 'total', 'aligned'
524             -action => one of 'exact', 'est', 'fast', 'max'
525             -context => strand/frame context string
526             -method => one of 'identical', 'conserved'
527             Note : $denom == 'aligned', return desired_stat/num_aligned
528             $denom == 'total', return desired_stat/_reported_length
529             (i.e., length of the original input sequences)
530             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
531             reported lengths of translated dna are reduced by
532             a factor of 3, to provide fractions relative to
533             amino acid coordinates.
534              
535             =cut
536              
537             sub frac {
538 1058     1058 1 151968 my $self = shift;
539 1058         2014 my @args = @_;
540 1058         3454 my ($type, $denom, $action, $context, $method) = $self->_rearrange([qw(TYPE DENOM ACTION CONTEXT METHOD)],@args);
541 1058         2600 $self->_check_type_arg(\$type);
542 1058         1733 $self->_check_action_arg(\$action);
543 1058         2016 $self->_check_context_arg($type, \$context);
544 1058 50 33     9097 unless ($method and grep(/^$method$/, qw( identical conserved ))) {
545 0         0 $self->throw("-method must specified; one of ('identical', 'conserved')");
546             }
547 1058   50     1441 $denom ||= 'total';
548 1058 50       3635 unless (grep /^$denom/, qw( total aligned )) {
549 0         0 $self->throw("Denominator selection must be one of ('total', 'aligned'), not '$denom'");
550             }
551 1058         2213 my $key = "frac_${method}_${type}_${denom}_${action}_${context}";
552 1058         826 my $stat;
553 1058         1385 for ($method) {
554 1058 100       1761 $_ eq 'identical' && do {
555 529         1003 $stat = $self->identities($type, $action, $context);
556 529         685 last;
557             };
558 529 50       858 $_ eq 'conserved' && do {
559 529         879 $stat = $self->conserved($type, $action, $context);
560 529         647 last;
561             };
562 0         0 do {
563 0         0 $self->throw("What are YOU doing here?");
564             };
565             }
566 1058 100       2162 if (!defined $self->{$key}) {
567 1046         1395 for ($denom) {
568 1046 50       2176 /total/ && do {
569 0         0 $self->{$key} =
570             $stat/$self->_reported_length($type); # need fudge fac??
571 0         0 last;
572             };
573 1046 50       1980 /aligned/ && do {
574 1046         1890 $self->{$key} =
575             $stat/$self->length($type,$action,$context);
576 1046         1151 last;
577             };
578 0         0 do {
579 0         0 $self->throw("What are YOU doing here?");
580             };
581             }
582             }
583 1058         3297 return $self->{$key};
584             }
585              
586             =head2 frac_identical
587              
588             Title : frac_identical
589             Usage : $tiling->frac_identical($type, $denom, $action, $context)
590             Function: Return the fraction of sequence length consisting
591             of identical pairs, with respect to $denom
592             Returns : scalar float
593             Args : -type => one of 'hit', 'subject', 'query'
594             -denom => one of 'total', 'aligned'
595             -action => one of 'exact', 'est', 'fast', 'max'
596             -context => strand/frame context string
597             Note : $denom == 'aligned', return conserved/num_aligned
598             $denom == 'total', return conserved/_reported_length
599             (i.e., length of the original input sequences)
600             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
601             reported lengths of translated dna are reduced by
602             a factor of 3, to provide fractions relative to
603             amino acid coordinates.
604             Note : This an alias that calls frac()
605              
606             =cut
607              
608             sub frac_identical{
609 3     3 1 4 my $self = shift;
610 3         8 my @args = @_;
611 3         14 my ($type, $denom, $action,$context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args );
612 3         38 $self->frac( -type=>$type, -denom=>$denom, -action=>$action, -method=>'identical', -context=>$context);
613             }
614              
615             =head2 frac_conserved
616              
617             Title : frac_conserved
618             Usage : $tiling->frac_conserved($type, $denom, $action, $context)
619             Function: Return the fraction of sequence length consisting
620             of conserved pairs, with respect to $denom
621             Returns : scalar float
622             Args : -type => one of 'hit', 'subject', 'query'
623             -denom => one of 'total', 'aligned'
624             -action => one of 'exact', 'est', 'fast', 'max'
625             -context => strand/frame context string
626             Note : $denom == 'aligned', return conserved/num_aligned
627             $denom == 'total', return conserved/_reported_length
628             (i.e., length of the original input sequences)
629             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
630             reported lengths of translated dna are reduced by
631             a factor of 3, to provide fractions relative to
632             amino acid coordinates.
633             Note : This an alias that calls frac()
634              
635             =cut
636              
637             sub frac_conserved{
638 3     3 1 6 my $self = shift;
639 3         7 my @args = @_;
640 3         13 my ($type, $denom, $action, $context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args );
641 3         17 $self->frac( -type=>$type, -denom=>$denom, -action=>$action, -context=>$context, -method=>'conserved');
642             }
643              
644             =head2 frac_aligned
645              
646             Title : frac_aligned
647             Aliases : frac_aligned_query - frac_aligned(-type=>'query',...)
648             frac_aligned_hit - frac_aligned(-type=>'hit',...)
649             Usage : $tiling->frac_aligned(-type=>$type,
650             -action=>$action,
651             -context=>$context)
652             Function: Return the fraction of input sequence length
653             that was aligned by the algorithm
654             Returns : scalar float
655             Args : -type => one of 'hit', 'subject', 'query'
656             -action => one of 'exact', 'est', 'fast', 'max'
657             -context => strand/frame context string
658              
659             =cut
660              
661             sub frac_aligned{
662 4     4 1 12 my ($self, @args) = @_;
663 4         18 my ($type, $action, $context) = $self->_rearrange([qw(TYPE ACTION CONTEXT)],@args);
664 4         16 $self->_check_type_arg(\$type);
665 4         9 $self->_check_action_arg(\$action);
666 4         12 $self->_check_context_arg($type, \$context);
667 4 50       14 if (!$self->{"frac_aligned_${type}_${action}_${context}"}) {
668 4         9 $self->{"frac_aligned_${type}_${action}_${context}"} = $self->num_aligned($type,$action,$context)/$self->_reported_length($type);
669             }
670 4         74 return $self->{"frac_aligned_${type}_${action}_${context}"};
671             }
672              
673 2     2 0 10 sub frac_aligned_query { shift->frac_aligned(-type=>'query', @_) }
674 2     2 0 24 sub frac_aligned_hit { shift->frac_aligned(-type=>'hit', @_) }
675            
676              
677             =head2 num_aligned
678              
679             Title : num_aligned
680             Usage : $tiling->num_aligned(-type=>$type)
681             Function: Return the number of residues of sequence $type
682             that were aligned by the algorithm
683             Returns : scalar int
684             Args : -type => one of 'hit', 'subject', 'query'
685             -action => one of 'exact', 'est', 'fast', 'max'
686             -context => strand/frame context string
687             Note : Since this is calculated from reported coordinates,
688             not symbol string counts, it is already in terms of
689             "logical length"
690             Note : Aliases length()
691              
692             =cut
693              
694 4     4 1 12 sub num_aligned { shift->length( @_ ) };
695              
696             =head2 num_unaligned
697              
698             Title : num_unaligned
699             Usage : $tiling->num_unaligned(-type=>$type)
700             Function: Return the number of residues of sequence $type
701             that were left unaligned by the algorithm
702             Returns : scalar int
703             Args : -type => one of 'hit', 'subject', 'query'
704             -action => one of 'exact', 'est', 'fast', 'max'
705             -context => strand/frame context string
706             Note : Since this is calculated from reported coordinates,
707             not symbol string counts, it is already in terms of
708             "logical length"
709              
710             =cut
711              
712             sub num_unaligned {
713 0     0 1 0 my $self = shift;
714 0         0 my ($type,$action,$context) = @_;
715 0         0 my $ret;
716 0         0 $self->_check_type_arg(\$type);
717 0         0 $self->_check_action_arg(\$action);
718 0         0 $self->_check_context_arg($type, \$context);
719 0 0       0 if (!defined $self->{"num_unaligned_${type}_${action}_${context}"}) {
720 0         0 $self->{"num_unaligned_${type}_${action}_${context}"} = $self->_reported_length($type)-$self->num_aligned($type,$action,$context);
721             }
722 0         0 return $self->{"num_unaligned_${type}_${action}_${context}"};
723             }
724            
725              
726             =head2 range
727              
728             Title : range
729             Usage : $tiling->range(-type=>$type)
730             Function: Returns the extent of the longest tiling
731             as ($min_coord, $max_coord)
732             Returns : array of two scalar integers
733             Args : -type => one of 'hit', 'subject', 'query'
734             -context => strand/frame context string
735              
736             =cut
737              
738             sub range {
739 8     8 1 1785 my ($self, $type, $context) = @_;
740 8         20 $self->_check_type_arg(\$type);
741 8         19 $self->_check_context_arg($type, \$context);
742 8         22 my @a = $self->_contig_intersection($type,$context);
743 8         48 return ($a[0][0], $a[-1][1]);
744             }
745              
746              
747              
748             =head1 ACCESSORS
749              
750             =head2 coverage_map
751              
752             Title : coverage_map
753             Usage : $map = $tiling->coverage_map($type)
754             Function: Property to contain the coverage map calculated
755             by _calc_coverage_map() - see that for
756             details
757             Example :
758             Returns : value of coverage_map_$type as an array
759             Args : scalar $type: one of 'hit', 'subject', 'query'
760             default is 'query'
761             Note : getter
762              
763             =cut
764              
765             sub coverage_map{
766 332     332 1 278 my $self = shift;
767 332         423 my ($type, $context) = @_;
768 332         557 $self->_check_type_arg(\$type);
769 332         563 $self->_check_context_arg($type, \$context);
770              
771 332 100       921 if (!defined $self->{"coverage_map_${type}_${context}"}) {
772             # following calculates coverage maps in all strands/frames
773             # if necessary
774 203         372 $self->_calc_coverage_map($type, $context);
775             }
776             # if undef is returned, then there were no hsps for given strand/frame
777 332 50       964 if (!defined $self->{"coverage_map_${type}_${context}"}) {
778 0         0 $self->warn("No HSPS present for type '$type' in context '$context' for this hit");
779 0         0 return undef;
780             }
781 332         273 return @{$self->{"coverage_map_${type}_${context}"}};
  332         943  
782             }
783              
784             =head2 coverage_map_as_text
785              
786             Title : coverage_map_as_text
787             Usage : $tiling->coverage_map_as_text($type, $legend_flag)
788             Function: Format a text-graphic representation of the
789             coverage map
790             Returns : an array of scalar strings, suitable for printing
791             Args : $type: one of 'query', 'hit', 'subject'
792             $context: strand/frame context string
793             $legend_flag: boolean; add a legend indicating
794             the actual interval coordinates for each component
795             interval and hsp (in the $type sequence context)
796             Example : print $tiling->coverage_map_as_text('query',1);
797              
798             =cut
799              
800             sub coverage_map_as_text{
801 0     0 1 0 my $self = shift;
802 0         0 my ($type, $context, $legend_q) = @_;
803 0         0 $self->_check_type_arg(\$type);
804 0         0 $self->_check_context_arg($type, \$context);
805              
806 0         0 my @map = $self->coverage_map($type, $context);
807 0         0 my @ret;
808 0         0 my @hsps = $self->hit->hsps;
809 0         0 my %hsps_i;
810 0         0 require Tie::RefHash;
811 0         0 tie %hsps_i, 'Tie::RefHash';
812 0         0 @hsps_i{@hsps} = (0..$#hsps);
813 0         0 my @mx;
814 0         0 foreach (0..$#map) {
815 0         0 my @hspx = ('') x @hsps;
816 0         0 my @these_hsps = @{$map[$_]->[1]};
  0         0  
817 0         0 @hspx[@hsps_i{@these_hsps}] = ('*') x @these_hsps;
818 0         0 $mx[$_] = \@hspx;
819             }
820 0         0 untie %hsps_i;
821              
822 0         0 push @ret, "\tIntvl\n";
823 0         0 push @ret, "HSPS\t", join ("\t", (0..$#map)), "\n";
824 0         0 foreach my $h (0..$#hsps) {
825 0         0 push @ret, join("\t", $h, map { $mx[$_][$h] } (0..$#map) ),"\n";
  0         0  
826             }
827 0 0       0 if ($legend_q) {
828 0         0 push @ret, "Interval legend\n";
829 0         0 foreach (0..$#map) {
830 0         0 push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$map[$_][0]});
  0         0  
831             }
832 0         0 push @ret, "HSP legend\n";
833 0         0 my @ints = get_intervals_from_hsps($type,@hsps);
834 0         0 foreach (0..$#hsps) {
835 0         0 push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$ints[$_]});
  0         0  
836             }
837             }
838 0         0 return @ret;
839             }
840              
841             =head2 hit
842              
843             Title : hit
844             Usage : $tiling->hit
845             Function:
846             Example :
847             Returns : The HitI object associated with the invocant
848             Args : none
849             Note : getter only
850              
851             =cut
852              
853             sub hit{
854 5229     5229 1 3321 my $self = shift;
855 5229 50       6209 $self->warn("Getter only") if @_;
856 5229         10182 return $self->{'hit'};
857             }
858              
859             =head2 hsps
860              
861             Title : hsps
862             Usage : $tiling->hsps()
863             Function: Container for the HSP objects associated with invocant
864             Example :
865             Returns : an array of hsps associated with the hit
866             Args : on set, new value (an arrayref or undef, optional)
867              
868             =cut
869              
870             sub hsps{
871 1768     1768 1 1302 my $self = shift;
872 1768 100       2953 return $self->{'hsps'} = shift if @_;
873 1249         894 return @{$self->{'hsps'}};
  1249         3225  
874             }
875              
876             =head2 contexts
877              
878             Title : contexts
879             Usage : @contexts = $tiling->context($type) or
880             @indices = $tiling->context($type, $context)
881             Function: Retrieve the set of available contexts in the hit,
882             or the indices of hsps having the given context
883             (integer indices for the array returned by $self->hsps)
884             Returns : array of scalar context strings or
885             array of scalar positive integers
886             undef if no hsps in given context
887             Args : $type: one of 'query', 'hit', 'subject'
888             optional $context: context string
889              
890             =cut
891              
892             sub contexts{
893 1032     1032 1 64327 my $self = shift;
894 1032         962 my ($type, $context) = @_;
895 1032         1305 $self->_check_type_arg(\$type);
896 1032 100       1413 return keys %{$self->{"_contexts_$type"}} unless defined $context;
  403         1368  
897 629 50       1387 return undef unless $self->{"_contexts_$type"}{$context};
898 629         421 return @{$self->{"_contexts_$type"}{$context}};
  629         1540  
899             }
900              
901             =head2 mapping
902              
903             Title : mapping
904             Usage : $tiling->mapping($type)
905             Function: Retrieve the mapping coefficient for the sequence type
906             based on the underlying algorithm
907             Returns : scalar integer (mapping coefficient)
908             Args : $type: one of 'query', 'hit', 'subject'
909             Note : getter only (set in constructor)
910              
911             =cut
912              
913             sub mapping{
914 505     505 1 381 my $self = shift;
915 505         378 my $type = shift;
916 505         653 $self->_check_type_arg(\$type);
917 505         1020 return $self->{"_mapping_${type}"};
918             }
919              
920             =head2 default_context
921              
922             Title : default_context
923             Usage : $tiling->default_context($type)
924             Function: Retrieve the default strand/frame context string
925             for the sequence type based on the underlying algorithm
926             Returns : scalar string (context string)
927             Args : $type: one of 'query', 'hit', 'subject'
928             Note : getter only (set in constructor)
929              
930             =cut
931              
932             sub default_context{
933 298     298 1 194 my $self = shift;
934 298         186 my $type = shift;
935 298         327 $self->_check_type_arg(\$type);
936 298         477 return $self->{"_def_context_${type}"};
937             }
938              
939             =head2 algorithm
940              
941             Title : algorithm
942             Usage : $tiling->algorithm
943             Function: Retrieve the algorithm name associated with the
944             invocant's hit object
945             Returns : scalar string
946             Args : none
947             Note : getter only (set in constructor)
948              
949             =cut
950              
951             sub algorithm{
952 0     0 1 0 my $self = shift;
953 0 0       0 $self->warn("Getter only") if @_;
954 0         0 return $self->{"_algorithm"};
955             }
956              
957             =head1 "PRIVATE" METHODS
958              
959             =head2 Calculators
960              
961             See L for lower level
962             calculation methods.
963              
964             =head2 _calc_coverage_map
965              
966             Title : _calc_coverage_map
967             Usage : $tiling->_calc_coverage_map($type)
968             Function: Calculates the coverage map for the object's associated
969             hit from the perspective of the desired $type (see Args:)
970             and sets the coverage_map() property
971             Returns : True on success
972             Args : optional scalar $type: one of 'hit'|'subject'|'query'
973             default is 'query'
974             Note : The "coverage map" is an array with the following format:
975             ( [ $component_interval => [ @containing_hsps ] ], ... ),
976             where $component_interval is a closed interval (see
977             DESCRIPTION) of the form [$a0, $a1] with $a0 <= $a1, and
978             @containing_hsps is an array of all HspI objects in the hit
979             which completely contain the $component_interval.
980             The set of $component_interval's is a disjoint decomposition
981             of the minimum set of minimal intervals that completely
982             cover the hit's HSPs (from the perspective of the $type)
983             Note : This calculates the map for all strand/frame contexts available
984             in the hit
985              
986             =cut
987              
988             sub _calc_coverage_map {
989 207     207   192 my $self = shift;
990 207         271 my ($type) = @_;
991 207         338 $self->_check_type_arg(\$type);
992              
993             # obtain the [start, end] intervals for all hsps in the hit (relative
994             # to the type)
995 207 50       447 unless ($self->{'hsps'}) {
996 0         0 $self->warn("No HSPs for this hit");
997 0         0 return;
998             }
999              
1000 207         182 my (@map, @hsps, %filters, @intervals);
1001            
1002              
1003             # conversion here?
1004 207         362 my $c = $self->mapping($type);
1005            
1006             # create the possible maps
1007 207         339 for my $context ($self->contexts($type)) {
1008 219         307 @map = ();
1009 219         354 @hsps = ($self->hsps)[$self->contexts($type, $context)];
1010 219         846 @intervals = get_intervals_from_hsps( $type, @hsps );
1011             # the "frame"
1012 219         502 my $f = ($intervals[0]->[0] - 1) % $c;
1013              
1014             # convert interval endpoints...
1015 219         352 for (@intervals) {
1016 536         675 $$_[0] = ($$_[0] - $f + $c - 1)/$c;
1017 536         667 $$_[1] = ($$_[1] - $f)/$c;
1018             }
1019            
1020             # determine the minimal set of disjoint intervals that cover the
1021             # set of hsp intervals
1022 219         592 my @dj_set = interval_tiling(\@intervals);
1023              
1024             # decompose each disjoint interval into another set of disjoint
1025             # intervals, each of which is completely contained within the
1026             # original hsp intervals with which it overlaps
1027 219         198 my $i=0;
1028 219         186 my @decomp;
1029 219         285 for my $dj_elt (@dj_set) {
1030 432         527 my ($covering, $indices) = @$dj_elt;
1031 432         702 my @covering_hsps = @hsps[@$indices];
1032 432         491 my @coverers = @intervals[@$indices];
1033 432         779 @decomp = decompose_interval( \@coverers );
1034 432         564 for (@decomp) {
1035 638         442 my ($component, $container_indices) = @{$_};
  638         703  
1036 638         1286 push @map, [ $component,
1037             [@covering_hsps[@$container_indices]] ];
1038             }
1039 432         535 1;
1040             }
1041            
1042             # unconvert the components:
1043             #####
1044 219         335 foreach (@map) {
1045 638         904 $$_[0][0] = $c*$$_[0][0] - $c + 1 + $f;
1046 638         798 $$_[0][1] = $c*$$_[0][1] + $f;
1047             }
1048 219         327 foreach (@dj_set) {
1049 432         590 $$_[0][0] = $c*$$_[0][0] - $c + 1 + $f;
1050 432         497 $$_[0][1] = $c*$$_[0][1] + $f;
1051             }
1052              
1053             # sort the map on the interval left-ends
1054 219         401 @map = sort { $a->[0][0]<=>$b->[0][0] } @map;
  980         696  
1055 219         694 $self->{"coverage_map_${type}_${context}"} = [@map];
1056             # set the _contig_intersection attribute here (side effect)
1057 219         313 $self->{"_contig_intersection_${type}_${context}"} = [map { $$_[0] } @map];
  638         1507  
1058             }
1059              
1060 207         550 return 1; # success
1061             }
1062              
1063             =head2 _calc_stats
1064              
1065             Title : _calc_stats
1066             Usage : $tiling->_calc_stats($type, $action, $context)
1067             Function: Calculates [estimated] tiling statistics (identities, conserved sites
1068             length) and sets the public accessors
1069             Returns : True on success
1070             Args : scalar $type: one of 'hit', 'subject', 'query'
1071             default is 'query'
1072             optional scalar $action: requests calculation method
1073             currently one of 'exact', 'est', 'fast', 'max'
1074             option scalar $context: strand/frame context string
1075             Note : Action: The statistics are calculated by summing quantities
1076             over the disjoint component intervals, taking into account
1077             coverage of those intervals by multiple HSPs. The action
1078             tells the algorithm how to obtain those quantities--
1079             'exact' will use Bio::Search::HSP::HSPI::matches
1080             to count the appropriate segment of the homology string;
1081             'est' will estimate the statistics by multiplying the
1082             fraction of the HSP overlapped by the component interval
1083             (see MapTileUtils) by the BLAST-reported identities/postives
1084             (this may be convenient for BLAST summary report formats)
1085             * Both exact and est take the average over the number of HSPs
1086             that overlap the component interval.
1087             'max' uses the exact method to calculate the statistics,
1088             and returns only the maximum identites/positives over
1089             overlapping HSP for the component interval. No averaging
1090             is involved here.
1091             'fast' doesn't involve tiling at all (hence the name),
1092             but it seems like a very good estimate, and uses only
1093             reported values, and so does not require sequence data. It
1094             calculates an average of reported identities, conserved
1095             sites, and lengths, over unmodified hsps in the hit,
1096             weighted by the length of the hsps.
1097              
1098             =cut
1099              
1100             sub _calc_stats {
1101 532     532   423 my $self = shift;
1102 532         595 my ($type, $action, $context) = @_;
1103             # need to check args here, in case method is called internally.
1104 532         665 $self->_check_type_arg(\$type);
1105 532         727 $self->_check_action_arg(\$action);
1106 532         724 $self->_check_context_arg($type, \$context);
1107 532         630 my ($ident, $cons, $length) = (0,0,0);
1108              
1109             # fast : avoid coverage map altogether, get a pretty damn
1110             # good estimate with a weighted average of reported hsp
1111             # statistics
1112              
1113 532 100       941 ($action eq 'fast') && do {
1114 204         385 my @hsps = $self->hit->hsps;
1115 204         442 @hsps = @hsps[$self->contexts($type, $context)];
1116             # weights for averages
1117 204         352 my @wt = map {$_->length($type)} @hsps;
  450         829  
1118 204         11969 my $sum = eval( join('+',@wt) );
1119 204         962 $_ /= $sum for (@wt);
1120 204         279 for (@hsps) {
1121 450         566 my $wt = shift @wt;
1122 450         1164 $ident += $wt*$_->matches_MT($type,'identities');
1123 450         921 $cons += $wt*$_->matches_MT($type,'conserved');
1124 450         955 $length += $wt*$_->length($type);
1125             }
1126             };
1127              
1128             # or, do tiling
1129              
1130             # calculate identities/conserved sites in tiling
1131             # estimate based on the fraction of the component interval covered
1132             # and ident/cons reported by the HSPs
1133 532 100       839 ($action ne 'fast') && do {
1134 328         682 foreach ($self->coverage_map($type, $context)) {
1135 1384         1189 my ($intvl, $hsps) = @{$_};
  1384         1958  
1136 1384         2067 my $len = ($$intvl[1]-$$intvl[0]+1);
1137 1384 100       2040 my $ncover = ($action eq 'max') ? 1 : scalar @$hsps;
1138 1384         1251 my ($acc_i, $acc_c) = (0,0);
1139 1384         1353 foreach my $hsp (@$hsps) {
1140 1764         1570 for ($action) {
1141 1764 100       2579 ($_ eq 'est') && do {
1142 788         2549 my ($inc_i, $inc_c) = $hsp->matches_MT(
1143             -type => $type,
1144             -action => 'searchutils',
1145             );
1146 788         1734 my $frac = $len/$hsp->length($type);
1147 788         881 $acc_i += $inc_i * $frac;
1148 788         573 $acc_c += $inc_c * $frac;
1149 788         1202 last;
1150             };
1151 976 100       1326 ($_ eq 'max') && do {
1152 483         1587 my ($inc_i, $inc_c) = $hsp->matches_MT(
1153             -type => $type,
1154             -action => 'searchutils',
1155             -start => $$intvl[0],
1156             -end => $$intvl[1]
1157             );
1158 483 100       1003 $acc_i = ($acc_i > $inc_i) ? $acc_i : $inc_i;
1159 483 100       552 $acc_c = ($acc_c > $inc_c) ? $acc_c : $inc_c;
1160 483         730 last;
1161             };
1162 493 50 33     1453 (!$_ || ($_ eq 'exact')) && do {
1163 493         1705 my ($inc_i, $inc_c) = $hsp->matches_MT(
1164             -type => $type,
1165             -action => 'searchutils',
1166             -start => $$intvl[0],
1167             -end => $$intvl[1]
1168             );
1169 493         611 $acc_i += $inc_i;
1170 493         297 $acc_c += $inc_c;
1171 493         714 last;
1172             };
1173             }
1174             }
1175 1384         1695 $ident += ($acc_i/$ncover);
1176 1384         1003 $cons += ($acc_c/$ncover);
1177 1384         1596 $length += $len;
1178             }
1179             };
1180            
1181 532         1700 $self->{"identities_${type}_${action}_${context}"} = $ident;
1182 532         1152 $self->{"conserved_${type}_${action}_${context}"} = $cons;
1183 532         1105 $self->{"length_${type}_${action}_${context}"} = $length;
1184            
1185 532         758 return 1;
1186             }
1187              
1188             =head2 Tiling Helper Methods
1189              
1190             =cut
1191              
1192             # coverage_map is of the form
1193             # ( [ $interval, \@containing_hsps ], ... )
1194              
1195             # so, for each interval, pick one of the containing hsps,
1196             # and return the union of all the picks.
1197              
1198             # use the combinatorial generating iterator, with
1199             # the urns containing the @containing_hsps for each
1200             # interval
1201              
1202             =head2 _make_tiling_iterator
1203              
1204             Title : _make_tiling_iterator
1205             Usage : $self->_make_tiling_iterator($type)
1206             Function: Create an iterator code ref that will step through all
1207             minimal combinations of HSPs that produce complete coverage
1208             of the $type ('hit', 'subject', 'query') sequence,
1209             and set the correct iterator property of the invocant
1210             Example :
1211             Returns : The iterator
1212             Args : scalar $type, one of 'hit', 'subject', 'query';
1213             default is 'query'
1214              
1215             =cut
1216              
1217             sub _make_tiling_iterator {
1218             ### create the urns
1219 3     3   7 my $self = shift;
1220 3         3 my ($type, $context) = @_;
1221 3         7 $self->_check_type_arg(\$type);
1222 3         6 $self->_check_context_arg($type, \$context);
1223              
1224             # initialize the urns
1225 3         13 my @urns = map { [0, $$_[1]] } $self->coverage_map($type, $context);
  43         49  
1226              
1227 3         6 my $FINISHED = 0;
1228             my $iter = sub {
1229             # rewind?
1230 271 100   271   333 if (my $rewind = shift) {
1231             # reinitialize urn indices
1232 2         9 $$_[0] = 0 for (@urns);
1233 2         4 $FINISHED = 0;
1234 2         5 return 1;
1235             }
1236             # check if done...
1237 269 100       295 return if $FINISHED;
1238              
1239 266         183 my $finished_incrementing = 0;
1240             # @ret is the collector of urn choices
1241 266         166 my @ret;
1242              
1243 266         258 for my $urn (@urns) {
1244 4482         3133 my ($n, $hsps) = @$urn;
1245 4482         3042 push @ret, $$hsps[$n];
1246 4482 100       4914 unless ($finished_incrementing) {
1247 1086 100       994 if ($n == $#$hsps) { $$urn[0] = 0; }
  823         708  
1248 263         193 else { ($$urn[0])++; $finished_incrementing = 1 }
  263         245  
1249             }
1250             }
1251              
1252             # backstop...
1253 266 100       312 $FINISHED = 1 unless $finished_incrementing;
1254             # uniquify @ret
1255             # $hsp->rank is a unique identifier for an hsp in a hit.
1256             # preserve order in @ret
1257            
1258 266         137 my (%order, %uniq);
1259 266         1369 @order{(0..$#ret)} = @ret;
1260 266         990 $uniq{$order{$_}->rank} = $_ for (0..$#ret);
1261 266         534 @ret = @order{ sort {$a<=>$b} values %uniq };
  5818         3883  
1262              
1263 266         818 return @ret;
1264 3         21 };
1265 3         14 return $iter;
1266             }
1267              
1268             =head2 _tiling_iterator
1269              
1270             Title : _tiling_iterator
1271             Usage : $tiling->_tiling_iterator($type,$context)
1272             Function: Retrieve the tiling iterator coderef for the requested
1273             $type ('hit', 'subject', 'query')
1274             Example :
1275             Returns : coderef to the desired iterator
1276             Args : scalar $type, one of 'hit', 'subject', 'query'
1277             default is 'query'
1278             option scalar $context: strand/frame context string
1279             Note : getter only
1280              
1281             =cut
1282              
1283             sub _tiling_iterator {
1284 270     270   175 my $self = shift;
1285 270         216 my ($type, $context) = @_;
1286 270         252 $self->_check_type_arg(\$type);
1287 270         330 $self->_check_context_arg($type, \$context);
1288              
1289 270 100       516 if (!defined $self->{"_tiling_iterator_${type}_${context}"}) {
1290 2         9 $self->{"_tiling_iterator_${type}_${context}"} =
1291             $self->_make_tiling_iterator($type,$context);
1292             }
1293 270         438 return $self->{"_tiling_iterator_${type}_${context}"};
1294             }
1295             =head2 Construction Helper Methods
1296              
1297             See also L.
1298              
1299             =cut
1300              
1301             sub _check_type_arg {
1302 8297     8297   5338 my $self = shift;
1303 8297         5781 my $typeref = shift;
1304 8297   100     10661 $$typeref ||= 'query';
1305 8297 50       43148 $self->throw("Unknown type '$$typeref'") unless grep(/^$$typeref$/, qw( hit query subject ));
1306 8297 100       10233 $$typeref = 'hit' if $$typeref eq 'subject';
1307 8297         6088 return 1;
1308             }
1309              
1310             sub _check_action_arg {
1311 3721     3721   2965 my $self = shift;
1312 3721         2532 my $actionref = shift;
1313 3721 100       4361 if (!$$actionref) {
1314 17 100       44 $$actionref = ($self->_has_sequence_data ? 'exact' : 'est');
1315             }
1316             else {
1317 3704 50       23527 $self->throw("Calc action '$$actionref' unrecognized") unless grep /^$$actionref$/, qw( est exact fast max );
1318 3704 100 100     7246 if ($$actionref ne 'est' and !$self->_has_sequence_data) {
1319 12         47 $self->warn("Blast file did not possess sequence data; defaulting to 'est' action");
1320 12         14 $$actionref = 'est';
1321             }
1322             }
1323 3721         3682 return 1;
1324             }
1325              
1326             sub _check_context_arg {
1327 4613     4613   3776 my $self = shift;
1328 4613         3792 my ($type, $contextref) = @_;
1329 4613 100       5619 if (!$$contextref) {
1330 298 50       320 $self->throw("Type '$type' requires strand/frame context for algorithm ".$self->algorithm) unless ($self->mapping($type) == 1);
1331             # set default according to default_context attrib
1332 298         339 $$contextref = $self->default_context($type);
1333             }
1334             else {
1335 4315 50       7345 ($$contextref =~ /^[mp]$/) && do { $$contextref .= '_' };
  0         0  
1336 4315 50       8204 $self->throw("Context '$$contextref' unrecognized") unless
1337             $$contextref =~ /all|[mp][0-2_]/;
1338             }
1339            
1340             }
1341              
1342             =head2 _make_context_key
1343              
1344             Title : _make_context_key
1345             Alias : _context
1346             Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame)
1347             Function: create a string indicating strand/frame context; serves as
1348             component of memoizing hash keys
1349             Returns : scalar string
1350             Args : -type => one of ('query', 'hit', 'subject')
1351             -strand => one of (1,0,-1)
1352             -frame => one of (-2, 1, 0, 1, -2)
1353             called w/o args: returns 'all'
1354              
1355             =cut
1356              
1357             sub _make_context_key {
1358 546     546   389 my $self = shift;
1359 546         1065 my @args = @_;
1360 546         1762 my ($type, $strand, $frame) = $self->_rearrange([qw(TYPE STRAND FRAME)], @args);
1361 546         1286 _check_type_arg(\$type);
1362 546 50 33     941 return 'all' unless (defined $strand or defined $frame);
1363 546 100 66     1508 if ( defined $strand && $self->_has_strand($type) ) {
1364 211 100 66     572 if (defined $frame && $self->_has_frame($type)) {
1365 94 100       365 return ($strand >= 0 ? 'p' : 'm').abs($frame);
1366             }
1367             else {
1368 117 100       379 return ($strand >= 0 ? 'p_' : 'm_');
1369             }
1370             }
1371             else {
1372 335 50 33     827 if (defined $frame && $self->_has_frame($type)) {
1373 0         0 $self->warn("Frame defined without strand; punting with plus strand");
1374 0         0 return 'p'.abs($frame);
1375             }
1376             else {
1377 335         683 return 'all';
1378             }
1379             }
1380             }
1381              
1382             =head2 _context
1383              
1384             Title : _context
1385             Alias : _make_context_key
1386             Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame)
1387             Function: create a string indicating strand/frame context; serves as
1388             component of memoizing hash keys
1389             Returns : scalar string
1390             Args : -type => one of ('query', 'hit', 'subject')
1391             -strand => one of (1,0,-1)
1392             -frame => one of (-2, 1, 0, 1, -2)
1393             called w/o args: returns 'all'
1394              
1395             =cut
1396              
1397 546     546   959 sub _context { shift->_make_context_key(@_) }
1398              
1399             =head2 Predicates
1400              
1401             Most based on a reading of the algorithm name with a configuration lookup.
1402              
1403             =over
1404              
1405             =item _has_sequence_data()
1406              
1407             =cut
1408              
1409             sub _has_sequence_data {
1410 2253     2253   1629 my $self = shift;
1411 2253 50       2835 $self->throw("Hit attribute not yet set") unless defined $self->hit;
1412 2253 100       2746 return (($self->hit->hsps)[0]->seq_str('match') ? 1 : 0);
1413             }
1414              
1415             =item _has_logical_length()
1416              
1417             =cut
1418              
1419             sub _has_logical_length {
1420 0     0   0 my $self = shift;
1421 0         0 my $type = shift;
1422 0         0 $self->_check_type_arg(\$type);
1423             # based on mapping coeff
1424 0 0       0 $self->throw("Mapping coefficients not yet set") unless defined $self->mapping($type);
1425 0         0 return ($self->mapping($type) > 1);
1426             }
1427              
1428             =item _has_strand()
1429              
1430             =cut
1431              
1432             sub _has_strand {
1433 546     546   539 my $self = shift;
1434 546         491 my $type = shift;
1435 546         795 $self->_check_type_arg(\$type);
1436 546         1834 return $self->{"_has_strand_${type}"};
1437             }
1438              
1439             =item _has_frame()
1440              
1441             =cut
1442              
1443             sub _has_frame {
1444 546     546   633 my $self = shift;
1445 546         519 my $type = shift;
1446 546         677 $self->_check_type_arg(\$type);
1447 546         1671 return $self->{"_has_frame_${type}"};
1448             }
1449              
1450             =back
1451              
1452             =head1 Private Accessors
1453              
1454             =head2 _contig_intersection
1455              
1456             Title : _contig_intersection
1457             Usage : $tiling->_contig_intersection($type)
1458             Function: Return the minimal set of $type coordinate intervals
1459             covered by the invocant's HSPs
1460             Returns : array of intervals (2-member arrayrefs; see MapTileUtils)
1461             Args : scalar $type: one of 'query', 'hit', 'subject'
1462              
1463             =cut
1464              
1465             sub _contig_intersection {
1466 8     8   11 my $self = shift;
1467 8         11 my ($type, $context) = @_;
1468 8         11 $self->_check_type_arg(\$type);
1469 8         17 $self->_check_context_arg($type, \$context);
1470 8 100       27 if (!defined $self->{"_contig_intersection_${type}_${context}"}) {
1471 4         15 $self->_calc_coverage_map($type);
1472             }
1473 8         11 return @{$self->{"_contig_intersection_${type}_${context}"}};
  8         27  
1474             }
1475              
1476             =head2 _reported_length
1477              
1478             Title : _reported_length
1479             Usage : $tiling->_reported_length($type)
1480             Function: Get the total length of the seq $type
1481             for the invocant's hit object, as reported
1482             by (not calculated from) the input data file
1483             Returns : scalar int
1484             Args : scalar $type: one of 'query', 'hit', 'subject'
1485             Note : This is kludgy; the hit object does not currently
1486             maintain accessors for these values, but the
1487             hsps possess these attributes. This is a wrapper
1488             that allows a consistent access method in the
1489             MapTiling code.
1490             Note : Since this number is based on a reported length,
1491             it is already a "logical length".
1492              
1493             =cut
1494              
1495             sub _reported_length {
1496 4     4   5 my $self = shift;
1497 4         7 my $type = shift;
1498 4         12 $self->_check_type_arg(\$type);
1499 4         9 my $key = uc( $type."_LENGTH" );
1500 4         13 return ($self->hsps)[0]->{$key};
1501             }
1502              
1503             1;
1504