File Coverage

Bio/Search/Tiling/MapTiling.pm
Criterion Covered Total %
statement 396 473 83.7
branch 93 136 68.3
condition 26 51 50.9
subroutine 43 47 91.4
pod 22 24 91.6
total 580 731 79.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Tiling::MapTiling
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Mark A. Jensen
7             #
8             # Copyright Mark A. Jensen
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Tiling::MapTiling - An implementation of an HSP tiling
17             algorithm, with methods to obtain frequently-requested statistics
18              
19             =head1 SYNOPSIS
20              
21             # get a BLAST $hit from somewhere, then
22             $tiling = Bio::Search::Tiling::MapTiling->new($hit);
23              
24             # stats
25             $numID = $tiling->identities();
26             $numCons = $tiling->conserved();
27             $query_length = $tiling->length('query');
28             $subject_length = $tiling->length('subject'); # or...
29             $subject_length = $tiling->length('hit');
30              
31             # get a visual on the coverage map
32             print $tiling->coverage_map_as_text('query',$context,'LEGEND');
33              
34             # tilings
35             $context = $tiling->_context( -type => 'subject', -strand=> 1, -frame=>1);
36             @covering_hsps_for_subject = $tiling->next_tiling('subject',$context);
37             $context = $tiling->_context( -type => 'query', -strand=> -1, -frame=>0);
38             @covering_hsps_for_query = $tiling->next_tiling('query', $context);
39              
40             =head1 DESCRIPTION
41              
42             Frequently, users want to use a set of high-scoring pairs (HSPs)
43             obtained from a BLAST or other search to assess the overall level of
44             identity, conservation, or coverage represented by matches between a
45             subject and a query sequence. Because a set of HSPs frequently
46             describes multiple overlapping sequence fragments, a simple summation of
47             statistics over the HSPs will generally overestimate those
48             statistics. To obtain an accurate estimate of global hit statistics, a
49             'tiling' of HSPs onto either the subject or the query sequence must be
50             performed, in order to properly correct for this.
51              
52             This module will execute a tiling algorithm on a given hit based on an
53             interval decomposition I'm calling the "coverage map". Internal object
54             methods compute the various statistics, which are then stored in
55             appropriately-named public object attributes. See
56             L for more info on the algorithm.
57              
58             =head2 STRAND/FRAME CONTEXTS
59              
60             In BLASTX, TBLASTN, and TBLASTX reports, strand and frame information
61             are reported for the query, subject, or query and subject,
62             respectively, for each HSP. Tilings for these sequence types are only
63             meaningful when they include HSPs in the same strand and frame, or
64             "context". So, in these situations, the context must be specified
65             in the method calls or the methods will throw.
66              
67             Contexts are specified as strings: C<[ 'all' | [m|p][_|0|1|2] ]>, where
68             C = all HSPs (will throw if context must be specified), C = minus
69             strand, C

= plus strand, and C<_> = no frame info, C<0,1,2> = respective

70             (absolute) frame. The L method will convert a (strand,
71             frame) specification to a context string, e.g.:
72              
73             $context = $self->_context(-type=>'query', -strand=>-1, -frame=>-2);
74              
75             returns C.
76              
77             The contexts present among the HSPs in a hit are identified and stored
78             for convenience upon object construction. These are accessed off the
79             object with the L method. If contexts don't apply for the
80             given report, this returns C<('all')>.
81              
82             =head1 TILED ALIGNMENTS
83              
84             The experimental method L will use a tiling
85             to concatenate tiled hsps into a series of L
86             objects:
87              
88             @alns = $tiling->get_tiled_alns($type, $context);
89              
90             Each alignment contains two sequences with ids 'query' and 'subject',
91             and consists of a concatenation of tiling HSPs which overlap or are
92             directly adjacent. The alignment are returned in C<$type> sequence
93             order. When HSPs overlap, the alignment sequence is taken from the HSP
94             which comes first in the coverage map array.
95              
96             The sequences in each alignment contain features (even though they are
97             L objects) which map the original query/subject
98             coordinates to the new alignment sequence coordinates. You can
99             determine the original BLAST fragments this way:
100              
101             $aln = ($tiling->get_tiled_alns)[0];
102             $qseq = $aln->get_seq_by_id('query');
103             $hseq = $aln->get_seq_by_id('subject');
104             foreach my $feat ($qseq->get_SeqFeatures) {
105             $org_start = ($feat->get_tag_values('query_start'))[0];
106             $org_end = ($feat->get_tag_values('query_end'))[0];
107             # original fragment as represented in the tiled alignment:
108             $org_fragment = $feat->seq;
109             }
110             foreach my $feat ($hseq->get_SeqFeatures) {
111             $org_start = ($feat->get_tag_values('subject_start'))[0];
112             $org_end = ($feat->get_tag_values('subject_end'))[0];
113             # original fragment as represented in the tiled alignment:
114             $org_fragment = $feat->seq;
115             }
116              
117             =head1 DESIGN NOTE
118              
119             The major calculations are made just-in-time, and then memoized. So,
120             for example, for a given MapTiling object, a coverage map would
121             usually be calculated only once (for the query), and at most twice (if
122             the subject perspective is also desired), and then only when a
123             statistic is first accessed. Afterward, the map and/or any statistic
124             is read from storage. So feel free to call the statistic methods
125             frequently if it suits you.
126              
127             =head1 FEEDBACK
128              
129             =head2 Mailing Lists
130              
131             User feedback is an integral part of the evolution of this and other
132             Bioperl modules. Send your comments and suggestions preferably to
133             the Bioperl mailing list. Your participation is much appreciated.
134              
135             bioperl-l@bioperl.org - General discussion
136             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
137              
138             =head2 Support
139              
140             Please direct usage questions or support issues to the mailing list:
141              
142             I
143              
144             rather than to the module maintainer directly. Many experienced and
145             reponsive experts will be able look at the problem and quickly
146             address it. Please include a thorough description of the problem
147             with code and data examples if at all possible.
148              
149             =head2 Reporting Bugs
150              
151             Report bugs to the Bioperl bug tracking system to help us keep track
152             of the bugs and their resolution. Bug reports can be submitted via
153             the web:
154              
155             https://github.com/bioperl/bioperl-live/issues
156              
157             =head1 AUTHOR - Mark A. Jensen
158              
159             Email maj -at- fortinbras -dot- us
160              
161             =head1 APPENDIX
162              
163             The rest of the documentation details each of the object methods.
164             Internal methods are usually preceded with a _
165              
166             =cut
167              
168             # Let the code begin...
169              
170             package Bio::Search::Tiling::MapTiling;
171 1     1   853 use strict;
  1         1  
  1         23  
172 1     1   2 use warnings;
  1         1  
  1         22  
173              
174             # Object preamble - inherits from Bio::Root::Root
175             #use lib '../../..';
176              
177 1     1   385 use Bio::Root::Root;
  1         2  
  1         29  
178 1     1   462 use Bio::Search::Tiling::TilingI;
  1         2  
  1         21  
179 1     1   433 use Bio::Search::Tiling::MapTileUtils;
  1         2  
  1         67  
180 1     1   430 use Bio::LocatableSeq;
  1         3  
  1         32  
181              
182             # use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI);
183 1     1   5 use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI);
  1         1  
  1         3809  
184              
185             =head1 CONSTRUCTOR
186              
187             =head2 new
188              
189             Title : new
190             Usage : my $obj = new Bio::Search::Tiling::GenericTiling();
191             Function: Builds a new Bio::Search::Tiling::GenericTiling object
192             Returns : an instance of Bio::Search::Tiling::GenericTiling
193             Args : -hit => $a_Bio_Search_Hit_HitI_object
194             general filter function:
195             -hsp_filter => sub { my $this_hsp = shift;
196             ...;
197             return 1 if $wanted;
198             return 0; }
199              
200             =cut
201              
202             sub new {
203 519     519 1 3443 my $class = shift;
204 519         715 my @args = @_;
205 519         1142 my $self = $class->SUPER::new(@args);
206 519         1459 my($hit, $filter) = $self->_rearrange( [qw( HIT HSP_FILTER)],@args );
207              
208 519 50       1087 $self->throw("HitI object required") unless $hit;
209 519 50 33     2477 $self->throw("Argument must be HitI object") unless ( ref $hit && $hit->isa('Bio::Search::Hit::HitI') );
210 519         604 $self->{hit} = $hit;
211 519         1145 $self->_set_attributes();
212 519         861 $self->{"_algorithm"} = $hit->algorithm;
213              
214 519         1267 my @hsps = $hit->hsps;
215             # apply filter function if requested
216 519 50       914 if ( defined $filter ) {
217 0 0       0 if ( ref($filter) eq 'CODE' ) {
218 0 0       0 @hsps = map { $filter->($_) ? $_ : () } @hsps;
  0         0  
219             }
220             else {
221 0         0 $self->warn("-filter is not a coderef; ignoring");
222             }
223             }
224            
225             # identify available contexts
226 519         533 for my $t (qw( query hit )) {
227 1038         861 my %contexts;
228 1038         1586 for my $i (0..$#hsps) {
229 546         1600 my $ctxt = $self->_context(
230             -type => $t,
231             -strand => $hsps[$i]->strand($t),
232             -frame => $hsps[$i]->frame($t));
233 546   100     1552 $contexts{$ctxt} ||= [];
234 546         456 push @{$contexts{$ctxt}}, $i;
  546         1117  
235             }
236 1038         1941 $self->{"_contexts_${t}"} = \%contexts;
237             }
238              
239 519 100       1422 $self->warn("No HSPs present in hit after filtering") unless (@hsps);
240 519         868 $self->hsps(\@hsps);
241 519         1678 return $self;
242             }
243              
244             # a tiling is based on the set of hsps contained in a single hit.
245             # check all the boundaries - zero hsps, one hsp, all disjoint hsps
246              
247             =head1 TILING ITERATORS
248              
249             =head2 next_tiling
250              
251             Title : next_tiling
252             Usage : @hsps = $self->next_tiling($type);
253             Function: Obtain a tiling: a minimal set of HSPs covering the $type
254             ('hit', 'subject', 'query') sequence
255             Example :
256             Returns : an array of HSPI objects
257             Args : scalar $type: one of 'hit', 'subject', 'query', with
258             'subject' an alias for 'hit'
259              
260             =cut
261              
262             sub next_tiling{
263 268     268 1 1872 my $self = shift;
264 268         233 my ($type, $context) = @_;
265 268         273 $self->_check_type_arg(\$type);
266 268         389 $self->_check_context_arg($type, \$context);
267 268         323 return $self->_tiling_iterator($type, $context)->();
268             }
269              
270             =head2 rewind_tilings
271              
272             Title : rewind_tilings
273             Usage : $self->rewind_tilings($type)
274             Function: Reset the next_tilings($type) iterator
275             Example :
276             Returns : True on success
277             Args : scalar $type: one of 'hit', 'subject', 'query';
278             default is 'query'
279              
280             =cut
281              
282             sub rewind_tilings{
283 2     2 1 294 my $self = shift;
284 2         3 my ($type,$context) = @_;
285 2         6 $self->_check_type_arg(\$type);
286 2         5 $self->_check_context_arg($type, \$context);
287 2         5 return $self->_tiling_iterator($type, $context)->('REWIND');
288             }
289              
290             =head1 ALIGNMENTS
291              
292             =head2 get_tiled_alns()
293              
294             Title : get_tiled_alns
295             Usage : @alns = $tiling->get_tiled_alns($type, $context)
296             Function: Use a tiling to construct a minimal set of alignment
297             objects covering the region specified by $type/$context
298             by splicing adjacent HSP tiles
299             Returns : an array of Bio::SimpleAlign objects; see Note below
300             Args : scalar $type: one of 'hit', 'subject', 'query'
301             default is 'query'
302             scalar $context: strand/frame context string
303             Following $type and $context, an array of
304             ordered, tiled HSP objects can be specified; this is
305             the tiling that will directly the alignment construction
306             default -- the first tiling provided by a tiling iterator
307             Notes : Each returned alignment is a concatenation of adjacent tiles.
308             The set of alignments will cover all regions described by the
309             $type/$context pair in the hit. The pair of sequences in each
310             alignment have ids 'query' and 'subject', and each sequence
311             possesses SeqFeatures that map the original query or subject
312             coordinates to the sequence coordinates in the tiled alignment.
313            
314             =cut
315              
316             sub get_tiled_alns {
317 1     1 1 2 my $self = shift;
318 1         2 my ($type, $context) = @_;
319 1         3 $self->_check_type_arg(\$type);
320 1         4 $self->_check_context_arg($type, \$context);
321 1         1 my $t = shift; # first arg after type/context, arrayref to a tiling
322 1         2 my @tiling;
323 1 50 33     5 if ($t && (ref($t) eq 'ARRAY')) {
324 0         0 @tiling = @$t;
325             }
326             else { # otherwise, take the first tiling available
327              
328 1         4 @tiling = $self->_make_tiling_iterator($type,$context)->();
329             }
330 1         11 my @ret;
331              
332 1         4 my @map = $self->coverage_map($type, $context);
333 1         3 my @intervals = map {$_->[0]} @map; # disjoint decomp
  13         12  
334             # divide into disjoint covering groups
335 1         5 my @groups = covering_groups(@intervals);
336              
337 1         995 require Bio::SimpleAlign;
338 1         11 require Bio::SeqFeature::Generic;
339             # cut hsp aligns along the disjoint decomp
340             # look for gaps...or add gaps?
341 1         3 my ($q_start, $h_start, $q_strand, $h_strand);
342             # build seqs
343 1         4 for my $grp (@groups) {
344 6         19 my $taln = Bio::SimpleAlign->new();
345 6         8 my (@qfeats, @hfeats);
346 6         6 my $query_string = '';
347 6         6 my $hit_string = '';
348 6         7 my ($qlen,$hlen) = (0,0);
349 6         7 my ($qinc, $hinc, $qstart, $hstart);
350 6         12 for my $intvl (@$grp) {
351             # following just chooses the first available hsp containing the
352             # interval -- this is arbitrary, could be smarter.
353 12         33 my $aln = ( containing_hsps($intvl, @tiling) )[0]->get_aln;
354 12         25 my $qseq = $aln->get_seq_by_pos(1);
355 12         20 my $hseq = $aln->get_seq_by_pos(2);
356 12   66     30 $qstart ||= $qseq->start;
357 12   66     27 $hstart ||= $hseq->start;
358             # calculate the slice boundaries
359 12         11 my ($beg, $end);
360 12         16 for ($type) {
361 12 50       33 /query/ && do {
362 12         22 $beg = $aln->column_from_residue_number($qseq->id, $intvl->[0]);
363 12         19 $end = $aln->column_from_residue_number($qseq->id, $intvl->[1]);
364 12         22 last;
365             };
366 0 0       0 /subject|hit/ && do {
367 0         0 $beg = $aln->column_from_residue_number($hseq->id, $intvl->[0]);
368 0         0 $end = $aln->column_from_residue_number($hseq->id, $intvl->[1]);
369 0         0 last;
370             };
371             }
372 12         26 $aln = $aln->slice($beg, $end);
373 12         32 $qseq = $aln->get_seq_by_pos(1);
374 12         20 $hseq = $aln->get_seq_by_pos(2);
375 12         19 $qinc = $qseq->length - $qseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS);
376 12         22 $hinc = $hseq->length - $hseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS);
377              
378 12 50 33     33 push @qfeats, Bio::SeqFeature::Generic->new(
    50 33        
379             -start => $qlen+1,
380             -end => $qlen+$qinc,
381             -strand => $qseq->strand,
382             -primary => 'query',
383             -source_tag => 'BLAST',
384             -display_name => 'query coordinates',
385             -tag => { query_id => $qseq->id,
386             query_desc => $qseq->desc,
387             query_start => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*$qlen,
388             query_end => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*($qlen+$qinc-1),
389             }
390             );
391 12 50 33     60 push @hfeats, Bio::SeqFeature::Generic->new(
    50 33        
392             -start => $hlen+1,
393             -end => $hlen+$hinc,
394             -strand => $hseq->strand,
395             -primary => 'subject/hit',
396             -source_tag => 'BLAST',
397             -display_name => 'subject/hit coordinates',
398             -tag => { subject_id => $hseq->id,
399             subject_desc => $hseq->desc,
400             subject_start => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*$hlen,
401             subject_end => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*($hlen+$hinc-1)
402             }
403             );
404 12         49 $query_string .= $qseq->seq;
405 12         21 $hit_string .= $hseq->seq;
406 12         13 $qlen += $qinc;
407 12         32 $hlen += $hinc;
408             }
409             # create the LocatableSeqs and add the features to each
410             # then add the seqs to the new aln
411             # note in MapTileUtils, Bio::FeatureHolderI methods have been
412             # mixed into Bio::LocatableSeq
413 6         22 my $qseq = Bio::LocatableSeq->new( -id => 'query',
414             -seq => $query_string);
415 6         19 $qseq->add_SeqFeature(@qfeats);
416 6         19 my $hseq = Bio::LocatableSeq->new( -id => 'subject',
417             -seq => $hit_string );
418 6         16 $hseq->add_SeqFeature(@hfeats);
419 6         17 $taln->add_seq($qseq);
420 6         13 $taln->add_seq($hseq);
421 6         17 push @ret, $taln;
422             }
423            
424 1         16 return @ret;
425             }
426              
427             =head1 STATISTICS
428              
429             =head2 identities
430              
431             Title : identities
432             Usage : $tiling->identities($type, $action, $context)
433             Function: Retrieve the calculated number of identities for the invocant
434             Example :
435             Returns : value of identities (a scalar)
436             Args : scalar $type: one of 'hit', 'subject', 'query'
437             default is 'query'
438             option scalar $action: one of 'exact', 'est', 'fast', 'max'
439             default is 'exact'
440             option scalar $context: strand/frame context string
441             Note : getter only
442              
443             =cut
444              
445             sub identities{
446 538     538 1 1038 my $self = shift;
447 538         564 my ($type, $action, $context) = @_;
448 538         766 $self->_check_type_arg(\$type);
449 538         794 $self->_check_action_arg(\$action);
450 538         798 $self->_check_context_arg($type, \$context);
451 538 100       1321 if (!defined $self->{"identities_${type}_${action}_${context}"}) {
452 531         935 $self->_calc_stats($type, $action, $context);
453             }
454 538         1088 return $self->{"identities_${type}_${action}_${context}"};
455             }
456              
457             =head2 conserved
458              
459             Title : conserved
460             Usage : $tiling->conserved($type, $action)
461             Function: Retrieve the calculated number of conserved sites for the invocant
462             Example :
463             Returns : value of conserved (a scalar)
464             Args : scalar $type: one of 'hit', 'subject', 'query'
465             default is 'query'
466             option scalar $action: one of 'exact', 'est', 'fast', 'max'
467             default is 'exact'
468             option scalar $context: strand/frame context string
469             Note : getter only
470              
471             =cut
472              
473             sub conserved{
474 538     538 1 933 my $self = shift;
475 538         595 my ($type, $action, $context) = @_;
476 538         762 $self->_check_type_arg(\$type);
477 538         803 $self->_check_action_arg(\$action);
478 538         793 $self->_check_context_arg($type, \$context);
479 538 50       1358 if (!defined $self->{"conserved_${type}_${action}_${context}"}) {
480 0         0 $self->_calc_stats($type, $action, $context);
481             }
482 538         970 return $self->{"conserved_${type}_${action}_${context}"};
483             }
484              
485             =head2 length
486              
487             Title : length
488             Usage : $tiling->length($type, $action, $context)
489             Function: Retrieve the total length of aligned residues for
490             the seq $type
491             Example :
492             Returns : value of length (a scalar)
493             Args : scalar $type: one of 'hit', 'subject', 'query'
494             default is 'query'
495             option scalar $action: one of 'exact', 'est', 'fast', 'max'
496             default is 'exact'
497             option scalar $context: strand/frame context string
498             Note : getter only
499              
500             =cut
501              
502             sub length{
503 1051     1051 1 1137 my $self = shift;
504 1051         985 my ($type,$action,$context) = @_;
505 1051         1549 $self->_check_type_arg(\$type);
506 1051         1393 $self->_check_action_arg(\$action);
507 1051         1474 $self->_check_context_arg($type, \$context);
508 1051 100       2332 if (!defined $self->{"length_${type}_${action}_${context}"}) {
509 1         4 $self->_calc_stats($type, $action, $context);
510             }
511 1051         3245 return $self->{"length_${type}_${action}_${context}"};
512             }
513              
514             =head2 frac
515              
516             Title : frac
517             Usage : $tiling->frac($type, $denom, $action, $context, $method)
518             Function: Return the fraction of sequence length consisting
519             of desired kinds of pairs (given by $method),
520             with respect to $denom
521             Returns : scalar float
522             Args : -type => one of 'hit', 'subject', 'query'
523             -denom => one of 'total', 'aligned'
524             -action => one of 'exact', 'est', 'fast', 'max'
525             -context => strand/frame context string
526             -method => one of 'identical', 'conserved'
527             Note : $denom == 'aligned', return desired_stat/num_aligned
528             $denom == 'total', return desired_stat/_reported_length
529             (i.e., length of the original input sequences)
530             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
531             reported lengths of translated dna are reduced by
532             a factor of 3, to provide fractions relative to
533             amino acid coordinates.
534              
535             =cut
536              
537             sub frac {
538 1058     1058 1 131452 my $self = shift;
539 1058         1921 my @args = @_;
540 1058         3064 my ($type, $denom, $action, $context, $method) = $self->_rearrange([qw(TYPE DENOM ACTION CONTEXT METHOD)],@args);
541 1058         2929 $self->_check_type_arg(\$type);
542 1058         1614 $self->_check_action_arg(\$action);
543 1058         2083 $self->_check_context_arg($type, \$context);
544 1058 50 33     9090 unless ($method and grep(/^$method$/, qw( identical conserved ))) {
545 0         0 $self->throw("-method must specified; one of ('identical', 'conserved')");
546             }
547 1058   50     1641 $denom ||= 'total';
548 1058 50       3409 unless (grep /^$denom/, qw( total aligned )) {
549 0         0 $self->throw("Denominator selection must be one of ('total', 'aligned'), not '$denom'");
550             }
551 1058         1843 my $key = "frac_${method}_${type}_${denom}_${action}_${context}";
552 1058         751 my $stat;
553 1058         1265 for ($method) {
554 1058 100       1636 $_ eq 'identical' && do {
555 529         875 $stat = $self->identities($type, $action, $context);
556 529         638 last;
557             };
558 529 50       891 $_ eq 'conserved' && do {
559 529         835 $stat = $self->conserved($type, $action, $context);
560 529         657 last;
561             };
562 0         0 do {
563 0         0 $self->throw("What are YOU doing here?");
564             };
565             }
566 1058 100       1995 if (!defined $self->{$key}) {
567 1046         1217 for ($denom) {
568 1046 50       1891 /total/ && do {
569 0         0 $self->{$key} =
570             $stat/$self->_reported_length($type); # need fudge fac??
571 0         0 last;
572             };
573 1046 50       1773 /aligned/ && do {
574 1046         1635 $self->{$key} =
575             $stat/$self->length($type,$action,$context);
576 1046         1127 last;
577             };
578 0         0 do {
579 0         0 $self->throw("What are YOU doing here?");
580             };
581             }
582             }
583 1058         3367 return $self->{$key};
584             }
585              
586             =head2 frac_identical
587              
588             Title : frac_identical
589             Usage : $tiling->frac_identical($type, $denom, $action, $context)
590             Function: Return the fraction of sequence length consisting
591             of identical pairs, with respect to $denom
592             Returns : scalar float
593             Args : -type => one of 'hit', 'subject', 'query'
594             -denom => one of 'total', 'aligned'
595             -action => one of 'exact', 'est', 'fast', 'max'
596             -context => strand/frame context string
597             Note : $denom == 'aligned', return conserved/num_aligned
598             $denom == 'total', return conserved/_reported_length
599             (i.e., length of the original input sequences)
600             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
601             reported lengths of translated dna are reduced by
602             a factor of 3, to provide fractions relative to
603             amino acid coordinates.
604             Note : This an alias that calls frac()
605              
606             =cut
607              
608             sub frac_identical{
609 3     3 1 5 my $self = shift;
610 3         6 my @args = @_;
611 3         12 my ($type, $denom, $action,$context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args );
612 3         16 $self->frac( -type=>$type, -denom=>$denom, -action=>$action, -method=>'identical', -context=>$context);
613             }
614              
615             =head2 frac_conserved
616              
617             Title : frac_conserved
618             Usage : $tiling->frac_conserved($type, $denom, $action, $context)
619             Function: Return the fraction of sequence length consisting
620             of conserved pairs, with respect to $denom
621             Returns : scalar float
622             Args : -type => one of 'hit', 'subject', 'query'
623             -denom => one of 'total', 'aligned'
624             -action => one of 'exact', 'est', 'fast', 'max'
625             -context => strand/frame context string
626             Note : $denom == 'aligned', return conserved/num_aligned
627             $denom == 'total', return conserved/_reported_length
628             (i.e., length of the original input sequences)
629             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
630             reported lengths of translated dna are reduced by
631             a factor of 3, to provide fractions relative to
632             amino acid coordinates.
633             Note : This an alias that calls frac()
634              
635             =cut
636              
637             sub frac_conserved{
638 3     3 1 6 my $self = shift;
639 3         7 my @args = @_;
640 3         13 my ($type, $denom, $action, $context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args );
641 3         16 $self->frac( -type=>$type, -denom=>$denom, -action=>$action, -context=>$context, -method=>'conserved');
642             }
643              
644             =head2 frac_aligned
645              
646             Title : frac_aligned
647             Aliases : frac_aligned_query - frac_aligned(-type=>'query',...)
648             frac_aligned_hit - frac_aligned(-type=>'hit',...)
649             Usage : $tiling->frac_aligned(-type=>$type,
650             -action=>$action,
651             -context=>$context)
652             Function: Return the fraction of input sequence length
653             that was aligned by the algorithm
654             Returns : scalar float
655             Args : -type => one of 'hit', 'subject', 'query'
656             -action => one of 'exact', 'est', 'fast', 'max'
657             -context => strand/frame context string
658              
659             =cut
660              
661             sub frac_aligned{
662 4     4 1 8 my ($self, @args) = @_;
663 4         14 my ($type, $action, $context) = $self->_rearrange([qw(TYPE ACTION CONTEXT)],@args);
664 4         12 $self->_check_type_arg(\$type);
665 4         8 $self->_check_action_arg(\$action);
666 4         10 $self->_check_context_arg($type, \$context);
667 4 50       14 if (!$self->{"frac_aligned_${type}_${action}_${context}"}) {
668 4         11 $self->{"frac_aligned_${type}_${action}_${context}"} = $self->num_aligned($type,$action,$context)/$self->_reported_length($type);
669             }
670 4         51 return $self->{"frac_aligned_${type}_${action}_${context}"};
671             }
672              
673 2     2 0 10 sub frac_aligned_query { shift->frac_aligned(-type=>'query', @_) }
674 2     2 0 8 sub frac_aligned_hit { shift->frac_aligned(-type=>'hit', @_) }
675            
676              
677             =head2 num_aligned
678              
679             Title : num_aligned
680             Usage : $tiling->num_aligned(-type=>$type)
681             Function: Return the number of residues of sequence $type
682             that were aligned by the algorithm
683             Returns : scalar int
684             Args : -type => one of 'hit', 'subject', 'query'
685             -action => one of 'exact', 'est', 'fast', 'max'
686             -context => strand/frame context string
687             Note : Since this is calculated from reported coordinates,
688             not symbol string counts, it is already in terms of
689             "logical length"
690             Note : Aliases length()
691              
692             =cut
693              
694 4     4 1 8 sub num_aligned { shift->length( @_ ) };
695              
696             =head2 num_unaligned
697              
698             Title : num_unaligned
699             Usage : $tiling->num_unaligned(-type=>$type)
700             Function: Return the number of residues of sequence $type
701             that were left unaligned by the algorithm
702             Returns : scalar int
703             Args : -type => one of 'hit', 'subject', 'query'
704             -action => one of 'exact', 'est', 'fast', 'max'
705             -context => strand/frame context string
706             Note : Since this is calculated from reported coordinates,
707             not symbol string counts, it is already in terms of
708             "logical length"
709              
710             =cut
711              
712             sub num_unaligned {
713 0     0 1 0 my $self = shift;
714 0         0 my ($type,$action,$context) = @_;
715 0         0 my $ret;
716 0         0 $self->_check_type_arg(\$type);
717 0         0 $self->_check_action_arg(\$action);
718 0         0 $self->_check_context_arg($type, \$context);
719 0 0       0 if (!defined $self->{"num_unaligned_${type}_${action}_${context}"}) {
720 0         0 $self->{"num_unaligned_${type}_${action}_${context}"} = $self->_reported_length($type)-$self->num_aligned($type,$action,$context);
721             }
722 0         0 return $self->{"num_unaligned_${type}_${action}_${context}"};
723             }
724            
725              
726             =head2 range
727              
728             Title : range
729             Usage : $tiling->range(-type=>$type)
730             Function: Returns the extent of the longest tiling
731             as ($min_coord, $max_coord)
732             Returns : array of two scalar integers
733             Args : -type => one of 'hit', 'subject', 'query'
734             -context => strand/frame context string
735              
736             =cut
737              
738             sub range {
739 8     8 1 1673 my ($self, $type, $context) = @_;
740 8         18 $self->_check_type_arg(\$type);
741 8         17 $self->_check_context_arg($type, \$context);
742 8         17 my @a = $self->_contig_intersection($type,$context);
743 8         43 return ($a[0][0], $a[-1][1]);
744             }
745              
746              
747              
748             =head1 ACCESSORS
749              
750             =head2 coverage_map
751              
752             Title : coverage_map
753             Usage : $map = $tiling->coverage_map($type)
754             Function: Property to contain the coverage map calculated
755             by _calc_coverage_map() - see that for
756             details
757             Example :
758             Returns : value of coverage_map_$type as an array
759             Args : scalar $type: one of 'hit', 'subject', 'query'
760             default is 'query'
761             Note : getter
762              
763             =cut
764              
765             sub coverage_map{
766 332     332 1 268 my $self = shift;
767 332         291 my ($type, $context) = @_;
768 332         385 $self->_check_type_arg(\$type);
769 332         490 $self->_check_context_arg($type, \$context);
770              
771 332 100       852 if (!defined $self->{"coverage_map_${type}_${context}"}) {
772             # following calculates coverage maps in all strands/frames
773             # if necessary
774 203         316 $self->_calc_coverage_map($type, $context);
775             }
776             # if undef is returned, then there were no hsps for given strand/frame
777 332 50       704 if (!defined $self->{"coverage_map_${type}_${context}"}) {
778 0         0 $self->warn("No HSPS present for type '$type' in context '$context' for this hit");
779 0         0 return undef;
780             }
781 332         252 return @{$self->{"coverage_map_${type}_${context}"}};
  332         743  
782             }
783              
784             =head2 coverage_map_as_text
785              
786             Title : coverage_map_as_text
787             Usage : $tiling->coverage_map_as_text($type, $legend_flag)
788             Function: Format a text-graphic representation of the
789             coverage map
790             Returns : an array of scalar strings, suitable for printing
791             Args : $type: one of 'query', 'hit', 'subject'
792             $context: strand/frame context string
793             $legend_flag: boolean; add a legend indicating
794             the actual interval coordinates for each component
795             interval and hsp (in the $type sequence context)
796             Example : print $tiling->coverage_map_as_text('query',1);
797              
798             =cut
799              
800             sub coverage_map_as_text{
801 0     0 1 0 my $self = shift;
802 0         0 my ($type, $context, $legend_q) = @_;
803 0         0 $self->_check_type_arg(\$type);
804 0         0 $self->_check_context_arg($type, \$context);
805              
806 0         0 my @map = $self->coverage_map($type, $context);
807 0         0 my @ret;
808 0         0 my @hsps = $self->hit->hsps;
809 0         0 my %hsps_i;
810 0         0 require Tie::RefHash;
811 0         0 tie %hsps_i, 'Tie::RefHash';
812 0         0 @hsps_i{@hsps} = (0..$#hsps);
813 0         0 my @mx;
814 0         0 foreach (0..$#map) {
815 0         0 my @hspx = ('') x @hsps;
816 0         0 my @these_hsps = @{$map[$_]->[1]};
  0         0  
817 0         0 @hspx[@hsps_i{@these_hsps}] = ('*') x @these_hsps;
818 0         0 $mx[$_] = \@hspx;
819             }
820 0         0 untie %hsps_i;
821              
822 0         0 push @ret, "\tIntvl\n";
823 0         0 push @ret, "HSPS\t", join ("\t", (0..$#map)), "\n";
824 0         0 foreach my $h (0..$#hsps) {
825 0         0 push @ret, join("\t", $h, map { $mx[$_][$h] } (0..$#map) ),"\n";
  0         0  
826             }
827 0 0       0 if ($legend_q) {
828 0         0 push @ret, "Interval legend\n";
829 0         0 foreach (0..$#map) {
830 0         0 push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$map[$_][0]});
  0         0  
831             }
832 0         0 push @ret, "HSP legend\n";
833 0         0 my @ints = get_intervals_from_hsps($type,@hsps);
834 0         0 foreach (0..$#hsps) {
835 0         0 push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$ints[$_]});
  0         0  
836             }
837             }
838 0         0 return @ret;
839             }
840              
841             =head2 hit
842              
843             Title : hit
844             Usage : $tiling->hit
845             Function:
846             Example :
847             Returns : The HitI object associated with the invocant
848             Args : none
849             Note : getter only
850              
851             =cut
852              
853             sub hit{
854 5229     5229 1 4031 my $self = shift;
855 5229 50       6826 $self->warn("Getter only") if @_;
856 5229         9772 return $self->{'hit'};
857             }
858              
859             =head2 hsps
860              
861             Title : hsps
862             Usage : $tiling->hsps()
863             Function: Container for the HSP objects associated with invocant
864             Example :
865             Returns : an array of hsps associated with the hit
866             Args : on set, new value (an arrayref or undef, optional)
867              
868             =cut
869              
870             sub hsps{
871 1768     1768 1 1342 my $self = shift;
872 1768 100       2917 return $self->{'hsps'} = shift if @_;
873 1249         1051 return @{$self->{'hsps'}};
  1249         3321  
874             }
875              
876             =head2 contexts
877              
878             Title : contexts
879             Usage : @contexts = $tiling->context($type) or
880             @indices = $tiling->context($type, $context)
881             Function: Retrieve the set of available contexts in the hit,
882             or the indices of hsps having the given context
883             (integer indices for the array returned by $self->hsps)
884             Returns : array of scalar context strings or
885             array of scalar positive integers
886             undef if no hsps in given context
887             Args : $type: one of 'query', 'hit', 'subject'
888             optional $context: context string
889              
890             =cut
891              
892             sub contexts{
893 1032     1032 1 61400 my $self = shift;
894 1032         946 my ($type, $context) = @_;
895 1032         1228 $self->_check_type_arg(\$type);
896 1032 100       1512 return keys %{$self->{"_contexts_$type"}} unless defined $context;
  403         1346  
897 629 50       1386 return undef unless $self->{"_contexts_$type"}{$context};
898 629         462 return @{$self->{"_contexts_$type"}{$context}};
  629         1545  
899             }
900              
901             =head2 mapping
902              
903             Title : mapping
904             Usage : $tiling->mapping($type)
905             Function: Retrieve the mapping coefficient for the sequence type
906             based on the underlying algorithm
907             Returns : scalar integer (mapping coefficient)
908             Args : $type: one of 'query', 'hit', 'subject'
909             Note : getter only (set in constructor)
910              
911             =cut
912              
913             sub mapping{
914 505     505 1 402 my $self = shift;
915 505         351 my $type = shift;
916 505         534 $self->_check_type_arg(\$type);
917 505         961 return $self->{"_mapping_${type}"};
918             }
919              
920             =head2 default_context
921              
922             Title : default_context
923             Usage : $tiling->default_context($type)
924             Function: Retrieve the default strand/frame context string
925             for the sequence type based on the underlying algorithm
926             Returns : scalar string (context string)
927             Args : $type: one of 'query', 'hit', 'subject'
928             Note : getter only (set in constructor)
929              
930             =cut
931              
932             sub default_context{
933 298     298 1 226 my $self = shift;
934 298         191 my $type = shift;
935 298         305 $self->_check_type_arg(\$type);
936 298         485 return $self->{"_def_context_${type}"};
937             }
938              
939             =head2 algorithm
940              
941             Title : algorithm
942             Usage : $tiling->algorithm
943             Function: Retrieve the algorithm name associated with the
944             invocant's hit object
945             Returns : scalar string
946             Args : none
947             Note : getter only (set in constructor)
948              
949             =cut
950              
951             sub algorithm{
952 0     0 1 0 my $self = shift;
953 0 0       0 $self->warn("Getter only") if @_;
954 0         0 return $self->{"_algorithm"};
955             }
956              
957             =head1 "PRIVATE" METHODS
958              
959             =head2 Calculators
960              
961             See L for lower level
962             calculation methods.
963              
964             =head2 _calc_coverage_map
965              
966             Title : _calc_coverage_map
967             Usage : $tiling->_calc_coverage_map($type)
968             Function: Calculates the coverage map for the object's associated
969             hit from the perspective of the desired $type (see Args:)
970             and sets the coverage_map() property
971             Returns : True on success
972             Args : optional scalar $type: one of 'hit'|'subject'|'query'
973             default is 'query'
974             Note : The "coverage map" is an array with the following format:
975             ( [ $component_interval => [ @containing_hsps ] ], ... ),
976             where $component_interval is a closed interval (see
977             DESCRIPTION) of the form [$a0, $a1] with $a0 <= $a1, and
978             @containing_hsps is an array of all HspI objects in the hit
979             which completely contain the $component_interval.
980             The set of $component_interval's is a disjoint decomposition
981             of the minimum set of minimal intervals that completely
982             cover the hit's HSPs (from the perspective of the $type)
983             Note : This calculates the map for all strand/frame contexts available
984             in the hit
985              
986             =cut
987              
988             sub _calc_coverage_map {
989 207     207   155 my $self = shift;
990 207         168 my ($type) = @_;
991 207         290 $self->_check_type_arg(\$type);
992              
993             # obtain the [start, end] intervals for all hsps in the hit (relative
994             # to the type)
995 207 50       376 unless ($self->{'hsps'}) {
996 0         0 $self->warn("No HSPs for this hit");
997 0         0 return;
998             }
999              
1000 207         172 my (@map, @hsps, %filters, @intervals);
1001            
1002              
1003             # conversion here?
1004 207         297 my $c = $self->mapping($type);
1005            
1006             # create the possible maps
1007 207         353 for my $context ($self->contexts($type)) {
1008 219         239 @map = ();
1009 219         349 @hsps = ($self->hsps)[$self->contexts($type, $context)];
1010 219         611 @intervals = get_intervals_from_hsps( $type, @hsps );
1011             # the "frame"
1012 219         446 my $f = ($intervals[0]->[0] - 1) % $c;
1013              
1014             # convert interval endpoints...
1015 219         307 for (@intervals) {
1016 536         719 $$_[0] = ($$_[0] - $f + $c - 1)/$c;
1017 536         500 $$_[1] = ($$_[1] - $f)/$c;
1018             }
1019            
1020             # determine the minimal set of disjoint intervals that cover the
1021             # set of hsp intervals
1022 219         472 my @dj_set = interval_tiling(\@intervals);
1023              
1024             # decompose each disjoint interval into another set of disjoint
1025             # intervals, each of which is completely contained within the
1026             # original hsp intervals with which it overlaps
1027 219         196 my $i=0;
1028 219         155 my @decomp;
1029 219         259 for my $dj_elt (@dj_set) {
1030 432         449 my ($covering, $indices) = @$dj_elt;
1031 432         632 my @covering_hsps = @hsps[@$indices];
1032 432         403 my @coverers = @intervals[@$indices];
1033 432         692 @decomp = decompose_interval( \@coverers );
1034 432         586 for (@decomp) {
1035 638         409 my ($component, $container_indices) = @{$_};
  638         742  
1036 638         1188 push @map, [ $component,
1037             [@covering_hsps[@$container_indices]] ];
1038             }
1039 432         478 1;
1040             }
1041            
1042             # unconvert the components:
1043             #####
1044 219         253 foreach (@map) {
1045 638         745 $$_[0][0] = $c*$$_[0][0] - $c + 1 + $f;
1046 638         702 $$_[0][1] = $c*$$_[0][1] + $f;
1047             }
1048 219         267 foreach (@dj_set) {
1049 432         541 $$_[0][0] = $c*$$_[0][0] - $c + 1 + $f;
1050 432         511 $$_[0][1] = $c*$$_[0][1] + $f;
1051             }
1052              
1053             # sort the map on the interval left-ends
1054 219         296 @map = sort { $a->[0][0]<=>$b->[0][0] } @map;
  980         701  
1055 219         613 $self->{"coverage_map_${type}_${context}"} = [@map];
1056             # set the _contig_intersection attribute here (side effect)
1057 219         245 $self->{"_contig_intersection_${type}_${context}"} = [map { $$_[0] } @map];
  638         1468  
1058             }
1059              
1060 207         527 return 1; # success
1061             }
1062              
1063             =head2 _calc_stats
1064              
1065             Title : _calc_stats
1066             Usage : $tiling->_calc_stats($type, $action, $context)
1067             Function: Calculates [estimated] tiling statistics (identities, conserved sites
1068             length) and sets the public accessors
1069             Returns : True on success
1070             Args : scalar $type: one of 'hit', 'subject', 'query'
1071             default is 'query'
1072             optional scalar $action: requests calculation method
1073             currently one of 'exact', 'est', 'fast', 'max'
1074             option scalar $context: strand/frame context string
1075             Note : Action: The statistics are calculated by summing quantities
1076             over the disjoint component intervals, taking into account
1077             coverage of those intervals by multiple HSPs. The action
1078             tells the algorithm how to obtain those quantities--
1079             'exact' will use Bio::Search::HSP::HSPI::matches
1080             to count the appropriate segment of the homology string;
1081             'est' will estimate the statistics by multiplying the
1082             fraction of the HSP overlapped by the component interval
1083             (see MapTileUtils) by the BLAST-reported identities/postives
1084             (this may be convenient for BLAST summary report formats)
1085             * Both exact and est take the average over the number of HSPs
1086             that overlap the component interval.
1087             'max' uses the exact method to calculate the statistics,
1088             and returns only the maximum identites/positives over
1089             overlapping HSP for the component interval. No averaging
1090             is involved here.
1091             'fast' doesn't involve tiling at all (hence the name),
1092             but it seems like a very good estimate, and uses only
1093             reported values, and so does not require sequence data. It
1094             calculates an average of reported identities, conserved
1095             sites, and lengths, over unmodified hsps in the hit,
1096             weighted by the length of the hsps.
1097              
1098             =cut
1099              
1100             sub _calc_stats {
1101 532     532   451 my $self = shift;
1102 532         514 my ($type, $action, $context) = @_;
1103             # need to check args here, in case method is called internally.
1104 532         635 $self->_check_type_arg(\$type);
1105 532         821 $self->_check_action_arg(\$action);
1106 532         763 $self->_check_context_arg($type, \$context);
1107 532         591 my ($ident, $cons, $length) = (0,0,0);
1108              
1109             # fast : avoid coverage map altogether, get a pretty damn
1110             # good estimate with a weighted average of reported hsp
1111             # statistics
1112              
1113 532 100       788 ($action eq 'fast') && do {
1114 204         321 my @hsps = $self->hit->hsps;
1115 204         402 @hsps = @hsps[$self->contexts($type, $context)];
1116             # weights for averages
1117 204         312 my @wt = map {$_->length($type)} @hsps;
  450         757  
1118 204         10409 my $sum = eval( join('+',@wt) );
1119 204         877 $_ /= $sum for (@wt);
1120 204         235 for (@hsps) {
1121 450         473 my $wt = shift @wt;
1122 450         963 $ident += $wt*$_->matches_MT($type,'identities');
1123 450         760 $cons += $wt*$_->matches_MT($type,'conserved');
1124 450         787 $length += $wt*$_->length($type);
1125             }
1126             };
1127              
1128             # or, do tiling
1129              
1130             # calculate identities/conserved sites in tiling
1131             # estimate based on the fraction of the component interval covered
1132             # and ident/cons reported by the HSPs
1133 532 100       992 ($action ne 'fast') && do {
1134 328         567 foreach ($self->coverage_map($type, $context)) {
1135 1384         1160 my ($intvl, $hsps) = @{$_};
  1384         1715  
1136 1384         1753 my $len = ($$intvl[1]-$$intvl[0]+1);
1137 1384 100       1942 my $ncover = ($action eq 'max') ? 1 : scalar @$hsps;
1138 1384         1242 my ($acc_i, $acc_c) = (0,0);
1139 1384         1321 foreach my $hsp (@$hsps) {
1140 1764         1582 for ($action) {
1141 1764 100       2319 ($_ eq 'est') && do {
1142 788         2247 my ($inc_i, $inc_c) = $hsp->matches_MT(
1143             -type => $type,
1144             -action => 'searchutils',
1145             );
1146 788         1454 my $frac = $len/$hsp->length($type);
1147 788         797 $acc_i += $inc_i * $frac;
1148 788         547 $acc_c += $inc_c * $frac;
1149 788         1248 last;
1150             };
1151 976 100       1430 ($_ eq 'max') && do {
1152 483         1417 my ($inc_i, $inc_c) = $hsp->matches_MT(
1153             -type => $type,
1154             -action => 'searchutils',
1155             -start => $$intvl[0],
1156             -end => $$intvl[1]
1157             );
1158 483 100       899 $acc_i = ($acc_i > $inc_i) ? $acc_i : $inc_i;
1159 483 100       603 $acc_c = ($acc_c > $inc_c) ? $acc_c : $inc_c;
1160 483         666 last;
1161             };
1162 493 50 33     1472 (!$_ || ($_ eq 'exact')) && do {
1163 493         1495 my ($inc_i, $inc_c) = $hsp->matches_MT(
1164             -type => $type,
1165             -action => 'searchutils',
1166             -start => $$intvl[0],
1167             -end => $$intvl[1]
1168             );
1169 493         580 $acc_i += $inc_i;
1170 493         313 $acc_c += $inc_c;
1171 493         665 last;
1172             };
1173             }
1174             }
1175 1384         1513 $ident += ($acc_i/$ncover);
1176 1384         1040 $cons += ($acc_c/$ncover);
1177 1384         1500 $length += $len;
1178             }
1179             };
1180            
1181 532         1514 $self->{"identities_${type}_${action}_${context}"} = $ident;
1182 532         981 $self->{"conserved_${type}_${action}_${context}"} = $cons;
1183 532         831 $self->{"length_${type}_${action}_${context}"} = $length;
1184            
1185 532         746 return 1;
1186             }
1187              
1188             =head2 Tiling Helper Methods
1189              
1190             =cut
1191              
1192             # coverage_map is of the form
1193             # ( [ $interval, \@containing_hsps ], ... )
1194              
1195             # so, for each interval, pick one of the containing hsps,
1196             # and return the union of all the picks.
1197              
1198             # use the combinatorial generating iterator, with
1199             # the urns containing the @containing_hsps for each
1200             # interval
1201              
1202             =head2 _make_tiling_iterator
1203              
1204             Title : _make_tiling_iterator
1205             Usage : $self->_make_tiling_iterator($type)
1206             Function: Create an iterator code ref that will step through all
1207             minimal combinations of HSPs that produce complete coverage
1208             of the $type ('hit', 'subject', 'query') sequence,
1209             and set the correct iterator property of the invocant
1210             Example :
1211             Returns : The iterator
1212             Args : scalar $type, one of 'hit', 'subject', 'query';
1213             default is 'query'
1214              
1215             =cut
1216              
1217             sub _make_tiling_iterator {
1218             ### create the urns
1219 3     3   4 my $self = shift;
1220 3         4 my ($type, $context) = @_;
1221 3         6 $self->_check_type_arg(\$type);
1222 3         6 $self->_check_context_arg($type, \$context);
1223              
1224             # initialize the urns
1225 3         8 my @urns = map { [0, $$_[1]] } $self->coverage_map($type, $context);
  43         43  
1226              
1227 3         5 my $FINISHED = 0;
1228             my $iter = sub {
1229             # rewind?
1230 271 100   271   338 if (my $rewind = shift) {
1231             # reinitialize urn indices
1232 2         9 $$_[0] = 0 for (@urns);
1233 2         2 $FINISHED = 0;
1234 2         5 return 1;
1235             }
1236             # check if done...
1237 269 100       310 return if $FINISHED;
1238              
1239 266         179 my $finished_incrementing = 0;
1240             # @ret is the collector of urn choices
1241 266         164 my @ret;
1242              
1243 266         241 for my $urn (@urns) {
1244 4482         3382 my ($n, $hsps) = @$urn;
1245 4482         2974 push @ret, $$hsps[$n];
1246 4482 100       5106 unless ($finished_incrementing) {
1247 1086 100       969 if ($n == $#$hsps) { $$urn[0] = 0; }
  823         717  
1248 263         151 else { ($$urn[0])++; $finished_incrementing = 1 }
  263         266  
1249             }
1250             }
1251              
1252             # backstop...
1253 266 100       290 $FINISHED = 1 unless $finished_incrementing;
1254             # uniquify @ret
1255             # $hsp->rank is a unique identifier for an hsp in a hit.
1256             # preserve order in @ret
1257            
1258 266         144 my (%order, %uniq);
1259 266         1304 @order{(0..$#ret)} = @ret;
1260 266         977 $uniq{$order{$_}->rank} = $_ for (0..$#ret);
1261 266         529 @ret = @order{ sort {$a<=>$b} values %uniq };
  5869         3856  
1262              
1263 266         848 return @ret;
1264 3         19 };
1265 3         12 return $iter;
1266             }
1267              
1268             =head2 _tiling_iterator
1269              
1270             Title : _tiling_iterator
1271             Usage : $tiling->_tiling_iterator($type,$context)
1272             Function: Retrieve the tiling iterator coderef for the requested
1273             $type ('hit', 'subject', 'query')
1274             Example :
1275             Returns : coderef to the desired iterator
1276             Args : scalar $type, one of 'hit', 'subject', 'query'
1277             default is 'query'
1278             option scalar $context: strand/frame context string
1279             Note : getter only
1280              
1281             =cut
1282              
1283             sub _tiling_iterator {
1284 270     270   181 my $self = shift;
1285 270         210 my ($type, $context) = @_;
1286 270         242 $self->_check_type_arg(\$type);
1287 270         300 $self->_check_context_arg($type, \$context);
1288              
1289 270 100       500 if (!defined $self->{"_tiling_iterator_${type}_${context}"}) {
1290 2         7 $self->{"_tiling_iterator_${type}_${context}"} =
1291             $self->_make_tiling_iterator($type,$context);
1292             }
1293 270         450 return $self->{"_tiling_iterator_${type}_${context}"};
1294             }
1295             =head2 Construction Helper Methods
1296              
1297             See also L.
1298              
1299             =cut
1300              
1301             sub _check_type_arg {
1302 8297     8297   5425 my $self = shift;
1303 8297         5270 my $typeref = shift;
1304 8297   100     11042 $$typeref ||= 'query';
1305 8297 50       43536 $self->throw("Unknown type '$$typeref'") unless grep(/^$$typeref$/, qw( hit query subject ));
1306 8297 100       11221 $$typeref = 'hit' if $$typeref eq 'subject';
1307 8297         7281 return 1;
1308             }
1309              
1310             sub _check_action_arg {
1311 3721     3721   3049 my $self = shift;
1312 3721         2463 my $actionref = shift;
1313 3721 100       4186 if (!$$actionref) {
1314 17 100       30 $$actionref = ($self->_has_sequence_data ? 'exact' : 'est');
1315             }
1316             else {
1317 3704 50       22444 $self->throw("Calc action '$$actionref' unrecognized") unless grep /^$$actionref$/, qw( est exact fast max );
1318 3704 100 100     8334 if ($$actionref ne 'est' and !$self->_has_sequence_data) {
1319 12         42 $self->warn("Blast file did not possess sequence data; defaulting to 'est' action");
1320 12         11 $$actionref = 'est';
1321             }
1322             }
1323 3721         3382 return 1;
1324             }
1325              
1326             sub _check_context_arg {
1327 4613     4613   3802 my $self = shift;
1328 4613         3666 my ($type, $contextref) = @_;
1329 4613 100       5225 if (!$$contextref) {
1330 298 50       326 $self->throw("Type '$type' requires strand/frame context for algorithm ".$self->algorithm) unless ($self->mapping($type) == 1);
1331             # set default according to default_context attrib
1332 298         358 $$contextref = $self->default_context($type);
1333             }
1334             else {
1335 4315 50       7236 ($$contextref =~ /^[mp]$/) && do { $$contextref .= '_' };
  0         0  
1336 4315 50       8484 $self->throw("Context '$$contextref' unrecognized") unless
1337             $$contextref =~ /all|[mp][0-2_]/;
1338             }
1339            
1340             }
1341              
1342             =head2 _make_context_key
1343              
1344             Title : _make_context_key
1345             Alias : _context
1346             Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame)
1347             Function: create a string indicating strand/frame context; serves as
1348             component of memoizing hash keys
1349             Returns : scalar string
1350             Args : -type => one of ('query', 'hit', 'subject')
1351             -strand => one of (1,0,-1)
1352             -frame => one of (-2, 1, 0, 1, -2)
1353             called w/o args: returns 'all'
1354              
1355             =cut
1356              
1357             sub _make_context_key {
1358 546     546   374 my $self = shift;
1359 546         1094 my @args = @_;
1360 546         1582 my ($type, $strand, $frame) = $self->_rearrange([qw(TYPE STRAND FRAME)], @args);
1361 546         1263 _check_type_arg(\$type);
1362 546 50 33     1066 return 'all' unless (defined $strand or defined $frame);
1363 546 100 66     1329 if ( defined $strand && $self->_has_strand($type) ) {
1364 211 100 66     549 if (defined $frame && $self->_has_frame($type)) {
1365 94 100       318 return ($strand >= 0 ? 'p' : 'm').abs($frame);
1366             }
1367             else {
1368 117 100       350 return ($strand >= 0 ? 'p_' : 'm_');
1369             }
1370             }
1371             else {
1372 335 50 33     790 if (defined $frame && $self->_has_frame($type)) {
1373 0         0 $self->warn("Frame defined without strand; punting with plus strand");
1374 0         0 return 'p'.abs($frame);
1375             }
1376             else {
1377 335         696 return 'all';
1378             }
1379             }
1380             }
1381              
1382             =head2 _context
1383              
1384             Title : _context
1385             Alias : _make_context_key
1386             Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame)
1387             Function: create a string indicating strand/frame context; serves as
1388             component of memoizing hash keys
1389             Returns : scalar string
1390             Args : -type => one of ('query', 'hit', 'subject')
1391             -strand => one of (1,0,-1)
1392             -frame => one of (-2, 1, 0, 1, -2)
1393             called w/o args: returns 'all'
1394              
1395             =cut
1396              
1397 546     546   822 sub _context { shift->_make_context_key(@_) }
1398              
1399             =head2 Predicates
1400              
1401             Most based on a reading of the algorithm name with a configuration lookup.
1402              
1403             =over
1404              
1405             =item _has_sequence_data()
1406              
1407             =cut
1408              
1409             sub _has_sequence_data {
1410 2253     2253   1835 my $self = shift;
1411 2253 50       2488 $self->throw("Hit attribute not yet set") unless defined $self->hit;
1412 2253 100       2589 return (($self->hit->hsps)[0]->seq_str('match') ? 1 : 0);
1413             }
1414              
1415             =item _has_logical_length()
1416              
1417             =cut
1418              
1419             sub _has_logical_length {
1420 0     0   0 my $self = shift;
1421 0         0 my $type = shift;
1422 0         0 $self->_check_type_arg(\$type);
1423             # based on mapping coeff
1424 0 0       0 $self->throw("Mapping coefficients not yet set") unless defined $self->mapping($type);
1425 0         0 return ($self->mapping($type) > 1);
1426             }
1427              
1428             =item _has_strand()
1429              
1430             =cut
1431              
1432             sub _has_strand {
1433 546     546   428 my $self = shift;
1434 546         450 my $type = shift;
1435 546         829 $self->_check_type_arg(\$type);
1436 546         1758 return $self->{"_has_strand_${type}"};
1437             }
1438              
1439             =item _has_frame()
1440              
1441             =cut
1442              
1443             sub _has_frame {
1444 546     546   441 my $self = shift;
1445 546         390 my $type = shift;
1446 546         616 $self->_check_type_arg(\$type);
1447 546         1466 return $self->{"_has_frame_${type}"};
1448             }
1449              
1450             =back
1451              
1452             =head1 Private Accessors
1453              
1454             =head2 _contig_intersection
1455              
1456             Title : _contig_intersection
1457             Usage : $tiling->_contig_intersection($type)
1458             Function: Return the minimal set of $type coordinate intervals
1459             covered by the invocant's HSPs
1460             Returns : array of intervals (2-member arrayrefs; see MapTileUtils)
1461             Args : scalar $type: one of 'query', 'hit', 'subject'
1462              
1463             =cut
1464              
1465             sub _contig_intersection {
1466 8     8   9 my $self = shift;
1467 8         8 my ($type, $context) = @_;
1468 8         12 $self->_check_type_arg(\$type);
1469 8         13 $self->_check_context_arg($type, \$context);
1470 8 100       23 if (!defined $self->{"_contig_intersection_${type}_${context}"}) {
1471 4         9 $self->_calc_coverage_map($type);
1472             }
1473 8         8 return @{$self->{"_contig_intersection_${type}_${context}"}};
  8         22  
1474             }
1475              
1476             =head2 _reported_length
1477              
1478             Title : _reported_length
1479             Usage : $tiling->_reported_length($type)
1480             Function: Get the total length of the seq $type
1481             for the invocant's hit object, as reported
1482             by (not calculated from) the input data file
1483             Returns : scalar int
1484             Args : scalar $type: one of 'query', 'hit', 'subject'
1485             Note : This is kludgy; the hit object does not currently
1486             maintain accessors for these values, but the
1487             hsps possess these attributes. This is a wrapper
1488             that allows a consistent access method in the
1489             MapTiling code.
1490             Note : Since this number is based on a reported length,
1491             it is already a "logical length".
1492              
1493             =cut
1494              
1495             sub _reported_length {
1496 4     4   7 my $self = shift;
1497 4         5 my $type = shift;
1498 4         7 $self->_check_type_arg(\$type);
1499 4         10 my $key = uc( $type."_LENGTH" );
1500 4         9 return ($self->hsps)[0]->{$key};
1501             }
1502              
1503             1;
1504