File Coverage

Bio/Search/Tiling/MapTiling.pm
Criterion Covered Total %
statement 396 473 83.7
branch 93 136 68.3
condition 26 51 50.9
subroutine 43 47 91.4
pod 22 24 91.6
total 580 731 79.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Tiling::MapTiling
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Mark A. Jensen
7             #
8             # Copyright Mark A. Jensen
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Tiling::MapTiling - An implementation of an HSP tiling
17             algorithm, with methods to obtain frequently-requested statistics
18              
19             =head1 SYNOPSIS
20              
21             # get a BLAST $hit from somewhere, then
22             $tiling = Bio::Search::Tiling::MapTiling->new($hit);
23              
24             # stats
25             $numID = $tiling->identities();
26             $numCons = $tiling->conserved();
27             $query_length = $tiling->length('query');
28             $subject_length = $tiling->length('subject'); # or...
29             $subject_length = $tiling->length('hit');
30              
31             # get a visual on the coverage map
32             print $tiling->coverage_map_as_text('query',$context,'LEGEND');
33              
34             # tilings
35             $context = $tiling->_context( -type => 'subject', -strand=> 1, -frame=>1);
36             @covering_hsps_for_subject = $tiling->next_tiling('subject',$context);
37             $context = $tiling->_context( -type => 'query', -strand=> -1, -frame=>0);
38             @covering_hsps_for_query = $tiling->next_tiling('query', $context);
39              
40             =head1 DESCRIPTION
41              
42             Frequently, users want to use a set of high-scoring pairs (HSPs)
43             obtained from a BLAST or other search to assess the overall level of
44             identity, conservation, or coverage represented by matches between a
45             subject and a query sequence. Because a set of HSPs frequently
46             describes multiple overlapping sequence fragments, a simple summation of
47             statistics over the HSPs will generally overestimate those
48             statistics. To obtain an accurate estimate of global hit statistics, a
49             'tiling' of HSPs onto either the subject or the query sequence must be
50             performed, in order to properly correct for this.
51              
52             This module will execute a tiling algorithm on a given hit based on an
53             interval decomposition I'm calling the "coverage map". Internal object
54             methods compute the various statistics, which are then stored in
55             appropriately-named public object attributes. See
56             L for more info on the algorithm.
57              
58             =head2 STRAND/FRAME CONTEXTS
59              
60             In BLASTX, TBLASTN, and TBLASTX reports, strand and frame information
61             are reported for the query, subject, or query and subject,
62             respectively, for each HSP. Tilings for these sequence types are only
63             meaningful when they include HSPs in the same strand and frame, or
64             "context". So, in these situations, the context must be specified
65             in the method calls or the methods will throw.
66              
67             Contexts are specified as strings: C<[ 'all' | [m|p][_|0|1|2] ]>, where
68             C = all HSPs (will throw if context must be specified), C = minus
69             strand, C

= plus strand, and C<_> = no frame info, C<0,1,2> = respective

70             (absolute) frame. The L method will convert a (strand,
71             frame) specification to a context string, e.g.:
72              
73             $context = $self->_context(-type=>'query', -strand=>-1, -frame=>-2);
74              
75             returns C.
76              
77             The contexts present among the HSPs in a hit are identified and stored
78             for convenience upon object construction. These are accessed off the
79             object with the L method. If contexts don't apply for the
80             given report, this returns C<('all')>.
81              
82             =head1 TILED ALIGNMENTS
83              
84             The experimental method L will use a tiling
85             to concatenate tiled hsps into a series of L
86             objects:
87              
88             @alns = $tiling->get_tiled_alns($type, $context);
89              
90             Each alignment contains two sequences with ids 'query' and 'subject',
91             and consists of a concatenation of tiling HSPs which overlap or are
92             directly adjacent. The alignment are returned in C<$type> sequence
93             order. When HSPs overlap, the alignment sequence is taken from the HSP
94             which comes first in the coverage map array.
95              
96             The sequences in each alignment contain features (even though they are
97             L objects) which map the original query/subject
98             coordinates to the new alignment sequence coordinates. You can
99             determine the original BLAST fragments this way:
100              
101             $aln = ($tiling->get_tiled_alns)[0];
102             $qseq = $aln->get_seq_by_id('query');
103             $hseq = $aln->get_seq_by_id('subject');
104             foreach my $feat ($qseq->get_SeqFeatures) {
105             $org_start = ($feat->get_tag_values('query_start'))[0];
106             $org_end = ($feat->get_tag_values('query_end'))[0];
107             # original fragment as represented in the tiled alignment:
108             $org_fragment = $feat->seq;
109             }
110             foreach my $feat ($hseq->get_SeqFeatures) {
111             $org_start = ($feat->get_tag_values('subject_start'))[0];
112             $org_end = ($feat->get_tag_values('subject_end'))[0];
113             # original fragment as represented in the tiled alignment:
114             $org_fragment = $feat->seq;
115             }
116              
117             =head1 DESIGN NOTE
118              
119             The major calculations are made just-in-time, and then memoized. So,
120             for example, for a given MapTiling object, a coverage map would
121             usually be calculated only once (for the query), and at most twice (if
122             the subject perspective is also desired), and then only when a
123             statistic is first accessed. Afterward, the map and/or any statistic
124             is read from storage. So feel free to call the statistic methods
125             frequently if it suits you.
126              
127             =head1 FEEDBACK
128              
129             =head2 Mailing Lists
130              
131             User feedback is an integral part of the evolution of this and other
132             Bioperl modules. Send your comments and suggestions preferably to
133             the Bioperl mailing list. Your participation is much appreciated.
134              
135             bioperl-l@bioperl.org - General discussion
136             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
137              
138             =head2 Support
139              
140             Please direct usage questions or support issues to the mailing list:
141              
142             I
143              
144             rather than to the module maintainer directly. Many experienced and
145             reponsive experts will be able look at the problem and quickly
146             address it. Please include a thorough description of the problem
147             with code and data examples if at all possible.
148              
149             =head2 Reporting Bugs
150              
151             Report bugs to the Bioperl bug tracking system to help us keep track
152             of the bugs and their resolution. Bug reports can be submitted via
153             the web:
154              
155             https://github.com/bioperl/bioperl-live/issues
156              
157             =head1 AUTHOR - Mark A. Jensen
158              
159             Email maj -at- fortinbras -dot- us
160              
161             =head1 APPENDIX
162              
163             The rest of the documentation details each of the object methods.
164             Internal methods are usually preceded with a _
165              
166             =cut
167              
168             # Let the code begin...
169              
170             package Bio::Search::Tiling::MapTiling;
171 1     1   937 use strict;
  1         1  
  1         26  
172 1     1   4 use warnings;
  1         2  
  1         23  
173              
174             # Object preamble - inherits from Bio::Root::Root
175             #use lib '../../..';
176              
177 1     1   397 use Bio::Root::Root;
  1         3  
  1         29  
178 1     1   458 use Bio::Search::Tiling::TilingI;
  1         2  
  1         24  
179 1     1   429 use Bio::Search::Tiling::MapTileUtils;
  1         2  
  1         74  
180 1     1   446 use Bio::LocatableSeq;
  1         5  
  1         38  
181              
182             # use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI);
183 1     1   6 use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI);
  1         2  
  1         4313  
184              
185             =head1 CONSTRUCTOR
186              
187             =head2 new
188              
189             Title : new
190             Usage : my $obj = new Bio::Search::Tiling::GenericTiling();
191             Function: Builds a new Bio::Search::Tiling::GenericTiling object
192             Returns : an instance of Bio::Search::Tiling::GenericTiling
193             Args : -hit => $a_Bio_Search_Hit_HitI_object
194             general filter function:
195             -hsp_filter => sub { my $this_hsp = shift;
196             ...;
197             return 1 if $wanted;
198             return 0; }
199              
200             =cut
201              
202             sub new {
203 519     519 1 3929 my $class = shift;
204 519         1005 my @args = @_;
205 519         1595 my $self = $class->SUPER::new(@args);
206 519         1681 my($hit, $filter) = $self->_rearrange( [qw( HIT HSP_FILTER)],@args );
207              
208 519 50       1383 $self->throw("HitI object required") unless $hit;
209 519 50 33     2825 $self->throw("Argument must be HitI object") unless ( ref $hit && $hit->isa('Bio::Search::Hit::HitI') );
210 519         1035 $self->{hit} = $hit;
211 519         1476 $self->_set_attributes();
212 519         1335 $self->{"_algorithm"} = $hit->algorithm;
213              
214 519         1172 my @hsps = $hit->hsps;
215             # apply filter function if requested
216 519 50       1041 if ( defined $filter ) {
217 0 0       0 if ( ref($filter) eq 'CODE' ) {
218 0 0       0 @hsps = map { $filter->($_) ? $_ : () } @hsps;
  0         0  
219             }
220             else {
221 0         0 $self->warn("-filter is not a coderef; ignoring");
222             }
223             }
224            
225             # identify available contexts
226 519         895 for my $t (qw( query hit )) {
227 1038         1113 my %contexts;
228 1038         1822 for my $i (0..$#hsps) {
229 546         1665 my $ctxt = $self->_context(
230             -type => $t,
231             -strand => $hsps[$i]->strand($t),
232             -frame => $hsps[$i]->frame($t));
233 546   100     1793 $contexts{$ctxt} ||= [];
234 546         630 push @{$contexts{$ctxt}}, $i;
  546         1252  
235             }
236 1038         2456 $self->{"_contexts_${t}"} = \%contexts;
237             }
238              
239 519 100       1628 $self->warn("No HSPs present in hit after filtering") unless (@hsps);
240 519         1401 $self->hsps(\@hsps);
241 519         2163 return $self;
242             }
243              
244             # a tiling is based on the set of hsps contained in a single hit.
245             # check all the boundaries - zero hsps, one hsp, all disjoint hsps
246              
247             =head1 TILING ITERATORS
248              
249             =head2 next_tiling
250              
251             Title : next_tiling
252             Usage : @hsps = $self->next_tiling($type);
253             Function: Obtain a tiling: a minimal set of HSPs covering the $type
254             ('hit', 'subject', 'query') sequence
255             Example :
256             Returns : an array of HSPI objects
257             Args : scalar $type: one of 'hit', 'subject', 'query', with
258             'subject' an alias for 'hit'
259              
260             =cut
261              
262             sub next_tiling{
263 268     268 1 2053 my $self = shift;
264 268         352 my ($type, $context) = @_;
265 268         505 $self->_check_type_arg(\$type);
266 268         624 $self->_check_context_arg($type, \$context);
267 268         500 return $self->_tiling_iterator($type, $context)->();
268             }
269              
270             =head2 rewind_tilings
271              
272             Title : rewind_tilings
273             Usage : $self->rewind_tilings($type)
274             Function: Reset the next_tilings($type) iterator
275             Example :
276             Returns : True on success
277             Args : scalar $type: one of 'hit', 'subject', 'query';
278             default is 'query'
279              
280             =cut
281              
282             sub rewind_tilings{
283 2     2 1 309 my $self = shift;
284 2         7 my ($type,$context) = @_;
285 2         7 $self->_check_type_arg(\$type);
286 2         7 $self->_check_context_arg($type, \$context);
287 2         6 return $self->_tiling_iterator($type, $context)->('REWIND');
288             }
289              
290             =head1 ALIGNMENTS
291              
292             =head2 get_tiled_alns()
293              
294             Title : get_tiled_alns
295             Usage : @alns = $tiling->get_tiled_alns($type, $context)
296             Function: Use a tiling to construct a minimal set of alignment
297             objects covering the region specified by $type/$context
298             by splicing adjacent HSP tiles
299             Returns : an array of Bio::SimpleAlign objects; see Note below
300             Args : scalar $type: one of 'hit', 'subject', 'query'
301             default is 'query'
302             scalar $context: strand/frame context string
303             Following $type and $context, an array of
304             ordered, tiled HSP objects can be specified; this is
305             the tiling that will directly the alignment construction
306             default -- the first tiling provided by a tiling iterator
307             Notes : Each returned alignment is a concatenation of adjacent tiles.
308             The set of alignments will cover all regions described by the
309             $type/$context pair in the hit. The pair of sequences in each
310             alignment have ids 'query' and 'subject', and each sequence
311             possesses SeqFeatures that map the original query or subject
312             coordinates to the sequence coordinates in the tiled alignment.
313            
314             =cut
315              
316             sub get_tiled_alns {
317 1     1 1 6 my $self = shift;
318 1         3 my ($type, $context) = @_;
319 1         8 $self->_check_type_arg(\$type);
320 1         5 $self->_check_context_arg($type, \$context);
321 1         2 my $t = shift; # first arg after type/context, arrayref to a tiling
322 1         1 my @tiling;
323 1 50 33     5 if ($t && (ref($t) eq 'ARRAY')) {
324 0         0 @tiling = @$t;
325             }
326             else { # otherwise, take the first tiling available
327              
328 1         4 @tiling = $self->_make_tiling_iterator($type,$context)->();
329             }
330 1         12 my @ret;
331              
332 1         4 my @map = $self->coverage_map($type, $context);
333 1         2 my @intervals = map {$_->[0]} @map; # disjoint decomp
  13         15  
334             # divide into disjoint covering groups
335 1         7 my @groups = covering_groups(@intervals);
336              
337 1         927 require Bio::SimpleAlign;
338 1         11 require Bio::SeqFeature::Generic;
339             # cut hsp aligns along the disjoint decomp
340             # look for gaps...or add gaps?
341 1         4 my ($q_start, $h_start, $q_strand, $h_strand);
342             # build seqs
343 1         4 for my $grp (@groups) {
344 6         27 my $taln = Bio::SimpleAlign->new();
345 6         12 my (@qfeats, @hfeats);
346 6         8 my $query_string = '';
347 6         10 my $hit_string = '';
348 6         10 my ($qlen,$hlen) = (0,0);
349 6         9 my ($qinc, $hinc, $qstart, $hstart);
350 6         17 for my $intvl (@$grp) {
351             # following just chooses the first available hsp containing the
352             # interval -- this is arbitrary, could be smarter.
353 12         60 my $aln = ( containing_hsps($intvl, @tiling) )[0]->get_aln;
354 12         48 my $qseq = $aln->get_seq_by_pos(1);
355 12         33 my $hseq = $aln->get_seq_by_pos(2);
356 12   66     48 $qstart ||= $qseq->start;
357 12   66     44 $hstart ||= $hseq->start;
358             # calculate the slice boundaries
359 12         28 my ($beg, $end);
360 12         24 for ($type) {
361 12 50       47 /query/ && do {
362 12         40 $beg = $aln->column_from_residue_number($qseq->id, $intvl->[0]);
363 12         41 $end = $aln->column_from_residue_number($qseq->id, $intvl->[1]);
364 12         28 last;
365             };
366 0 0       0 /subject|hit/ && do {
367 0         0 $beg = $aln->column_from_residue_number($hseq->id, $intvl->[0]);
368 0         0 $end = $aln->column_from_residue_number($hseq->id, $intvl->[1]);
369 0         0 last;
370             };
371             }
372 12         53 $aln = $aln->slice($beg, $end);
373 12         44 $qseq = $aln->get_seq_by_pos(1);
374 12         43 $hseq = $aln->get_seq_by_pos(2);
375 12         42 $qinc = $qseq->length - $qseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS);
376 12         36 $hinc = $hseq->length - $hseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS);
377              
378 12 50 33     43 push @qfeats, Bio::SeqFeature::Generic->new(
    50 33        
379             -start => $qlen+1,
380             -end => $qlen+$qinc,
381             -strand => $qseq->strand,
382             -primary => 'query',
383             -source_tag => 'BLAST',
384             -display_name => 'query coordinates',
385             -tag => { query_id => $qseq->id,
386             query_desc => $qseq->desc,
387             query_start => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*$qlen,
388             query_end => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*($qlen+$qinc-1),
389             }
390             );
391 12 50 33     84 push @hfeats, Bio::SeqFeature::Generic->new(
    50 33        
392             -start => $hlen+1,
393             -end => $hlen+$hinc,
394             -strand => $hseq->strand,
395             -primary => 'subject/hit',
396             -source_tag => 'BLAST',
397             -display_name => 'subject/hit coordinates',
398             -tag => { subject_id => $hseq->id,
399             subject_desc => $hseq->desc,
400             subject_start => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*$hlen,
401             subject_end => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*($hlen+$hinc-1)
402             }
403             );
404 12         68 $query_string .= $qseq->seq;
405 12         31 $hit_string .= $hseq->seq;
406 12         23 $qlen += $qinc;
407 12         46 $hlen += $hinc;
408             }
409             # create the LocatableSeqs and add the features to each
410             # then add the seqs to the new aln
411             # note in MapTileUtils, Bio::FeatureHolderI methods have been
412             # mixed into Bio::LocatableSeq
413 6         25 my $qseq = Bio::LocatableSeq->new( -id => 'query',
414             -seq => $query_string);
415 6         31 $qseq->add_SeqFeature(@qfeats);
416 6         21 my $hseq = Bio::LocatableSeq->new( -id => 'subject',
417             -seq => $hit_string );
418 6         24 $hseq->add_SeqFeature(@hfeats);
419 6         22 $taln->add_seq($qseq);
420 6         18 $taln->add_seq($hseq);
421 6         22 push @ret, $taln;
422             }
423            
424 1         21 return @ret;
425             }
426              
427             =head1 STATISTICS
428              
429             =head2 identities
430              
431             Title : identities
432             Usage : $tiling->identities($type, $action, $context)
433             Function: Retrieve the calculated number of identities for the invocant
434             Example :
435             Returns : value of identities (a scalar)
436             Args : scalar $type: one of 'hit', 'subject', 'query'
437             default is 'query'
438             option scalar $action: one of 'exact', 'est', 'fast', 'max'
439             default is 'exact'
440             option scalar $context: strand/frame context string
441             Note : getter only
442              
443             =cut
444              
445             sub identities{
446 538     538 1 1250 my $self = shift;
447 538         901 my ($type, $action, $context) = @_;
448 538         1290 $self->_check_type_arg(\$type);
449 538         1143 $self->_check_action_arg(\$action);
450 538         1146 $self->_check_context_arg($type, \$context);
451 538 100       1620 if (!defined $self->{"identities_${type}_${action}_${context}"}) {
452 531         1123 $self->_calc_stats($type, $action, $context);
453             }
454 538         1461 return $self->{"identities_${type}_${action}_${context}"};
455             }
456              
457             =head2 conserved
458              
459             Title : conserved
460             Usage : $tiling->conserved($type, $action)
461             Function: Retrieve the calculated number of conserved sites for the invocant
462             Example :
463             Returns : value of conserved (a scalar)
464             Args : scalar $type: one of 'hit', 'subject', 'query'
465             default is 'query'
466             option scalar $action: one of 'exact', 'est', 'fast', 'max'
467             default is 'exact'
468             option scalar $context: strand/frame context string
469             Note : getter only
470              
471             =cut
472              
473             sub conserved{
474 538     538 1 1054 my $self = shift;
475 538         888 my ($type, $action, $context) = @_;
476 538         1062 $self->_check_type_arg(\$type);
477 538         979 $self->_check_action_arg(\$action);
478 538         1118 $self->_check_context_arg($type, \$context);
479 538 50       1546 if (!defined $self->{"conserved_${type}_${action}_${context}"}) {
480 0         0 $self->_calc_stats($type, $action, $context);
481             }
482 538         1194 return $self->{"conserved_${type}_${action}_${context}"};
483             }
484              
485             =head2 length
486              
487             Title : length
488             Usage : $tiling->length($type, $action, $context)
489             Function: Retrieve the total length of aligned residues for
490             the seq $type
491             Example :
492             Returns : value of length (a scalar)
493             Args : scalar $type: one of 'hit', 'subject', 'query'
494             default is 'query'
495             option scalar $action: one of 'exact', 'est', 'fast', 'max'
496             default is 'exact'
497             option scalar $context: strand/frame context string
498             Note : getter only
499              
500             =cut
501              
502             sub length{
503 1051     1051 1 1976 my $self = shift;
504 1051         1699 my ($type,$action,$context) = @_;
505 1051         2350 $self->_check_type_arg(\$type);
506 1051         2055 $self->_check_action_arg(\$action);
507 1051         2210 $self->_check_context_arg($type, \$context);
508 1051 100       2602 if (!defined $self->{"length_${type}_${action}_${context}"}) {
509 1         5 $self->_calc_stats($type, $action, $context);
510             }
511 1051         3735 return $self->{"length_${type}_${action}_${context}"};
512             }
513              
514             =head2 frac
515              
516             Title : frac
517             Usage : $tiling->frac($type, $denom, $action, $context, $method)
518             Function: Return the fraction of sequence length consisting
519             of desired kinds of pairs (given by $method),
520             with respect to $denom
521             Returns : scalar float
522             Args : -type => one of 'hit', 'subject', 'query'
523             -denom => one of 'total', 'aligned'
524             -action => one of 'exact', 'est', 'fast', 'max'
525             -context => strand/frame context string
526             -method => one of 'identical', 'conserved'
527             Note : $denom == 'aligned', return desired_stat/num_aligned
528             $denom == 'total', return desired_stat/_reported_length
529             (i.e., length of the original input sequences)
530             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
531             reported lengths of translated dna are reduced by
532             a factor of 3, to provide fractions relative to
533             amino acid coordinates.
534              
535             =cut
536              
537             sub frac {
538 1058     1058 1 150463 my $self = shift;
539 1058         2566 my @args = @_;
540 1058         3765 my ($type, $denom, $action, $context, $method) = $self->_rearrange([qw(TYPE DENOM ACTION CONTEXT METHOD)],@args);
541 1058         3407 $self->_check_type_arg(\$type);
542 1058         2482 $self->_check_action_arg(\$action);
543 1058         2405 $self->_check_context_arg($type, \$context);
544 1058 50 33     9796 unless ($method and grep(/^$method$/, qw( identical conserved ))) {
545 0         0 $self->throw("-method must specified; one of ('identical', 'conserved')");
546             }
547 1058   50     2047 $denom ||= 'total';
548 1058 50       4202 unless (grep /^$denom/, qw( total aligned )) {
549 0         0 $self->throw("Denominator selection must be one of ('total', 'aligned'), not '$denom'");
550             }
551 1058         2407 my $key = "frac_${method}_${type}_${denom}_${action}_${context}";
552 1058         1395 my $stat;
553 1058         1527 for ($method) {
554 1058 100       1864 $_ eq 'identical' && do {
555 529         1256 $stat = $self->identities($type, $action, $context);
556 529         890 last;
557             };
558 529 50       1040 $_ eq 'conserved' && do {
559 529         1048 $stat = $self->conserved($type, $action, $context);
560 529         707 last;
561             };
562 0         0 do {
563 0         0 $self->throw("What are YOU doing here?");
564             };
565             }
566 1058 100       2303 if (!defined $self->{$key}) {
567 1046         1703 for ($denom) {
568 1046 50       2246 /total/ && do {
569 0         0 $self->{$key} =
570             $stat/$self->_reported_length($type); # need fudge fac??
571 0         0 last;
572             };
573 1046 50       2208 /aligned/ && do {
574 1046         2004 $self->{$key} =
575             $stat/$self->length($type,$action,$context);
576 1046         1527 last;
577             };
578 0         0 do {
579 0         0 $self->throw("What are YOU doing here?");
580             };
581             }
582             }
583 1058         3816 return $self->{$key};
584             }
585              
586             =head2 frac_identical
587              
588             Title : frac_identical
589             Usage : $tiling->frac_identical($type, $denom, $action, $context)
590             Function: Return the fraction of sequence length consisting
591             of identical pairs, with respect to $denom
592             Returns : scalar float
593             Args : -type => one of 'hit', 'subject', 'query'
594             -denom => one of 'total', 'aligned'
595             -action => one of 'exact', 'est', 'fast', 'max'
596             -context => strand/frame context string
597             Note : $denom == 'aligned', return conserved/num_aligned
598             $denom == 'total', return conserved/_reported_length
599             (i.e., length of the original input sequences)
600             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
601             reported lengths of translated dna are reduced by
602             a factor of 3, to provide fractions relative to
603             amino acid coordinates.
604             Note : This an alias that calls frac()
605              
606             =cut
607              
608             sub frac_identical{
609 3     3 1 15 my $self = shift;
610 3         11 my @args = @_;
611 3         13 my ($type, $denom, $action,$context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args );
612 3         14 $self->frac( -type=>$type, -denom=>$denom, -action=>$action, -method=>'identical', -context=>$context);
613             }
614              
615             =head2 frac_conserved
616              
617             Title : frac_conserved
618             Usage : $tiling->frac_conserved($type, $denom, $action, $context)
619             Function: Return the fraction of sequence length consisting
620             of conserved pairs, with respect to $denom
621             Returns : scalar float
622             Args : -type => one of 'hit', 'subject', 'query'
623             -denom => one of 'total', 'aligned'
624             -action => one of 'exact', 'est', 'fast', 'max'
625             -context => strand/frame context string
626             Note : $denom == 'aligned', return conserved/num_aligned
627             $denom == 'total', return conserved/_reported_length
628             (i.e., length of the original input sequences)
629             Note : In keeping with the spirit of Bio::Search::HSP::HSPI,
630             reported lengths of translated dna are reduced by
631             a factor of 3, to provide fractions relative to
632             amino acid coordinates.
633             Note : This an alias that calls frac()
634              
635             =cut
636              
637             sub frac_conserved{
638 3     3 1 7 my $self = shift;
639 3         10 my @args = @_;
640 3         16 my ($type, $denom, $action, $context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args );
641 3         17 $self->frac( -type=>$type, -denom=>$denom, -action=>$action, -context=>$context, -method=>'conserved');
642             }
643              
644             =head2 frac_aligned
645              
646             Title : frac_aligned
647             Aliases : frac_aligned_query - frac_aligned(-type=>'query',...)
648             frac_aligned_hit - frac_aligned(-type=>'hit',...)
649             Usage : $tiling->frac_aligned(-type=>$type,
650             -action=>$action,
651             -context=>$context)
652             Function: Return the fraction of input sequence length
653             that was aligned by the algorithm
654             Returns : scalar float
655             Args : -type => one of 'hit', 'subject', 'query'
656             -action => one of 'exact', 'est', 'fast', 'max'
657             -context => strand/frame context string
658              
659             =cut
660              
661             sub frac_aligned{
662 4     4 1 14 my ($self, @args) = @_;
663 4         18 my ($type, $action, $context) = $self->_rearrange([qw(TYPE ACTION CONTEXT)],@args);
664 4         19 $self->_check_type_arg(\$type);
665 4         12 $self->_check_action_arg(\$action);
666 4         11 $self->_check_context_arg($type, \$context);
667 4 50       16 if (!$self->{"frac_aligned_${type}_${action}_${context}"}) {
668 4         10 $self->{"frac_aligned_${type}_${action}_${context}"} = $self->num_aligned($type,$action,$context)/$self->_reported_length($type);
669             }
670 4         51 return $self->{"frac_aligned_${type}_${action}_${context}"};
671             }
672              
673 2     2 0 11 sub frac_aligned_query { shift->frac_aligned(-type=>'query', @_) }
674 2     2 0 9 sub frac_aligned_hit { shift->frac_aligned(-type=>'hit', @_) }
675            
676              
677             =head2 num_aligned
678              
679             Title : num_aligned
680             Usage : $tiling->num_aligned(-type=>$type)
681             Function: Return the number of residues of sequence $type
682             that were aligned by the algorithm
683             Returns : scalar int
684             Args : -type => one of 'hit', 'subject', 'query'
685             -action => one of 'exact', 'est', 'fast', 'max'
686             -context => strand/frame context string
687             Note : Since this is calculated from reported coordinates,
688             not symbol string counts, it is already in terms of
689             "logical length"
690             Note : Aliases length()
691              
692             =cut
693              
694 4     4 1 12 sub num_aligned { shift->length( @_ ) };
695              
696             =head2 num_unaligned
697              
698             Title : num_unaligned
699             Usage : $tiling->num_unaligned(-type=>$type)
700             Function: Return the number of residues of sequence $type
701             that were left unaligned by the algorithm
702             Returns : scalar int
703             Args : -type => one of 'hit', 'subject', 'query'
704             -action => one of 'exact', 'est', 'fast', 'max'
705             -context => strand/frame context string
706             Note : Since this is calculated from reported coordinates,
707             not symbol string counts, it is already in terms of
708             "logical length"
709              
710             =cut
711              
712             sub num_unaligned {
713 0     0 1 0 my $self = shift;
714 0         0 my ($type,$action,$context) = @_;
715 0         0 my $ret;
716 0         0 $self->_check_type_arg(\$type);
717 0         0 $self->_check_action_arg(\$action);
718 0         0 $self->_check_context_arg($type, \$context);
719 0 0       0 if (!defined $self->{"num_unaligned_${type}_${action}_${context}"}) {
720 0         0 $self->{"num_unaligned_${type}_${action}_${context}"} = $self->_reported_length($type)-$self->num_aligned($type,$action,$context);
721             }
722 0         0 return $self->{"num_unaligned_${type}_${action}_${context}"};
723             }
724            
725              
726             =head2 range
727              
728             Title : range
729             Usage : $tiling->range(-type=>$type)
730             Function: Returns the extent of the longest tiling
731             as ($min_coord, $max_coord)
732             Returns : array of two scalar integers
733             Args : -type => one of 'hit', 'subject', 'query'
734             -context => strand/frame context string
735              
736             =cut
737              
738             sub range {
739 8     8 1 1590 my ($self, $type, $context) = @_;
740 8         26 $self->_check_type_arg(\$type);
741 8         26 $self->_check_context_arg($type, \$context);
742 8         20 my @a = $self->_contig_intersection($type,$context);
743 8         52 return ($a[0][0], $a[-1][1]);
744             }
745              
746              
747              
748             =head1 ACCESSORS
749              
750             =head2 coverage_map
751              
752             Title : coverage_map
753             Usage : $map = $tiling->coverage_map($type)
754             Function: Property to contain the coverage map calculated
755             by _calc_coverage_map() - see that for
756             details
757             Example :
758             Returns : value of coverage_map_$type as an array
759             Args : scalar $type: one of 'hit', 'subject', 'query'
760             default is 'query'
761             Note : getter
762              
763             =cut
764              
765             sub coverage_map{
766 332     332 1 431 my $self = shift;
767 332         527 my ($type, $context) = @_;
768 332         782 $self->_check_type_arg(\$type);
769 332         688 $self->_check_context_arg($type, \$context);
770              
771 332 100       852 if (!defined $self->{"coverage_map_${type}_${context}"}) {
772             # following calculates coverage maps in all strands/frames
773             # if necessary
774 203         566 $self->_calc_coverage_map($type, $context);
775             }
776             # if undef is returned, then there were no hsps for given strand/frame
777 332 50       836 if (!defined $self->{"coverage_map_${type}_${context}"}) {
778 0         0 $self->warn("No HSPS present for type '$type' in context '$context' for this hit");
779 0         0 return undef;
780             }
781 332         367 return @{$self->{"coverage_map_${type}_${context}"}};
  332         983  
782             }
783              
784             =head2 coverage_map_as_text
785              
786             Title : coverage_map_as_text
787             Usage : $tiling->coverage_map_as_text($type, $legend_flag)
788             Function: Format a text-graphic representation of the
789             coverage map
790             Returns : an array of scalar strings, suitable for printing
791             Args : $type: one of 'query', 'hit', 'subject'
792             $context: strand/frame context string
793             $legend_flag: boolean; add a legend indicating
794             the actual interval coordinates for each component
795             interval and hsp (in the $type sequence context)
796             Example : print $tiling->coverage_map_as_text('query',1);
797              
798             =cut
799              
800             sub coverage_map_as_text{
801 0     0 1 0 my $self = shift;
802 0         0 my ($type, $context, $legend_q) = @_;
803 0         0 $self->_check_type_arg(\$type);
804 0         0 $self->_check_context_arg($type, \$context);
805              
806 0         0 my @map = $self->coverage_map($type, $context);
807 0         0 my @ret;
808 0         0 my @hsps = $self->hit->hsps;
809 0         0 my %hsps_i;
810 0         0 require Tie::RefHash;
811 0         0 tie %hsps_i, 'Tie::RefHash';
812 0         0 @hsps_i{@hsps} = (0..$#hsps);
813 0         0 my @mx;
814 0         0 foreach (0..$#map) {
815 0         0 my @hspx = ('') x @hsps;
816 0         0 my @these_hsps = @{$map[$_]->[1]};
  0         0  
817 0         0 @hspx[@hsps_i{@these_hsps}] = ('*') x @these_hsps;
818 0         0 $mx[$_] = \@hspx;
819             }
820 0         0 untie %hsps_i;
821              
822 0         0 push @ret, "\tIntvl\n";
823 0         0 push @ret, "HSPS\t", join ("\t", (0..$#map)), "\n";
824 0         0 foreach my $h (0..$#hsps) {
825 0         0 push @ret, join("\t", $h, map { $mx[$_][$h] } (0..$#map) ),"\n";
  0         0  
826             }
827 0 0       0 if ($legend_q) {
828 0         0 push @ret, "Interval legend\n";
829 0         0 foreach (0..$#map) {
830 0         0 push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$map[$_][0]});
  0         0  
831             }
832 0         0 push @ret, "HSP legend\n";
833 0         0 my @ints = get_intervals_from_hsps($type,@hsps);
834 0         0 foreach (0..$#hsps) {
835 0         0 push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$ints[$_]});
  0         0  
836             }
837             }
838 0         0 return @ret;
839             }
840              
841             =head2 hit
842              
843             Title : hit
844             Usage : $tiling->hit
845             Function:
846             Example :
847             Returns : The HitI object associated with the invocant
848             Args : none
849             Note : getter only
850              
851             =cut
852              
853             sub hit{
854 5229     5229 1 5248 my $self = shift;
855 5229 50       7535 $self->warn("Getter only") if @_;
856 5229         11111 return $self->{'hit'};
857             }
858              
859             =head2 hsps
860              
861             Title : hsps
862             Usage : $tiling->hsps()
863             Function: Container for the HSP objects associated with invocant
864             Example :
865             Returns : an array of hsps associated with the hit
866             Args : on set, new value (an arrayref or undef, optional)
867              
868             =cut
869              
870             sub hsps{
871 1768     1768 1 2534 my $self = shift;
872 1768 100       3664 return $self->{'hsps'} = shift if @_;
873 1249         1622 return @{$self->{'hsps'}};
  1249         4497  
874             }
875              
876             =head2 contexts
877              
878             Title : contexts
879             Usage : @contexts = $tiling->context($type) or
880             @indices = $tiling->context($type, $context)
881             Function: Retrieve the set of available contexts in the hit,
882             or the indices of hsps having the given context
883             (integer indices for the array returned by $self->hsps)
884             Returns : array of scalar context strings or
885             array of scalar positive integers
886             undef if no hsps in given context
887             Args : $type: one of 'query', 'hit', 'subject'
888             optional $context: context string
889              
890             =cut
891              
892             sub contexts{
893 1032     1032 1 68736 my $self = shift;
894 1032         1618 my ($type, $context) = @_;
895 1032         2146 $self->_check_type_arg(\$type);
896 1032 100       1691 return keys %{$self->{"_contexts_$type"}} unless defined $context;
  403         1632  
897 629 50       1609 return undef unless $self->{"_contexts_$type"}{$context};
898 629         672 return @{$self->{"_contexts_$type"}{$context}};
  629         1806  
899             }
900              
901             =head2 mapping
902              
903             Title : mapping
904             Usage : $tiling->mapping($type)
905             Function: Retrieve the mapping coefficient for the sequence type
906             based on the underlying algorithm
907             Returns : scalar integer (mapping coefficient)
908             Args : $type: one of 'query', 'hit', 'subject'
909             Note : getter only (set in constructor)
910              
911             =cut
912              
913             sub mapping{
914 505     505 1 605 my $self = shift;
915 505         577 my $type = shift;
916 505         964 $self->_check_type_arg(\$type);
917 505         1221 return $self->{"_mapping_${type}"};
918             }
919              
920             =head2 default_context
921              
922             Title : default_context
923             Usage : $tiling->default_context($type)
924             Function: Retrieve the default strand/frame context string
925             for the sequence type based on the underlying algorithm
926             Returns : scalar string (context string)
927             Args : $type: one of 'query', 'hit', 'subject'
928             Note : getter only (set in constructor)
929              
930             =cut
931              
932             sub default_context{
933 298     298 1 344 my $self = shift;
934 298         345 my $type = shift;
935 298         560 $self->_check_type_arg(\$type);
936 298         616 return $self->{"_def_context_${type}"};
937             }
938              
939             =head2 algorithm
940              
941             Title : algorithm
942             Usage : $tiling->algorithm
943             Function: Retrieve the algorithm name associated with the
944             invocant's hit object
945             Returns : scalar string
946             Args : none
947             Note : getter only (set in constructor)
948              
949             =cut
950              
951             sub algorithm{
952 0     0 1 0 my $self = shift;
953 0 0       0 $self->warn("Getter only") if @_;
954 0         0 return $self->{"_algorithm"};
955             }
956              
957             =head1 "PRIVATE" METHODS
958              
959             =head2 Calculators
960              
961             See L for lower level
962             calculation methods.
963              
964             =head2 _calc_coverage_map
965              
966             Title : _calc_coverage_map
967             Usage : $tiling->_calc_coverage_map($type)
968             Function: Calculates the coverage map for the object's associated
969             hit from the perspective of the desired $type (see Args:)
970             and sets the coverage_map() property
971             Returns : True on success
972             Args : optional scalar $type: one of 'hit'|'subject'|'query'
973             default is 'query'
974             Note : The "coverage map" is an array with the following format:
975             ( [ $component_interval => [ @containing_hsps ] ], ... ),
976             where $component_interval is a closed interval (see
977             DESCRIPTION) of the form [$a0, $a1] with $a0 <= $a1, and
978             @containing_hsps is an array of all HspI objects in the hit
979             which completely contain the $component_interval.
980             The set of $component_interval's is a disjoint decomposition
981             of the minimum set of minimal intervals that completely
982             cover the hit's HSPs (from the perspective of the $type)
983             Note : This calculates the map for all strand/frame contexts available
984             in the hit
985              
986             =cut
987              
988             sub _calc_coverage_map {
989 207     207   271 my $self = shift;
990 207         329 my ($type) = @_;
991 207         415 $self->_check_type_arg(\$type);
992              
993             # obtain the [start, end] intervals for all hsps in the hit (relative
994             # to the type)
995 207 50       485 unless ($self->{'hsps'}) {
996 0         0 $self->warn("No HSPs for this hit");
997 0         0 return;
998             }
999              
1000 207         337 my (@map, @hsps, %filters, @intervals);
1001            
1002              
1003             # conversion here?
1004 207         429 my $c = $self->mapping($type);
1005            
1006             # create the possible maps
1007 207         488 for my $context ($self->contexts($type)) {
1008 219         382 @map = ();
1009 219         435 @hsps = ($self->hsps)[$self->contexts($type, $context)];
1010 219         929 @intervals = get_intervals_from_hsps( $type, @hsps );
1011             # the "frame"
1012 219         546 my $f = ($intervals[0]->[0] - 1) % $c;
1013              
1014             # convert interval endpoints...
1015 219         429 for (@intervals) {
1016 536         848 $$_[0] = ($$_[0] - $f + $c - 1)/$c;
1017 536         712 $$_[1] = ($$_[1] - $f)/$c;
1018             }
1019            
1020             # determine the minimal set of disjoint intervals that cover the
1021             # set of hsp intervals
1022 219         608 my @dj_set = interval_tiling(\@intervals);
1023              
1024             # decompose each disjoint interval into another set of disjoint
1025             # intervals, each of which is completely contained within the
1026             # original hsp intervals with which it overlaps
1027 219         295 my $i=0;
1028 219         258 my @decomp;
1029 219         402 for my $dj_elt (@dj_set) {
1030 432         597 my ($covering, $indices) = @$dj_elt;
1031 432         753 my @covering_hsps = @hsps[@$indices];
1032 432         565 my @coverers = @intervals[@$indices];
1033 432         840 @decomp = decompose_interval( \@coverers );
1034 432         729 for (@decomp) {
1035 638         619 my ($component, $container_indices) = @{$_};
  638         800  
1036 638         1258 push @map, [ $component,
1037             [@covering_hsps[@$container_indices]] ];
1038             }
1039 432         752 1;
1040             }
1041            
1042             # unconvert the components:
1043             #####
1044 219         313 foreach (@map) {
1045 638         992 $$_[0][0] = $c*$$_[0][0] - $c + 1 + $f;
1046 638         870 $$_[0][1] = $c*$$_[0][1] + $f;
1047             }
1048 219         280 foreach (@dj_set) {
1049 432         662 $$_[0][0] = $c*$$_[0][0] - $c + 1 + $f;
1050 432         606 $$_[0][1] = $c*$$_[0][1] + $f;
1051             }
1052              
1053             # sort the map on the interval left-ends
1054 219         467 @map = sort { $a->[0][0]<=>$b->[0][0] } @map;
  980         1085  
1055 219         738 $self->{"coverage_map_${type}_${context}"} = [@map];
1056             # set the _contig_intersection attribute here (side effect)
1057 219         347 $self->{"_contig_intersection_${type}_${context}"} = [map { $$_[0] } @map];
  638         1663  
1058             }
1059              
1060 207         593 return 1; # success
1061             }
1062              
1063             =head2 _calc_stats
1064              
1065             Title : _calc_stats
1066             Usage : $tiling->_calc_stats($type, $action, $context)
1067             Function: Calculates [estimated] tiling statistics (identities, conserved sites
1068             length) and sets the public accessors
1069             Returns : True on success
1070             Args : scalar $type: one of 'hit', 'subject', 'query'
1071             default is 'query'
1072             optional scalar $action: requests calculation method
1073             currently one of 'exact', 'est', 'fast', 'max'
1074             option scalar $context: strand/frame context string
1075             Note : Action: The statistics are calculated by summing quantities
1076             over the disjoint component intervals, taking into account
1077             coverage of those intervals by multiple HSPs. The action
1078             tells the algorithm how to obtain those quantities--
1079             'exact' will use Bio::Search::HSP::HSPI::matches
1080             to count the appropriate segment of the homology string;
1081             'est' will estimate the statistics by multiplying the
1082             fraction of the HSP overlapped by the component interval
1083             (see MapTileUtils) by the BLAST-reported identities/postives
1084             (this may be convenient for BLAST summary report formats)
1085             * Both exact and est take the average over the number of HSPs
1086             that overlap the component interval.
1087             'max' uses the exact method to calculate the statistics,
1088             and returns only the maximum identites/positives over
1089             overlapping HSP for the component interval. No averaging
1090             is involved here.
1091             'fast' doesn't involve tiling at all (hence the name),
1092             but it seems like a very good estimate, and uses only
1093             reported values, and so does not require sequence data. It
1094             calculates an average of reported identities, conserved
1095             sites, and lengths, over unmodified hsps in the hit,
1096             weighted by the length of the hsps.
1097              
1098             =cut
1099              
1100             sub _calc_stats {
1101 532     532   588 my $self = shift;
1102 532         730 my ($type, $action, $context) = @_;
1103             # need to check args here, in case method is called internally.
1104 532         1039 $self->_check_type_arg(\$type);
1105 532         1021 $self->_check_action_arg(\$action);
1106 532         1093 $self->_check_context_arg($type, \$context);
1107 532         971 my ($ident, $cons, $length) = (0,0,0);
1108              
1109             # fast : avoid coverage map altogether, get a pretty damn
1110             # good estimate with a weighted average of reported hsp
1111             # statistics
1112              
1113 532 100       994 ($action eq 'fast') && do {
1114 204         367 my @hsps = $self->hit->hsps;
1115 204         483 @hsps = @hsps[$self->contexts($type, $context)];
1116             # weights for averages
1117 204         406 my @wt = map {$_->length($type)} @hsps;
  450         981  
1118 204         12895 my $sum = eval( join('+',@wt) );
1119 204         1033 $_ /= $sum for (@wt);
1120 204         356 for (@hsps) {
1121 450         759 my $wt = shift @wt;
1122 450         1340 $ident += $wt*$_->matches_MT($type,'identities');
1123 450         1043 $cons += $wt*$_->matches_MT($type,'conserved');
1124 450         1085 $length += $wt*$_->length($type);
1125             }
1126             };
1127              
1128             # or, do tiling
1129              
1130             # calculate identities/conserved sites in tiling
1131             # estimate based on the fraction of the component interval covered
1132             # and ident/cons reported by the HSPs
1133 532 100       1012 ($action ne 'fast') && do {
1134 328         848 foreach ($self->coverage_map($type, $context)) {
1135 1384         1826 my ($intvl, $hsps) = @{$_};
  1384         2167  
1136 1384         2334 my $len = ($$intvl[1]-$$intvl[0]+1);
1137 1384 100       2274 my $ncover = ($action eq 'max') ? 1 : scalar @$hsps;
1138 1384         1824 my ($acc_i, $acc_c) = (0,0);
1139 1384         1722 foreach my $hsp (@$hsps) {
1140 1764         2007 for ($action) {
1141 1764 100       2775 ($_ eq 'est') && do {
1142 788         2485 my ($inc_i, $inc_c) = $hsp->matches_MT(
1143             -type => $type,
1144             -action => 'searchutils',
1145             );
1146 788         1946 my $frac = $len/$hsp->length($type);
1147 788         1275 $acc_i += $inc_i * $frac;
1148 788         881 $acc_c += $inc_c * $frac;
1149 788         1490 last;
1150             };
1151 976 100       1324 ($_ eq 'max') && do {
1152 483         1618 my ($inc_i, $inc_c) = $hsp->matches_MT(
1153             -type => $type,
1154             -action => 'searchutils',
1155             -start => $$intvl[0],
1156             -end => $$intvl[1]
1157             );
1158 483 100       1093 $acc_i = ($acc_i > $inc_i) ? $acc_i : $inc_i;
1159 483 100       614 $acc_c = ($acc_c > $inc_c) ? $acc_c : $inc_c;
1160 483         893 last;
1161             };
1162 493 50 33     1287 (!$_ || ($_ eq 'exact')) && do {
1163 493         1664 my ($inc_i, $inc_c) = $hsp->matches_MT(
1164             -type => $type,
1165             -action => 'searchutils',
1166             -start => $$intvl[0],
1167             -end => $$intvl[1]
1168             );
1169 493         912 $acc_i += $inc_i;
1170 493         534 $acc_c += $inc_c;
1171 493         821 last;
1172             };
1173             }
1174             }
1175 1384         2051 $ident += ($acc_i/$ncover);
1176 1384         1486 $cons += ($acc_c/$ncover);
1177 1384         1963 $length += $len;
1178             }
1179             };
1180            
1181 532         1890 $self->{"identities_${type}_${action}_${context}"} = $ident;
1182 532         1265 $self->{"conserved_${type}_${action}_${context}"} = $cons;
1183 532         1295 $self->{"length_${type}_${action}_${context}"} = $length;
1184            
1185 532         893 return 1;
1186             }
1187              
1188             =head2 Tiling Helper Methods
1189              
1190             =cut
1191              
1192             # coverage_map is of the form
1193             # ( [ $interval, \@containing_hsps ], ... )
1194              
1195             # so, for each interval, pick one of the containing hsps,
1196             # and return the union of all the picks.
1197              
1198             # use the combinatorial generating iterator, with
1199             # the urns containing the @containing_hsps for each
1200             # interval
1201              
1202             =head2 _make_tiling_iterator
1203              
1204             Title : _make_tiling_iterator
1205             Usage : $self->_make_tiling_iterator($type)
1206             Function: Create an iterator code ref that will step through all
1207             minimal combinations of HSPs that produce complete coverage
1208             of the $type ('hit', 'subject', 'query') sequence,
1209             and set the correct iterator property of the invocant
1210             Example :
1211             Returns : The iterator
1212             Args : scalar $type, one of 'hit', 'subject', 'query';
1213             default is 'query'
1214              
1215             =cut
1216              
1217             sub _make_tiling_iterator {
1218             ### create the urns
1219 3     3   6 my $self = shift;
1220 3         7 my ($type, $context) = @_;
1221 3         8 $self->_check_type_arg(\$type);
1222 3         7 $self->_check_context_arg($type, \$context);
1223              
1224             # initialize the urns
1225 3         12 my @urns = map { [0, $$_[1]] } $self->coverage_map($type, $context);
  43         68  
1226              
1227 3         5 my $FINISHED = 0;
1228             my $iter = sub {
1229             # rewind?
1230 271 100   271   401 if (my $rewind = shift) {
1231             # reinitialize urn indices
1232 2         10 $$_[0] = 0 for (@urns);
1233 2         4 $FINISHED = 0;
1234 2         8 return 1;
1235             }
1236             # check if done...
1237 269 100       413 return if $FINISHED;
1238              
1239 266         281 my $finished_incrementing = 0;
1240             # @ret is the collector of urn choices
1241 266         278 my @ret;
1242              
1243 266         356 for my $urn (@urns) {
1244 4482         4794 my ($n, $hsps) = @$urn;
1245 4482         4387 push @ret, $$hsps[$n];
1246 4482 100       5935 unless ($finished_incrementing) {
1247 1086 100       1288 if ($n == $#$hsps) { $$urn[0] = 0; }
  823         1068  
1248 263         254 else { ($$urn[0])++; $finished_incrementing = 1 }
  263         327  
1249             }
1250             }
1251              
1252             # backstop...
1253 266 100       326 $FINISHED = 1 unless $finished_incrementing;
1254             # uniquify @ret
1255             # $hsp->rank is a unique identifier for an hsp in a hit.
1256             # preserve order in @ret
1257            
1258 266         279 my (%order, %uniq);
1259 266         1774 @order{(0..$#ret)} = @ret;
1260 266         1433 $uniq{$order{$_}->rank} = $_ for (0..$#ret);
1261 266         776 @ret = @order{ sort {$a<=>$b} values %uniq };
  5638         5459  
1262              
1263 266         997 return @ret;
1264 3         19 };
1265 3         32 return $iter;
1266             }
1267              
1268             =head2 _tiling_iterator
1269              
1270             Title : _tiling_iterator
1271             Usage : $tiling->_tiling_iterator($type,$context)
1272             Function: Retrieve the tiling iterator coderef for the requested
1273             $type ('hit', 'subject', 'query')
1274             Example :
1275             Returns : coderef to the desired iterator
1276             Args : scalar $type, one of 'hit', 'subject', 'query'
1277             default is 'query'
1278             option scalar $context: strand/frame context string
1279             Note : getter only
1280              
1281             =cut
1282              
1283             sub _tiling_iterator {
1284 270     270   320 my $self = shift;
1285 270         335 my ($type, $context) = @_;
1286 270         475 $self->_check_type_arg(\$type);
1287 270         543 $self->_check_context_arg($type, \$context);
1288              
1289 270 100       603 if (!defined $self->{"_tiling_iterator_${type}_${context}"}) {
1290 2         7 $self->{"_tiling_iterator_${type}_${context}"} =
1291             $self->_make_tiling_iterator($type,$context);
1292             }
1293 270         526 return $self->{"_tiling_iterator_${type}_${context}"};
1294             }
1295             =head2 Construction Helper Methods
1296              
1297             See also L.
1298              
1299             =cut
1300              
1301             sub _check_type_arg {
1302 8297     8297   8109 my $self = shift;
1303 8297         8012 my $typeref = shift;
1304 8297   100     12451 $$typeref ||= 'query';
1305 8297 50       50602 $self->throw("Unknown type '$$typeref'") unless grep(/^$$typeref$/, qw( hit query subject ));
1306 8297 100       13629 $$typeref = 'hit' if $$typeref eq 'subject';
1307 8297         8945 return 1;
1308             }
1309              
1310             sub _check_action_arg {
1311 3721     3721   4169 my $self = shift;
1312 3721         3641 my $actionref = shift;
1313 3721 100       5433 if (!$$actionref) {
1314 17 100       41 $$actionref = ($self->_has_sequence_data ? 'exact' : 'est');
1315             }
1316             else {
1317 3704 50       26390 $self->throw("Calc action '$$actionref' unrecognized") unless grep /^$$actionref$/, qw( est exact fast max );
1318 3704 100 100     8483 if ($$actionref ne 'est' and !$self->_has_sequence_data) {
1319 12         46 $self->warn("Blast file did not possess sequence data; defaulting to 'est' action");
1320 12         14 $$actionref = 'est';
1321             }
1322             }
1323 3721         4305 return 1;
1324             }
1325              
1326             sub _check_context_arg {
1327 4613     4613   4771 my $self = shift;
1328 4613         5924 my ($type, $contextref) = @_;
1329 4613 100       6230 if (!$$contextref) {
1330 298 50       440 $self->throw("Type '$type' requires strand/frame context for algorithm ".$self->algorithm) unless ($self->mapping($type) == 1);
1331             # set default according to default_context attrib
1332 298         501 $$contextref = $self->default_context($type);
1333             }
1334             else {
1335 4315 50       7966 ($$contextref =~ /^[mp]$/) && do { $$contextref .= '_' };
  0         0  
1336 4315 50       8837 $self->throw("Context '$$contextref' unrecognized") unless
1337             $$contextref =~ /all|[mp][0-2_]/;
1338             }
1339            
1340             }
1341              
1342             =head2 _make_context_key
1343              
1344             Title : _make_context_key
1345             Alias : _context
1346             Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame)
1347             Function: create a string indicating strand/frame context; serves as
1348             component of memoizing hash keys
1349             Returns : scalar string
1350             Args : -type => one of ('query', 'hit', 'subject')
1351             -strand => one of (1,0,-1)
1352             -frame => one of (-2, 1, 0, 1, -2)
1353             called w/o args: returns 'all'
1354              
1355             =cut
1356              
1357             sub _make_context_key {
1358 546     546   621 my $self = shift;
1359 546         1388 my @args = @_;
1360 546         1941 my ($type, $strand, $frame) = $self->_rearrange([qw(TYPE STRAND FRAME)], @args);
1361 546         1483 _check_type_arg(\$type);
1362 546 50 33     1103 return 'all' unless (defined $strand or defined $frame);
1363 546 100 66     1458 if ( defined $strand && $self->_has_strand($type) ) {
1364 211 100 66     668 if (defined $frame && $self->_has_frame($type)) {
1365 94 100       373 return ($strand >= 0 ? 'p' : 'm').abs($frame);
1366             }
1367             else {
1368 117 100       415 return ($strand >= 0 ? 'p_' : 'm_');
1369             }
1370             }
1371             else {
1372 335 50 33     838 if (defined $frame && $self->_has_frame($type)) {
1373 0         0 $self->warn("Frame defined without strand; punting with plus strand");
1374 0         0 return 'p'.abs($frame);
1375             }
1376             else {
1377 335         900 return 'all';
1378             }
1379             }
1380             }
1381              
1382             =head2 _context
1383              
1384             Title : _context
1385             Alias : _make_context_key
1386             Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame)
1387             Function: create a string indicating strand/frame context; serves as
1388             component of memoizing hash keys
1389             Returns : scalar string
1390             Args : -type => one of ('query', 'hit', 'subject')
1391             -strand => one of (1,0,-1)
1392             -frame => one of (-2, 1, 0, 1, -2)
1393             called w/o args: returns 'all'
1394              
1395             =cut
1396              
1397 546     546   1213 sub _context { shift->_make_context_key(@_) }
1398              
1399             =head2 Predicates
1400              
1401             Most based on a reading of the algorithm name with a configuration lookup.
1402              
1403             =over
1404              
1405             =item _has_sequence_data()
1406              
1407             =cut
1408              
1409             sub _has_sequence_data {
1410 2253     2253   2844 my $self = shift;
1411 2253 50       3349 $self->throw("Hit attribute not yet set") unless defined $self->hit;
1412 2253 100       3358 return (($self->hit->hsps)[0]->seq_str('match') ? 1 : 0);
1413             }
1414              
1415             =item _has_logical_length()
1416              
1417             =cut
1418              
1419             sub _has_logical_length {
1420 0     0   0 my $self = shift;
1421 0         0 my $type = shift;
1422 0         0 $self->_check_type_arg(\$type);
1423             # based on mapping coeff
1424 0 0       0 $self->throw("Mapping coefficients not yet set") unless defined $self->mapping($type);
1425 0         0 return ($self->mapping($type) > 1);
1426             }
1427              
1428             =item _has_strand()
1429              
1430             =cut
1431              
1432             sub _has_strand {
1433 546     546   706 my $self = shift;
1434 546         638 my $type = shift;
1435 546         1070 $self->_check_type_arg(\$type);
1436 546         1959 return $self->{"_has_strand_${type}"};
1437             }
1438              
1439             =item _has_frame()
1440              
1441             =cut
1442              
1443             sub _has_frame {
1444 546     546   678 my $self = shift;
1445 546         604 my $type = shift;
1446 546         1099 $self->_check_type_arg(\$type);
1447 546         1596 return $self->{"_has_frame_${type}"};
1448             }
1449              
1450             =back
1451              
1452             =head1 Private Accessors
1453              
1454             =head2 _contig_intersection
1455              
1456             Title : _contig_intersection
1457             Usage : $tiling->_contig_intersection($type)
1458             Function: Return the minimal set of $type coordinate intervals
1459             covered by the invocant's HSPs
1460             Returns : array of intervals (2-member arrayrefs; see MapTileUtils)
1461             Args : scalar $type: one of 'query', 'hit', 'subject'
1462              
1463             =cut
1464              
1465             sub _contig_intersection {
1466 8     8   13 my $self = shift;
1467 8         14 my ($type, $context) = @_;
1468 8         19 $self->_check_type_arg(\$type);
1469 8         20 $self->_check_context_arg($type, \$context);
1470 8 100       27 if (!defined $self->{"_contig_intersection_${type}_${context}"}) {
1471 4         11 $self->_calc_coverage_map($type);
1472             }
1473 8         11 return @{$self->{"_contig_intersection_${type}_${context}"}};
  8         25  
1474             }
1475              
1476             =head2 _reported_length
1477              
1478             Title : _reported_length
1479             Usage : $tiling->_reported_length($type)
1480             Function: Get the total length of the seq $type
1481             for the invocant's hit object, as reported
1482             by (not calculated from) the input data file
1483             Returns : scalar int
1484             Args : scalar $type: one of 'query', 'hit', 'subject'
1485             Note : This is kludgy; the hit object does not currently
1486             maintain accessors for these values, but the
1487             hsps possess these attributes. This is a wrapper
1488             that allows a consistent access method in the
1489             MapTiling code.
1490             Note : Since this number is based on a reported length,
1491             it is already a "logical length".
1492              
1493             =cut
1494              
1495             sub _reported_length {
1496 4     4   8 my $self = shift;
1497 4         7 my $type = shift;
1498 4         11 $self->_check_type_arg(\$type);
1499 4         8 my $key = uc( $type."_LENGTH" );
1500 4         9 return ($self->hsps)[0]->{$key};
1501             }
1502              
1503             1;
1504