| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Search::Tiling::MapTiling |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Mark A. Jensen |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Mark A. Jensen |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Search::Tiling::MapTiling - An implementation of an HSP tiling |
|
17
|
|
|
|
|
|
|
algorithm, with methods to obtain frequently-requested statistics |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# get a BLAST $hit from somewhere, then |
|
22
|
|
|
|
|
|
|
$tiling = Bio::Search::Tiling::MapTiling->new($hit); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# stats |
|
25
|
|
|
|
|
|
|
$numID = $tiling->identities(); |
|
26
|
|
|
|
|
|
|
$numCons = $tiling->conserved(); |
|
27
|
|
|
|
|
|
|
$query_length = $tiling->length('query'); |
|
28
|
|
|
|
|
|
|
$subject_length = $tiling->length('subject'); # or... |
|
29
|
|
|
|
|
|
|
$subject_length = $tiling->length('hit'); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# get a visual on the coverage map |
|
32
|
|
|
|
|
|
|
print $tiling->coverage_map_as_text('query',$context,'LEGEND'); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# tilings |
|
35
|
|
|
|
|
|
|
$context = $tiling->_context( -type => 'subject', -strand=> 1, -frame=>1); |
|
36
|
|
|
|
|
|
|
@covering_hsps_for_subject = $tiling->next_tiling('subject',$context); |
|
37
|
|
|
|
|
|
|
$context = $tiling->_context( -type => 'query', -strand=> -1, -frame=>0); |
|
38
|
|
|
|
|
|
|
@covering_hsps_for_query = $tiling->next_tiling('query', $context); |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Frequently, users want to use a set of high-scoring pairs (HSPs) |
|
43
|
|
|
|
|
|
|
obtained from a BLAST or other search to assess the overall level of |
|
44
|
|
|
|
|
|
|
identity, conservation, or coverage represented by matches between a |
|
45
|
|
|
|
|
|
|
subject and a query sequence. Because a set of HSPs frequently |
|
46
|
|
|
|
|
|
|
describes multiple overlapping sequence fragments, a simple summation of |
|
47
|
|
|
|
|
|
|
statistics over the HSPs will generally overestimate those |
|
48
|
|
|
|
|
|
|
statistics. To obtain an accurate estimate of global hit statistics, a |
|
49
|
|
|
|
|
|
|
'tiling' of HSPs onto either the subject or the query sequence must be |
|
50
|
|
|
|
|
|
|
performed, in order to properly correct for this. |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
This module will execute a tiling algorithm on a given hit based on an |
|
53
|
|
|
|
|
|
|
interval decomposition I'm calling the "coverage map". Internal object |
|
54
|
|
|
|
|
|
|
methods compute the various statistics, which are then stored in |
|
55
|
|
|
|
|
|
|
appropriately-named public object attributes. See |
|
56
|
|
|
|
|
|
|
L for more info on the algorithm. |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head2 STRAND/FRAME CONTEXTS |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
In BLASTX, TBLASTN, and TBLASTX reports, strand and frame information |
|
61
|
|
|
|
|
|
|
are reported for the query, subject, or query and subject, |
|
62
|
|
|
|
|
|
|
respectively, for each HSP. Tilings for these sequence types are only |
|
63
|
|
|
|
|
|
|
meaningful when they include HSPs in the same strand and frame, or |
|
64
|
|
|
|
|
|
|
"context". So, in these situations, the context must be specified |
|
65
|
|
|
|
|
|
|
in the method calls or the methods will throw. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Contexts are specified as strings: C<[ 'all' | [m|p][_|0|1|2] ]>, where |
|
68
|
|
|
|
|
|
|
C = all HSPs (will throw if context must be specified), C = minus |
|
69
|
|
|
|
|
|
|
strand, C = plus strand, and C<_> = no frame info, C<0,1,2> = respective |
|
70
|
|
|
|
|
|
|
(absolute) frame. The L method will convert a (strand, |
|
71
|
|
|
|
|
|
|
frame) specification to a context string, e.g.: |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
$context = $self->_context(-type=>'query', -strand=>-1, -frame=>-2); |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
returns C. |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
The contexts present among the HSPs in a hit are identified and stored |
|
78
|
|
|
|
|
|
|
for convenience upon object construction. These are accessed off the |
|
79
|
|
|
|
|
|
|
object with the L method. If contexts don't apply for the |
|
80
|
|
|
|
|
|
|
given report, this returns C<('all')>. |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 TILED ALIGNMENTS |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
The experimental method L will use a tiling |
|
85
|
|
|
|
|
|
|
to concatenate tiled hsps into a series of L |
|
86
|
|
|
|
|
|
|
objects: |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
@alns = $tiling->get_tiled_alns($type, $context); |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Each alignment contains two sequences with ids 'query' and 'subject', |
|
91
|
|
|
|
|
|
|
and consists of a concatenation of tiling HSPs which overlap or are |
|
92
|
|
|
|
|
|
|
directly adjacent. The alignment are returned in C<$type> sequence |
|
93
|
|
|
|
|
|
|
order. When HSPs overlap, the alignment sequence is taken from the HSP |
|
94
|
|
|
|
|
|
|
which comes first in the coverage map array. |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
The sequences in each alignment contain features (even though they are |
|
97
|
|
|
|
|
|
|
L objects) which map the original query/subject |
|
98
|
|
|
|
|
|
|
coordinates to the new alignment sequence coordinates. You can |
|
99
|
|
|
|
|
|
|
determine the original BLAST fragments this way: |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$aln = ($tiling->get_tiled_alns)[0]; |
|
102
|
|
|
|
|
|
|
$qseq = $aln->get_seq_by_id('query'); |
|
103
|
|
|
|
|
|
|
$hseq = $aln->get_seq_by_id('subject'); |
|
104
|
|
|
|
|
|
|
foreach my $feat ($qseq->get_SeqFeatures) { |
|
105
|
|
|
|
|
|
|
$org_start = ($feat->get_tag_values('query_start'))[0]; |
|
106
|
|
|
|
|
|
|
$org_end = ($feat->get_tag_values('query_end'))[0]; |
|
107
|
|
|
|
|
|
|
# original fragment as represented in the tiled alignment: |
|
108
|
|
|
|
|
|
|
$org_fragment = $feat->seq; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
foreach my $feat ($hseq->get_SeqFeatures) { |
|
111
|
|
|
|
|
|
|
$org_start = ($feat->get_tag_values('subject_start'))[0]; |
|
112
|
|
|
|
|
|
|
$org_end = ($feat->get_tag_values('subject_end'))[0]; |
|
113
|
|
|
|
|
|
|
# original fragment as represented in the tiled alignment: |
|
114
|
|
|
|
|
|
|
$org_fragment = $feat->seq; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 DESIGN NOTE |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
The major calculations are made just-in-time, and then memoized. So, |
|
120
|
|
|
|
|
|
|
for example, for a given MapTiling object, a coverage map would |
|
121
|
|
|
|
|
|
|
usually be calculated only once (for the query), and at most twice (if |
|
122
|
|
|
|
|
|
|
the subject perspective is also desired), and then only when a |
|
123
|
|
|
|
|
|
|
statistic is first accessed. Afterward, the map and/or any statistic |
|
124
|
|
|
|
|
|
|
is read from storage. So feel free to call the statistic methods |
|
125
|
|
|
|
|
|
|
frequently if it suits you. |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
132
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
|
133
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
136
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 Support |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
I |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
145
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
146
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
147
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
152
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
|
153
|
|
|
|
|
|
|
the web: |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 AUTHOR - Mark A. Jensen |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Email maj -at- fortinbras -dot- us |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 APPENDIX |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
|
164
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=cut |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# Let the code begin... |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
package Bio::Search::Tiling::MapTiling; |
|
171
|
1
|
|
|
1
|
|
826
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
24
|
|
|
172
|
1
|
|
|
1
|
|
2
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
25
|
|
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
|
175
|
|
|
|
|
|
|
#use lib '../../..'; |
|
176
|
|
|
|
|
|
|
|
|
177
|
1
|
|
|
1
|
|
390
|
use Bio::Root::Root; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
32
|
|
|
178
|
1
|
|
|
1
|
|
479
|
use Bio::Search::Tiling::TilingI; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
20
|
|
|
179
|
1
|
|
|
1
|
|
433
|
use Bio::Search::Tiling::MapTileUtils; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
67
|
|
|
180
|
1
|
|
|
1
|
|
419
|
use Bio::LocatableSeq; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
32
|
|
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI); |
|
183
|
1
|
|
|
1
|
|
5
|
use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
3974
|
|
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head2 new |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Title : new |
|
190
|
|
|
|
|
|
|
Usage : my $obj = new Bio::Search::Tiling::GenericTiling(); |
|
191
|
|
|
|
|
|
|
Function: Builds a new Bio::Search::Tiling::GenericTiling object |
|
192
|
|
|
|
|
|
|
Returns : an instance of Bio::Search::Tiling::GenericTiling |
|
193
|
|
|
|
|
|
|
Args : -hit => $a_Bio_Search_Hit_HitI_object |
|
194
|
|
|
|
|
|
|
general filter function: |
|
195
|
|
|
|
|
|
|
-hsp_filter => sub { my $this_hsp = shift; |
|
196
|
|
|
|
|
|
|
...; |
|
197
|
|
|
|
|
|
|
return 1 if $wanted; |
|
198
|
|
|
|
|
|
|
return 0; } |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub new { |
|
203
|
519
|
|
|
519
|
1
|
3735
|
my $class = shift; |
|
204
|
519
|
|
|
|
|
816
|
my @args = @_; |
|
205
|
519
|
|
|
|
|
1241
|
my $self = $class->SUPER::new(@args); |
|
206
|
519
|
|
|
|
|
1528
|
my($hit, $filter) = $self->_rearrange( [qw( HIT HSP_FILTER)],@args ); |
|
207
|
|
|
|
|
|
|
|
|
208
|
519
|
50
|
|
|
|
1183
|
$self->throw("HitI object required") unless $hit; |
|
209
|
519
|
50
|
33
|
|
|
2666
|
$self->throw("Argument must be HitI object") unless ( ref $hit && $hit->isa('Bio::Search::Hit::HitI') ); |
|
210
|
519
|
|
|
|
|
632
|
$self->{hit} = $hit; |
|
211
|
519
|
|
|
|
|
1214
|
$self->_set_attributes(); |
|
212
|
519
|
|
|
|
|
806
|
$self->{"_algorithm"} = $hit->algorithm; |
|
213
|
|
|
|
|
|
|
|
|
214
|
519
|
|
|
|
|
880
|
my @hsps = $hit->hsps; |
|
215
|
|
|
|
|
|
|
# apply filter function if requested |
|
216
|
519
|
50
|
|
|
|
964
|
if ( defined $filter ) { |
|
217
|
0
|
0
|
|
|
|
0
|
if ( ref($filter) eq 'CODE' ) { |
|
218
|
0
|
0
|
|
|
|
0
|
@hsps = map { $filter->($_) ? $_ : () } @hsps; |
|
|
0
|
|
|
|
|
0
|
|
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
else { |
|
221
|
0
|
|
|
|
|
0
|
$self->warn("-filter is not a coderef; ignoring"); |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# identify available contexts |
|
226
|
519
|
|
|
|
|
641
|
for my $t (qw( query hit )) { |
|
227
|
1038
|
|
|
|
|
761
|
my %contexts; |
|
228
|
1038
|
|
|
|
|
1509
|
for my $i (0..$#hsps) { |
|
229
|
546
|
|
|
|
|
1666
|
my $ctxt = $self->_context( |
|
230
|
|
|
|
|
|
|
-type => $t, |
|
231
|
|
|
|
|
|
|
-strand => $hsps[$i]->strand($t), |
|
232
|
|
|
|
|
|
|
-frame => $hsps[$i]->frame($t)); |
|
233
|
546
|
|
100
|
|
|
1617
|
$contexts{$ctxt} ||= []; |
|
234
|
546
|
|
|
|
|
375
|
push @{$contexts{$ctxt}}, $i; |
|
|
546
|
|
|
|
|
1102
|
|
|
235
|
|
|
|
|
|
|
} |
|
236
|
1038
|
|
|
|
|
2039
|
$self->{"_contexts_${t}"} = \%contexts; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
|
|
239
|
519
|
100
|
|
|
|
1506
|
$self->warn("No HSPs present in hit after filtering") unless (@hsps); |
|
240
|
519
|
|
|
|
|
954
|
$self->hsps(\@hsps); |
|
241
|
519
|
|
|
|
|
1864
|
return $self; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# a tiling is based on the set of hsps contained in a single hit. |
|
245
|
|
|
|
|
|
|
# check all the boundaries - zero hsps, one hsp, all disjoint hsps |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head1 TILING ITERATORS |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=head2 next_tiling |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Title : next_tiling |
|
252
|
|
|
|
|
|
|
Usage : @hsps = $self->next_tiling($type); |
|
253
|
|
|
|
|
|
|
Function: Obtain a tiling: a minimal set of HSPs covering the $type |
|
254
|
|
|
|
|
|
|
('hit', 'subject', 'query') sequence |
|
255
|
|
|
|
|
|
|
Example : |
|
256
|
|
|
|
|
|
|
Returns : an array of HSPI objects |
|
257
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query', with |
|
258
|
|
|
|
|
|
|
'subject' an alias for 'hit' |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=cut |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub next_tiling{ |
|
263
|
268
|
|
|
268
|
1
|
1962
|
my $self = shift; |
|
264
|
268
|
|
|
|
|
205
|
my ($type, $context) = @_; |
|
265
|
268
|
|
|
|
|
285
|
$self->_check_type_arg(\$type); |
|
266
|
268
|
|
|
|
|
373
|
$self->_check_context_arg($type, \$context); |
|
267
|
268
|
|
|
|
|
332
|
return $self->_tiling_iterator($type, $context)->(); |
|
268
|
|
|
|
|
|
|
} |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 rewind_tilings |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Title : rewind_tilings |
|
273
|
|
|
|
|
|
|
Usage : $self->rewind_tilings($type) |
|
274
|
|
|
|
|
|
|
Function: Reset the next_tilings($type) iterator |
|
275
|
|
|
|
|
|
|
Example : |
|
276
|
|
|
|
|
|
|
Returns : True on success |
|
277
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query'; |
|
278
|
|
|
|
|
|
|
default is 'query' |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=cut |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub rewind_tilings{ |
|
283
|
2
|
|
|
2
|
1
|
314
|
my $self = shift; |
|
284
|
2
|
|
|
|
|
4
|
my ($type,$context) = @_; |
|
285
|
2
|
|
|
|
|
6
|
$self->_check_type_arg(\$type); |
|
286
|
2
|
|
|
|
|
6
|
$self->_check_context_arg($type, \$context); |
|
287
|
2
|
|
|
|
|
6
|
return $self->_tiling_iterator($type, $context)->('REWIND'); |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head1 ALIGNMENTS |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head2 get_tiled_alns() |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
Title : get_tiled_alns |
|
295
|
|
|
|
|
|
|
Usage : @alns = $tiling->get_tiled_alns($type, $context) |
|
296
|
|
|
|
|
|
|
Function: Use a tiling to construct a minimal set of alignment |
|
297
|
|
|
|
|
|
|
objects covering the region specified by $type/$context |
|
298
|
|
|
|
|
|
|
by splicing adjacent HSP tiles |
|
299
|
|
|
|
|
|
|
Returns : an array of Bio::SimpleAlign objects; see Note below |
|
300
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
|
301
|
|
|
|
|
|
|
default is 'query' |
|
302
|
|
|
|
|
|
|
scalar $context: strand/frame context string |
|
303
|
|
|
|
|
|
|
Following $type and $context, an array of |
|
304
|
|
|
|
|
|
|
ordered, tiled HSP objects can be specified; this is |
|
305
|
|
|
|
|
|
|
the tiling that will directly the alignment construction |
|
306
|
|
|
|
|
|
|
default -- the first tiling provided by a tiling iterator |
|
307
|
|
|
|
|
|
|
Notes : Each returned alignment is a concatenation of adjacent tiles. |
|
308
|
|
|
|
|
|
|
The set of alignments will cover all regions described by the |
|
309
|
|
|
|
|
|
|
$type/$context pair in the hit. The pair of sequences in each |
|
310
|
|
|
|
|
|
|
alignment have ids 'query' and 'subject', and each sequence |
|
311
|
|
|
|
|
|
|
possesses SeqFeatures that map the original query or subject |
|
312
|
|
|
|
|
|
|
coordinates to the sequence coordinates in the tiled alignment. |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=cut |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub get_tiled_alns { |
|
317
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
|
318
|
1
|
|
|
|
|
2
|
my ($type, $context) = @_; |
|
319
|
1
|
|
|
|
|
3
|
$self->_check_type_arg(\$type); |
|
320
|
1
|
|
|
|
|
4
|
$self->_check_context_arg($type, \$context); |
|
321
|
1
|
|
|
|
|
2
|
my $t = shift; # first arg after type/context, arrayref to a tiling |
|
322
|
1
|
|
|
|
|
2
|
my @tiling; |
|
323
|
1
|
50
|
33
|
|
|
6
|
if ($t && (ref($t) eq 'ARRAY')) { |
|
324
|
0
|
|
|
|
|
0
|
@tiling = @$t; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
else { # otherwise, take the first tiling available |
|
327
|
|
|
|
|
|
|
|
|
328
|
1
|
|
|
|
|
3
|
@tiling = $self->_make_tiling_iterator($type,$context)->(); |
|
329
|
|
|
|
|
|
|
} |
|
330
|
1
|
|
|
|
|
11
|
my @ret; |
|
331
|
|
|
|
|
|
|
|
|
332
|
1
|
|
|
|
|
4
|
my @map = $self->coverage_map($type, $context); |
|
333
|
1
|
|
|
|
|
3
|
my @intervals = map {$_->[0]} @map; # disjoint decomp |
|
|
13
|
|
|
|
|
11
|
|
|
334
|
|
|
|
|
|
|
# divide into disjoint covering groups |
|
335
|
1
|
|
|
|
|
6
|
my @groups = covering_groups(@intervals); |
|
336
|
|
|
|
|
|
|
|
|
337
|
1
|
|
|
|
|
1053
|
require Bio::SimpleAlign; |
|
338
|
1
|
|
|
|
|
9
|
require Bio::SeqFeature::Generic; |
|
339
|
|
|
|
|
|
|
# cut hsp aligns along the disjoint decomp |
|
340
|
|
|
|
|
|
|
# look for gaps...or add gaps? |
|
341
|
1
|
|
|
|
|
2
|
my ($q_start, $h_start, $q_strand, $h_strand); |
|
342
|
|
|
|
|
|
|
# build seqs |
|
343
|
1
|
|
|
|
|
4
|
for my $grp (@groups) { |
|
344
|
6
|
|
|
|
|
18
|
my $taln = Bio::SimpleAlign->new(); |
|
345
|
6
|
|
|
|
|
9
|
my (@qfeats, @hfeats); |
|
346
|
6
|
|
|
|
|
7
|
my $query_string = ''; |
|
347
|
6
|
|
|
|
|
4
|
my $hit_string = ''; |
|
348
|
6
|
|
|
|
|
8
|
my ($qlen,$hlen) = (0,0); |
|
349
|
6
|
|
|
|
|
5
|
my ($qinc, $hinc, $qstart, $hstart); |
|
350
|
6
|
|
|
|
|
11
|
for my $intvl (@$grp) { |
|
351
|
|
|
|
|
|
|
# following just chooses the first available hsp containing the |
|
352
|
|
|
|
|
|
|
# interval -- this is arbitrary, could be smarter. |
|
353
|
12
|
|
|
|
|
37
|
my $aln = ( containing_hsps($intvl, @tiling) )[0]->get_aln; |
|
354
|
12
|
|
|
|
|
29
|
my $qseq = $aln->get_seq_by_pos(1); |
|
355
|
12
|
|
|
|
|
22
|
my $hseq = $aln->get_seq_by_pos(2); |
|
356
|
12
|
|
66
|
|
|
35
|
$qstart ||= $qseq->start; |
|
357
|
12
|
|
66
|
|
|
30
|
$hstart ||= $hseq->start; |
|
358
|
|
|
|
|
|
|
# calculate the slice boundaries |
|
359
|
12
|
|
|
|
|
14
|
my ($beg, $end); |
|
360
|
12
|
|
|
|
|
18
|
for ($type) { |
|
361
|
12
|
50
|
|
|
|
30
|
/query/ && do { |
|
362
|
12
|
|
|
|
|
25
|
$beg = $aln->column_from_residue_number($qseq->id, $intvl->[0]); |
|
363
|
12
|
|
|
|
|
29
|
$end = $aln->column_from_residue_number($qseq->id, $intvl->[1]); |
|
364
|
12
|
|
|
|
|
21
|
last; |
|
365
|
|
|
|
|
|
|
}; |
|
366
|
0
|
0
|
|
|
|
0
|
/subject|hit/ && do { |
|
367
|
0
|
|
|
|
|
0
|
$beg = $aln->column_from_residue_number($hseq->id, $intvl->[0]); |
|
368
|
0
|
|
|
|
|
0
|
$end = $aln->column_from_residue_number($hseq->id, $intvl->[1]); |
|
369
|
0
|
|
|
|
|
0
|
last; |
|
370
|
|
|
|
|
|
|
}; |
|
371
|
|
|
|
|
|
|
} |
|
372
|
12
|
|
|
|
|
31
|
$aln = $aln->slice($beg, $end); |
|
373
|
12
|
|
|
|
|
35
|
$qseq = $aln->get_seq_by_pos(1); |
|
374
|
12
|
|
|
|
|
23
|
$hseq = $aln->get_seq_by_pos(2); |
|
375
|
12
|
|
|
|
|
20
|
$qinc = $qseq->length - $qseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS); |
|
376
|
12
|
|
|
|
|
22
|
$hinc = $hseq->length - $hseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS); |
|
377
|
|
|
|
|
|
|
|
|
378
|
12
|
50
|
33
|
|
|
41
|
push @qfeats, Bio::SeqFeature::Generic->new( |
|
|
|
50
|
33
|
|
|
|
|
|
379
|
|
|
|
|
|
|
-start => $qlen+1, |
|
380
|
|
|
|
|
|
|
-end => $qlen+$qinc, |
|
381
|
|
|
|
|
|
|
-strand => $qseq->strand, |
|
382
|
|
|
|
|
|
|
-primary => 'query', |
|
383
|
|
|
|
|
|
|
-source_tag => 'BLAST', |
|
384
|
|
|
|
|
|
|
-display_name => 'query coordinates', |
|
385
|
|
|
|
|
|
|
-tag => { query_id => $qseq->id, |
|
386
|
|
|
|
|
|
|
query_desc => $qseq->desc, |
|
387
|
|
|
|
|
|
|
query_start => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*$qlen, |
|
388
|
|
|
|
|
|
|
query_end => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*($qlen+$qinc-1), |
|
389
|
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
); |
|
391
|
12
|
50
|
33
|
|
|
58
|
push @hfeats, Bio::SeqFeature::Generic->new( |
|
|
|
50
|
33
|
|
|
|
|
|
392
|
|
|
|
|
|
|
-start => $hlen+1, |
|
393
|
|
|
|
|
|
|
-end => $hlen+$hinc, |
|
394
|
|
|
|
|
|
|
-strand => $hseq->strand, |
|
395
|
|
|
|
|
|
|
-primary => 'subject/hit', |
|
396
|
|
|
|
|
|
|
-source_tag => 'BLAST', |
|
397
|
|
|
|
|
|
|
-display_name => 'subject/hit coordinates', |
|
398
|
|
|
|
|
|
|
-tag => { subject_id => $hseq->id, |
|
399
|
|
|
|
|
|
|
subject_desc => $hseq->desc, |
|
400
|
|
|
|
|
|
|
subject_start => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*$hlen, |
|
401
|
|
|
|
|
|
|
subject_end => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*($hlen+$hinc-1) |
|
402
|
|
|
|
|
|
|
} |
|
403
|
|
|
|
|
|
|
); |
|
404
|
12
|
|
|
|
|
45
|
$query_string .= $qseq->seq; |
|
405
|
12
|
|
|
|
|
23
|
$hit_string .= $hseq->seq; |
|
406
|
12
|
|
|
|
|
14
|
$qlen += $qinc; |
|
407
|
12
|
|
|
|
|
35
|
$hlen += $hinc; |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
# create the LocatableSeqs and add the features to each |
|
410
|
|
|
|
|
|
|
# then add the seqs to the new aln |
|
411
|
|
|
|
|
|
|
# note in MapTileUtils, Bio::FeatureHolderI methods have been |
|
412
|
|
|
|
|
|
|
# mixed into Bio::LocatableSeq |
|
413
|
6
|
|
|
|
|
25
|
my $qseq = Bio::LocatableSeq->new( -id => 'query', |
|
414
|
|
|
|
|
|
|
-seq => $query_string); |
|
415
|
6
|
|
|
|
|
24
|
$qseq->add_SeqFeature(@qfeats); |
|
416
|
6
|
|
|
|
|
23
|
my $hseq = Bio::LocatableSeq->new( -id => 'subject', |
|
417
|
|
|
|
|
|
|
-seq => $hit_string ); |
|
418
|
6
|
|
|
|
|
17
|
$hseq->add_SeqFeature(@hfeats); |
|
419
|
6
|
|
|
|
|
19
|
$taln->add_seq($qseq); |
|
420
|
6
|
|
|
|
|
12
|
$taln->add_seq($hseq); |
|
421
|
6
|
|
|
|
|
17
|
push @ret, $taln; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
|
|
424
|
1
|
|
|
|
|
20
|
return @ret; |
|
425
|
|
|
|
|
|
|
} |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=head1 STATISTICS |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head2 identities |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Title : identities |
|
432
|
|
|
|
|
|
|
Usage : $tiling->identities($type, $action, $context) |
|
433
|
|
|
|
|
|
|
Function: Retrieve the calculated number of identities for the invocant |
|
434
|
|
|
|
|
|
|
Example : |
|
435
|
|
|
|
|
|
|
Returns : value of identities (a scalar) |
|
436
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
|
437
|
|
|
|
|
|
|
default is 'query' |
|
438
|
|
|
|
|
|
|
option scalar $action: one of 'exact', 'est', 'fast', 'max' |
|
439
|
|
|
|
|
|
|
default is 'exact' |
|
440
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
|
441
|
|
|
|
|
|
|
Note : getter only |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=cut |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
sub identities{ |
|
446
|
538
|
|
|
538
|
1
|
1037
|
my $self = shift; |
|
447
|
538
|
|
|
|
|
691
|
my ($type, $action, $context) = @_; |
|
448
|
538
|
|
|
|
|
732
|
$self->_check_type_arg(\$type); |
|
449
|
538
|
|
|
|
|
780
|
$self->_check_action_arg(\$action); |
|
450
|
538
|
|
|
|
|
939
|
$self->_check_context_arg($type, \$context); |
|
451
|
538
|
100
|
|
|
|
1609
|
if (!defined $self->{"identities_${type}_${action}_${context}"}) { |
|
452
|
531
|
|
|
|
|
932
|
$self->_calc_stats($type, $action, $context); |
|
453
|
|
|
|
|
|
|
} |
|
454
|
538
|
|
|
|
|
1179
|
return $self->{"identities_${type}_${action}_${context}"}; |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head2 conserved |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Title : conserved |
|
460
|
|
|
|
|
|
|
Usage : $tiling->conserved($type, $action) |
|
461
|
|
|
|
|
|
|
Function: Retrieve the calculated number of conserved sites for the invocant |
|
462
|
|
|
|
|
|
|
Example : |
|
463
|
|
|
|
|
|
|
Returns : value of conserved (a scalar) |
|
464
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
|
465
|
|
|
|
|
|
|
default is 'query' |
|
466
|
|
|
|
|
|
|
option scalar $action: one of 'exact', 'est', 'fast', 'max' |
|
467
|
|
|
|
|
|
|
default is 'exact' |
|
468
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
|
469
|
|
|
|
|
|
|
Note : getter only |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=cut |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub conserved{ |
|
474
|
538
|
|
|
538
|
1
|
1234
|
my $self = shift; |
|
475
|
538
|
|
|
|
|
615
|
my ($type, $action, $context) = @_; |
|
476
|
538
|
|
|
|
|
794
|
$self->_check_type_arg(\$type); |
|
477
|
538
|
|
|
|
|
912
|
$self->_check_action_arg(\$action); |
|
478
|
538
|
|
|
|
|
835
|
$self->_check_context_arg($type, \$context); |
|
479
|
538
|
50
|
|
|
|
1591
|
if (!defined $self->{"conserved_${type}_${action}_${context}"}) { |
|
480
|
0
|
|
|
|
|
0
|
$self->_calc_stats($type, $action, $context); |
|
481
|
|
|
|
|
|
|
} |
|
482
|
538
|
|
|
|
|
1007
|
return $self->{"conserved_${type}_${action}_${context}"}; |
|
483
|
|
|
|
|
|
|
} |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head2 length |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
Title : length |
|
488
|
|
|
|
|
|
|
Usage : $tiling->length($type, $action, $context) |
|
489
|
|
|
|
|
|
|
Function: Retrieve the total length of aligned residues for |
|
490
|
|
|
|
|
|
|
the seq $type |
|
491
|
|
|
|
|
|
|
Example : |
|
492
|
|
|
|
|
|
|
Returns : value of length (a scalar) |
|
493
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
|
494
|
|
|
|
|
|
|
default is 'query' |
|
495
|
|
|
|
|
|
|
option scalar $action: one of 'exact', 'est', 'fast', 'max' |
|
496
|
|
|
|
|
|
|
default is 'exact' |
|
497
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
|
498
|
|
|
|
|
|
|
Note : getter only |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub length{ |
|
503
|
1051
|
|
|
1051
|
1
|
1162
|
my $self = shift; |
|
504
|
1051
|
|
|
|
|
1170
|
my ($type,$action,$context) = @_; |
|
505
|
1051
|
|
|
|
|
1507
|
$self->_check_type_arg(\$type); |
|
506
|
1051
|
|
|
|
|
1432
|
$self->_check_action_arg(\$action); |
|
507
|
1051
|
|
|
|
|
1710
|
$self->_check_context_arg($type, \$context); |
|
508
|
1051
|
100
|
|
|
|
2436
|
if (!defined $self->{"length_${type}_${action}_${context}"}) { |
|
509
|
1
|
|
|
|
|
5
|
$self->_calc_stats($type, $action, $context); |
|
510
|
|
|
|
|
|
|
} |
|
511
|
1051
|
|
|
|
|
3608
|
return $self->{"length_${type}_${action}_${context}"}; |
|
512
|
|
|
|
|
|
|
} |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=head2 frac |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
Title : frac |
|
517
|
|
|
|
|
|
|
Usage : $tiling->frac($type, $denom, $action, $context, $method) |
|
518
|
|
|
|
|
|
|
Function: Return the fraction of sequence length consisting |
|
519
|
|
|
|
|
|
|
of desired kinds of pairs (given by $method), |
|
520
|
|
|
|
|
|
|
with respect to $denom |
|
521
|
|
|
|
|
|
|
Returns : scalar float |
|
522
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
|
523
|
|
|
|
|
|
|
-denom => one of 'total', 'aligned' |
|
524
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
|
525
|
|
|
|
|
|
|
-context => strand/frame context string |
|
526
|
|
|
|
|
|
|
-method => one of 'identical', 'conserved' |
|
527
|
|
|
|
|
|
|
Note : $denom == 'aligned', return desired_stat/num_aligned |
|
528
|
|
|
|
|
|
|
$denom == 'total', return desired_stat/_reported_length |
|
529
|
|
|
|
|
|
|
(i.e., length of the original input sequences) |
|
530
|
|
|
|
|
|
|
Note : In keeping with the spirit of Bio::Search::HSP::HSPI, |
|
531
|
|
|
|
|
|
|
reported lengths of translated dna are reduced by |
|
532
|
|
|
|
|
|
|
a factor of 3, to provide fractions relative to |
|
533
|
|
|
|
|
|
|
amino acid coordinates. |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=cut |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
sub frac { |
|
538
|
1058
|
|
|
1058
|
1
|
151968
|
my $self = shift; |
|
539
|
1058
|
|
|
|
|
2014
|
my @args = @_; |
|
540
|
1058
|
|
|
|
|
3454
|
my ($type, $denom, $action, $context, $method) = $self->_rearrange([qw(TYPE DENOM ACTION CONTEXT METHOD)],@args); |
|
541
|
1058
|
|
|
|
|
2600
|
$self->_check_type_arg(\$type); |
|
542
|
1058
|
|
|
|
|
1733
|
$self->_check_action_arg(\$action); |
|
543
|
1058
|
|
|
|
|
2016
|
$self->_check_context_arg($type, \$context); |
|
544
|
1058
|
50
|
33
|
|
|
9097
|
unless ($method and grep(/^$method$/, qw( identical conserved ))) { |
|
545
|
0
|
|
|
|
|
0
|
$self->throw("-method must specified; one of ('identical', 'conserved')"); |
|
546
|
|
|
|
|
|
|
} |
|
547
|
1058
|
|
50
|
|
|
1441
|
$denom ||= 'total'; |
|
548
|
1058
|
50
|
|
|
|
3635
|
unless (grep /^$denom/, qw( total aligned )) { |
|
549
|
0
|
|
|
|
|
0
|
$self->throw("Denominator selection must be one of ('total', 'aligned'), not '$denom'"); |
|
550
|
|
|
|
|
|
|
} |
|
551
|
1058
|
|
|
|
|
2213
|
my $key = "frac_${method}_${type}_${denom}_${action}_${context}"; |
|
552
|
1058
|
|
|
|
|
826
|
my $stat; |
|
553
|
1058
|
|
|
|
|
1385
|
for ($method) { |
|
554
|
1058
|
100
|
|
|
|
1761
|
$_ eq 'identical' && do { |
|
555
|
529
|
|
|
|
|
1003
|
$stat = $self->identities($type, $action, $context); |
|
556
|
529
|
|
|
|
|
685
|
last; |
|
557
|
|
|
|
|
|
|
}; |
|
558
|
529
|
50
|
|
|
|
858
|
$_ eq 'conserved' && do { |
|
559
|
529
|
|
|
|
|
879
|
$stat = $self->conserved($type, $action, $context); |
|
560
|
529
|
|
|
|
|
647
|
last; |
|
561
|
|
|
|
|
|
|
}; |
|
562
|
0
|
|
|
|
|
0
|
do { |
|
563
|
0
|
|
|
|
|
0
|
$self->throw("What are YOU doing here?"); |
|
564
|
|
|
|
|
|
|
}; |
|
565
|
|
|
|
|
|
|
} |
|
566
|
1058
|
100
|
|
|
|
2162
|
if (!defined $self->{$key}) { |
|
567
|
1046
|
|
|
|
|
1395
|
for ($denom) { |
|
568
|
1046
|
50
|
|
|
|
2176
|
/total/ && do { |
|
569
|
0
|
|
|
|
|
0
|
$self->{$key} = |
|
570
|
|
|
|
|
|
|
$stat/$self->_reported_length($type); # need fudge fac?? |
|
571
|
0
|
|
|
|
|
0
|
last; |
|
572
|
|
|
|
|
|
|
}; |
|
573
|
1046
|
50
|
|
|
|
1980
|
/aligned/ && do { |
|
574
|
1046
|
|
|
|
|
1890
|
$self->{$key} = |
|
575
|
|
|
|
|
|
|
$stat/$self->length($type,$action,$context); |
|
576
|
1046
|
|
|
|
|
1151
|
last; |
|
577
|
|
|
|
|
|
|
}; |
|
578
|
0
|
|
|
|
|
0
|
do { |
|
579
|
0
|
|
|
|
|
0
|
$self->throw("What are YOU doing here?"); |
|
580
|
|
|
|
|
|
|
}; |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
} |
|
583
|
1058
|
|
|
|
|
3297
|
return $self->{$key}; |
|
584
|
|
|
|
|
|
|
} |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=head2 frac_identical |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
Title : frac_identical |
|
589
|
|
|
|
|
|
|
Usage : $tiling->frac_identical($type, $denom, $action, $context) |
|
590
|
|
|
|
|
|
|
Function: Return the fraction of sequence length consisting |
|
591
|
|
|
|
|
|
|
of identical pairs, with respect to $denom |
|
592
|
|
|
|
|
|
|
Returns : scalar float |
|
593
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
|
594
|
|
|
|
|
|
|
-denom => one of 'total', 'aligned' |
|
595
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
|
596
|
|
|
|
|
|
|
-context => strand/frame context string |
|
597
|
|
|
|
|
|
|
Note : $denom == 'aligned', return conserved/num_aligned |
|
598
|
|
|
|
|
|
|
$denom == 'total', return conserved/_reported_length |
|
599
|
|
|
|
|
|
|
(i.e., length of the original input sequences) |
|
600
|
|
|
|
|
|
|
Note : In keeping with the spirit of Bio::Search::HSP::HSPI, |
|
601
|
|
|
|
|
|
|
reported lengths of translated dna are reduced by |
|
602
|
|
|
|
|
|
|
a factor of 3, to provide fractions relative to |
|
603
|
|
|
|
|
|
|
amino acid coordinates. |
|
604
|
|
|
|
|
|
|
Note : This an alias that calls frac() |
|
605
|
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=cut |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
sub frac_identical{ |
|
609
|
3
|
|
|
3
|
1
|
4
|
my $self = shift; |
|
610
|
3
|
|
|
|
|
8
|
my @args = @_; |
|
611
|
3
|
|
|
|
|
14
|
my ($type, $denom, $action,$context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args ); |
|
612
|
3
|
|
|
|
|
38
|
$self->frac( -type=>$type, -denom=>$denom, -action=>$action, -method=>'identical', -context=>$context); |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=head2 frac_conserved |
|
616
|
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
Title : frac_conserved |
|
618
|
|
|
|
|
|
|
Usage : $tiling->frac_conserved($type, $denom, $action, $context) |
|
619
|
|
|
|
|
|
|
Function: Return the fraction of sequence length consisting |
|
620
|
|
|
|
|
|
|
of conserved pairs, with respect to $denom |
|
621
|
|
|
|
|
|
|
Returns : scalar float |
|
622
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
|
623
|
|
|
|
|
|
|
-denom => one of 'total', 'aligned' |
|
624
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
|
625
|
|
|
|
|
|
|
-context => strand/frame context string |
|
626
|
|
|
|
|
|
|
Note : $denom == 'aligned', return conserved/num_aligned |
|
627
|
|
|
|
|
|
|
$denom == 'total', return conserved/_reported_length |
|
628
|
|
|
|
|
|
|
(i.e., length of the original input sequences) |
|
629
|
|
|
|
|
|
|
Note : In keeping with the spirit of Bio::Search::HSP::HSPI, |
|
630
|
|
|
|
|
|
|
reported lengths of translated dna are reduced by |
|
631
|
|
|
|
|
|
|
a factor of 3, to provide fractions relative to |
|
632
|
|
|
|
|
|
|
amino acid coordinates. |
|
633
|
|
|
|
|
|
|
Note : This an alias that calls frac() |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=cut |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
sub frac_conserved{ |
|
638
|
3
|
|
|
3
|
1
|
6
|
my $self = shift; |
|
639
|
3
|
|
|
|
|
7
|
my @args = @_; |
|
640
|
3
|
|
|
|
|
13
|
my ($type, $denom, $action, $context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args ); |
|
641
|
3
|
|
|
|
|
17
|
$self->frac( -type=>$type, -denom=>$denom, -action=>$action, -context=>$context, -method=>'conserved'); |
|
642
|
|
|
|
|
|
|
} |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=head2 frac_aligned |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
Title : frac_aligned |
|
647
|
|
|
|
|
|
|
Aliases : frac_aligned_query - frac_aligned(-type=>'query',...) |
|
648
|
|
|
|
|
|
|
frac_aligned_hit - frac_aligned(-type=>'hit',...) |
|
649
|
|
|
|
|
|
|
Usage : $tiling->frac_aligned(-type=>$type, |
|
650
|
|
|
|
|
|
|
-action=>$action, |
|
651
|
|
|
|
|
|
|
-context=>$context) |
|
652
|
|
|
|
|
|
|
Function: Return the fraction of input sequence length |
|
653
|
|
|
|
|
|
|
that was aligned by the algorithm |
|
654
|
|
|
|
|
|
|
Returns : scalar float |
|
655
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
|
656
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
|
657
|
|
|
|
|
|
|
-context => strand/frame context string |
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=cut |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
sub frac_aligned{ |
|
662
|
4
|
|
|
4
|
1
|
12
|
my ($self, @args) = @_; |
|
663
|
4
|
|
|
|
|
18
|
my ($type, $action, $context) = $self->_rearrange([qw(TYPE ACTION CONTEXT)],@args); |
|
664
|
4
|
|
|
|
|
16
|
$self->_check_type_arg(\$type); |
|
665
|
4
|
|
|
|
|
9
|
$self->_check_action_arg(\$action); |
|
666
|
4
|
|
|
|
|
12
|
$self->_check_context_arg($type, \$context); |
|
667
|
4
|
50
|
|
|
|
14
|
if (!$self->{"frac_aligned_${type}_${action}_${context}"}) { |
|
668
|
4
|
|
|
|
|
9
|
$self->{"frac_aligned_${type}_${action}_${context}"} = $self->num_aligned($type,$action,$context)/$self->_reported_length($type); |
|
669
|
|
|
|
|
|
|
} |
|
670
|
4
|
|
|
|
|
74
|
return $self->{"frac_aligned_${type}_${action}_${context}"}; |
|
671
|
|
|
|
|
|
|
} |
|
672
|
|
|
|
|
|
|
|
|
673
|
2
|
|
|
2
|
0
|
10
|
sub frac_aligned_query { shift->frac_aligned(-type=>'query', @_) } |
|
674
|
2
|
|
|
2
|
0
|
24
|
sub frac_aligned_hit { shift->frac_aligned(-type=>'hit', @_) } |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=head2 num_aligned |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
Title : num_aligned |
|
680
|
|
|
|
|
|
|
Usage : $tiling->num_aligned(-type=>$type) |
|
681
|
|
|
|
|
|
|
Function: Return the number of residues of sequence $type |
|
682
|
|
|
|
|
|
|
that were aligned by the algorithm |
|
683
|
|
|
|
|
|
|
Returns : scalar int |
|
684
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
|
685
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
|
686
|
|
|
|
|
|
|
-context => strand/frame context string |
|
687
|
|
|
|
|
|
|
Note : Since this is calculated from reported coordinates, |
|
688
|
|
|
|
|
|
|
not symbol string counts, it is already in terms of |
|
689
|
|
|
|
|
|
|
"logical length" |
|
690
|
|
|
|
|
|
|
Note : Aliases length() |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=cut |
|
693
|
|
|
|
|
|
|
|
|
694
|
4
|
|
|
4
|
1
|
12
|
sub num_aligned { shift->length( @_ ) }; |
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=head2 num_unaligned |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
Title : num_unaligned |
|
699
|
|
|
|
|
|
|
Usage : $tiling->num_unaligned(-type=>$type) |
|
700
|
|
|
|
|
|
|
Function: Return the number of residues of sequence $type |
|
701
|
|
|
|
|
|
|
that were left unaligned by the algorithm |
|
702
|
|
|
|
|
|
|
Returns : scalar int |
|
703
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
|
704
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
|
705
|
|
|
|
|
|
|
-context => strand/frame context string |
|
706
|
|
|
|
|
|
|
Note : Since this is calculated from reported coordinates, |
|
707
|
|
|
|
|
|
|
not symbol string counts, it is already in terms of |
|
708
|
|
|
|
|
|
|
"logical length" |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=cut |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub num_unaligned { |
|
713
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
714
|
0
|
|
|
|
|
0
|
my ($type,$action,$context) = @_; |
|
715
|
0
|
|
|
|
|
0
|
my $ret; |
|
716
|
0
|
|
|
|
|
0
|
$self->_check_type_arg(\$type); |
|
717
|
0
|
|
|
|
|
0
|
$self->_check_action_arg(\$action); |
|
718
|
0
|
|
|
|
|
0
|
$self->_check_context_arg($type, \$context); |
|
719
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{"num_unaligned_${type}_${action}_${context}"}) { |
|
720
|
0
|
|
|
|
|
0
|
$self->{"num_unaligned_${type}_${action}_${context}"} = $self->_reported_length($type)-$self->num_aligned($type,$action,$context); |
|
721
|
|
|
|
|
|
|
} |
|
722
|
0
|
|
|
|
|
0
|
return $self->{"num_unaligned_${type}_${action}_${context}"}; |
|
723
|
|
|
|
|
|
|
} |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=head2 range |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
Title : range |
|
729
|
|
|
|
|
|
|
Usage : $tiling->range(-type=>$type) |
|
730
|
|
|
|
|
|
|
Function: Returns the extent of the longest tiling |
|
731
|
|
|
|
|
|
|
as ($min_coord, $max_coord) |
|
732
|
|
|
|
|
|
|
Returns : array of two scalar integers |
|
733
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
|
734
|
|
|
|
|
|
|
-context => strand/frame context string |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=cut |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
sub range { |
|
739
|
8
|
|
|
8
|
1
|
1785
|
my ($self, $type, $context) = @_; |
|
740
|
8
|
|
|
|
|
20
|
$self->_check_type_arg(\$type); |
|
741
|
8
|
|
|
|
|
19
|
$self->_check_context_arg($type, \$context); |
|
742
|
8
|
|
|
|
|
22
|
my @a = $self->_contig_intersection($type,$context); |
|
743
|
8
|
|
|
|
|
48
|
return ($a[0][0], $a[-1][1]); |
|
744
|
|
|
|
|
|
|
} |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
=head1 ACCESSORS |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=head2 coverage_map |
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Title : coverage_map |
|
753
|
|
|
|
|
|
|
Usage : $map = $tiling->coverage_map($type) |
|
754
|
|
|
|
|
|
|
Function: Property to contain the coverage map calculated |
|
755
|
|
|
|
|
|
|
by _calc_coverage_map() - see that for |
|
756
|
|
|
|
|
|
|
details |
|
757
|
|
|
|
|
|
|
Example : |
|
758
|
|
|
|
|
|
|
Returns : value of coverage_map_$type as an array |
|
759
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
|
760
|
|
|
|
|
|
|
default is 'query' |
|
761
|
|
|
|
|
|
|
Note : getter |
|
762
|
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=cut |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
sub coverage_map{ |
|
766
|
332
|
|
|
332
|
1
|
278
|
my $self = shift; |
|
767
|
332
|
|
|
|
|
423
|
my ($type, $context) = @_; |
|
768
|
332
|
|
|
|
|
557
|
$self->_check_type_arg(\$type); |
|
769
|
332
|
|
|
|
|
563
|
$self->_check_context_arg($type, \$context); |
|
770
|
|
|
|
|
|
|
|
|
771
|
332
|
100
|
|
|
|
921
|
if (!defined $self->{"coverage_map_${type}_${context}"}) { |
|
772
|
|
|
|
|
|
|
# following calculates coverage maps in all strands/frames |
|
773
|
|
|
|
|
|
|
# if necessary |
|
774
|
203
|
|
|
|
|
372
|
$self->_calc_coverage_map($type, $context); |
|
775
|
|
|
|
|
|
|
} |
|
776
|
|
|
|
|
|
|
# if undef is returned, then there were no hsps for given strand/frame |
|
777
|
332
|
50
|
|
|
|
964
|
if (!defined $self->{"coverage_map_${type}_${context}"}) { |
|
778
|
0
|
|
|
|
|
0
|
$self->warn("No HSPS present for type '$type' in context '$context' for this hit"); |
|
779
|
0
|
|
|
|
|
0
|
return undef; |
|
780
|
|
|
|
|
|
|
} |
|
781
|
332
|
|
|
|
|
273
|
return @{$self->{"coverage_map_${type}_${context}"}}; |
|
|
332
|
|
|
|
|
943
|
|
|
782
|
|
|
|
|
|
|
} |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
=head2 coverage_map_as_text |
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
Title : coverage_map_as_text |
|
787
|
|
|
|
|
|
|
Usage : $tiling->coverage_map_as_text($type, $legend_flag) |
|
788
|
|
|
|
|
|
|
Function: Format a text-graphic representation of the |
|
789
|
|
|
|
|
|
|
coverage map |
|
790
|
|
|
|
|
|
|
Returns : an array of scalar strings, suitable for printing |
|
791
|
|
|
|
|
|
|
Args : $type: one of 'query', 'hit', 'subject' |
|
792
|
|
|
|
|
|
|
$context: strand/frame context string |
|
793
|
|
|
|
|
|
|
$legend_flag: boolean; add a legend indicating |
|
794
|
|
|
|
|
|
|
the actual interval coordinates for each component |
|
795
|
|
|
|
|
|
|
interval and hsp (in the $type sequence context) |
|
796
|
|
|
|
|
|
|
Example : print $tiling->coverage_map_as_text('query',1); |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=cut |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
sub coverage_map_as_text{ |
|
801
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
802
|
0
|
|
|
|
|
0
|
my ($type, $context, $legend_q) = @_; |
|
803
|
0
|
|
|
|
|
0
|
$self->_check_type_arg(\$type); |
|
804
|
0
|
|
|
|
|
0
|
$self->_check_context_arg($type, \$context); |
|
805
|
|
|
|
|
|
|
|
|
806
|
0
|
|
|
|
|
0
|
my @map = $self->coverage_map($type, $context); |
|
807
|
0
|
|
|
|
|
0
|
my @ret; |
|
808
|
0
|
|
|
|
|
0
|
my @hsps = $self->hit->hsps; |
|
809
|
0
|
|
|
|
|
0
|
my %hsps_i; |
|
810
|
0
|
|
|
|
|
0
|
require Tie::RefHash; |
|
811
|
0
|
|
|
|
|
0
|
tie %hsps_i, 'Tie::RefHash'; |
|
812
|
0
|
|
|
|
|
0
|
@hsps_i{@hsps} = (0..$#hsps); |
|
813
|
0
|
|
|
|
|
0
|
my @mx; |
|
814
|
0
|
|
|
|
|
0
|
foreach (0..$#map) { |
|
815
|
0
|
|
|
|
|
0
|
my @hspx = ('') x @hsps; |
|
816
|
0
|
|
|
|
|
0
|
my @these_hsps = @{$map[$_]->[1]}; |
|
|
0
|
|
|
|
|
0
|
|
|
817
|
0
|
|
|
|
|
0
|
@hspx[@hsps_i{@these_hsps}] = ('*') x @these_hsps; |
|
818
|
0
|
|
|
|
|
0
|
$mx[$_] = \@hspx; |
|
819
|
|
|
|
|
|
|
} |
|
820
|
0
|
|
|
|
|
0
|
untie %hsps_i; |
|
821
|
|
|
|
|
|
|
|
|
822
|
0
|
|
|
|
|
0
|
push @ret, "\tIntvl\n"; |
|
823
|
0
|
|
|
|
|
0
|
push @ret, "HSPS\t", join ("\t", (0..$#map)), "\n"; |
|
824
|
0
|
|
|
|
|
0
|
foreach my $h (0..$#hsps) { |
|
825
|
0
|
|
|
|
|
0
|
push @ret, join("\t", $h, map { $mx[$_][$h] } (0..$#map) ),"\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
826
|
|
|
|
|
|
|
} |
|
827
|
0
|
0
|
|
|
|
0
|
if ($legend_q) { |
|
828
|
0
|
|
|
|
|
0
|
push @ret, "Interval legend\n"; |
|
829
|
0
|
|
|
|
|
0
|
foreach (0..$#map) { |
|
830
|
0
|
|
|
|
|
0
|
push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$map[$_][0]}); |
|
|
0
|
|
|
|
|
0
|
|
|
831
|
|
|
|
|
|
|
} |
|
832
|
0
|
|
|
|
|
0
|
push @ret, "HSP legend\n"; |
|
833
|
0
|
|
|
|
|
0
|
my @ints = get_intervals_from_hsps($type,@hsps); |
|
834
|
0
|
|
|
|
|
0
|
foreach (0..$#hsps) { |
|
835
|
0
|
|
|
|
|
0
|
push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$ints[$_]}); |
|
|
0
|
|
|
|
|
0
|
|
|
836
|
|
|
|
|
|
|
} |
|
837
|
|
|
|
|
|
|
} |
|
838
|
0
|
|
|
|
|
0
|
return @ret; |
|
839
|
|
|
|
|
|
|
} |
|
840
|
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
=head2 hit |
|
842
|
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
Title : hit |
|
844
|
|
|
|
|
|
|
Usage : $tiling->hit |
|
845
|
|
|
|
|
|
|
Function: |
|
846
|
|
|
|
|
|
|
Example : |
|
847
|
|
|
|
|
|
|
Returns : The HitI object associated with the invocant |
|
848
|
|
|
|
|
|
|
Args : none |
|
849
|
|
|
|
|
|
|
Note : getter only |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=cut |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
sub hit{ |
|
854
|
5229
|
|
|
5229
|
1
|
3321
|
my $self = shift; |
|
855
|
5229
|
50
|
|
|
|
6209
|
$self->warn("Getter only") if @_; |
|
856
|
5229
|
|
|
|
|
10182
|
return $self->{'hit'}; |
|
857
|
|
|
|
|
|
|
} |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=head2 hsps |
|
860
|
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
Title : hsps |
|
862
|
|
|
|
|
|
|
Usage : $tiling->hsps() |
|
863
|
|
|
|
|
|
|
Function: Container for the HSP objects associated with invocant |
|
864
|
|
|
|
|
|
|
Example : |
|
865
|
|
|
|
|
|
|
Returns : an array of hsps associated with the hit |
|
866
|
|
|
|
|
|
|
Args : on set, new value (an arrayref or undef, optional) |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
=cut |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
sub hsps{ |
|
871
|
1768
|
|
|
1768
|
1
|
1302
|
my $self = shift; |
|
872
|
1768
|
100
|
|
|
|
2953
|
return $self->{'hsps'} = shift if @_; |
|
873
|
1249
|
|
|
|
|
894
|
return @{$self->{'hsps'}}; |
|
|
1249
|
|
|
|
|
3225
|
|
|
874
|
|
|
|
|
|
|
} |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
=head2 contexts |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
Title : contexts |
|
879
|
|
|
|
|
|
|
Usage : @contexts = $tiling->context($type) or |
|
880
|
|
|
|
|
|
|
@indices = $tiling->context($type, $context) |
|
881
|
|
|
|
|
|
|
Function: Retrieve the set of available contexts in the hit, |
|
882
|
|
|
|
|
|
|
or the indices of hsps having the given context |
|
883
|
|
|
|
|
|
|
(integer indices for the array returned by $self->hsps) |
|
884
|
|
|
|
|
|
|
Returns : array of scalar context strings or |
|
885
|
|
|
|
|
|
|
array of scalar positive integers |
|
886
|
|
|
|
|
|
|
undef if no hsps in given context |
|
887
|
|
|
|
|
|
|
Args : $type: one of 'query', 'hit', 'subject' |
|
888
|
|
|
|
|
|
|
optional $context: context string |
|
889
|
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
=cut |
|
891
|
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
sub contexts{ |
|
893
|
1032
|
|
|
1032
|
1
|
64327
|
my $self = shift; |
|
894
|
1032
|
|
|
|
|
962
|
my ($type, $context) = @_; |
|
895
|
1032
|
|
|
|
|
1305
|
$self->_check_type_arg(\$type); |
|
896
|
1032
|
100
|
|
|
|
1413
|
return keys %{$self->{"_contexts_$type"}} unless defined $context; |
|
|
403
|
|
|
|
|
1368
|
|
|
897
|
629
|
50
|
|
|
|
1387
|
return undef unless $self->{"_contexts_$type"}{$context}; |
|
898
|
629
|
|
|
|
|
421
|
return @{$self->{"_contexts_$type"}{$context}}; |
|
|
629
|
|
|
|
|
1540
|
|
|
899
|
|
|
|
|
|
|
} |
|
900
|
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
=head2 mapping |
|
902
|
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
Title : mapping |
|
904
|
|
|
|
|
|
|
Usage : $tiling->mapping($type) |
|
905
|
|
|
|
|
|
|
Function: Retrieve the mapping coefficient for the sequence type |
|
906
|
|
|
|
|
|
|
based on the underlying algorithm |
|
907
|
|
|
|
|
|
|
Returns : scalar integer (mapping coefficient) |
|
908
|
|
|
|
|
|
|
Args : $type: one of 'query', 'hit', 'subject' |
|
909
|
|
|
|
|
|
|
Note : getter only (set in constructor) |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=cut |
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
sub mapping{ |
|
914
|
505
|
|
|
505
|
1
|
381
|
my $self = shift; |
|
915
|
505
|
|
|
|
|
378
|
my $type = shift; |
|
916
|
505
|
|
|
|
|
653
|
$self->_check_type_arg(\$type); |
|
917
|
505
|
|
|
|
|
1020
|
return $self->{"_mapping_${type}"}; |
|
918
|
|
|
|
|
|
|
} |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head2 default_context |
|
921
|
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
Title : default_context |
|
923
|
|
|
|
|
|
|
Usage : $tiling->default_context($type) |
|
924
|
|
|
|
|
|
|
Function: Retrieve the default strand/frame context string |
|
925
|
|
|
|
|
|
|
for the sequence type based on the underlying algorithm |
|
926
|
|
|
|
|
|
|
Returns : scalar string (context string) |
|
927
|
|
|
|
|
|
|
Args : $type: one of 'query', 'hit', 'subject' |
|
928
|
|
|
|
|
|
|
Note : getter only (set in constructor) |
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=cut |
|
931
|
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
sub default_context{ |
|
933
|
298
|
|
|
298
|
1
|
194
|
my $self = shift; |
|
934
|
298
|
|
|
|
|
186
|
my $type = shift; |
|
935
|
298
|
|
|
|
|
327
|
$self->_check_type_arg(\$type); |
|
936
|
298
|
|
|
|
|
477
|
return $self->{"_def_context_${type}"}; |
|
937
|
|
|
|
|
|
|
} |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=head2 algorithm |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
Title : algorithm |
|
942
|
|
|
|
|
|
|
Usage : $tiling->algorithm |
|
943
|
|
|
|
|
|
|
Function: Retrieve the algorithm name associated with the |
|
944
|
|
|
|
|
|
|
invocant's hit object |
|
945
|
|
|
|
|
|
|
Returns : scalar string |
|
946
|
|
|
|
|
|
|
Args : none |
|
947
|
|
|
|
|
|
|
Note : getter only (set in constructor) |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
=cut |
|
950
|
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
sub algorithm{ |
|
952
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
953
|
0
|
0
|
|
|
|
0
|
$self->warn("Getter only") if @_; |
|
954
|
0
|
|
|
|
|
0
|
return $self->{"_algorithm"}; |
|
955
|
|
|
|
|
|
|
} |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=head1 "PRIVATE" METHODS |
|
958
|
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=head2 Calculators |
|
960
|
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
See L for lower level |
|
962
|
|
|
|
|
|
|
calculation methods. |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
=head2 _calc_coverage_map |
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
Title : _calc_coverage_map |
|
967
|
|
|
|
|
|
|
Usage : $tiling->_calc_coverage_map($type) |
|
968
|
|
|
|
|
|
|
Function: Calculates the coverage map for the object's associated |
|
969
|
|
|
|
|
|
|
hit from the perspective of the desired $type (see Args:) |
|
970
|
|
|
|
|
|
|
and sets the coverage_map() property |
|
971
|
|
|
|
|
|
|
Returns : True on success |
|
972
|
|
|
|
|
|
|
Args : optional scalar $type: one of 'hit'|'subject'|'query' |
|
973
|
|
|
|
|
|
|
default is 'query' |
|
974
|
|
|
|
|
|
|
Note : The "coverage map" is an array with the following format: |
|
975
|
|
|
|
|
|
|
( [ $component_interval => [ @containing_hsps ] ], ... ), |
|
976
|
|
|
|
|
|
|
where $component_interval is a closed interval (see |
|
977
|
|
|
|
|
|
|
DESCRIPTION) of the form [$a0, $a1] with $a0 <= $a1, and |
|
978
|
|
|
|
|
|
|
@containing_hsps is an array of all HspI objects in the hit |
|
979
|
|
|
|
|
|
|
which completely contain the $component_interval. |
|
980
|
|
|
|
|
|
|
The set of $component_interval's is a disjoint decomposition |
|
981
|
|
|
|
|
|
|
of the minimum set of minimal intervals that completely |
|
982
|
|
|
|
|
|
|
cover the hit's HSPs (from the perspective of the $type) |
|
983
|
|
|
|
|
|
|
Note : This calculates the map for all strand/frame contexts available |
|
984
|
|
|
|
|
|
|
in the hit |
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
=cut |
|
987
|
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
sub _calc_coverage_map { |
|
989
|
207
|
|
|
207
|
|
192
|
my $self = shift; |
|
990
|
207
|
|
|
|
|
271
|
my ($type) = @_; |
|
991
|
207
|
|
|
|
|
338
|
$self->_check_type_arg(\$type); |
|
992
|
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
# obtain the [start, end] intervals for all hsps in the hit (relative |
|
994
|
|
|
|
|
|
|
# to the type) |
|
995
|
207
|
50
|
|
|
|
447
|
unless ($self->{'hsps'}) { |
|
996
|
0
|
|
|
|
|
0
|
$self->warn("No HSPs for this hit"); |
|
997
|
0
|
|
|
|
|
0
|
return; |
|
998
|
|
|
|
|
|
|
} |
|
999
|
|
|
|
|
|
|
|
|
1000
|
207
|
|
|
|
|
182
|
my (@map, @hsps, %filters, @intervals); |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
# conversion here? |
|
1004
|
207
|
|
|
|
|
362
|
my $c = $self->mapping($type); |
|
1005
|
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
# create the possible maps |
|
1007
|
207
|
|
|
|
|
339
|
for my $context ($self->contexts($type)) { |
|
1008
|
219
|
|
|
|
|
307
|
@map = (); |
|
1009
|
219
|
|
|
|
|
354
|
@hsps = ($self->hsps)[$self->contexts($type, $context)]; |
|
1010
|
219
|
|
|
|
|
846
|
@intervals = get_intervals_from_hsps( $type, @hsps ); |
|
1011
|
|
|
|
|
|
|
# the "frame" |
|
1012
|
219
|
|
|
|
|
502
|
my $f = ($intervals[0]->[0] - 1) % $c; |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
# convert interval endpoints... |
|
1015
|
219
|
|
|
|
|
352
|
for (@intervals) { |
|
1016
|
536
|
|
|
|
|
675
|
$$_[0] = ($$_[0] - $f + $c - 1)/$c; |
|
1017
|
536
|
|
|
|
|
667
|
$$_[1] = ($$_[1] - $f)/$c; |
|
1018
|
|
|
|
|
|
|
} |
|
1019
|
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
# determine the minimal set of disjoint intervals that cover the |
|
1021
|
|
|
|
|
|
|
# set of hsp intervals |
|
1022
|
219
|
|
|
|
|
592
|
my @dj_set = interval_tiling(\@intervals); |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
# decompose each disjoint interval into another set of disjoint |
|
1025
|
|
|
|
|
|
|
# intervals, each of which is completely contained within the |
|
1026
|
|
|
|
|
|
|
# original hsp intervals with which it overlaps |
|
1027
|
219
|
|
|
|
|
198
|
my $i=0; |
|
1028
|
219
|
|
|
|
|
186
|
my @decomp; |
|
1029
|
219
|
|
|
|
|
285
|
for my $dj_elt (@dj_set) { |
|
1030
|
432
|
|
|
|
|
527
|
my ($covering, $indices) = @$dj_elt; |
|
1031
|
432
|
|
|
|
|
702
|
my @covering_hsps = @hsps[@$indices]; |
|
1032
|
432
|
|
|
|
|
491
|
my @coverers = @intervals[@$indices]; |
|
1033
|
432
|
|
|
|
|
779
|
@decomp = decompose_interval( \@coverers ); |
|
1034
|
432
|
|
|
|
|
564
|
for (@decomp) { |
|
1035
|
638
|
|
|
|
|
442
|
my ($component, $container_indices) = @{$_}; |
|
|
638
|
|
|
|
|
703
|
|
|
1036
|
638
|
|
|
|
|
1286
|
push @map, [ $component, |
|
1037
|
|
|
|
|
|
|
[@covering_hsps[@$container_indices]] ]; |
|
1038
|
|
|
|
|
|
|
} |
|
1039
|
432
|
|
|
|
|
535
|
1; |
|
1040
|
|
|
|
|
|
|
} |
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
# unconvert the components: |
|
1043
|
|
|
|
|
|
|
##### |
|
1044
|
219
|
|
|
|
|
335
|
foreach (@map) { |
|
1045
|
638
|
|
|
|
|
904
|
$$_[0][0] = $c*$$_[0][0] - $c + 1 + $f; |
|
1046
|
638
|
|
|
|
|
798
|
$$_[0][1] = $c*$$_[0][1] + $f; |
|
1047
|
|
|
|
|
|
|
} |
|
1048
|
219
|
|
|
|
|
327
|
foreach (@dj_set) { |
|
1049
|
432
|
|
|
|
|
590
|
$$_[0][0] = $c*$$_[0][0] - $c + 1 + $f; |
|
1050
|
432
|
|
|
|
|
497
|
$$_[0][1] = $c*$$_[0][1] + $f; |
|
1051
|
|
|
|
|
|
|
} |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
# sort the map on the interval left-ends |
|
1054
|
219
|
|
|
|
|
401
|
@map = sort { $a->[0][0]<=>$b->[0][0] } @map; |
|
|
980
|
|
|
|
|
696
|
|
|
1055
|
219
|
|
|
|
|
694
|
$self->{"coverage_map_${type}_${context}"} = [@map]; |
|
1056
|
|
|
|
|
|
|
# set the _contig_intersection attribute here (side effect) |
|
1057
|
219
|
|
|
|
|
313
|
$self->{"_contig_intersection_${type}_${context}"} = [map { $$_[0] } @map]; |
|
|
638
|
|
|
|
|
1507
|
|
|
1058
|
|
|
|
|
|
|
} |
|
1059
|
|
|
|
|
|
|
|
|
1060
|
207
|
|
|
|
|
550
|
return 1; # success |
|
1061
|
|
|
|
|
|
|
} |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
=head2 _calc_stats |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
Title : _calc_stats |
|
1066
|
|
|
|
|
|
|
Usage : $tiling->_calc_stats($type, $action, $context) |
|
1067
|
|
|
|
|
|
|
Function: Calculates [estimated] tiling statistics (identities, conserved sites |
|
1068
|
|
|
|
|
|
|
length) and sets the public accessors |
|
1069
|
|
|
|
|
|
|
Returns : True on success |
|
1070
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
|
1071
|
|
|
|
|
|
|
default is 'query' |
|
1072
|
|
|
|
|
|
|
optional scalar $action: requests calculation method |
|
1073
|
|
|
|
|
|
|
currently one of 'exact', 'est', 'fast', 'max' |
|
1074
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
|
1075
|
|
|
|
|
|
|
Note : Action: The statistics are calculated by summing quantities |
|
1076
|
|
|
|
|
|
|
over the disjoint component intervals, taking into account |
|
1077
|
|
|
|
|
|
|
coverage of those intervals by multiple HSPs. The action |
|
1078
|
|
|
|
|
|
|
tells the algorithm how to obtain those quantities-- |
|
1079
|
|
|
|
|
|
|
'exact' will use Bio::Search::HSP::HSPI::matches |
|
1080
|
|
|
|
|
|
|
to count the appropriate segment of the homology string; |
|
1081
|
|
|
|
|
|
|
'est' will estimate the statistics by multiplying the |
|
1082
|
|
|
|
|
|
|
fraction of the HSP overlapped by the component interval |
|
1083
|
|
|
|
|
|
|
(see MapTileUtils) by the BLAST-reported identities/postives |
|
1084
|
|
|
|
|
|
|
(this may be convenient for BLAST summary report formats) |
|
1085
|
|
|
|
|
|
|
* Both exact and est take the average over the number of HSPs |
|
1086
|
|
|
|
|
|
|
that overlap the component interval. |
|
1087
|
|
|
|
|
|
|
'max' uses the exact method to calculate the statistics, |
|
1088
|
|
|
|
|
|
|
and returns only the maximum identites/positives over |
|
1089
|
|
|
|
|
|
|
overlapping HSP for the component interval. No averaging |
|
1090
|
|
|
|
|
|
|
is involved here. |
|
1091
|
|
|
|
|
|
|
'fast' doesn't involve tiling at all (hence the name), |
|
1092
|
|
|
|
|
|
|
but it seems like a very good estimate, and uses only |
|
1093
|
|
|
|
|
|
|
reported values, and so does not require sequence data. It |
|
1094
|
|
|
|
|
|
|
calculates an average of reported identities, conserved |
|
1095
|
|
|
|
|
|
|
sites, and lengths, over unmodified hsps in the hit, |
|
1096
|
|
|
|
|
|
|
weighted by the length of the hsps. |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
=cut |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
sub _calc_stats { |
|
1101
|
532
|
|
|
532
|
|
423
|
my $self = shift; |
|
1102
|
532
|
|
|
|
|
595
|
my ($type, $action, $context) = @_; |
|
1103
|
|
|
|
|
|
|
# need to check args here, in case method is called internally. |
|
1104
|
532
|
|
|
|
|
665
|
$self->_check_type_arg(\$type); |
|
1105
|
532
|
|
|
|
|
727
|
$self->_check_action_arg(\$action); |
|
1106
|
532
|
|
|
|
|
724
|
$self->_check_context_arg($type, \$context); |
|
1107
|
532
|
|
|
|
|
630
|
my ($ident, $cons, $length) = (0,0,0); |
|
1108
|
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
# fast : avoid coverage map altogether, get a pretty damn |
|
1110
|
|
|
|
|
|
|
# good estimate with a weighted average of reported hsp |
|
1111
|
|
|
|
|
|
|
# statistics |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
532
|
100
|
|
|
|
941
|
($action eq 'fast') && do { |
|
1114
|
204
|
|
|
|
|
385
|
my @hsps = $self->hit->hsps; |
|
1115
|
204
|
|
|
|
|
442
|
@hsps = @hsps[$self->contexts($type, $context)]; |
|
1116
|
|
|
|
|
|
|
# weights for averages |
|
1117
|
204
|
|
|
|
|
352
|
my @wt = map {$_->length($type)} @hsps; |
|
|
450
|
|
|
|
|
829
|
|
|
1118
|
204
|
|
|
|
|
11969
|
my $sum = eval( join('+',@wt) ); |
|
1119
|
204
|
|
|
|
|
962
|
$_ /= $sum for (@wt); |
|
1120
|
204
|
|
|
|
|
279
|
for (@hsps) { |
|
1121
|
450
|
|
|
|
|
566
|
my $wt = shift @wt; |
|
1122
|
450
|
|
|
|
|
1164
|
$ident += $wt*$_->matches_MT($type,'identities'); |
|
1123
|
450
|
|
|
|
|
921
|
$cons += $wt*$_->matches_MT($type,'conserved'); |
|
1124
|
450
|
|
|
|
|
955
|
$length += $wt*$_->length($type); |
|
1125
|
|
|
|
|
|
|
} |
|
1126
|
|
|
|
|
|
|
}; |
|
1127
|
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
# or, do tiling |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# calculate identities/conserved sites in tiling |
|
1131
|
|
|
|
|
|
|
# estimate based on the fraction of the component interval covered |
|
1132
|
|
|
|
|
|
|
# and ident/cons reported by the HSPs |
|
1133
|
532
|
100
|
|
|
|
839
|
($action ne 'fast') && do { |
|
1134
|
328
|
|
|
|
|
682
|
foreach ($self->coverage_map($type, $context)) { |
|
1135
|
1384
|
|
|
|
|
1189
|
my ($intvl, $hsps) = @{$_}; |
|
|
1384
|
|
|
|
|
1958
|
|
|
1136
|
1384
|
|
|
|
|
2067
|
my $len = ($$intvl[1]-$$intvl[0]+1); |
|
1137
|
1384
|
100
|
|
|
|
2040
|
my $ncover = ($action eq 'max') ? 1 : scalar @$hsps; |
|
1138
|
1384
|
|
|
|
|
1251
|
my ($acc_i, $acc_c) = (0,0); |
|
1139
|
1384
|
|
|
|
|
1353
|
foreach my $hsp (@$hsps) { |
|
1140
|
1764
|
|
|
|
|
1570
|
for ($action) { |
|
1141
|
1764
|
100
|
|
|
|
2579
|
($_ eq 'est') && do { |
|
1142
|
788
|
|
|
|
|
2549
|
my ($inc_i, $inc_c) = $hsp->matches_MT( |
|
1143
|
|
|
|
|
|
|
-type => $type, |
|
1144
|
|
|
|
|
|
|
-action => 'searchutils', |
|
1145
|
|
|
|
|
|
|
); |
|
1146
|
788
|
|
|
|
|
1734
|
my $frac = $len/$hsp->length($type); |
|
1147
|
788
|
|
|
|
|
881
|
$acc_i += $inc_i * $frac; |
|
1148
|
788
|
|
|
|
|
573
|
$acc_c += $inc_c * $frac; |
|
1149
|
788
|
|
|
|
|
1202
|
last; |
|
1150
|
|
|
|
|
|
|
}; |
|
1151
|
976
|
100
|
|
|
|
1326
|
($_ eq 'max') && do { |
|
1152
|
483
|
|
|
|
|
1587
|
my ($inc_i, $inc_c) = $hsp->matches_MT( |
|
1153
|
|
|
|
|
|
|
-type => $type, |
|
1154
|
|
|
|
|
|
|
-action => 'searchutils', |
|
1155
|
|
|
|
|
|
|
-start => $$intvl[0], |
|
1156
|
|
|
|
|
|
|
-end => $$intvl[1] |
|
1157
|
|
|
|
|
|
|
); |
|
1158
|
483
|
100
|
|
|
|
1003
|
$acc_i = ($acc_i > $inc_i) ? $acc_i : $inc_i; |
|
1159
|
483
|
100
|
|
|
|
552
|
$acc_c = ($acc_c > $inc_c) ? $acc_c : $inc_c; |
|
1160
|
483
|
|
|
|
|
730
|
last; |
|
1161
|
|
|
|
|
|
|
}; |
|
1162
|
493
|
50
|
33
|
|
|
1453
|
(!$_ || ($_ eq 'exact')) && do { |
|
1163
|
493
|
|
|
|
|
1705
|
my ($inc_i, $inc_c) = $hsp->matches_MT( |
|
1164
|
|
|
|
|
|
|
-type => $type, |
|
1165
|
|
|
|
|
|
|
-action => 'searchutils', |
|
1166
|
|
|
|
|
|
|
-start => $$intvl[0], |
|
1167
|
|
|
|
|
|
|
-end => $$intvl[1] |
|
1168
|
|
|
|
|
|
|
); |
|
1169
|
493
|
|
|
|
|
611
|
$acc_i += $inc_i; |
|
1170
|
493
|
|
|
|
|
297
|
$acc_c += $inc_c; |
|
1171
|
493
|
|
|
|
|
714
|
last; |
|
1172
|
|
|
|
|
|
|
}; |
|
1173
|
|
|
|
|
|
|
} |
|
1174
|
|
|
|
|
|
|
} |
|
1175
|
1384
|
|
|
|
|
1695
|
$ident += ($acc_i/$ncover); |
|
1176
|
1384
|
|
|
|
|
1003
|
$cons += ($acc_c/$ncover); |
|
1177
|
1384
|
|
|
|
|
1596
|
$length += $len; |
|
1178
|
|
|
|
|
|
|
} |
|
1179
|
|
|
|
|
|
|
}; |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
532
|
|
|
|
|
1700
|
$self->{"identities_${type}_${action}_${context}"} = $ident; |
|
1182
|
532
|
|
|
|
|
1152
|
$self->{"conserved_${type}_${action}_${context}"} = $cons; |
|
1183
|
532
|
|
|
|
|
1105
|
$self->{"length_${type}_${action}_${context}"} = $length; |
|
1184
|
|
|
|
|
|
|
|
|
1185
|
532
|
|
|
|
|
758
|
return 1; |
|
1186
|
|
|
|
|
|
|
} |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head2 Tiling Helper Methods |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
=cut |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
# coverage_map is of the form |
|
1193
|
|
|
|
|
|
|
# ( [ $interval, \@containing_hsps ], ... ) |
|
1194
|
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
# so, for each interval, pick one of the containing hsps, |
|
1196
|
|
|
|
|
|
|
# and return the union of all the picks. |
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
# use the combinatorial generating iterator, with |
|
1199
|
|
|
|
|
|
|
# the urns containing the @containing_hsps for each |
|
1200
|
|
|
|
|
|
|
# interval |
|
1201
|
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
=head2 _make_tiling_iterator |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
Title : _make_tiling_iterator |
|
1205
|
|
|
|
|
|
|
Usage : $self->_make_tiling_iterator($type) |
|
1206
|
|
|
|
|
|
|
Function: Create an iterator code ref that will step through all |
|
1207
|
|
|
|
|
|
|
minimal combinations of HSPs that produce complete coverage |
|
1208
|
|
|
|
|
|
|
of the $type ('hit', 'subject', 'query') sequence, |
|
1209
|
|
|
|
|
|
|
and set the correct iterator property of the invocant |
|
1210
|
|
|
|
|
|
|
Example : |
|
1211
|
|
|
|
|
|
|
Returns : The iterator |
|
1212
|
|
|
|
|
|
|
Args : scalar $type, one of 'hit', 'subject', 'query'; |
|
1213
|
|
|
|
|
|
|
default is 'query' |
|
1214
|
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
=cut |
|
1216
|
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
sub _make_tiling_iterator { |
|
1218
|
|
|
|
|
|
|
### create the urns |
|
1219
|
3
|
|
|
3
|
|
7
|
my $self = shift; |
|
1220
|
3
|
|
|
|
|
3
|
my ($type, $context) = @_; |
|
1221
|
3
|
|
|
|
|
7
|
$self->_check_type_arg(\$type); |
|
1222
|
3
|
|
|
|
|
6
|
$self->_check_context_arg($type, \$context); |
|
1223
|
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
# initialize the urns |
|
1225
|
3
|
|
|
|
|
13
|
my @urns = map { [0, $$_[1]] } $self->coverage_map($type, $context); |
|
|
43
|
|
|
|
|
49
|
|
|
1226
|
|
|
|
|
|
|
|
|
1227
|
3
|
|
|
|
|
6
|
my $FINISHED = 0; |
|
1228
|
|
|
|
|
|
|
my $iter = sub { |
|
1229
|
|
|
|
|
|
|
# rewind? |
|
1230
|
271
|
100
|
|
271
|
|
333
|
if (my $rewind = shift) { |
|
1231
|
|
|
|
|
|
|
# reinitialize urn indices |
|
1232
|
2
|
|
|
|
|
9
|
$$_[0] = 0 for (@urns); |
|
1233
|
2
|
|
|
|
|
4
|
$FINISHED = 0; |
|
1234
|
2
|
|
|
|
|
5
|
return 1; |
|
1235
|
|
|
|
|
|
|
} |
|
1236
|
|
|
|
|
|
|
# check if done... |
|
1237
|
269
|
100
|
|
|
|
295
|
return if $FINISHED; |
|
1238
|
|
|
|
|
|
|
|
|
1239
|
266
|
|
|
|
|
183
|
my $finished_incrementing = 0; |
|
1240
|
|
|
|
|
|
|
# @ret is the collector of urn choices |
|
1241
|
266
|
|
|
|
|
166
|
my @ret; |
|
1242
|
|
|
|
|
|
|
|
|
1243
|
266
|
|
|
|
|
258
|
for my $urn (@urns) { |
|
1244
|
4482
|
|
|
|
|
3133
|
my ($n, $hsps) = @$urn; |
|
1245
|
4482
|
|
|
|
|
3042
|
push @ret, $$hsps[$n]; |
|
1246
|
4482
|
100
|
|
|
|
4914
|
unless ($finished_incrementing) { |
|
1247
|
1086
|
100
|
|
|
|
994
|
if ($n == $#$hsps) { $$urn[0] = 0; } |
|
|
823
|
|
|
|
|
708
|
|
|
1248
|
263
|
|
|
|
|
193
|
else { ($$urn[0])++; $finished_incrementing = 1 } |
|
|
263
|
|
|
|
|
245
|
|
|
1249
|
|
|
|
|
|
|
} |
|
1250
|
|
|
|
|
|
|
} |
|
1251
|
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
# backstop... |
|
1253
|
266
|
100
|
|
|
|
312
|
$FINISHED = 1 unless $finished_incrementing; |
|
1254
|
|
|
|
|
|
|
# uniquify @ret |
|
1255
|
|
|
|
|
|
|
# $hsp->rank is a unique identifier for an hsp in a hit. |
|
1256
|
|
|
|
|
|
|
# preserve order in @ret |
|
1257
|
|
|
|
|
|
|
|
|
1258
|
266
|
|
|
|
|
137
|
my (%order, %uniq); |
|
1259
|
266
|
|
|
|
|
1369
|
@order{(0..$#ret)} = @ret; |
|
1260
|
266
|
|
|
|
|
990
|
$uniq{$order{$_}->rank} = $_ for (0..$#ret); |
|
1261
|
266
|
|
|
|
|
534
|
@ret = @order{ sort {$a<=>$b} values %uniq }; |
|
|
5818
|
|
|
|
|
3883
|
|
|
1262
|
|
|
|
|
|
|
|
|
1263
|
266
|
|
|
|
|
818
|
return @ret; |
|
1264
|
3
|
|
|
|
|
21
|
}; |
|
1265
|
3
|
|
|
|
|
14
|
return $iter; |
|
1266
|
|
|
|
|
|
|
} |
|
1267
|
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=head2 _tiling_iterator |
|
1269
|
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
Title : _tiling_iterator |
|
1271
|
|
|
|
|
|
|
Usage : $tiling->_tiling_iterator($type,$context) |
|
1272
|
|
|
|
|
|
|
Function: Retrieve the tiling iterator coderef for the requested |
|
1273
|
|
|
|
|
|
|
$type ('hit', 'subject', 'query') |
|
1274
|
|
|
|
|
|
|
Example : |
|
1275
|
|
|
|
|
|
|
Returns : coderef to the desired iterator |
|
1276
|
|
|
|
|
|
|
Args : scalar $type, one of 'hit', 'subject', 'query' |
|
1277
|
|
|
|
|
|
|
default is 'query' |
|
1278
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
|
1279
|
|
|
|
|
|
|
Note : getter only |
|
1280
|
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
=cut |
|
1282
|
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
sub _tiling_iterator { |
|
1284
|
270
|
|
|
270
|
|
175
|
my $self = shift; |
|
1285
|
270
|
|
|
|
|
216
|
my ($type, $context) = @_; |
|
1286
|
270
|
|
|
|
|
252
|
$self->_check_type_arg(\$type); |
|
1287
|
270
|
|
|
|
|
330
|
$self->_check_context_arg($type, \$context); |
|
1288
|
|
|
|
|
|
|
|
|
1289
|
270
|
100
|
|
|
|
516
|
if (!defined $self->{"_tiling_iterator_${type}_${context}"}) { |
|
1290
|
2
|
|
|
|
|
9
|
$self->{"_tiling_iterator_${type}_${context}"} = |
|
1291
|
|
|
|
|
|
|
$self->_make_tiling_iterator($type,$context); |
|
1292
|
|
|
|
|
|
|
} |
|
1293
|
270
|
|
|
|
|
438
|
return $self->{"_tiling_iterator_${type}_${context}"}; |
|
1294
|
|
|
|
|
|
|
} |
|
1295
|
|
|
|
|
|
|
=head2 Construction Helper Methods |
|
1296
|
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
See also L. |
|
1298
|
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
=cut |
|
1300
|
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
sub _check_type_arg { |
|
1302
|
8297
|
|
|
8297
|
|
5338
|
my $self = shift; |
|
1303
|
8297
|
|
|
|
|
5781
|
my $typeref = shift; |
|
1304
|
8297
|
|
100
|
|
|
10661
|
$$typeref ||= 'query'; |
|
1305
|
8297
|
50
|
|
|
|
43148
|
$self->throw("Unknown type '$$typeref'") unless grep(/^$$typeref$/, qw( hit query subject )); |
|
1306
|
8297
|
100
|
|
|
|
10233
|
$$typeref = 'hit' if $$typeref eq 'subject'; |
|
1307
|
8297
|
|
|
|
|
6088
|
return 1; |
|
1308
|
|
|
|
|
|
|
} |
|
1309
|
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
sub _check_action_arg { |
|
1311
|
3721
|
|
|
3721
|
|
2965
|
my $self = shift; |
|
1312
|
3721
|
|
|
|
|
2532
|
my $actionref = shift; |
|
1313
|
3721
|
100
|
|
|
|
4361
|
if (!$$actionref) { |
|
1314
|
17
|
100
|
|
|
|
44
|
$$actionref = ($self->_has_sequence_data ? 'exact' : 'est'); |
|
1315
|
|
|
|
|
|
|
} |
|
1316
|
|
|
|
|
|
|
else { |
|
1317
|
3704
|
50
|
|
|
|
23527
|
$self->throw("Calc action '$$actionref' unrecognized") unless grep /^$$actionref$/, qw( est exact fast max ); |
|
1318
|
3704
|
100
|
100
|
|
|
7246
|
if ($$actionref ne 'est' and !$self->_has_sequence_data) { |
|
1319
|
12
|
|
|
|
|
47
|
$self->warn("Blast file did not possess sequence data; defaulting to 'est' action"); |
|
1320
|
12
|
|
|
|
|
14
|
$$actionref = 'est'; |
|
1321
|
|
|
|
|
|
|
} |
|
1322
|
|
|
|
|
|
|
} |
|
1323
|
3721
|
|
|
|
|
3682
|
return 1; |
|
1324
|
|
|
|
|
|
|
} |
|
1325
|
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
sub _check_context_arg { |
|
1327
|
4613
|
|
|
4613
|
|
3776
|
my $self = shift; |
|
1328
|
4613
|
|
|
|
|
3792
|
my ($type, $contextref) = @_; |
|
1329
|
4613
|
100
|
|
|
|
5619
|
if (!$$contextref) { |
|
1330
|
298
|
50
|
|
|
|
320
|
$self->throw("Type '$type' requires strand/frame context for algorithm ".$self->algorithm) unless ($self->mapping($type) == 1); |
|
1331
|
|
|
|
|
|
|
# set default according to default_context attrib |
|
1332
|
298
|
|
|
|
|
339
|
$$contextref = $self->default_context($type); |
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
else { |
|
1335
|
4315
|
50
|
|
|
|
7345
|
($$contextref =~ /^[mp]$/) && do { $$contextref .= '_' }; |
|
|
0
|
|
|
|
|
0
|
|
|
1336
|
4315
|
50
|
|
|
|
8204
|
$self->throw("Context '$$contextref' unrecognized") unless |
|
1337
|
|
|
|
|
|
|
$$contextref =~ /all|[mp][0-2_]/; |
|
1338
|
|
|
|
|
|
|
} |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
} |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
=head2 _make_context_key |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
Title : _make_context_key |
|
1345
|
|
|
|
|
|
|
Alias : _context |
|
1346
|
|
|
|
|
|
|
Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame) |
|
1347
|
|
|
|
|
|
|
Function: create a string indicating strand/frame context; serves as |
|
1348
|
|
|
|
|
|
|
component of memoizing hash keys |
|
1349
|
|
|
|
|
|
|
Returns : scalar string |
|
1350
|
|
|
|
|
|
|
Args : -type => one of ('query', 'hit', 'subject') |
|
1351
|
|
|
|
|
|
|
-strand => one of (1,0,-1) |
|
1352
|
|
|
|
|
|
|
-frame => one of (-2, 1, 0, 1, -2) |
|
1353
|
|
|
|
|
|
|
called w/o args: returns 'all' |
|
1354
|
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
=cut |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
sub _make_context_key { |
|
1358
|
546
|
|
|
546
|
|
389
|
my $self = shift; |
|
1359
|
546
|
|
|
|
|
1065
|
my @args = @_; |
|
1360
|
546
|
|
|
|
|
1762
|
my ($type, $strand, $frame) = $self->_rearrange([qw(TYPE STRAND FRAME)], @args); |
|
1361
|
546
|
|
|
|
|
1286
|
_check_type_arg(\$type); |
|
1362
|
546
|
50
|
33
|
|
|
941
|
return 'all' unless (defined $strand or defined $frame); |
|
1363
|
546
|
100
|
66
|
|
|
1508
|
if ( defined $strand && $self->_has_strand($type) ) { |
|
1364
|
211
|
100
|
66
|
|
|
572
|
if (defined $frame && $self->_has_frame($type)) { |
|
1365
|
94
|
100
|
|
|
|
365
|
return ($strand >= 0 ? 'p' : 'm').abs($frame); |
|
1366
|
|
|
|
|
|
|
} |
|
1367
|
|
|
|
|
|
|
else { |
|
1368
|
117
|
100
|
|
|
|
379
|
return ($strand >= 0 ? 'p_' : 'm_'); |
|
1369
|
|
|
|
|
|
|
} |
|
1370
|
|
|
|
|
|
|
} |
|
1371
|
|
|
|
|
|
|
else { |
|
1372
|
335
|
50
|
33
|
|
|
827
|
if (defined $frame && $self->_has_frame($type)) { |
|
1373
|
0
|
|
|
|
|
0
|
$self->warn("Frame defined without strand; punting with plus strand"); |
|
1374
|
0
|
|
|
|
|
0
|
return 'p'.abs($frame); |
|
1375
|
|
|
|
|
|
|
} |
|
1376
|
|
|
|
|
|
|
else { |
|
1377
|
335
|
|
|
|
|
683
|
return 'all'; |
|
1378
|
|
|
|
|
|
|
} |
|
1379
|
|
|
|
|
|
|
} |
|
1380
|
|
|
|
|
|
|
} |
|
1381
|
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
=head2 _context |
|
1383
|
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
Title : _context |
|
1385
|
|
|
|
|
|
|
Alias : _make_context_key |
|
1386
|
|
|
|
|
|
|
Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame) |
|
1387
|
|
|
|
|
|
|
Function: create a string indicating strand/frame context; serves as |
|
1388
|
|
|
|
|
|
|
component of memoizing hash keys |
|
1389
|
|
|
|
|
|
|
Returns : scalar string |
|
1390
|
|
|
|
|
|
|
Args : -type => one of ('query', 'hit', 'subject') |
|
1391
|
|
|
|
|
|
|
-strand => one of (1,0,-1) |
|
1392
|
|
|
|
|
|
|
-frame => one of (-2, 1, 0, 1, -2) |
|
1393
|
|
|
|
|
|
|
called w/o args: returns 'all' |
|
1394
|
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
=cut |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
546
|
|
|
546
|
|
959
|
sub _context { shift->_make_context_key(@_) } |
|
1398
|
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
=head2 Predicates |
|
1400
|
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
Most based on a reading of the algorithm name with a configuration lookup. |
|
1402
|
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
=over |
|
1404
|
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
=item _has_sequence_data() |
|
1406
|
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
=cut |
|
1408
|
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
sub _has_sequence_data { |
|
1410
|
2253
|
|
|
2253
|
|
1629
|
my $self = shift; |
|
1411
|
2253
|
50
|
|
|
|
2835
|
$self->throw("Hit attribute not yet set") unless defined $self->hit; |
|
1412
|
2253
|
100
|
|
|
|
2746
|
return (($self->hit->hsps)[0]->seq_str('match') ? 1 : 0); |
|
1413
|
|
|
|
|
|
|
} |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
=item _has_logical_length() |
|
1416
|
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
=cut |
|
1418
|
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
sub _has_logical_length { |
|
1420
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
1421
|
0
|
|
|
|
|
0
|
my $type = shift; |
|
1422
|
0
|
|
|
|
|
0
|
$self->_check_type_arg(\$type); |
|
1423
|
|
|
|
|
|
|
# based on mapping coeff |
|
1424
|
0
|
0
|
|
|
|
0
|
$self->throw("Mapping coefficients not yet set") unless defined $self->mapping($type); |
|
1425
|
0
|
|
|
|
|
0
|
return ($self->mapping($type) > 1); |
|
1426
|
|
|
|
|
|
|
} |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
=item _has_strand() |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=cut |
|
1431
|
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
sub _has_strand { |
|
1433
|
546
|
|
|
546
|
|
539
|
my $self = shift; |
|
1434
|
546
|
|
|
|
|
491
|
my $type = shift; |
|
1435
|
546
|
|
|
|
|
795
|
$self->_check_type_arg(\$type); |
|
1436
|
546
|
|
|
|
|
1834
|
return $self->{"_has_strand_${type}"}; |
|
1437
|
|
|
|
|
|
|
} |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
=item _has_frame() |
|
1440
|
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
=cut |
|
1442
|
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
sub _has_frame { |
|
1444
|
546
|
|
|
546
|
|
633
|
my $self = shift; |
|
1445
|
546
|
|
|
|
|
519
|
my $type = shift; |
|
1446
|
546
|
|
|
|
|
677
|
$self->_check_type_arg(\$type); |
|
1447
|
546
|
|
|
|
|
1671
|
return $self->{"_has_frame_${type}"}; |
|
1448
|
|
|
|
|
|
|
} |
|
1449
|
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
=back |
|
1451
|
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
=head1 Private Accessors |
|
1453
|
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=head2 _contig_intersection |
|
1455
|
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
Title : _contig_intersection |
|
1457
|
|
|
|
|
|
|
Usage : $tiling->_contig_intersection($type) |
|
1458
|
|
|
|
|
|
|
Function: Return the minimal set of $type coordinate intervals |
|
1459
|
|
|
|
|
|
|
covered by the invocant's HSPs |
|
1460
|
|
|
|
|
|
|
Returns : array of intervals (2-member arrayrefs; see MapTileUtils) |
|
1461
|
|
|
|
|
|
|
Args : scalar $type: one of 'query', 'hit', 'subject' |
|
1462
|
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
=cut |
|
1464
|
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
sub _contig_intersection { |
|
1466
|
8
|
|
|
8
|
|
11
|
my $self = shift; |
|
1467
|
8
|
|
|
|
|
11
|
my ($type, $context) = @_; |
|
1468
|
8
|
|
|
|
|
11
|
$self->_check_type_arg(\$type); |
|
1469
|
8
|
|
|
|
|
17
|
$self->_check_context_arg($type, \$context); |
|
1470
|
8
|
100
|
|
|
|
27
|
if (!defined $self->{"_contig_intersection_${type}_${context}"}) { |
|
1471
|
4
|
|
|
|
|
15
|
$self->_calc_coverage_map($type); |
|
1472
|
|
|
|
|
|
|
} |
|
1473
|
8
|
|
|
|
|
11
|
return @{$self->{"_contig_intersection_${type}_${context}"}}; |
|
|
8
|
|
|
|
|
27
|
|
|
1474
|
|
|
|
|
|
|
} |
|
1475
|
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
=head2 _reported_length |
|
1477
|
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
Title : _reported_length |
|
1479
|
|
|
|
|
|
|
Usage : $tiling->_reported_length($type) |
|
1480
|
|
|
|
|
|
|
Function: Get the total length of the seq $type |
|
1481
|
|
|
|
|
|
|
for the invocant's hit object, as reported |
|
1482
|
|
|
|
|
|
|
by (not calculated from) the input data file |
|
1483
|
|
|
|
|
|
|
Returns : scalar int |
|
1484
|
|
|
|
|
|
|
Args : scalar $type: one of 'query', 'hit', 'subject' |
|
1485
|
|
|
|
|
|
|
Note : This is kludgy; the hit object does not currently |
|
1486
|
|
|
|
|
|
|
maintain accessors for these values, but the |
|
1487
|
|
|
|
|
|
|
hsps possess these attributes. This is a wrapper |
|
1488
|
|
|
|
|
|
|
that allows a consistent access method in the |
|
1489
|
|
|
|
|
|
|
MapTiling code. |
|
1490
|
|
|
|
|
|
|
Note : Since this number is based on a reported length, |
|
1491
|
|
|
|
|
|
|
it is already a "logical length". |
|
1492
|
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
=cut |
|
1494
|
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
sub _reported_length { |
|
1496
|
4
|
|
|
4
|
|
5
|
my $self = shift; |
|
1497
|
4
|
|
|
|
|
7
|
my $type = shift; |
|
1498
|
4
|
|
|
|
|
12
|
$self->_check_type_arg(\$type); |
|
1499
|
4
|
|
|
|
|
9
|
my $key = uc( $type."_LENGTH" ); |
|
1500
|
4
|
|
|
|
|
13
|
return ($self->hsps)[0]->{$key}; |
|
1501
|
|
|
|
|
|
|
} |
|
1502
|
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
1; |
|
1504
|
|
|
|
|
|
|
|