line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Search::Tiling::MapTiling |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Mark A. Jensen |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Mark A. Jensen |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Search::Tiling::MapTiling - An implementation of an HSP tiling |
17
|
|
|
|
|
|
|
algorithm, with methods to obtain frequently-requested statistics |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# get a BLAST $hit from somewhere, then |
22
|
|
|
|
|
|
|
$tiling = Bio::Search::Tiling::MapTiling->new($hit); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# stats |
25
|
|
|
|
|
|
|
$numID = $tiling->identities(); |
26
|
|
|
|
|
|
|
$numCons = $tiling->conserved(); |
27
|
|
|
|
|
|
|
$query_length = $tiling->length('query'); |
28
|
|
|
|
|
|
|
$subject_length = $tiling->length('subject'); # or... |
29
|
|
|
|
|
|
|
$subject_length = $tiling->length('hit'); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# get a visual on the coverage map |
32
|
|
|
|
|
|
|
print $tiling->coverage_map_as_text('query',$context,'LEGEND'); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# tilings |
35
|
|
|
|
|
|
|
$context = $tiling->_context( -type => 'subject', -strand=> 1, -frame=>1); |
36
|
|
|
|
|
|
|
@covering_hsps_for_subject = $tiling->next_tiling('subject',$context); |
37
|
|
|
|
|
|
|
$context = $tiling->_context( -type => 'query', -strand=> -1, -frame=>0); |
38
|
|
|
|
|
|
|
@covering_hsps_for_query = $tiling->next_tiling('query', $context); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Frequently, users want to use a set of high-scoring pairs (HSPs) |
43
|
|
|
|
|
|
|
obtained from a BLAST or other search to assess the overall level of |
44
|
|
|
|
|
|
|
identity, conservation, or coverage represented by matches between a |
45
|
|
|
|
|
|
|
subject and a query sequence. Because a set of HSPs frequently |
46
|
|
|
|
|
|
|
describes multiple overlapping sequence fragments, a simple summation of |
47
|
|
|
|
|
|
|
statistics over the HSPs will generally overestimate those |
48
|
|
|
|
|
|
|
statistics. To obtain an accurate estimate of global hit statistics, a |
49
|
|
|
|
|
|
|
'tiling' of HSPs onto either the subject or the query sequence must be |
50
|
|
|
|
|
|
|
performed, in order to properly correct for this. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
This module will execute a tiling algorithm on a given hit based on an |
53
|
|
|
|
|
|
|
interval decomposition I'm calling the "coverage map". Internal object |
54
|
|
|
|
|
|
|
methods compute the various statistics, which are then stored in |
55
|
|
|
|
|
|
|
appropriately-named public object attributes. See |
56
|
|
|
|
|
|
|
L for more info on the algorithm. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head2 STRAND/FRAME CONTEXTS |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
In BLASTX, TBLASTN, and TBLASTX reports, strand and frame information |
61
|
|
|
|
|
|
|
are reported for the query, subject, or query and subject, |
62
|
|
|
|
|
|
|
respectively, for each HSP. Tilings for these sequence types are only |
63
|
|
|
|
|
|
|
meaningful when they include HSPs in the same strand and frame, or |
64
|
|
|
|
|
|
|
"context". So, in these situations, the context must be specified |
65
|
|
|
|
|
|
|
in the method calls or the methods will throw. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Contexts are specified as strings: C<[ 'all' | [m|p][_|0|1|2] ]>, where |
68
|
|
|
|
|
|
|
C = all HSPs (will throw if context must be specified), C = minus |
69
|
|
|
|
|
|
|
strand, C = plus strand, and C<_> = no frame info, C<0,1,2> = respective |
70
|
|
|
|
|
|
|
(absolute) frame. The L method will convert a (strand, |
71
|
|
|
|
|
|
|
frame) specification to a context string, e.g.: |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
$context = $self->_context(-type=>'query', -strand=>-1, -frame=>-2); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
returns C. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
The contexts present among the HSPs in a hit are identified and stored |
78
|
|
|
|
|
|
|
for convenience upon object construction. These are accessed off the |
79
|
|
|
|
|
|
|
object with the L method. If contexts don't apply for the |
80
|
|
|
|
|
|
|
given report, this returns C<('all')>. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 TILED ALIGNMENTS |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
The experimental method L will use a tiling |
85
|
|
|
|
|
|
|
to concatenate tiled hsps into a series of L |
86
|
|
|
|
|
|
|
objects: |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
@alns = $tiling->get_tiled_alns($type, $context); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Each alignment contains two sequences with ids 'query' and 'subject', |
91
|
|
|
|
|
|
|
and consists of a concatenation of tiling HSPs which overlap or are |
92
|
|
|
|
|
|
|
directly adjacent. The alignment are returned in C<$type> sequence |
93
|
|
|
|
|
|
|
order. When HSPs overlap, the alignment sequence is taken from the HSP |
94
|
|
|
|
|
|
|
which comes first in the coverage map array. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
The sequences in each alignment contain features (even though they are |
97
|
|
|
|
|
|
|
L objects) which map the original query/subject |
98
|
|
|
|
|
|
|
coordinates to the new alignment sequence coordinates. You can |
99
|
|
|
|
|
|
|
determine the original BLAST fragments this way: |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$aln = ($tiling->get_tiled_alns)[0]; |
102
|
|
|
|
|
|
|
$qseq = $aln->get_seq_by_id('query'); |
103
|
|
|
|
|
|
|
$hseq = $aln->get_seq_by_id('subject'); |
104
|
|
|
|
|
|
|
foreach my $feat ($qseq->get_SeqFeatures) { |
105
|
|
|
|
|
|
|
$org_start = ($feat->get_tag_values('query_start'))[0]; |
106
|
|
|
|
|
|
|
$org_end = ($feat->get_tag_values('query_end'))[0]; |
107
|
|
|
|
|
|
|
# original fragment as represented in the tiled alignment: |
108
|
|
|
|
|
|
|
$org_fragment = $feat->seq; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
foreach my $feat ($hseq->get_SeqFeatures) { |
111
|
|
|
|
|
|
|
$org_start = ($feat->get_tag_values('subject_start'))[0]; |
112
|
|
|
|
|
|
|
$org_end = ($feat->get_tag_values('subject_end'))[0]; |
113
|
|
|
|
|
|
|
# original fragment as represented in the tiled alignment: |
114
|
|
|
|
|
|
|
$org_fragment = $feat->seq; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 DESIGN NOTE |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
The major calculations are made just-in-time, and then memoized. So, |
120
|
|
|
|
|
|
|
for example, for a given MapTiling object, a coverage map would |
121
|
|
|
|
|
|
|
usually be calculated only once (for the query), and at most twice (if |
122
|
|
|
|
|
|
|
the subject perspective is also desired), and then only when a |
123
|
|
|
|
|
|
|
statistic is first accessed. Afterward, the map and/or any statistic |
124
|
|
|
|
|
|
|
is read from storage. So feel free to call the statistic methods |
125
|
|
|
|
|
|
|
frequently if it suits you. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 FEEDBACK |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 Mailing Lists |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
132
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
133
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
136
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 Support |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
I |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
145
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
146
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
147
|
|
|
|
|
|
|
with code and data examples if at all possible. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 Reporting Bugs |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
152
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
153
|
|
|
|
|
|
|
the web: |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 AUTHOR - Mark A. Jensen |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Email maj -at- fortinbras -dot- us |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 APPENDIX |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
164
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=cut |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# Let the code begin... |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
package Bio::Search::Tiling::MapTiling; |
171
|
1
|
|
|
1
|
|
826
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
172
|
1
|
|
|
1
|
|
2
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
175
|
|
|
|
|
|
|
#use lib '../../..'; |
176
|
|
|
|
|
|
|
|
177
|
1
|
|
|
1
|
|
390
|
use Bio::Root::Root; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
32
|
|
178
|
1
|
|
|
1
|
|
479
|
use Bio::Search::Tiling::TilingI; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
20
|
|
179
|
1
|
|
|
1
|
|
433
|
use Bio::Search::Tiling::MapTileUtils; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
67
|
|
180
|
1
|
|
|
1
|
|
419
|
use Bio::LocatableSeq; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
32
|
|
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI); |
183
|
1
|
|
|
1
|
|
5
|
use base qw(Bio::Root::Root Bio::Search::Tiling::TilingI); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
3974
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head2 new |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Title : new |
190
|
|
|
|
|
|
|
Usage : my $obj = new Bio::Search::Tiling::GenericTiling(); |
191
|
|
|
|
|
|
|
Function: Builds a new Bio::Search::Tiling::GenericTiling object |
192
|
|
|
|
|
|
|
Returns : an instance of Bio::Search::Tiling::GenericTiling |
193
|
|
|
|
|
|
|
Args : -hit => $a_Bio_Search_Hit_HitI_object |
194
|
|
|
|
|
|
|
general filter function: |
195
|
|
|
|
|
|
|
-hsp_filter => sub { my $this_hsp = shift; |
196
|
|
|
|
|
|
|
...; |
197
|
|
|
|
|
|
|
return 1 if $wanted; |
198
|
|
|
|
|
|
|
return 0; } |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub new { |
203
|
519
|
|
|
519
|
1
|
3735
|
my $class = shift; |
204
|
519
|
|
|
|
|
816
|
my @args = @_; |
205
|
519
|
|
|
|
|
1241
|
my $self = $class->SUPER::new(@args); |
206
|
519
|
|
|
|
|
1528
|
my($hit, $filter) = $self->_rearrange( [qw( HIT HSP_FILTER)],@args ); |
207
|
|
|
|
|
|
|
|
208
|
519
|
50
|
|
|
|
1183
|
$self->throw("HitI object required") unless $hit; |
209
|
519
|
50
|
33
|
|
|
2666
|
$self->throw("Argument must be HitI object") unless ( ref $hit && $hit->isa('Bio::Search::Hit::HitI') ); |
210
|
519
|
|
|
|
|
632
|
$self->{hit} = $hit; |
211
|
519
|
|
|
|
|
1214
|
$self->_set_attributes(); |
212
|
519
|
|
|
|
|
806
|
$self->{"_algorithm"} = $hit->algorithm; |
213
|
|
|
|
|
|
|
|
214
|
519
|
|
|
|
|
880
|
my @hsps = $hit->hsps; |
215
|
|
|
|
|
|
|
# apply filter function if requested |
216
|
519
|
50
|
|
|
|
964
|
if ( defined $filter ) { |
217
|
0
|
0
|
|
|
|
0
|
if ( ref($filter) eq 'CODE' ) { |
218
|
0
|
0
|
|
|
|
0
|
@hsps = map { $filter->($_) ? $_ : () } @hsps; |
|
0
|
|
|
|
|
0
|
|
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
else { |
221
|
0
|
|
|
|
|
0
|
$self->warn("-filter is not a coderef; ignoring"); |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# identify available contexts |
226
|
519
|
|
|
|
|
641
|
for my $t (qw( query hit )) { |
227
|
1038
|
|
|
|
|
761
|
my %contexts; |
228
|
1038
|
|
|
|
|
1509
|
for my $i (0..$#hsps) { |
229
|
546
|
|
|
|
|
1666
|
my $ctxt = $self->_context( |
230
|
|
|
|
|
|
|
-type => $t, |
231
|
|
|
|
|
|
|
-strand => $hsps[$i]->strand($t), |
232
|
|
|
|
|
|
|
-frame => $hsps[$i]->frame($t)); |
233
|
546
|
|
100
|
|
|
1617
|
$contexts{$ctxt} ||= []; |
234
|
546
|
|
|
|
|
375
|
push @{$contexts{$ctxt}}, $i; |
|
546
|
|
|
|
|
1102
|
|
235
|
|
|
|
|
|
|
} |
236
|
1038
|
|
|
|
|
2039
|
$self->{"_contexts_${t}"} = \%contexts; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
519
|
100
|
|
|
|
1506
|
$self->warn("No HSPs present in hit after filtering") unless (@hsps); |
240
|
519
|
|
|
|
|
954
|
$self->hsps(\@hsps); |
241
|
519
|
|
|
|
|
1864
|
return $self; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# a tiling is based on the set of hsps contained in a single hit. |
245
|
|
|
|
|
|
|
# check all the boundaries - zero hsps, one hsp, all disjoint hsps |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head1 TILING ITERATORS |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=head2 next_tiling |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Title : next_tiling |
252
|
|
|
|
|
|
|
Usage : @hsps = $self->next_tiling($type); |
253
|
|
|
|
|
|
|
Function: Obtain a tiling: a minimal set of HSPs covering the $type |
254
|
|
|
|
|
|
|
('hit', 'subject', 'query') sequence |
255
|
|
|
|
|
|
|
Example : |
256
|
|
|
|
|
|
|
Returns : an array of HSPI objects |
257
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query', with |
258
|
|
|
|
|
|
|
'subject' an alias for 'hit' |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=cut |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub next_tiling{ |
263
|
268
|
|
|
268
|
1
|
1962
|
my $self = shift; |
264
|
268
|
|
|
|
|
205
|
my ($type, $context) = @_; |
265
|
268
|
|
|
|
|
285
|
$self->_check_type_arg(\$type); |
266
|
268
|
|
|
|
|
373
|
$self->_check_context_arg($type, \$context); |
267
|
268
|
|
|
|
|
332
|
return $self->_tiling_iterator($type, $context)->(); |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 rewind_tilings |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Title : rewind_tilings |
273
|
|
|
|
|
|
|
Usage : $self->rewind_tilings($type) |
274
|
|
|
|
|
|
|
Function: Reset the next_tilings($type) iterator |
275
|
|
|
|
|
|
|
Example : |
276
|
|
|
|
|
|
|
Returns : True on success |
277
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query'; |
278
|
|
|
|
|
|
|
default is 'query' |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=cut |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub rewind_tilings{ |
283
|
2
|
|
|
2
|
1
|
314
|
my $self = shift; |
284
|
2
|
|
|
|
|
4
|
my ($type,$context) = @_; |
285
|
2
|
|
|
|
|
6
|
$self->_check_type_arg(\$type); |
286
|
2
|
|
|
|
|
6
|
$self->_check_context_arg($type, \$context); |
287
|
2
|
|
|
|
|
6
|
return $self->_tiling_iterator($type, $context)->('REWIND'); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head1 ALIGNMENTS |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head2 get_tiled_alns() |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
Title : get_tiled_alns |
295
|
|
|
|
|
|
|
Usage : @alns = $tiling->get_tiled_alns($type, $context) |
296
|
|
|
|
|
|
|
Function: Use a tiling to construct a minimal set of alignment |
297
|
|
|
|
|
|
|
objects covering the region specified by $type/$context |
298
|
|
|
|
|
|
|
by splicing adjacent HSP tiles |
299
|
|
|
|
|
|
|
Returns : an array of Bio::SimpleAlign objects; see Note below |
300
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
301
|
|
|
|
|
|
|
default is 'query' |
302
|
|
|
|
|
|
|
scalar $context: strand/frame context string |
303
|
|
|
|
|
|
|
Following $type and $context, an array of |
304
|
|
|
|
|
|
|
ordered, tiled HSP objects can be specified; this is |
305
|
|
|
|
|
|
|
the tiling that will directly the alignment construction |
306
|
|
|
|
|
|
|
default -- the first tiling provided by a tiling iterator |
307
|
|
|
|
|
|
|
Notes : Each returned alignment is a concatenation of adjacent tiles. |
308
|
|
|
|
|
|
|
The set of alignments will cover all regions described by the |
309
|
|
|
|
|
|
|
$type/$context pair in the hit. The pair of sequences in each |
310
|
|
|
|
|
|
|
alignment have ids 'query' and 'subject', and each sequence |
311
|
|
|
|
|
|
|
possesses SeqFeatures that map the original query or subject |
312
|
|
|
|
|
|
|
coordinates to the sequence coordinates in the tiled alignment. |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=cut |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub get_tiled_alns { |
317
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
318
|
1
|
|
|
|
|
2
|
my ($type, $context) = @_; |
319
|
1
|
|
|
|
|
3
|
$self->_check_type_arg(\$type); |
320
|
1
|
|
|
|
|
4
|
$self->_check_context_arg($type, \$context); |
321
|
1
|
|
|
|
|
2
|
my $t = shift; # first arg after type/context, arrayref to a tiling |
322
|
1
|
|
|
|
|
2
|
my @tiling; |
323
|
1
|
50
|
33
|
|
|
6
|
if ($t && (ref($t) eq 'ARRAY')) { |
324
|
0
|
|
|
|
|
0
|
@tiling = @$t; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
else { # otherwise, take the first tiling available |
327
|
|
|
|
|
|
|
|
328
|
1
|
|
|
|
|
3
|
@tiling = $self->_make_tiling_iterator($type,$context)->(); |
329
|
|
|
|
|
|
|
} |
330
|
1
|
|
|
|
|
11
|
my @ret; |
331
|
|
|
|
|
|
|
|
332
|
1
|
|
|
|
|
4
|
my @map = $self->coverage_map($type, $context); |
333
|
1
|
|
|
|
|
3
|
my @intervals = map {$_->[0]} @map; # disjoint decomp |
|
13
|
|
|
|
|
11
|
|
334
|
|
|
|
|
|
|
# divide into disjoint covering groups |
335
|
1
|
|
|
|
|
6
|
my @groups = covering_groups(@intervals); |
336
|
|
|
|
|
|
|
|
337
|
1
|
|
|
|
|
1053
|
require Bio::SimpleAlign; |
338
|
1
|
|
|
|
|
9
|
require Bio::SeqFeature::Generic; |
339
|
|
|
|
|
|
|
# cut hsp aligns along the disjoint decomp |
340
|
|
|
|
|
|
|
# look for gaps...or add gaps? |
341
|
1
|
|
|
|
|
2
|
my ($q_start, $h_start, $q_strand, $h_strand); |
342
|
|
|
|
|
|
|
# build seqs |
343
|
1
|
|
|
|
|
4
|
for my $grp (@groups) { |
344
|
6
|
|
|
|
|
18
|
my $taln = Bio::SimpleAlign->new(); |
345
|
6
|
|
|
|
|
9
|
my (@qfeats, @hfeats); |
346
|
6
|
|
|
|
|
7
|
my $query_string = ''; |
347
|
6
|
|
|
|
|
4
|
my $hit_string = ''; |
348
|
6
|
|
|
|
|
8
|
my ($qlen,$hlen) = (0,0); |
349
|
6
|
|
|
|
|
5
|
my ($qinc, $hinc, $qstart, $hstart); |
350
|
6
|
|
|
|
|
11
|
for my $intvl (@$grp) { |
351
|
|
|
|
|
|
|
# following just chooses the first available hsp containing the |
352
|
|
|
|
|
|
|
# interval -- this is arbitrary, could be smarter. |
353
|
12
|
|
|
|
|
37
|
my $aln = ( containing_hsps($intvl, @tiling) )[0]->get_aln; |
354
|
12
|
|
|
|
|
29
|
my $qseq = $aln->get_seq_by_pos(1); |
355
|
12
|
|
|
|
|
22
|
my $hseq = $aln->get_seq_by_pos(2); |
356
|
12
|
|
66
|
|
|
35
|
$qstart ||= $qseq->start; |
357
|
12
|
|
66
|
|
|
30
|
$hstart ||= $hseq->start; |
358
|
|
|
|
|
|
|
# calculate the slice boundaries |
359
|
12
|
|
|
|
|
14
|
my ($beg, $end); |
360
|
12
|
|
|
|
|
18
|
for ($type) { |
361
|
12
|
50
|
|
|
|
30
|
/query/ && do { |
362
|
12
|
|
|
|
|
25
|
$beg = $aln->column_from_residue_number($qseq->id, $intvl->[0]); |
363
|
12
|
|
|
|
|
29
|
$end = $aln->column_from_residue_number($qseq->id, $intvl->[1]); |
364
|
12
|
|
|
|
|
21
|
last; |
365
|
|
|
|
|
|
|
}; |
366
|
0
|
0
|
|
|
|
0
|
/subject|hit/ && do { |
367
|
0
|
|
|
|
|
0
|
$beg = $aln->column_from_residue_number($hseq->id, $intvl->[0]); |
368
|
0
|
|
|
|
|
0
|
$end = $aln->column_from_residue_number($hseq->id, $intvl->[1]); |
369
|
0
|
|
|
|
|
0
|
last; |
370
|
|
|
|
|
|
|
}; |
371
|
|
|
|
|
|
|
} |
372
|
12
|
|
|
|
|
31
|
$aln = $aln->slice($beg, $end); |
373
|
12
|
|
|
|
|
35
|
$qseq = $aln->get_seq_by_pos(1); |
374
|
12
|
|
|
|
|
23
|
$hseq = $aln->get_seq_by_pos(2); |
375
|
12
|
|
|
|
|
20
|
$qinc = $qseq->length - $qseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS); |
376
|
12
|
|
|
|
|
22
|
$hinc = $hseq->length - $hseq->num_gaps($Bio::LocatableSeq::GAP_SYMBOLS); |
377
|
|
|
|
|
|
|
|
378
|
12
|
50
|
33
|
|
|
41
|
push @qfeats, Bio::SeqFeature::Generic->new( |
|
|
50
|
33
|
|
|
|
|
379
|
|
|
|
|
|
|
-start => $qlen+1, |
380
|
|
|
|
|
|
|
-end => $qlen+$qinc, |
381
|
|
|
|
|
|
|
-strand => $qseq->strand, |
382
|
|
|
|
|
|
|
-primary => 'query', |
383
|
|
|
|
|
|
|
-source_tag => 'BLAST', |
384
|
|
|
|
|
|
|
-display_name => 'query coordinates', |
385
|
|
|
|
|
|
|
-tag => { query_id => $qseq->id, |
386
|
|
|
|
|
|
|
query_desc => $qseq->desc, |
387
|
|
|
|
|
|
|
query_start => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*$qlen, |
388
|
|
|
|
|
|
|
query_end => $qstart + (($qseq->strand && $qseq->strand < 0) ? -1 : 1)*($qlen+$qinc-1), |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
); |
391
|
12
|
50
|
33
|
|
|
58
|
push @hfeats, Bio::SeqFeature::Generic->new( |
|
|
50
|
33
|
|
|
|
|
392
|
|
|
|
|
|
|
-start => $hlen+1, |
393
|
|
|
|
|
|
|
-end => $hlen+$hinc, |
394
|
|
|
|
|
|
|
-strand => $hseq->strand, |
395
|
|
|
|
|
|
|
-primary => 'subject/hit', |
396
|
|
|
|
|
|
|
-source_tag => 'BLAST', |
397
|
|
|
|
|
|
|
-display_name => 'subject/hit coordinates', |
398
|
|
|
|
|
|
|
-tag => { subject_id => $hseq->id, |
399
|
|
|
|
|
|
|
subject_desc => $hseq->desc, |
400
|
|
|
|
|
|
|
subject_start => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*$hlen, |
401
|
|
|
|
|
|
|
subject_end => $hstart + (($hseq->strand && $hseq->strand < 0) ? -1 : 1)*($hlen+$hinc-1) |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
); |
404
|
12
|
|
|
|
|
45
|
$query_string .= $qseq->seq; |
405
|
12
|
|
|
|
|
23
|
$hit_string .= $hseq->seq; |
406
|
12
|
|
|
|
|
14
|
$qlen += $qinc; |
407
|
12
|
|
|
|
|
35
|
$hlen += $hinc; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
# create the LocatableSeqs and add the features to each |
410
|
|
|
|
|
|
|
# then add the seqs to the new aln |
411
|
|
|
|
|
|
|
# note in MapTileUtils, Bio::FeatureHolderI methods have been |
412
|
|
|
|
|
|
|
# mixed into Bio::LocatableSeq |
413
|
6
|
|
|
|
|
25
|
my $qseq = Bio::LocatableSeq->new( -id => 'query', |
414
|
|
|
|
|
|
|
-seq => $query_string); |
415
|
6
|
|
|
|
|
24
|
$qseq->add_SeqFeature(@qfeats); |
416
|
6
|
|
|
|
|
23
|
my $hseq = Bio::LocatableSeq->new( -id => 'subject', |
417
|
|
|
|
|
|
|
-seq => $hit_string ); |
418
|
6
|
|
|
|
|
17
|
$hseq->add_SeqFeature(@hfeats); |
419
|
6
|
|
|
|
|
19
|
$taln->add_seq($qseq); |
420
|
6
|
|
|
|
|
12
|
$taln->add_seq($hseq); |
421
|
6
|
|
|
|
|
17
|
push @ret, $taln; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
1
|
|
|
|
|
20
|
return @ret; |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=head1 STATISTICS |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head2 identities |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Title : identities |
432
|
|
|
|
|
|
|
Usage : $tiling->identities($type, $action, $context) |
433
|
|
|
|
|
|
|
Function: Retrieve the calculated number of identities for the invocant |
434
|
|
|
|
|
|
|
Example : |
435
|
|
|
|
|
|
|
Returns : value of identities (a scalar) |
436
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
437
|
|
|
|
|
|
|
default is 'query' |
438
|
|
|
|
|
|
|
option scalar $action: one of 'exact', 'est', 'fast', 'max' |
439
|
|
|
|
|
|
|
default is 'exact' |
440
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
441
|
|
|
|
|
|
|
Note : getter only |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=cut |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
sub identities{ |
446
|
538
|
|
|
538
|
1
|
1037
|
my $self = shift; |
447
|
538
|
|
|
|
|
691
|
my ($type, $action, $context) = @_; |
448
|
538
|
|
|
|
|
732
|
$self->_check_type_arg(\$type); |
449
|
538
|
|
|
|
|
780
|
$self->_check_action_arg(\$action); |
450
|
538
|
|
|
|
|
939
|
$self->_check_context_arg($type, \$context); |
451
|
538
|
100
|
|
|
|
1609
|
if (!defined $self->{"identities_${type}_${action}_${context}"}) { |
452
|
531
|
|
|
|
|
932
|
$self->_calc_stats($type, $action, $context); |
453
|
|
|
|
|
|
|
} |
454
|
538
|
|
|
|
|
1179
|
return $self->{"identities_${type}_${action}_${context}"}; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head2 conserved |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Title : conserved |
460
|
|
|
|
|
|
|
Usage : $tiling->conserved($type, $action) |
461
|
|
|
|
|
|
|
Function: Retrieve the calculated number of conserved sites for the invocant |
462
|
|
|
|
|
|
|
Example : |
463
|
|
|
|
|
|
|
Returns : value of conserved (a scalar) |
464
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
465
|
|
|
|
|
|
|
default is 'query' |
466
|
|
|
|
|
|
|
option scalar $action: one of 'exact', 'est', 'fast', 'max' |
467
|
|
|
|
|
|
|
default is 'exact' |
468
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
469
|
|
|
|
|
|
|
Note : getter only |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=cut |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub conserved{ |
474
|
538
|
|
|
538
|
1
|
1234
|
my $self = shift; |
475
|
538
|
|
|
|
|
615
|
my ($type, $action, $context) = @_; |
476
|
538
|
|
|
|
|
794
|
$self->_check_type_arg(\$type); |
477
|
538
|
|
|
|
|
912
|
$self->_check_action_arg(\$action); |
478
|
538
|
|
|
|
|
835
|
$self->_check_context_arg($type, \$context); |
479
|
538
|
50
|
|
|
|
1591
|
if (!defined $self->{"conserved_${type}_${action}_${context}"}) { |
480
|
0
|
|
|
|
|
0
|
$self->_calc_stats($type, $action, $context); |
481
|
|
|
|
|
|
|
} |
482
|
538
|
|
|
|
|
1007
|
return $self->{"conserved_${type}_${action}_${context}"}; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head2 length |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
Title : length |
488
|
|
|
|
|
|
|
Usage : $tiling->length($type, $action, $context) |
489
|
|
|
|
|
|
|
Function: Retrieve the total length of aligned residues for |
490
|
|
|
|
|
|
|
the seq $type |
491
|
|
|
|
|
|
|
Example : |
492
|
|
|
|
|
|
|
Returns : value of length (a scalar) |
493
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
494
|
|
|
|
|
|
|
default is 'query' |
495
|
|
|
|
|
|
|
option scalar $action: one of 'exact', 'est', 'fast', 'max' |
496
|
|
|
|
|
|
|
default is 'exact' |
497
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
498
|
|
|
|
|
|
|
Note : getter only |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub length{ |
503
|
1051
|
|
|
1051
|
1
|
1162
|
my $self = shift; |
504
|
1051
|
|
|
|
|
1170
|
my ($type,$action,$context) = @_; |
505
|
1051
|
|
|
|
|
1507
|
$self->_check_type_arg(\$type); |
506
|
1051
|
|
|
|
|
1432
|
$self->_check_action_arg(\$action); |
507
|
1051
|
|
|
|
|
1710
|
$self->_check_context_arg($type, \$context); |
508
|
1051
|
100
|
|
|
|
2436
|
if (!defined $self->{"length_${type}_${action}_${context}"}) { |
509
|
1
|
|
|
|
|
5
|
$self->_calc_stats($type, $action, $context); |
510
|
|
|
|
|
|
|
} |
511
|
1051
|
|
|
|
|
3608
|
return $self->{"length_${type}_${action}_${context}"}; |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=head2 frac |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
Title : frac |
517
|
|
|
|
|
|
|
Usage : $tiling->frac($type, $denom, $action, $context, $method) |
518
|
|
|
|
|
|
|
Function: Return the fraction of sequence length consisting |
519
|
|
|
|
|
|
|
of desired kinds of pairs (given by $method), |
520
|
|
|
|
|
|
|
with respect to $denom |
521
|
|
|
|
|
|
|
Returns : scalar float |
522
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
523
|
|
|
|
|
|
|
-denom => one of 'total', 'aligned' |
524
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
525
|
|
|
|
|
|
|
-context => strand/frame context string |
526
|
|
|
|
|
|
|
-method => one of 'identical', 'conserved' |
527
|
|
|
|
|
|
|
Note : $denom == 'aligned', return desired_stat/num_aligned |
528
|
|
|
|
|
|
|
$denom == 'total', return desired_stat/_reported_length |
529
|
|
|
|
|
|
|
(i.e., length of the original input sequences) |
530
|
|
|
|
|
|
|
Note : In keeping with the spirit of Bio::Search::HSP::HSPI, |
531
|
|
|
|
|
|
|
reported lengths of translated dna are reduced by |
532
|
|
|
|
|
|
|
a factor of 3, to provide fractions relative to |
533
|
|
|
|
|
|
|
amino acid coordinates. |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=cut |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
sub frac { |
538
|
1058
|
|
|
1058
|
1
|
151968
|
my $self = shift; |
539
|
1058
|
|
|
|
|
2014
|
my @args = @_; |
540
|
1058
|
|
|
|
|
3454
|
my ($type, $denom, $action, $context, $method) = $self->_rearrange([qw(TYPE DENOM ACTION CONTEXT METHOD)],@args); |
541
|
1058
|
|
|
|
|
2600
|
$self->_check_type_arg(\$type); |
542
|
1058
|
|
|
|
|
1733
|
$self->_check_action_arg(\$action); |
543
|
1058
|
|
|
|
|
2016
|
$self->_check_context_arg($type, \$context); |
544
|
1058
|
50
|
33
|
|
|
9097
|
unless ($method and grep(/^$method$/, qw( identical conserved ))) { |
545
|
0
|
|
|
|
|
0
|
$self->throw("-method must specified; one of ('identical', 'conserved')"); |
546
|
|
|
|
|
|
|
} |
547
|
1058
|
|
50
|
|
|
1441
|
$denom ||= 'total'; |
548
|
1058
|
50
|
|
|
|
3635
|
unless (grep /^$denom/, qw( total aligned )) { |
549
|
0
|
|
|
|
|
0
|
$self->throw("Denominator selection must be one of ('total', 'aligned'), not '$denom'"); |
550
|
|
|
|
|
|
|
} |
551
|
1058
|
|
|
|
|
2213
|
my $key = "frac_${method}_${type}_${denom}_${action}_${context}"; |
552
|
1058
|
|
|
|
|
826
|
my $stat; |
553
|
1058
|
|
|
|
|
1385
|
for ($method) { |
554
|
1058
|
100
|
|
|
|
1761
|
$_ eq 'identical' && do { |
555
|
529
|
|
|
|
|
1003
|
$stat = $self->identities($type, $action, $context); |
556
|
529
|
|
|
|
|
685
|
last; |
557
|
|
|
|
|
|
|
}; |
558
|
529
|
50
|
|
|
|
858
|
$_ eq 'conserved' && do { |
559
|
529
|
|
|
|
|
879
|
$stat = $self->conserved($type, $action, $context); |
560
|
529
|
|
|
|
|
647
|
last; |
561
|
|
|
|
|
|
|
}; |
562
|
0
|
|
|
|
|
0
|
do { |
563
|
0
|
|
|
|
|
0
|
$self->throw("What are YOU doing here?"); |
564
|
|
|
|
|
|
|
}; |
565
|
|
|
|
|
|
|
} |
566
|
1058
|
100
|
|
|
|
2162
|
if (!defined $self->{$key}) { |
567
|
1046
|
|
|
|
|
1395
|
for ($denom) { |
568
|
1046
|
50
|
|
|
|
2176
|
/total/ && do { |
569
|
0
|
|
|
|
|
0
|
$self->{$key} = |
570
|
|
|
|
|
|
|
$stat/$self->_reported_length($type); # need fudge fac?? |
571
|
0
|
|
|
|
|
0
|
last; |
572
|
|
|
|
|
|
|
}; |
573
|
1046
|
50
|
|
|
|
1980
|
/aligned/ && do { |
574
|
1046
|
|
|
|
|
1890
|
$self->{$key} = |
575
|
|
|
|
|
|
|
$stat/$self->length($type,$action,$context); |
576
|
1046
|
|
|
|
|
1151
|
last; |
577
|
|
|
|
|
|
|
}; |
578
|
0
|
|
|
|
|
0
|
do { |
579
|
0
|
|
|
|
|
0
|
$self->throw("What are YOU doing here?"); |
580
|
|
|
|
|
|
|
}; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
} |
583
|
1058
|
|
|
|
|
3297
|
return $self->{$key}; |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=head2 frac_identical |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
Title : frac_identical |
589
|
|
|
|
|
|
|
Usage : $tiling->frac_identical($type, $denom, $action, $context) |
590
|
|
|
|
|
|
|
Function: Return the fraction of sequence length consisting |
591
|
|
|
|
|
|
|
of identical pairs, with respect to $denom |
592
|
|
|
|
|
|
|
Returns : scalar float |
593
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
594
|
|
|
|
|
|
|
-denom => one of 'total', 'aligned' |
595
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
596
|
|
|
|
|
|
|
-context => strand/frame context string |
597
|
|
|
|
|
|
|
Note : $denom == 'aligned', return conserved/num_aligned |
598
|
|
|
|
|
|
|
$denom == 'total', return conserved/_reported_length |
599
|
|
|
|
|
|
|
(i.e., length of the original input sequences) |
600
|
|
|
|
|
|
|
Note : In keeping with the spirit of Bio::Search::HSP::HSPI, |
601
|
|
|
|
|
|
|
reported lengths of translated dna are reduced by |
602
|
|
|
|
|
|
|
a factor of 3, to provide fractions relative to |
603
|
|
|
|
|
|
|
amino acid coordinates. |
604
|
|
|
|
|
|
|
Note : This an alias that calls frac() |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=cut |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
sub frac_identical{ |
609
|
3
|
|
|
3
|
1
|
4
|
my $self = shift; |
610
|
3
|
|
|
|
|
8
|
my @args = @_; |
611
|
3
|
|
|
|
|
14
|
my ($type, $denom, $action,$context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args ); |
612
|
3
|
|
|
|
|
38
|
$self->frac( -type=>$type, -denom=>$denom, -action=>$action, -method=>'identical', -context=>$context); |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=head2 frac_conserved |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
Title : frac_conserved |
618
|
|
|
|
|
|
|
Usage : $tiling->frac_conserved($type, $denom, $action, $context) |
619
|
|
|
|
|
|
|
Function: Return the fraction of sequence length consisting |
620
|
|
|
|
|
|
|
of conserved pairs, with respect to $denom |
621
|
|
|
|
|
|
|
Returns : scalar float |
622
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
623
|
|
|
|
|
|
|
-denom => one of 'total', 'aligned' |
624
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
625
|
|
|
|
|
|
|
-context => strand/frame context string |
626
|
|
|
|
|
|
|
Note : $denom == 'aligned', return conserved/num_aligned |
627
|
|
|
|
|
|
|
$denom == 'total', return conserved/_reported_length |
628
|
|
|
|
|
|
|
(i.e., length of the original input sequences) |
629
|
|
|
|
|
|
|
Note : In keeping with the spirit of Bio::Search::HSP::HSPI, |
630
|
|
|
|
|
|
|
reported lengths of translated dna are reduced by |
631
|
|
|
|
|
|
|
a factor of 3, to provide fractions relative to |
632
|
|
|
|
|
|
|
amino acid coordinates. |
633
|
|
|
|
|
|
|
Note : This an alias that calls frac() |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=cut |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
sub frac_conserved{ |
638
|
3
|
|
|
3
|
1
|
6
|
my $self = shift; |
639
|
3
|
|
|
|
|
7
|
my @args = @_; |
640
|
3
|
|
|
|
|
13
|
my ($type, $denom, $action, $context) = $self->_rearrange( [qw[ TYPE DENOM ACTION CONTEXT]],@args ); |
641
|
3
|
|
|
|
|
17
|
$self->frac( -type=>$type, -denom=>$denom, -action=>$action, -context=>$context, -method=>'conserved'); |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=head2 frac_aligned |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
Title : frac_aligned |
647
|
|
|
|
|
|
|
Aliases : frac_aligned_query - frac_aligned(-type=>'query',...) |
648
|
|
|
|
|
|
|
frac_aligned_hit - frac_aligned(-type=>'hit',...) |
649
|
|
|
|
|
|
|
Usage : $tiling->frac_aligned(-type=>$type, |
650
|
|
|
|
|
|
|
-action=>$action, |
651
|
|
|
|
|
|
|
-context=>$context) |
652
|
|
|
|
|
|
|
Function: Return the fraction of input sequence length |
653
|
|
|
|
|
|
|
that was aligned by the algorithm |
654
|
|
|
|
|
|
|
Returns : scalar float |
655
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
656
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
657
|
|
|
|
|
|
|
-context => strand/frame context string |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=cut |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
sub frac_aligned{ |
662
|
4
|
|
|
4
|
1
|
12
|
my ($self, @args) = @_; |
663
|
4
|
|
|
|
|
18
|
my ($type, $action, $context) = $self->_rearrange([qw(TYPE ACTION CONTEXT)],@args); |
664
|
4
|
|
|
|
|
16
|
$self->_check_type_arg(\$type); |
665
|
4
|
|
|
|
|
9
|
$self->_check_action_arg(\$action); |
666
|
4
|
|
|
|
|
12
|
$self->_check_context_arg($type, \$context); |
667
|
4
|
50
|
|
|
|
14
|
if (!$self->{"frac_aligned_${type}_${action}_${context}"}) { |
668
|
4
|
|
|
|
|
9
|
$self->{"frac_aligned_${type}_${action}_${context}"} = $self->num_aligned($type,$action,$context)/$self->_reported_length($type); |
669
|
|
|
|
|
|
|
} |
670
|
4
|
|
|
|
|
74
|
return $self->{"frac_aligned_${type}_${action}_${context}"}; |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
|
673
|
2
|
|
|
2
|
0
|
10
|
sub frac_aligned_query { shift->frac_aligned(-type=>'query', @_) } |
674
|
2
|
|
|
2
|
0
|
24
|
sub frac_aligned_hit { shift->frac_aligned(-type=>'hit', @_) } |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=head2 num_aligned |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
Title : num_aligned |
680
|
|
|
|
|
|
|
Usage : $tiling->num_aligned(-type=>$type) |
681
|
|
|
|
|
|
|
Function: Return the number of residues of sequence $type |
682
|
|
|
|
|
|
|
that were aligned by the algorithm |
683
|
|
|
|
|
|
|
Returns : scalar int |
684
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
685
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
686
|
|
|
|
|
|
|
-context => strand/frame context string |
687
|
|
|
|
|
|
|
Note : Since this is calculated from reported coordinates, |
688
|
|
|
|
|
|
|
not symbol string counts, it is already in terms of |
689
|
|
|
|
|
|
|
"logical length" |
690
|
|
|
|
|
|
|
Note : Aliases length() |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=cut |
693
|
|
|
|
|
|
|
|
694
|
4
|
|
|
4
|
1
|
12
|
sub num_aligned { shift->length( @_ ) }; |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=head2 num_unaligned |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
Title : num_unaligned |
699
|
|
|
|
|
|
|
Usage : $tiling->num_unaligned(-type=>$type) |
700
|
|
|
|
|
|
|
Function: Return the number of residues of sequence $type |
701
|
|
|
|
|
|
|
that were left unaligned by the algorithm |
702
|
|
|
|
|
|
|
Returns : scalar int |
703
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
704
|
|
|
|
|
|
|
-action => one of 'exact', 'est', 'fast', 'max' |
705
|
|
|
|
|
|
|
-context => strand/frame context string |
706
|
|
|
|
|
|
|
Note : Since this is calculated from reported coordinates, |
707
|
|
|
|
|
|
|
not symbol string counts, it is already in terms of |
708
|
|
|
|
|
|
|
"logical length" |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=cut |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub num_unaligned { |
713
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
714
|
0
|
|
|
|
|
0
|
my ($type,$action,$context) = @_; |
715
|
0
|
|
|
|
|
0
|
my $ret; |
716
|
0
|
|
|
|
|
0
|
$self->_check_type_arg(\$type); |
717
|
0
|
|
|
|
|
0
|
$self->_check_action_arg(\$action); |
718
|
0
|
|
|
|
|
0
|
$self->_check_context_arg($type, \$context); |
719
|
0
|
0
|
|
|
|
0
|
if (!defined $self->{"num_unaligned_${type}_${action}_${context}"}) { |
720
|
0
|
|
|
|
|
0
|
$self->{"num_unaligned_${type}_${action}_${context}"} = $self->_reported_length($type)-$self->num_aligned($type,$action,$context); |
721
|
|
|
|
|
|
|
} |
722
|
0
|
|
|
|
|
0
|
return $self->{"num_unaligned_${type}_${action}_${context}"}; |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=head2 range |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
Title : range |
729
|
|
|
|
|
|
|
Usage : $tiling->range(-type=>$type) |
730
|
|
|
|
|
|
|
Function: Returns the extent of the longest tiling |
731
|
|
|
|
|
|
|
as ($min_coord, $max_coord) |
732
|
|
|
|
|
|
|
Returns : array of two scalar integers |
733
|
|
|
|
|
|
|
Args : -type => one of 'hit', 'subject', 'query' |
734
|
|
|
|
|
|
|
-context => strand/frame context string |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=cut |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
sub range { |
739
|
8
|
|
|
8
|
1
|
1785
|
my ($self, $type, $context) = @_; |
740
|
8
|
|
|
|
|
20
|
$self->_check_type_arg(\$type); |
741
|
8
|
|
|
|
|
19
|
$self->_check_context_arg($type, \$context); |
742
|
8
|
|
|
|
|
22
|
my @a = $self->_contig_intersection($type,$context); |
743
|
8
|
|
|
|
|
48
|
return ($a[0][0], $a[-1][1]); |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
=head1 ACCESSORS |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=head2 coverage_map |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Title : coverage_map |
753
|
|
|
|
|
|
|
Usage : $map = $tiling->coverage_map($type) |
754
|
|
|
|
|
|
|
Function: Property to contain the coverage map calculated |
755
|
|
|
|
|
|
|
by _calc_coverage_map() - see that for |
756
|
|
|
|
|
|
|
details |
757
|
|
|
|
|
|
|
Example : |
758
|
|
|
|
|
|
|
Returns : value of coverage_map_$type as an array |
759
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
760
|
|
|
|
|
|
|
default is 'query' |
761
|
|
|
|
|
|
|
Note : getter |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=cut |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
sub coverage_map{ |
766
|
332
|
|
|
332
|
1
|
278
|
my $self = shift; |
767
|
332
|
|
|
|
|
423
|
my ($type, $context) = @_; |
768
|
332
|
|
|
|
|
557
|
$self->_check_type_arg(\$type); |
769
|
332
|
|
|
|
|
563
|
$self->_check_context_arg($type, \$context); |
770
|
|
|
|
|
|
|
|
771
|
332
|
100
|
|
|
|
921
|
if (!defined $self->{"coverage_map_${type}_${context}"}) { |
772
|
|
|
|
|
|
|
# following calculates coverage maps in all strands/frames |
773
|
|
|
|
|
|
|
# if necessary |
774
|
203
|
|
|
|
|
372
|
$self->_calc_coverage_map($type, $context); |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
# if undef is returned, then there were no hsps for given strand/frame |
777
|
332
|
50
|
|
|
|
964
|
if (!defined $self->{"coverage_map_${type}_${context}"}) { |
778
|
0
|
|
|
|
|
0
|
$self->warn("No HSPS present for type '$type' in context '$context' for this hit"); |
779
|
0
|
|
|
|
|
0
|
return undef; |
780
|
|
|
|
|
|
|
} |
781
|
332
|
|
|
|
|
273
|
return @{$self->{"coverage_map_${type}_${context}"}}; |
|
332
|
|
|
|
|
943
|
|
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
=head2 coverage_map_as_text |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
Title : coverage_map_as_text |
787
|
|
|
|
|
|
|
Usage : $tiling->coverage_map_as_text($type, $legend_flag) |
788
|
|
|
|
|
|
|
Function: Format a text-graphic representation of the |
789
|
|
|
|
|
|
|
coverage map |
790
|
|
|
|
|
|
|
Returns : an array of scalar strings, suitable for printing |
791
|
|
|
|
|
|
|
Args : $type: one of 'query', 'hit', 'subject' |
792
|
|
|
|
|
|
|
$context: strand/frame context string |
793
|
|
|
|
|
|
|
$legend_flag: boolean; add a legend indicating |
794
|
|
|
|
|
|
|
the actual interval coordinates for each component |
795
|
|
|
|
|
|
|
interval and hsp (in the $type sequence context) |
796
|
|
|
|
|
|
|
Example : print $tiling->coverage_map_as_text('query',1); |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=cut |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
sub coverage_map_as_text{ |
801
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
802
|
0
|
|
|
|
|
0
|
my ($type, $context, $legend_q) = @_; |
803
|
0
|
|
|
|
|
0
|
$self->_check_type_arg(\$type); |
804
|
0
|
|
|
|
|
0
|
$self->_check_context_arg($type, \$context); |
805
|
|
|
|
|
|
|
|
806
|
0
|
|
|
|
|
0
|
my @map = $self->coverage_map($type, $context); |
807
|
0
|
|
|
|
|
0
|
my @ret; |
808
|
0
|
|
|
|
|
0
|
my @hsps = $self->hit->hsps; |
809
|
0
|
|
|
|
|
0
|
my %hsps_i; |
810
|
0
|
|
|
|
|
0
|
require Tie::RefHash; |
811
|
0
|
|
|
|
|
0
|
tie %hsps_i, 'Tie::RefHash'; |
812
|
0
|
|
|
|
|
0
|
@hsps_i{@hsps} = (0..$#hsps); |
813
|
0
|
|
|
|
|
0
|
my @mx; |
814
|
0
|
|
|
|
|
0
|
foreach (0..$#map) { |
815
|
0
|
|
|
|
|
0
|
my @hspx = ('') x @hsps; |
816
|
0
|
|
|
|
|
0
|
my @these_hsps = @{$map[$_]->[1]}; |
|
0
|
|
|
|
|
0
|
|
817
|
0
|
|
|
|
|
0
|
@hspx[@hsps_i{@these_hsps}] = ('*') x @these_hsps; |
818
|
0
|
|
|
|
|
0
|
$mx[$_] = \@hspx; |
819
|
|
|
|
|
|
|
} |
820
|
0
|
|
|
|
|
0
|
untie %hsps_i; |
821
|
|
|
|
|
|
|
|
822
|
0
|
|
|
|
|
0
|
push @ret, "\tIntvl\n"; |
823
|
0
|
|
|
|
|
0
|
push @ret, "HSPS\t", join ("\t", (0..$#map)), "\n"; |
824
|
0
|
|
|
|
|
0
|
foreach my $h (0..$#hsps) { |
825
|
0
|
|
|
|
|
0
|
push @ret, join("\t", $h, map { $mx[$_][$h] } (0..$#map) ),"\n"; |
|
0
|
|
|
|
|
0
|
|
826
|
|
|
|
|
|
|
} |
827
|
0
|
0
|
|
|
|
0
|
if ($legend_q) { |
828
|
0
|
|
|
|
|
0
|
push @ret, "Interval legend\n"; |
829
|
0
|
|
|
|
|
0
|
foreach (0..$#map) { |
830
|
0
|
|
|
|
|
0
|
push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$map[$_][0]}); |
|
0
|
|
|
|
|
0
|
|
831
|
|
|
|
|
|
|
} |
832
|
0
|
|
|
|
|
0
|
push @ret, "HSP legend\n"; |
833
|
0
|
|
|
|
|
0
|
my @ints = get_intervals_from_hsps($type,@hsps); |
834
|
0
|
|
|
|
|
0
|
foreach (0..$#hsps) { |
835
|
0
|
|
|
|
|
0
|
push @ret, sprintf("%d\t[%d, %d]\n", $_, @{$ints[$_]}); |
|
0
|
|
|
|
|
0
|
|
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
} |
838
|
0
|
|
|
|
|
0
|
return @ret; |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
=head2 hit |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
Title : hit |
844
|
|
|
|
|
|
|
Usage : $tiling->hit |
845
|
|
|
|
|
|
|
Function: |
846
|
|
|
|
|
|
|
Example : |
847
|
|
|
|
|
|
|
Returns : The HitI object associated with the invocant |
848
|
|
|
|
|
|
|
Args : none |
849
|
|
|
|
|
|
|
Note : getter only |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=cut |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
sub hit{ |
854
|
5229
|
|
|
5229
|
1
|
3321
|
my $self = shift; |
855
|
5229
|
50
|
|
|
|
6209
|
$self->warn("Getter only") if @_; |
856
|
5229
|
|
|
|
|
10182
|
return $self->{'hit'}; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=head2 hsps |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
Title : hsps |
862
|
|
|
|
|
|
|
Usage : $tiling->hsps() |
863
|
|
|
|
|
|
|
Function: Container for the HSP objects associated with invocant |
864
|
|
|
|
|
|
|
Example : |
865
|
|
|
|
|
|
|
Returns : an array of hsps associated with the hit |
866
|
|
|
|
|
|
|
Args : on set, new value (an arrayref or undef, optional) |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
=cut |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
sub hsps{ |
871
|
1768
|
|
|
1768
|
1
|
1302
|
my $self = shift; |
872
|
1768
|
100
|
|
|
|
2953
|
return $self->{'hsps'} = shift if @_; |
873
|
1249
|
|
|
|
|
894
|
return @{$self->{'hsps'}}; |
|
1249
|
|
|
|
|
3225
|
|
874
|
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
=head2 contexts |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
Title : contexts |
879
|
|
|
|
|
|
|
Usage : @contexts = $tiling->context($type) or |
880
|
|
|
|
|
|
|
@indices = $tiling->context($type, $context) |
881
|
|
|
|
|
|
|
Function: Retrieve the set of available contexts in the hit, |
882
|
|
|
|
|
|
|
or the indices of hsps having the given context |
883
|
|
|
|
|
|
|
(integer indices for the array returned by $self->hsps) |
884
|
|
|
|
|
|
|
Returns : array of scalar context strings or |
885
|
|
|
|
|
|
|
array of scalar positive integers |
886
|
|
|
|
|
|
|
undef if no hsps in given context |
887
|
|
|
|
|
|
|
Args : $type: one of 'query', 'hit', 'subject' |
888
|
|
|
|
|
|
|
optional $context: context string |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
=cut |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
sub contexts{ |
893
|
1032
|
|
|
1032
|
1
|
64327
|
my $self = shift; |
894
|
1032
|
|
|
|
|
962
|
my ($type, $context) = @_; |
895
|
1032
|
|
|
|
|
1305
|
$self->_check_type_arg(\$type); |
896
|
1032
|
100
|
|
|
|
1413
|
return keys %{$self->{"_contexts_$type"}} unless defined $context; |
|
403
|
|
|
|
|
1368
|
|
897
|
629
|
50
|
|
|
|
1387
|
return undef unless $self->{"_contexts_$type"}{$context}; |
898
|
629
|
|
|
|
|
421
|
return @{$self->{"_contexts_$type"}{$context}}; |
|
629
|
|
|
|
|
1540
|
|
899
|
|
|
|
|
|
|
} |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
=head2 mapping |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
Title : mapping |
904
|
|
|
|
|
|
|
Usage : $tiling->mapping($type) |
905
|
|
|
|
|
|
|
Function: Retrieve the mapping coefficient for the sequence type |
906
|
|
|
|
|
|
|
based on the underlying algorithm |
907
|
|
|
|
|
|
|
Returns : scalar integer (mapping coefficient) |
908
|
|
|
|
|
|
|
Args : $type: one of 'query', 'hit', 'subject' |
909
|
|
|
|
|
|
|
Note : getter only (set in constructor) |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=cut |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
sub mapping{ |
914
|
505
|
|
|
505
|
1
|
381
|
my $self = shift; |
915
|
505
|
|
|
|
|
378
|
my $type = shift; |
916
|
505
|
|
|
|
|
653
|
$self->_check_type_arg(\$type); |
917
|
505
|
|
|
|
|
1020
|
return $self->{"_mapping_${type}"}; |
918
|
|
|
|
|
|
|
} |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head2 default_context |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
Title : default_context |
923
|
|
|
|
|
|
|
Usage : $tiling->default_context($type) |
924
|
|
|
|
|
|
|
Function: Retrieve the default strand/frame context string |
925
|
|
|
|
|
|
|
for the sequence type based on the underlying algorithm |
926
|
|
|
|
|
|
|
Returns : scalar string (context string) |
927
|
|
|
|
|
|
|
Args : $type: one of 'query', 'hit', 'subject' |
928
|
|
|
|
|
|
|
Note : getter only (set in constructor) |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=cut |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
sub default_context{ |
933
|
298
|
|
|
298
|
1
|
194
|
my $self = shift; |
934
|
298
|
|
|
|
|
186
|
my $type = shift; |
935
|
298
|
|
|
|
|
327
|
$self->_check_type_arg(\$type); |
936
|
298
|
|
|
|
|
477
|
return $self->{"_def_context_${type}"}; |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=head2 algorithm |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
Title : algorithm |
942
|
|
|
|
|
|
|
Usage : $tiling->algorithm |
943
|
|
|
|
|
|
|
Function: Retrieve the algorithm name associated with the |
944
|
|
|
|
|
|
|
invocant's hit object |
945
|
|
|
|
|
|
|
Returns : scalar string |
946
|
|
|
|
|
|
|
Args : none |
947
|
|
|
|
|
|
|
Note : getter only (set in constructor) |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
=cut |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
sub algorithm{ |
952
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
953
|
0
|
0
|
|
|
|
0
|
$self->warn("Getter only") if @_; |
954
|
0
|
|
|
|
|
0
|
return $self->{"_algorithm"}; |
955
|
|
|
|
|
|
|
} |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=head1 "PRIVATE" METHODS |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=head2 Calculators |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
See L for lower level |
962
|
|
|
|
|
|
|
calculation methods. |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
=head2 _calc_coverage_map |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
Title : _calc_coverage_map |
967
|
|
|
|
|
|
|
Usage : $tiling->_calc_coverage_map($type) |
968
|
|
|
|
|
|
|
Function: Calculates the coverage map for the object's associated |
969
|
|
|
|
|
|
|
hit from the perspective of the desired $type (see Args:) |
970
|
|
|
|
|
|
|
and sets the coverage_map() property |
971
|
|
|
|
|
|
|
Returns : True on success |
972
|
|
|
|
|
|
|
Args : optional scalar $type: one of 'hit'|'subject'|'query' |
973
|
|
|
|
|
|
|
default is 'query' |
974
|
|
|
|
|
|
|
Note : The "coverage map" is an array with the following format: |
975
|
|
|
|
|
|
|
( [ $component_interval => [ @containing_hsps ] ], ... ), |
976
|
|
|
|
|
|
|
where $component_interval is a closed interval (see |
977
|
|
|
|
|
|
|
DESCRIPTION) of the form [$a0, $a1] with $a0 <= $a1, and |
978
|
|
|
|
|
|
|
@containing_hsps is an array of all HspI objects in the hit |
979
|
|
|
|
|
|
|
which completely contain the $component_interval. |
980
|
|
|
|
|
|
|
The set of $component_interval's is a disjoint decomposition |
981
|
|
|
|
|
|
|
of the minimum set of minimal intervals that completely |
982
|
|
|
|
|
|
|
cover the hit's HSPs (from the perspective of the $type) |
983
|
|
|
|
|
|
|
Note : This calculates the map for all strand/frame contexts available |
984
|
|
|
|
|
|
|
in the hit |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
=cut |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
sub _calc_coverage_map { |
989
|
207
|
|
|
207
|
|
192
|
my $self = shift; |
990
|
207
|
|
|
|
|
271
|
my ($type) = @_; |
991
|
207
|
|
|
|
|
338
|
$self->_check_type_arg(\$type); |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
# obtain the [start, end] intervals for all hsps in the hit (relative |
994
|
|
|
|
|
|
|
# to the type) |
995
|
207
|
50
|
|
|
|
447
|
unless ($self->{'hsps'}) { |
996
|
0
|
|
|
|
|
0
|
$self->warn("No HSPs for this hit"); |
997
|
0
|
|
|
|
|
0
|
return; |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
|
1000
|
207
|
|
|
|
|
182
|
my (@map, @hsps, %filters, @intervals); |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
# conversion here? |
1004
|
207
|
|
|
|
|
362
|
my $c = $self->mapping($type); |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
# create the possible maps |
1007
|
207
|
|
|
|
|
339
|
for my $context ($self->contexts($type)) { |
1008
|
219
|
|
|
|
|
307
|
@map = (); |
1009
|
219
|
|
|
|
|
354
|
@hsps = ($self->hsps)[$self->contexts($type, $context)]; |
1010
|
219
|
|
|
|
|
846
|
@intervals = get_intervals_from_hsps( $type, @hsps ); |
1011
|
|
|
|
|
|
|
# the "frame" |
1012
|
219
|
|
|
|
|
502
|
my $f = ($intervals[0]->[0] - 1) % $c; |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
# convert interval endpoints... |
1015
|
219
|
|
|
|
|
352
|
for (@intervals) { |
1016
|
536
|
|
|
|
|
675
|
$$_[0] = ($$_[0] - $f + $c - 1)/$c; |
1017
|
536
|
|
|
|
|
667
|
$$_[1] = ($$_[1] - $f)/$c; |
1018
|
|
|
|
|
|
|
} |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
# determine the minimal set of disjoint intervals that cover the |
1021
|
|
|
|
|
|
|
# set of hsp intervals |
1022
|
219
|
|
|
|
|
592
|
my @dj_set = interval_tiling(\@intervals); |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
# decompose each disjoint interval into another set of disjoint |
1025
|
|
|
|
|
|
|
# intervals, each of which is completely contained within the |
1026
|
|
|
|
|
|
|
# original hsp intervals with which it overlaps |
1027
|
219
|
|
|
|
|
198
|
my $i=0; |
1028
|
219
|
|
|
|
|
186
|
my @decomp; |
1029
|
219
|
|
|
|
|
285
|
for my $dj_elt (@dj_set) { |
1030
|
432
|
|
|
|
|
527
|
my ($covering, $indices) = @$dj_elt; |
1031
|
432
|
|
|
|
|
702
|
my @covering_hsps = @hsps[@$indices]; |
1032
|
432
|
|
|
|
|
491
|
my @coverers = @intervals[@$indices]; |
1033
|
432
|
|
|
|
|
779
|
@decomp = decompose_interval( \@coverers ); |
1034
|
432
|
|
|
|
|
564
|
for (@decomp) { |
1035
|
638
|
|
|
|
|
442
|
my ($component, $container_indices) = @{$_}; |
|
638
|
|
|
|
|
703
|
|
1036
|
638
|
|
|
|
|
1286
|
push @map, [ $component, |
1037
|
|
|
|
|
|
|
[@covering_hsps[@$container_indices]] ]; |
1038
|
|
|
|
|
|
|
} |
1039
|
432
|
|
|
|
|
535
|
1; |
1040
|
|
|
|
|
|
|
} |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
# unconvert the components: |
1043
|
|
|
|
|
|
|
##### |
1044
|
219
|
|
|
|
|
335
|
foreach (@map) { |
1045
|
638
|
|
|
|
|
904
|
$$_[0][0] = $c*$$_[0][0] - $c + 1 + $f; |
1046
|
638
|
|
|
|
|
798
|
$$_[0][1] = $c*$$_[0][1] + $f; |
1047
|
|
|
|
|
|
|
} |
1048
|
219
|
|
|
|
|
327
|
foreach (@dj_set) { |
1049
|
432
|
|
|
|
|
590
|
$$_[0][0] = $c*$$_[0][0] - $c + 1 + $f; |
1050
|
432
|
|
|
|
|
497
|
$$_[0][1] = $c*$$_[0][1] + $f; |
1051
|
|
|
|
|
|
|
} |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
# sort the map on the interval left-ends |
1054
|
219
|
|
|
|
|
401
|
@map = sort { $a->[0][0]<=>$b->[0][0] } @map; |
|
980
|
|
|
|
|
696
|
|
1055
|
219
|
|
|
|
|
694
|
$self->{"coverage_map_${type}_${context}"} = [@map]; |
1056
|
|
|
|
|
|
|
# set the _contig_intersection attribute here (side effect) |
1057
|
219
|
|
|
|
|
313
|
$self->{"_contig_intersection_${type}_${context}"} = [map { $$_[0] } @map]; |
|
638
|
|
|
|
|
1507
|
|
1058
|
|
|
|
|
|
|
} |
1059
|
|
|
|
|
|
|
|
1060
|
207
|
|
|
|
|
550
|
return 1; # success |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
=head2 _calc_stats |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
Title : _calc_stats |
1066
|
|
|
|
|
|
|
Usage : $tiling->_calc_stats($type, $action, $context) |
1067
|
|
|
|
|
|
|
Function: Calculates [estimated] tiling statistics (identities, conserved sites |
1068
|
|
|
|
|
|
|
length) and sets the public accessors |
1069
|
|
|
|
|
|
|
Returns : True on success |
1070
|
|
|
|
|
|
|
Args : scalar $type: one of 'hit', 'subject', 'query' |
1071
|
|
|
|
|
|
|
default is 'query' |
1072
|
|
|
|
|
|
|
optional scalar $action: requests calculation method |
1073
|
|
|
|
|
|
|
currently one of 'exact', 'est', 'fast', 'max' |
1074
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
1075
|
|
|
|
|
|
|
Note : Action: The statistics are calculated by summing quantities |
1076
|
|
|
|
|
|
|
over the disjoint component intervals, taking into account |
1077
|
|
|
|
|
|
|
coverage of those intervals by multiple HSPs. The action |
1078
|
|
|
|
|
|
|
tells the algorithm how to obtain those quantities-- |
1079
|
|
|
|
|
|
|
'exact' will use Bio::Search::HSP::HSPI::matches |
1080
|
|
|
|
|
|
|
to count the appropriate segment of the homology string; |
1081
|
|
|
|
|
|
|
'est' will estimate the statistics by multiplying the |
1082
|
|
|
|
|
|
|
fraction of the HSP overlapped by the component interval |
1083
|
|
|
|
|
|
|
(see MapTileUtils) by the BLAST-reported identities/postives |
1084
|
|
|
|
|
|
|
(this may be convenient for BLAST summary report formats) |
1085
|
|
|
|
|
|
|
* Both exact and est take the average over the number of HSPs |
1086
|
|
|
|
|
|
|
that overlap the component interval. |
1087
|
|
|
|
|
|
|
'max' uses the exact method to calculate the statistics, |
1088
|
|
|
|
|
|
|
and returns only the maximum identites/positives over |
1089
|
|
|
|
|
|
|
overlapping HSP for the component interval. No averaging |
1090
|
|
|
|
|
|
|
is involved here. |
1091
|
|
|
|
|
|
|
'fast' doesn't involve tiling at all (hence the name), |
1092
|
|
|
|
|
|
|
but it seems like a very good estimate, and uses only |
1093
|
|
|
|
|
|
|
reported values, and so does not require sequence data. It |
1094
|
|
|
|
|
|
|
calculates an average of reported identities, conserved |
1095
|
|
|
|
|
|
|
sites, and lengths, over unmodified hsps in the hit, |
1096
|
|
|
|
|
|
|
weighted by the length of the hsps. |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
=cut |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
sub _calc_stats { |
1101
|
532
|
|
|
532
|
|
423
|
my $self = shift; |
1102
|
532
|
|
|
|
|
595
|
my ($type, $action, $context) = @_; |
1103
|
|
|
|
|
|
|
# need to check args here, in case method is called internally. |
1104
|
532
|
|
|
|
|
665
|
$self->_check_type_arg(\$type); |
1105
|
532
|
|
|
|
|
727
|
$self->_check_action_arg(\$action); |
1106
|
532
|
|
|
|
|
724
|
$self->_check_context_arg($type, \$context); |
1107
|
532
|
|
|
|
|
630
|
my ($ident, $cons, $length) = (0,0,0); |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
# fast : avoid coverage map altogether, get a pretty damn |
1110
|
|
|
|
|
|
|
# good estimate with a weighted average of reported hsp |
1111
|
|
|
|
|
|
|
# statistics |
1112
|
|
|
|
|
|
|
|
1113
|
532
|
100
|
|
|
|
941
|
($action eq 'fast') && do { |
1114
|
204
|
|
|
|
|
385
|
my @hsps = $self->hit->hsps; |
1115
|
204
|
|
|
|
|
442
|
@hsps = @hsps[$self->contexts($type, $context)]; |
1116
|
|
|
|
|
|
|
# weights for averages |
1117
|
204
|
|
|
|
|
352
|
my @wt = map {$_->length($type)} @hsps; |
|
450
|
|
|
|
|
829
|
|
1118
|
204
|
|
|
|
|
11969
|
my $sum = eval( join('+',@wt) ); |
1119
|
204
|
|
|
|
|
962
|
$_ /= $sum for (@wt); |
1120
|
204
|
|
|
|
|
279
|
for (@hsps) { |
1121
|
450
|
|
|
|
|
566
|
my $wt = shift @wt; |
1122
|
450
|
|
|
|
|
1164
|
$ident += $wt*$_->matches_MT($type,'identities'); |
1123
|
450
|
|
|
|
|
921
|
$cons += $wt*$_->matches_MT($type,'conserved'); |
1124
|
450
|
|
|
|
|
955
|
$length += $wt*$_->length($type); |
1125
|
|
|
|
|
|
|
} |
1126
|
|
|
|
|
|
|
}; |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
# or, do tiling |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# calculate identities/conserved sites in tiling |
1131
|
|
|
|
|
|
|
# estimate based on the fraction of the component interval covered |
1132
|
|
|
|
|
|
|
# and ident/cons reported by the HSPs |
1133
|
532
|
100
|
|
|
|
839
|
($action ne 'fast') && do { |
1134
|
328
|
|
|
|
|
682
|
foreach ($self->coverage_map($type, $context)) { |
1135
|
1384
|
|
|
|
|
1189
|
my ($intvl, $hsps) = @{$_}; |
|
1384
|
|
|
|
|
1958
|
|
1136
|
1384
|
|
|
|
|
2067
|
my $len = ($$intvl[1]-$$intvl[0]+1); |
1137
|
1384
|
100
|
|
|
|
2040
|
my $ncover = ($action eq 'max') ? 1 : scalar @$hsps; |
1138
|
1384
|
|
|
|
|
1251
|
my ($acc_i, $acc_c) = (0,0); |
1139
|
1384
|
|
|
|
|
1353
|
foreach my $hsp (@$hsps) { |
1140
|
1764
|
|
|
|
|
1570
|
for ($action) { |
1141
|
1764
|
100
|
|
|
|
2579
|
($_ eq 'est') && do { |
1142
|
788
|
|
|
|
|
2549
|
my ($inc_i, $inc_c) = $hsp->matches_MT( |
1143
|
|
|
|
|
|
|
-type => $type, |
1144
|
|
|
|
|
|
|
-action => 'searchutils', |
1145
|
|
|
|
|
|
|
); |
1146
|
788
|
|
|
|
|
1734
|
my $frac = $len/$hsp->length($type); |
1147
|
788
|
|
|
|
|
881
|
$acc_i += $inc_i * $frac; |
1148
|
788
|
|
|
|
|
573
|
$acc_c += $inc_c * $frac; |
1149
|
788
|
|
|
|
|
1202
|
last; |
1150
|
|
|
|
|
|
|
}; |
1151
|
976
|
100
|
|
|
|
1326
|
($_ eq 'max') && do { |
1152
|
483
|
|
|
|
|
1587
|
my ($inc_i, $inc_c) = $hsp->matches_MT( |
1153
|
|
|
|
|
|
|
-type => $type, |
1154
|
|
|
|
|
|
|
-action => 'searchutils', |
1155
|
|
|
|
|
|
|
-start => $$intvl[0], |
1156
|
|
|
|
|
|
|
-end => $$intvl[1] |
1157
|
|
|
|
|
|
|
); |
1158
|
483
|
100
|
|
|
|
1003
|
$acc_i = ($acc_i > $inc_i) ? $acc_i : $inc_i; |
1159
|
483
|
100
|
|
|
|
552
|
$acc_c = ($acc_c > $inc_c) ? $acc_c : $inc_c; |
1160
|
483
|
|
|
|
|
730
|
last; |
1161
|
|
|
|
|
|
|
}; |
1162
|
493
|
50
|
33
|
|
|
1453
|
(!$_ || ($_ eq 'exact')) && do { |
1163
|
493
|
|
|
|
|
1705
|
my ($inc_i, $inc_c) = $hsp->matches_MT( |
1164
|
|
|
|
|
|
|
-type => $type, |
1165
|
|
|
|
|
|
|
-action => 'searchutils', |
1166
|
|
|
|
|
|
|
-start => $$intvl[0], |
1167
|
|
|
|
|
|
|
-end => $$intvl[1] |
1168
|
|
|
|
|
|
|
); |
1169
|
493
|
|
|
|
|
611
|
$acc_i += $inc_i; |
1170
|
493
|
|
|
|
|
297
|
$acc_c += $inc_c; |
1171
|
493
|
|
|
|
|
714
|
last; |
1172
|
|
|
|
|
|
|
}; |
1173
|
|
|
|
|
|
|
} |
1174
|
|
|
|
|
|
|
} |
1175
|
1384
|
|
|
|
|
1695
|
$ident += ($acc_i/$ncover); |
1176
|
1384
|
|
|
|
|
1003
|
$cons += ($acc_c/$ncover); |
1177
|
1384
|
|
|
|
|
1596
|
$length += $len; |
1178
|
|
|
|
|
|
|
} |
1179
|
|
|
|
|
|
|
}; |
1180
|
|
|
|
|
|
|
|
1181
|
532
|
|
|
|
|
1700
|
$self->{"identities_${type}_${action}_${context}"} = $ident; |
1182
|
532
|
|
|
|
|
1152
|
$self->{"conserved_${type}_${action}_${context}"} = $cons; |
1183
|
532
|
|
|
|
|
1105
|
$self->{"length_${type}_${action}_${context}"} = $length; |
1184
|
|
|
|
|
|
|
|
1185
|
532
|
|
|
|
|
758
|
return 1; |
1186
|
|
|
|
|
|
|
} |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head2 Tiling Helper Methods |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
=cut |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
# coverage_map is of the form |
1193
|
|
|
|
|
|
|
# ( [ $interval, \@containing_hsps ], ... ) |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
# so, for each interval, pick one of the containing hsps, |
1196
|
|
|
|
|
|
|
# and return the union of all the picks. |
1197
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
# use the combinatorial generating iterator, with |
1199
|
|
|
|
|
|
|
# the urns containing the @containing_hsps for each |
1200
|
|
|
|
|
|
|
# interval |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
=head2 _make_tiling_iterator |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
Title : _make_tiling_iterator |
1205
|
|
|
|
|
|
|
Usage : $self->_make_tiling_iterator($type) |
1206
|
|
|
|
|
|
|
Function: Create an iterator code ref that will step through all |
1207
|
|
|
|
|
|
|
minimal combinations of HSPs that produce complete coverage |
1208
|
|
|
|
|
|
|
of the $type ('hit', 'subject', 'query') sequence, |
1209
|
|
|
|
|
|
|
and set the correct iterator property of the invocant |
1210
|
|
|
|
|
|
|
Example : |
1211
|
|
|
|
|
|
|
Returns : The iterator |
1212
|
|
|
|
|
|
|
Args : scalar $type, one of 'hit', 'subject', 'query'; |
1213
|
|
|
|
|
|
|
default is 'query' |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
=cut |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
sub _make_tiling_iterator { |
1218
|
|
|
|
|
|
|
### create the urns |
1219
|
3
|
|
|
3
|
|
7
|
my $self = shift; |
1220
|
3
|
|
|
|
|
3
|
my ($type, $context) = @_; |
1221
|
3
|
|
|
|
|
7
|
$self->_check_type_arg(\$type); |
1222
|
3
|
|
|
|
|
6
|
$self->_check_context_arg($type, \$context); |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
# initialize the urns |
1225
|
3
|
|
|
|
|
13
|
my @urns = map { [0, $$_[1]] } $self->coverage_map($type, $context); |
|
43
|
|
|
|
|
49
|
|
1226
|
|
|
|
|
|
|
|
1227
|
3
|
|
|
|
|
6
|
my $FINISHED = 0; |
1228
|
|
|
|
|
|
|
my $iter = sub { |
1229
|
|
|
|
|
|
|
# rewind? |
1230
|
271
|
100
|
|
271
|
|
333
|
if (my $rewind = shift) { |
1231
|
|
|
|
|
|
|
# reinitialize urn indices |
1232
|
2
|
|
|
|
|
9
|
$$_[0] = 0 for (@urns); |
1233
|
2
|
|
|
|
|
4
|
$FINISHED = 0; |
1234
|
2
|
|
|
|
|
5
|
return 1; |
1235
|
|
|
|
|
|
|
} |
1236
|
|
|
|
|
|
|
# check if done... |
1237
|
269
|
100
|
|
|
|
295
|
return if $FINISHED; |
1238
|
|
|
|
|
|
|
|
1239
|
266
|
|
|
|
|
183
|
my $finished_incrementing = 0; |
1240
|
|
|
|
|
|
|
# @ret is the collector of urn choices |
1241
|
266
|
|
|
|
|
166
|
my @ret; |
1242
|
|
|
|
|
|
|
|
1243
|
266
|
|
|
|
|
258
|
for my $urn (@urns) { |
1244
|
4482
|
|
|
|
|
3133
|
my ($n, $hsps) = @$urn; |
1245
|
4482
|
|
|
|
|
3042
|
push @ret, $$hsps[$n]; |
1246
|
4482
|
100
|
|
|
|
4914
|
unless ($finished_incrementing) { |
1247
|
1086
|
100
|
|
|
|
994
|
if ($n == $#$hsps) { $$urn[0] = 0; } |
|
823
|
|
|
|
|
708
|
|
1248
|
263
|
|
|
|
|
193
|
else { ($$urn[0])++; $finished_incrementing = 1 } |
|
263
|
|
|
|
|
245
|
|
1249
|
|
|
|
|
|
|
} |
1250
|
|
|
|
|
|
|
} |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
# backstop... |
1253
|
266
|
100
|
|
|
|
312
|
$FINISHED = 1 unless $finished_incrementing; |
1254
|
|
|
|
|
|
|
# uniquify @ret |
1255
|
|
|
|
|
|
|
# $hsp->rank is a unique identifier for an hsp in a hit. |
1256
|
|
|
|
|
|
|
# preserve order in @ret |
1257
|
|
|
|
|
|
|
|
1258
|
266
|
|
|
|
|
137
|
my (%order, %uniq); |
1259
|
266
|
|
|
|
|
1369
|
@order{(0..$#ret)} = @ret; |
1260
|
266
|
|
|
|
|
990
|
$uniq{$order{$_}->rank} = $_ for (0..$#ret); |
1261
|
266
|
|
|
|
|
534
|
@ret = @order{ sort {$a<=>$b} values %uniq }; |
|
5818
|
|
|
|
|
3883
|
|
1262
|
|
|
|
|
|
|
|
1263
|
266
|
|
|
|
|
818
|
return @ret; |
1264
|
3
|
|
|
|
|
21
|
}; |
1265
|
3
|
|
|
|
|
14
|
return $iter; |
1266
|
|
|
|
|
|
|
} |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=head2 _tiling_iterator |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
Title : _tiling_iterator |
1271
|
|
|
|
|
|
|
Usage : $tiling->_tiling_iterator($type,$context) |
1272
|
|
|
|
|
|
|
Function: Retrieve the tiling iterator coderef for the requested |
1273
|
|
|
|
|
|
|
$type ('hit', 'subject', 'query') |
1274
|
|
|
|
|
|
|
Example : |
1275
|
|
|
|
|
|
|
Returns : coderef to the desired iterator |
1276
|
|
|
|
|
|
|
Args : scalar $type, one of 'hit', 'subject', 'query' |
1277
|
|
|
|
|
|
|
default is 'query' |
1278
|
|
|
|
|
|
|
option scalar $context: strand/frame context string |
1279
|
|
|
|
|
|
|
Note : getter only |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
=cut |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
sub _tiling_iterator { |
1284
|
270
|
|
|
270
|
|
175
|
my $self = shift; |
1285
|
270
|
|
|
|
|
216
|
my ($type, $context) = @_; |
1286
|
270
|
|
|
|
|
252
|
$self->_check_type_arg(\$type); |
1287
|
270
|
|
|
|
|
330
|
$self->_check_context_arg($type, \$context); |
1288
|
|
|
|
|
|
|
|
1289
|
270
|
100
|
|
|
|
516
|
if (!defined $self->{"_tiling_iterator_${type}_${context}"}) { |
1290
|
2
|
|
|
|
|
9
|
$self->{"_tiling_iterator_${type}_${context}"} = |
1291
|
|
|
|
|
|
|
$self->_make_tiling_iterator($type,$context); |
1292
|
|
|
|
|
|
|
} |
1293
|
270
|
|
|
|
|
438
|
return $self->{"_tiling_iterator_${type}_${context}"}; |
1294
|
|
|
|
|
|
|
} |
1295
|
|
|
|
|
|
|
=head2 Construction Helper Methods |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
See also L. |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
=cut |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
sub _check_type_arg { |
1302
|
8297
|
|
|
8297
|
|
5338
|
my $self = shift; |
1303
|
8297
|
|
|
|
|
5781
|
my $typeref = shift; |
1304
|
8297
|
|
100
|
|
|
10661
|
$$typeref ||= 'query'; |
1305
|
8297
|
50
|
|
|
|
43148
|
$self->throw("Unknown type '$$typeref'") unless grep(/^$$typeref$/, qw( hit query subject )); |
1306
|
8297
|
100
|
|
|
|
10233
|
$$typeref = 'hit' if $$typeref eq 'subject'; |
1307
|
8297
|
|
|
|
|
6088
|
return 1; |
1308
|
|
|
|
|
|
|
} |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
sub _check_action_arg { |
1311
|
3721
|
|
|
3721
|
|
2965
|
my $self = shift; |
1312
|
3721
|
|
|
|
|
2532
|
my $actionref = shift; |
1313
|
3721
|
100
|
|
|
|
4361
|
if (!$$actionref) { |
1314
|
17
|
100
|
|
|
|
44
|
$$actionref = ($self->_has_sequence_data ? 'exact' : 'est'); |
1315
|
|
|
|
|
|
|
} |
1316
|
|
|
|
|
|
|
else { |
1317
|
3704
|
50
|
|
|
|
23527
|
$self->throw("Calc action '$$actionref' unrecognized") unless grep /^$$actionref$/, qw( est exact fast max ); |
1318
|
3704
|
100
|
100
|
|
|
7246
|
if ($$actionref ne 'est' and !$self->_has_sequence_data) { |
1319
|
12
|
|
|
|
|
47
|
$self->warn("Blast file did not possess sequence data; defaulting to 'est' action"); |
1320
|
12
|
|
|
|
|
14
|
$$actionref = 'est'; |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
} |
1323
|
3721
|
|
|
|
|
3682
|
return 1; |
1324
|
|
|
|
|
|
|
} |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
sub _check_context_arg { |
1327
|
4613
|
|
|
4613
|
|
3776
|
my $self = shift; |
1328
|
4613
|
|
|
|
|
3792
|
my ($type, $contextref) = @_; |
1329
|
4613
|
100
|
|
|
|
5619
|
if (!$$contextref) { |
1330
|
298
|
50
|
|
|
|
320
|
$self->throw("Type '$type' requires strand/frame context for algorithm ".$self->algorithm) unless ($self->mapping($type) == 1); |
1331
|
|
|
|
|
|
|
# set default according to default_context attrib |
1332
|
298
|
|
|
|
|
339
|
$$contextref = $self->default_context($type); |
1333
|
|
|
|
|
|
|
} |
1334
|
|
|
|
|
|
|
else { |
1335
|
4315
|
50
|
|
|
|
7345
|
($$contextref =~ /^[mp]$/) && do { $$contextref .= '_' }; |
|
0
|
|
|
|
|
0
|
|
1336
|
4315
|
50
|
|
|
|
8204
|
$self->throw("Context '$$contextref' unrecognized") unless |
1337
|
|
|
|
|
|
|
$$contextref =~ /all|[mp][0-2_]/; |
1338
|
|
|
|
|
|
|
} |
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
} |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
=head2 _make_context_key |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
Title : _make_context_key |
1345
|
|
|
|
|
|
|
Alias : _context |
1346
|
|
|
|
|
|
|
Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame) |
1347
|
|
|
|
|
|
|
Function: create a string indicating strand/frame context; serves as |
1348
|
|
|
|
|
|
|
component of memoizing hash keys |
1349
|
|
|
|
|
|
|
Returns : scalar string |
1350
|
|
|
|
|
|
|
Args : -type => one of ('query', 'hit', 'subject') |
1351
|
|
|
|
|
|
|
-strand => one of (1,0,-1) |
1352
|
|
|
|
|
|
|
-frame => one of (-2, 1, 0, 1, -2) |
1353
|
|
|
|
|
|
|
called w/o args: returns 'all' |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
=cut |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
sub _make_context_key { |
1358
|
546
|
|
|
546
|
|
389
|
my $self = shift; |
1359
|
546
|
|
|
|
|
1065
|
my @args = @_; |
1360
|
546
|
|
|
|
|
1762
|
my ($type, $strand, $frame) = $self->_rearrange([qw(TYPE STRAND FRAME)], @args); |
1361
|
546
|
|
|
|
|
1286
|
_check_type_arg(\$type); |
1362
|
546
|
50
|
33
|
|
|
941
|
return 'all' unless (defined $strand or defined $frame); |
1363
|
546
|
100
|
66
|
|
|
1508
|
if ( defined $strand && $self->_has_strand($type) ) { |
1364
|
211
|
100
|
66
|
|
|
572
|
if (defined $frame && $self->_has_frame($type)) { |
1365
|
94
|
100
|
|
|
|
365
|
return ($strand >= 0 ? 'p' : 'm').abs($frame); |
1366
|
|
|
|
|
|
|
} |
1367
|
|
|
|
|
|
|
else { |
1368
|
117
|
100
|
|
|
|
379
|
return ($strand >= 0 ? 'p_' : 'm_'); |
1369
|
|
|
|
|
|
|
} |
1370
|
|
|
|
|
|
|
} |
1371
|
|
|
|
|
|
|
else { |
1372
|
335
|
50
|
33
|
|
|
827
|
if (defined $frame && $self->_has_frame($type)) { |
1373
|
0
|
|
|
|
|
0
|
$self->warn("Frame defined without strand; punting with plus strand"); |
1374
|
0
|
|
|
|
|
0
|
return 'p'.abs($frame); |
1375
|
|
|
|
|
|
|
} |
1376
|
|
|
|
|
|
|
else { |
1377
|
335
|
|
|
|
|
683
|
return 'all'; |
1378
|
|
|
|
|
|
|
} |
1379
|
|
|
|
|
|
|
} |
1380
|
|
|
|
|
|
|
} |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
=head2 _context |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
Title : _context |
1385
|
|
|
|
|
|
|
Alias : _make_context_key |
1386
|
|
|
|
|
|
|
Usage : $tiling->_make_context_key(-strand => $strand, -frame => $frame) |
1387
|
|
|
|
|
|
|
Function: create a string indicating strand/frame context; serves as |
1388
|
|
|
|
|
|
|
component of memoizing hash keys |
1389
|
|
|
|
|
|
|
Returns : scalar string |
1390
|
|
|
|
|
|
|
Args : -type => one of ('query', 'hit', 'subject') |
1391
|
|
|
|
|
|
|
-strand => one of (1,0,-1) |
1392
|
|
|
|
|
|
|
-frame => one of (-2, 1, 0, 1, -2) |
1393
|
|
|
|
|
|
|
called w/o args: returns 'all' |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
=cut |
1396
|
|
|
|
|
|
|
|
1397
|
546
|
|
|
546
|
|
959
|
sub _context { shift->_make_context_key(@_) } |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
=head2 Predicates |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
Most based on a reading of the algorithm name with a configuration lookup. |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
=over |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
=item _has_sequence_data() |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
=cut |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
sub _has_sequence_data { |
1410
|
2253
|
|
|
2253
|
|
1629
|
my $self = shift; |
1411
|
2253
|
50
|
|
|
|
2835
|
$self->throw("Hit attribute not yet set") unless defined $self->hit; |
1412
|
2253
|
100
|
|
|
|
2746
|
return (($self->hit->hsps)[0]->seq_str('match') ? 1 : 0); |
1413
|
|
|
|
|
|
|
} |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
=item _has_logical_length() |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
=cut |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
sub _has_logical_length { |
1420
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
1421
|
0
|
|
|
|
|
0
|
my $type = shift; |
1422
|
0
|
|
|
|
|
0
|
$self->_check_type_arg(\$type); |
1423
|
|
|
|
|
|
|
# based on mapping coeff |
1424
|
0
|
0
|
|
|
|
0
|
$self->throw("Mapping coefficients not yet set") unless defined $self->mapping($type); |
1425
|
0
|
|
|
|
|
0
|
return ($self->mapping($type) > 1); |
1426
|
|
|
|
|
|
|
} |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
=item _has_strand() |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=cut |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
sub _has_strand { |
1433
|
546
|
|
|
546
|
|
539
|
my $self = shift; |
1434
|
546
|
|
|
|
|
491
|
my $type = shift; |
1435
|
546
|
|
|
|
|
795
|
$self->_check_type_arg(\$type); |
1436
|
546
|
|
|
|
|
1834
|
return $self->{"_has_strand_${type}"}; |
1437
|
|
|
|
|
|
|
} |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
=item _has_frame() |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
=cut |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
sub _has_frame { |
1444
|
546
|
|
|
546
|
|
633
|
my $self = shift; |
1445
|
546
|
|
|
|
|
519
|
my $type = shift; |
1446
|
546
|
|
|
|
|
677
|
$self->_check_type_arg(\$type); |
1447
|
546
|
|
|
|
|
1671
|
return $self->{"_has_frame_${type}"}; |
1448
|
|
|
|
|
|
|
} |
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
=back |
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
=head1 Private Accessors |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=head2 _contig_intersection |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
Title : _contig_intersection |
1457
|
|
|
|
|
|
|
Usage : $tiling->_contig_intersection($type) |
1458
|
|
|
|
|
|
|
Function: Return the minimal set of $type coordinate intervals |
1459
|
|
|
|
|
|
|
covered by the invocant's HSPs |
1460
|
|
|
|
|
|
|
Returns : array of intervals (2-member arrayrefs; see MapTileUtils) |
1461
|
|
|
|
|
|
|
Args : scalar $type: one of 'query', 'hit', 'subject' |
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
=cut |
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
sub _contig_intersection { |
1466
|
8
|
|
|
8
|
|
11
|
my $self = shift; |
1467
|
8
|
|
|
|
|
11
|
my ($type, $context) = @_; |
1468
|
8
|
|
|
|
|
11
|
$self->_check_type_arg(\$type); |
1469
|
8
|
|
|
|
|
17
|
$self->_check_context_arg($type, \$context); |
1470
|
8
|
100
|
|
|
|
27
|
if (!defined $self->{"_contig_intersection_${type}_${context}"}) { |
1471
|
4
|
|
|
|
|
15
|
$self->_calc_coverage_map($type); |
1472
|
|
|
|
|
|
|
} |
1473
|
8
|
|
|
|
|
11
|
return @{$self->{"_contig_intersection_${type}_${context}"}}; |
|
8
|
|
|
|
|
27
|
|
1474
|
|
|
|
|
|
|
} |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
=head2 _reported_length |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
Title : _reported_length |
1479
|
|
|
|
|
|
|
Usage : $tiling->_reported_length($type) |
1480
|
|
|
|
|
|
|
Function: Get the total length of the seq $type |
1481
|
|
|
|
|
|
|
for the invocant's hit object, as reported |
1482
|
|
|
|
|
|
|
by (not calculated from) the input data file |
1483
|
|
|
|
|
|
|
Returns : scalar int |
1484
|
|
|
|
|
|
|
Args : scalar $type: one of 'query', 'hit', 'subject' |
1485
|
|
|
|
|
|
|
Note : This is kludgy; the hit object does not currently |
1486
|
|
|
|
|
|
|
maintain accessors for these values, but the |
1487
|
|
|
|
|
|
|
hsps possess these attributes. This is a wrapper |
1488
|
|
|
|
|
|
|
that allows a consistent access method in the |
1489
|
|
|
|
|
|
|
MapTiling code. |
1490
|
|
|
|
|
|
|
Note : Since this number is based on a reported length, |
1491
|
|
|
|
|
|
|
it is already a "logical length". |
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
=cut |
1494
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
sub _reported_length { |
1496
|
4
|
|
|
4
|
|
5
|
my $self = shift; |
1497
|
4
|
|
|
|
|
7
|
my $type = shift; |
1498
|
4
|
|
|
|
|
12
|
$self->_check_type_arg(\$type); |
1499
|
4
|
|
|
|
|
9
|
my $key = uc( $type."_LENGTH" ); |
1500
|
4
|
|
|
|
|
13
|
return ($self->hsps)[0]->{$key}; |
1501
|
|
|
|
|
|
|
} |
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
1; |
1504
|
|
|
|
|
|
|
|