line
stmt
bran
cond
sub
pod
time
code
1
#
2
# BioPerl module for Bio::Search::Tiling::TilingI
3
#
4
# Please direct questions and support issues to
5
#
6
# Cared for by Mark A. Jensen
7
#
8
# Copyright Mark A. Jensen
9
#
10
# You may distribute this module under the same terms as perl itself
11
12
# POD documentation - main docs before the code
13
14
=head1 NAME
15
16
Bio::Search::Tiling::TilingI - Abstract interface for an HSP tiling module
17
18
=head1 SYNOPSIS
19
20
Not used directly. Useful POD here for developers, however.
21
22
The interface is designed to make the following code conversion as
23
simple as possible:
24
25
From:
26
27
# Bio::Search::SearchUtils-based
28
while ( local $_ = $result->next_hit ) {
29
printf( "E-value: %g; Fraction aligned: %f; Number identical: %d\n",
30
$hit->significance, $hit->frac_aligned_query, $hit->num_identical);
31
}
32
33
To:
34
35
# TilingI-based
36
while ( local $_ = $result->next_hit ) {
37
my $tiling = Bio::Search::Tiling::MyTiling($_);
38
printf( "E-value: %g; Fraction aligned: %f; Number identical: %d\n",
39
$hit->significance, $tiling->frac_aligned_query, $tiling->num_identical);
40
}
41
42
43
44
=head1 DESCRIPTION
45
46
This module provides strong suggestions for any intended HSP tiling
47
object implementation. An object subclassing TilingI should override
48
the methods defined here according to their descriptions below.
49
50
See the section STATISTICS METHODS for hints on implementing methods
51
that are valid across different algorithms and report types.
52
53
=head1 FEEDBACK
54
55
=head2 Mailing Lists
56
57
User feedback is an integral part of the evolution of this and other
58
Bioperl modules. Send your comments and suggestions preferably to
59
the Bioperl mailing list. Your participation is much appreciated.
60
61
bioperl-l@bioperl.org - General discussion
62
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63
64
=head2 Support
65
66
Please direct usage questions or support issues to the mailing list:
67
68
I
69
70
rather than to the module maintainer directly. Many experienced and
71
reponsive experts will be able look at the problem and quickly
72
address it. Please include a thorough description of the problem
73
with code and data examples if at all possible.
74
75
=head2 Reporting Bugs
76
77
Report bugs to the Bioperl bug tracking system to help us keep track
78
of the bugs and their resolution. Bug reports can be submitted via
79
the web:
80
81
https://github.com/bioperl/bioperl-live/issues
82
83
=head1 AUTHOR - Mark A. Jensen
84
85
Email maj@fortinbras.us
86
87
=head1 APPENDIX
88
89
The rest of the documentation details each of the object methods.
90
Internal methods are usually preceded with a _
91
92
=cut
93
94
# Let the code begin...
95
96
package Bio::Search::Tiling::TilingI;
97
1
1
6
use strict;
1
2
1
28
98
1
1
4
use warnings;
1
2
1
25
99
100
# Object preamble - inherits from Bio::Root::Root
101
102
1
1
4
use Bio::Root::Root;
1
5
1
24
103
104
1
1
3
use base qw(Bio::Root::Root);
1
2
1
429
105
106
=head2 STATISTICS METHODS
107
108
The tiling statistics can be thought of as global counterparts to
109
similar statistics defined for the individual HSPs. We therefore
110
prescribe definitions for many of the synonymous methods defined in
111
L.
112
113
The tiling statistics must be able to keep track of the coordinate
114
systems in which both the query and subject sequences exist; i.e.,
115
either nucleotide or amino acid. This information is typically
116
inferred from the name of the algorithm used to perform the original
117
search (contained in C<$hit_object-Ealgorithm>). Here is a table
118
of algorithm information that may be useful (if you trust us).
119
120
algorithm query on hit coordinates(q/h)
121
--------- ------------ ---------------
122
blastn dna on dna dna/dna
123
blastp aa on aa aa/aa
124
blastx xna on aa dna/aa
125
tblastn aa on xna aa/dna
126
tblastx xna on xna dna/dna
127
fasta dna on dna dna/dna
128
fasta aa on aa aa/aa
129
fastx xna on aa dna/aa
130
fasty xna on aa dna/aa
131
tfasta aa on xna aa/dna
132
tfasty aa on xna aa/dna
133
megablast dna on dna dna/dna
134
135
xna: translated nucleotide data
136
137
Statistics methods must also be aware of differences in reporting
138
among the algorithms. Hit attributes are not necessarily normalized
139
over all algorithms. Devs, please feel free to add examples to the
140
list below.
141
142
=over
143
144
=item NCBI BLAST vs WU-BLAST (AB-BLAST) lengths
145
146
The total length of the alignment is reported differently between these two flavors. C<$hit_object-Elength()> will contain the number in the denominator of the stats line; i.e., 120 in
147
148
Identical = 34/120 Positives = 67/120
149
150
NCBI BLAST uses the total length of the query sequence as input by the user (a.k.a. "with gaps"). WU-BLAST uses the length of the query sequence actually aligned by the algorithm (a.k.a. "without gaps").
151
152
=back
153
154
Finally, developers should remember that sequence data may or may not
155
be associated with the HSPs contained in the hit object. This will
156
typically depend on whether a full report (e.g, C) or a
157
summary (e.g., C) was parsed. Statistics methods that
158
depend directly on the sequence data will need to check that
159
that data is present.
160
161
=head2 identities
162
163
Title : identities
164
Alias : num_identical
165
Usage : $num_identities = $tiling->identities()
166
Function: Return the estimated or exact number of identities in the
167
tiling, accounting for overlapping HSPs
168
Example :
169
Returns : number of identical residue pairs
170
Args :
171
172
=cut
173
174
sub identities{
175
0
0
1
0
my ($self,@args) = @_;
176
0
0
$self->throw_not_implemented;
177
}
178
179
#HSPI synonym
180
0
0
0
0
sub num_identical { shift->identities( @_ ) }
181
182
=head2 conserved
183
184
Title : conserved
185
Alias : num_conserved
186
Usage : $num_conserved = $tiling->conserved()
187
Function: Return the estimated or exact number of conserved sites in the
188
tiling, accounting for overlapping HSPs
189
Example :
190
Returns : number of conserved residue pairs
191
Args :
192
193
=cut
194
195
sub conserved{
196
0
0
1
0
my ($self,@args) = @_;
197
0
0
$self->throw_not_implemented;
198
}
199
200
#HSPI synonym
201
0
0
0
0
sub num_conserved { shift->conserved( @_ ) }
202
203
=head2 length
204
205
Title : length
206
Usage : $max_length = $tiling->length($type)
207
Function: Return the total number of residues of the subject or query
208
sequence covered by the tiling
209
Returns : number of "logical" residues covered
210
Args : scalar $type, one of 'hit', 'subject', 'query'
211
212
=cut
213
214
sub length{
215
0
0
1
0
my ($self, $type, @args) = @_;
216
0
0
$self->throw_not_implemented;
217
}
218
219
=head2 frac_identical
220
221
Title : frac_identical
222
Usage : $tiling->frac_identical($type)
223
Function: Return the fraction of sequence length consisting
224
of identical pairs
225
Returns : scalar float
226
Args : scalar $type, one of 'hit', 'subject', 'query'
227
Note : This method must take account of the $type coordinate
228
system and the length reporting method (see STATISTICS
229
METHODS above)
230
231
=cut
232
233
sub frac_identical {
234
0
0
1
0
my ($self, $type, @args) = @_;
235
0
0
$self->throw_not_implemented;
236
}
237
238
=head2 percent_identity
239
240
Title : percent_identity
241
Usage : $tiling->percent_identity($type)
242
Function: Return the fraction of sequence length consisting
243
of identical pairs as a percentage
244
Returns : scalar float
245
Args : scalar $type, one of 'hit', 'subject', 'query'
246
247
=cut
248
249
sub percent_identity {
250
0
0
1
0
my ($self, $type, @args) = @_;
251
0
0
return $self->frac_identical($type, @args) * 100;
252
}
253
254
=head2 frac_conserved
255
256
Title : frac_conserved
257
Usage : $tiling->frac_conserved($type)
258
Function: Return the fraction of sequence length consisting
259
of conserved pairs
260
Returns : scalar float
261
Args : scalar $type, one of 'hit', 'subject', 'query'
262
Note : This method must take account of the $type coordinate
263
system and the length reporting method (see STATISTICS
264
METHODS above)
265
266
=cut
267
268
sub frac_conserved{
269
0
0
1
0
my ($self, $type, @args) = @_;
270
0
0
$self->throw_not_implemented;
271
}
272
273
=head2 percent_conserved
274
275
Title : percent_conserved
276
Usage : $tiling->percent_conserved($type)
277
Function: Return the fraction of sequence length consisting
278
of conserved pairs as a percentage
279
Returns : scalar float
280
Args : scalar $type, one of 'hit', 'subject', 'query'
281
282
=cut
283
284
sub percent_conserved {
285
0
0
1
0
my ($self, $type, @args) = @_;
286
0
0
return $self->frac_conserved($type, @args) * 100;
287
}
288
289
=head2 frac_aligned
290
291
Title : frac_aligned
292
Usage : $tiling->frac_aligned($type)
293
Function: Return the fraction of B sequence length consisting
294
that was aligned by the algorithm
295
Returns : scalar float
296
Args : scalar $type, one of 'hit', 'subject', 'query'
297
Note : This method must take account of the $type coordinate
298
system and the length reporting method (see STATISTICS
299
METHODS above)
300
301
=cut
302
303
sub frac_aligned{
304
0
0
1
0
my ($self, $type, @args) = @_;
305
0
0
$self->throw_not_implemented;
306
}
307
308
# aliases for back compat
309
0
0
0
0
sub frac_aligned_query { shift->frac_aligned('query', @_) }
310
0
0
0
0
sub frac_aligned_hit { shift->frac_aligned('hit', @_) }
311
312
=head2 range
313
314
Title : range
315
Usage : $tiling->range($type)
316
Function: Returns the extent of the longest tiling
317
as ($min_coord, $max_coord)
318
Returns : array of two scalar integers
319
Args : scalar $type, one of 'hit', 'subject', 'query'
320
321
=cut
322
323
sub range {
324
0
0
1
0
my ($self, $type, @args) = @_;
325
0
0
$self->throw_not_implemented;
326
}
327
328
=head1 TILING ITERATORS
329
330
=head2 next_tiling
331
332
Title : next_tiling
333
Usage : @hsps = $self->next_tiling($type);
334
Function: Obtain a tiling of HSPs over the $type ('hit', 'subject',
335
'query') sequence
336
Example :
337
Returns : an array of HSPI objects
338
Args : scalar $type: one of 'hit', 'subject', 'query', with
339
'subject' an alias for 'hit'
340
341
=cut
342
343
sub next_tiling{
344
0
0
1
0
my ($self,$type,@args) = @_;
345
0
0
$self->throw_not_implemented;
346
}
347
348
=head2 rewind_tilings
349
350
Title : rewind_tilings
351
Usage : $self->rewind_tilings($type)
352
Function: Reset the next_tilings($type) iterator
353
Example :
354
Returns : True on success
355
Args : scalar $type: one of 'hit', 'subject', 'query', with
356
'subject' an alias for 'hit'
357
358
=cut
359
360
sub rewind_tilings{
361
0
0
1
0
my ($self, $type, @args) = @_;
362
0
0
$self->throw_not_implemented;
363
}
364
365
#alias
366
1
1
0
1444
sub rewind { shift->rewind_tilings(@_) }
367
368
=head1 INFORMATIONAL ACCESSORS
369
370
=head2 algorithm
371
372
Title : algorithm
373
Usage : $tiling->algorithm
374
Function: Retrieve the algorithm name associated with the
375
invocant's hit object
376
Returns : scalar string
377
Args :
378
379
=cut
380
381
sub algorithm{
382
0
0
1
my ($self, @args) = @_;
383
0
$self->throw_not_implemented;
384
}
385
386
1;