| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
=head1 NAME |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Bio::DB::GFF::RelSegment -- Sequence segment with relative coordinate support |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
See L. |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Bio::DB::GFF::RelSegment is a stretch of sequence that can handle |
|
12
|
|
|
|
|
|
|
relative coordinate addressing. It inherits from |
|
13
|
|
|
|
|
|
|
Bio::DB::GFF::Segment, and is the base class for |
|
14
|
|
|
|
|
|
|
Bio::DB::GFF::Feature. |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
In addition to the source sequence, a relative segment has a |
|
17
|
|
|
|
|
|
|
"reference sequence", which is used as the basis for its coordinate |
|
18
|
|
|
|
|
|
|
system. The reference sequence can be changed at will, allowing you |
|
19
|
|
|
|
|
|
|
freedom to change the "frame of reference" for features contained |
|
20
|
|
|
|
|
|
|
within the segment. For example, by setting a segment's reference |
|
21
|
|
|
|
|
|
|
sequence to the beginning of a gene, you can view all other features |
|
22
|
|
|
|
|
|
|
in gene-relative coordinates. |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
The reference sequence and the source sequence must be on the same |
|
25
|
|
|
|
|
|
|
physical stretch of DNA, naturally. However, they do not have to be |
|
26
|
|
|
|
|
|
|
on the same strand. The strandedness of the reference sequence |
|
27
|
|
|
|
|
|
|
determines whether coordinates increase to the right or the left. |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
Generally, you will not create or manipulate Bio::DB::GFF::RelSeg0ment |
|
30
|
|
|
|
|
|
|
objects directly, but use those that are returned by the Bio::DB::GFF |
|
31
|
|
|
|
|
|
|
module. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head2 An Example |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
To understand how relative coordinates work, consider the following |
|
36
|
|
|
|
|
|
|
example from the C. elegans database. First we create the appropriate |
|
37
|
|
|
|
|
|
|
GFF accessor object (the factory): |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:elegans', |
|
40
|
|
|
|
|
|
|
-adaptor=>'dbi:mysqlopt'); |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Now we fetch out a segment based on cosmid clone ZK909: |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $seg = $db->segment('ZK909'); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
If we call the segment's refseq() method, we see that the base of the |
|
47
|
|
|
|
|
|
|
coordinate system is the sequence "ZK154", and that its start and |
|
48
|
|
|
|
|
|
|
stop positions are 1 and the length of the cosmid: |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
print $seg->refseq; |
|
51
|
|
|
|
|
|
|
=> ZK909 |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
print $seg->start,' - ',$seg->stop; |
|
54
|
|
|
|
|
|
|
=> 1 - 33782 |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
As a convenience, the "" operator is overloaded in this class, to give |
|
57
|
|
|
|
|
|
|
the reference sequence, and start and stop positions: |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
print $seg; |
|
60
|
|
|
|
|
|
|
=> ZK909:1,33782 |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Internally, Bio::DB::GFF::RelSegment has looked up the absolute |
|
63
|
|
|
|
|
|
|
coordinates of this segment and maintains the source sequence and the |
|
64
|
|
|
|
|
|
|
absolute coordinates relative to the source sequence. We can see this |
|
65
|
|
|
|
|
|
|
information using sourceseq() (inherited from Bio::DB::GFF::Segment) |
|
66
|
|
|
|
|
|
|
and the abs_start() and abs_end() methods: |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
print $seg->sourceseq; |
|
69
|
|
|
|
|
|
|
=> CHROMOSOME_I |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
print $seg->abs_start,' - ',$seg->abs_end; |
|
72
|
|
|
|
|
|
|
=> 14839545 - 14873326 |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
We can also put the segment into absolute mode, so that it behaves |
|
75
|
|
|
|
|
|
|
like Bio::DB::Segment, and always represents coordinates on the source |
|
76
|
|
|
|
|
|
|
sequence. This is done by passing a true value to the absolute() |
|
77
|
|
|
|
|
|
|
method: |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
$seq->absolute(1); |
|
80
|
|
|
|
|
|
|
print $seg; |
|
81
|
|
|
|
|
|
|
=> CHROMOSOME_I:14839545,14873326 |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
We can change the reference sequence at any time. One way is to call |
|
84
|
|
|
|
|
|
|
the segment's ref() method, giving it the ID (and optionally the |
|
85
|
|
|
|
|
|
|
class) of another landmark on the genome. For example, if we know |
|
86
|
|
|
|
|
|
|
that cosmid ZK337 is adjacent to ZK909, then we can view ZK909 in |
|
87
|
|
|
|
|
|
|
ZK337-relative coordinates: |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
$seg->refseq('ZK337'); |
|
90
|
|
|
|
|
|
|
print $seg; |
|
91
|
|
|
|
|
|
|
=> ZK337:-33670,111 |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
We can call the segment's features() method in order to get the list |
|
94
|
|
|
|
|
|
|
of contigs that overlap this segment (in the C. elegans database, |
|
95
|
|
|
|
|
|
|
contigs have feature type "Sequence:Link"): |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
@links = $seg->features('Sequence:Link'); |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
We can now set the reference sequence to the first of these contigs like so: |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$seg->refseq($links[0]); |
|
102
|
|
|
|
|
|
|
print $seg; |
|
103
|
|
|
|
|
|
|
=> Sequence:Link(LINK_Y95D11A):3997326,4031107 |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
package Bio::DB::GFF::RelSegment; |
|
108
|
|
|
|
|
|
|
|
|
109
|
3
|
|
|
3
|
|
6
|
use strict; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
249
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
3
|
|
|
3
|
|
1020
|
use Bio::DB::GFF::Feature; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
72
|
|
|
112
|
3
|
|
|
3
|
|
15
|
use Bio::DB::GFF::Util::Rearrange; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
117
|
|
|
113
|
3
|
|
|
3
|
|
9
|
use Bio::RangeI; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
48
|
|
|
114
|
|
|
|
|
|
|
|
|
115
|
3
|
|
|
3
|
|
9
|
use base qw(Bio::DB::GFF::Segment); |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
294
|
|
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
use overload '""' => 'asString', |
|
118
|
1749
|
|
|
1749
|
|
6406
|
'bool' => sub { overload::StrVal(shift) }, |
|
119
|
3
|
|
|
3
|
|
12
|
fallback=>1; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
21
|
|
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 API |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
The remainder of this document describes the API for |
|
124
|
|
|
|
|
|
|
Bio::DB::GFF::Segment. |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head2 new |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Title : new |
|
131
|
|
|
|
|
|
|
Usage : $s = Bio::DB::GFF::RelSegment->new(@args) |
|
132
|
|
|
|
|
|
|
Function: create a new relative segment |
|
133
|
|
|
|
|
|
|
Returns : a new Bio::DB::GFF::RelSegment object |
|
134
|
|
|
|
|
|
|
Args : see below |
|
135
|
|
|
|
|
|
|
Status : Public |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
This method creates a new Bio::DB::GFF::RelSegment object. Generally |
|
138
|
|
|
|
|
|
|
this is called automatically by the Bio::DB::GFF module and |
|
139
|
|
|
|
|
|
|
derivatives. |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
This function uses a named-argument style: |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
-factory a Bio::DB::GFF::Adaptor to use for database access |
|
144
|
|
|
|
|
|
|
-seq ID of the source sequence |
|
145
|
|
|
|
|
|
|
-class class of the source sequence |
|
146
|
|
|
|
|
|
|
-start start of the desired segment relative to source sequence |
|
147
|
|
|
|
|
|
|
-stop stop of the desired segment relative to source sequence |
|
148
|
|
|
|
|
|
|
-ref ID of the reference sequence |
|
149
|
|
|
|
|
|
|
-refclass class of the reference sequence |
|
150
|
|
|
|
|
|
|
-offset 0-based offset from source sequence to start of segment |
|
151
|
|
|
|
|
|
|
-length length of desired segment |
|
152
|
|
|
|
|
|
|
-absolute, -force_absolute |
|
153
|
|
|
|
|
|
|
use absolute coordinates, rather than coordinates relative |
|
154
|
|
|
|
|
|
|
to the start of self or the reference sequence |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
The -seq argument accepts the ID of any landmark in the database. The |
|
157
|
|
|
|
|
|
|
stored source sequence becomes whatever the GFF file indicates is the |
|
158
|
|
|
|
|
|
|
proper sequence for this landmark. A class of "Sequence" is assumed |
|
159
|
|
|
|
|
|
|
unless otherwise specified in the -class argument. |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
If the argument to -seq is a Bio::GFF::Featname object (such as |
|
162
|
|
|
|
|
|
|
returned by the group() method), then the class is taken from that. |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
The optional -start and -stop arguments specify the end points for the |
|
165
|
|
|
|
|
|
|
retrieved segment. For those who do not like 1-based indexing, |
|
166
|
|
|
|
|
|
|
-offset and -length are provided. If both -start/-stop and |
|
167
|
|
|
|
|
|
|
-offset/-length are provided, the latter overrides the former. |
|
168
|
|
|
|
|
|
|
Generally it is not a good idea to mix metaphors. |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
-ref and -refclass together indicate a sequence to be used for |
|
171
|
|
|
|
|
|
|
relative coordinates. If not provided, the source sequence indicated |
|
172
|
|
|
|
|
|
|
by -seq is used as the reference sequence. If the argument to -ref is |
|
173
|
|
|
|
|
|
|
a Bio::GFF::Featname object (such as returned by the group() method), |
|
174
|
|
|
|
|
|
|
then the class is taken from that. |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
-force_absolute should be used if you wish to skip the lookup of the |
|
177
|
|
|
|
|
|
|
absolute position of the source sequence that ordinarily occurs when |
|
178
|
|
|
|
|
|
|
you create a relative segment. In this case, the source sequence must |
|
179
|
|
|
|
|
|
|
be a sequence that has been specified as the "source" in the GFF file. |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=cut |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# Create a new Bio::DB::GFF::RelSegment Object |
|
184
|
|
|
|
|
|
|
# arguments are: |
|
185
|
|
|
|
|
|
|
# -factory => factory and DBI interface |
|
186
|
|
|
|
|
|
|
# -seq => $sequence_name |
|
187
|
|
|
|
|
|
|
# -start => $start_relative_to_sequence |
|
188
|
|
|
|
|
|
|
# -stop => $stop_relative_to_sequence |
|
189
|
|
|
|
|
|
|
# -ref => $sequence which establishes coordinate system |
|
190
|
|
|
|
|
|
|
# -offset => 0-based offset relative to sequence |
|
191
|
|
|
|
|
|
|
# -length => length of segment |
|
192
|
|
|
|
|
|
|
# -nocheck => turn off checking, force segment to be constructed |
|
193
|
|
|
|
|
|
|
# -absolute => use absolute coordinate addressing |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
sub new { |
|
196
|
203
|
|
|
203
|
1
|
177
|
my $package = shift; |
|
197
|
203
|
|
|
|
|
1207
|
my ($factory,$name,$start,$stop,$refseq,$class,$refclass,$offset,$length,$force_absolute,$nocheck) = |
|
198
|
|
|
|
|
|
|
rearrange([ |
|
199
|
|
|
|
|
|
|
'FACTORY', |
|
200
|
|
|
|
|
|
|
[qw(NAME SEQ SEQUENCE SOURCESEQ)], |
|
201
|
|
|
|
|
|
|
[qw(START BEGIN)], |
|
202
|
|
|
|
|
|
|
[qw(STOP END)], |
|
203
|
|
|
|
|
|
|
[qw(REFSEQ REF REFNAME)], |
|
204
|
|
|
|
|
|
|
[qw(CLASS SEQCLASS)], |
|
205
|
|
|
|
|
|
|
qw(REFCLASS), |
|
206
|
|
|
|
|
|
|
[qw(OFFSET OFF)], |
|
207
|
|
|
|
|
|
|
[qw(LENGTH LEN)], |
|
208
|
|
|
|
|
|
|
[qw(ABSOLUTE)], |
|
209
|
|
|
|
|
|
|
[qw(NOCHECK FORCE)], |
|
210
|
|
|
|
|
|
|
],@_); |
|
211
|
|
|
|
|
|
|
|
|
212
|
203
|
50
|
|
|
|
682
|
$package = ref $package if ref $package; |
|
213
|
203
|
50
|
|
|
|
324
|
$factory or $package->throw("new(): provide a -factory argument"); |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# to allow people to use segments as sources |
|
216
|
203
|
100
|
66
|
|
|
481
|
if (ref($name) && $name->isa('Bio::DB::GFF::Segment')) { |
|
217
|
35
|
100
|
|
|
|
106
|
$start = 1 unless defined $start; |
|
218
|
35
|
100
|
|
|
|
66
|
$stop = $name->length unless defined $stop; |
|
219
|
35
|
|
|
|
|
91
|
return $name->subseq($start,$stop); |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
168
|
|
|
|
|
119
|
my @object_results; |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# support for Featname objects |
|
225
|
168
|
50
|
33
|
|
|
271
|
if (ref($name) && $name->can('class')) { |
|
226
|
0
|
|
|
|
|
0
|
$class = $name->class; |
|
227
|
0
|
|
|
|
|
0
|
$name = $name->name; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# if the class of the landmark is not specified then default to 'Sequence' |
|
231
|
168
|
|
50
|
|
|
299
|
$class ||= eval{$factory->default_class} || 'Sequence'; |
|
|
|
|
66
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# confirm that indicated sequence is actually in the database! |
|
234
|
168
|
|
|
|
|
121
|
my @abscoords; |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# abscoords() will now return an array ref, each element of which is |
|
237
|
|
|
|
|
|
|
# ($absref,$absclass,$absstart,$absstop,$absstrand) |
|
238
|
|
|
|
|
|
|
|
|
239
|
168
|
50
|
|
|
|
230
|
if ($nocheck) { |
|
240
|
0
|
|
|
|
|
0
|
$force_absolute++; |
|
241
|
0
|
|
|
|
|
0
|
$start = 1; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# if ($force_absolute && defined($start)) { # absolute position is given to us |
|
245
|
|
|
|
|
|
|
# @abscoords = ([$name,$class,$start,$stop,'+']); |
|
246
|
|
|
|
|
|
|
# } else { |
|
247
|
168
|
50
|
|
|
|
467
|
my $result = $factory->abscoords($name,$class,$force_absolute ? $name : ()) or return; |
|
|
|
100
|
|
|
|
|
|
|
248
|
160
|
|
|
|
|
199
|
@abscoords = @$result; |
|
249
|
|
|
|
|
|
|
# } |
|
250
|
|
|
|
|
|
|
|
|
251
|
160
|
|
|
|
|
205
|
foreach (@abscoords) { |
|
252
|
160
|
|
|
|
|
349
|
my ($absref,$absclass,$absstart,$absstop,$absstrand,$sname) = @$_; |
|
253
|
160
|
50
|
|
|
|
298
|
$sname = $name unless defined $sname; |
|
254
|
160
|
|
|
|
|
158
|
my ($this_start,$this_stop,$this_length) = ($start,$stop,$length); |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# partially fill in object |
|
257
|
160
|
|
|
|
|
297
|
my $self = bless { factory => $factory },$package; |
|
258
|
|
|
|
|
|
|
|
|
259
|
160
|
|
100
|
|
|
341
|
$absstrand ||= '+'; |
|
260
|
|
|
|
|
|
|
|
|
261
|
160
|
50
|
|
|
|
312
|
if ($absstart > $absstop) { # AAARGH! DATA FORMAT ERROR! FIX. |
|
262
|
0
|
|
|
|
|
0
|
($absstart,$absstop) = ($absstop,$absstart); |
|
263
|
0
|
0
|
|
|
|
0
|
$absstrand = $absstrand eq '+' ? '-' : '+'; |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# an explicit length overrides start and stop |
|
267
|
160
|
50
|
|
|
|
199
|
if (defined $offset) { |
|
268
|
0
|
0
|
|
|
|
0
|
warn "new(): bad idea to call new() with both a start and an offset" |
|
269
|
|
|
|
|
|
|
if defined $this_start; |
|
270
|
0
|
|
|
|
|
0
|
$this_start = $offset+1; |
|
271
|
|
|
|
|
|
|
} |
|
272
|
160
|
50
|
|
|
|
243
|
if (defined $this_length) { |
|
273
|
0
|
0
|
|
|
|
0
|
warn "new(): bad idea to call new() with both a stop and a length" |
|
274
|
|
|
|
|
|
|
if defined $this_stop; |
|
275
|
0
|
|
|
|
|
0
|
$this_stop = $this_start + $length - 1; |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
# this allows a SQL optimization way down deep |
|
279
|
160
|
100
|
100
|
|
|
867
|
$self->{whole}++ if $absref eq $sname and !defined($this_start) and !defined($this_stop); |
|
|
|
|
66
|
|
|
|
|
|
280
|
|
|
|
|
|
|
|
|
281
|
160
|
100
|
|
|
|
238
|
$this_start = 1 if !defined $this_start; |
|
282
|
160
|
100
|
|
|
|
254
|
$this_stop = $absstop-$absstart+1 if !defined $this_stop; |
|
283
|
160
|
|
|
|
|
159
|
$this_length = $this_stop - $this_start + 1; |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# now offset to correct subsegment based on desired start and stop |
|
286
|
160
|
50
|
|
|
|
292
|
if ($force_absolute) { |
|
|
|
100
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
# ($this_start,$this_stop) = ($absstart,$absstop); |
|
288
|
0
|
|
|
|
|
0
|
$self->absolute(1); |
|
289
|
|
|
|
|
|
|
} elsif ($absstrand eq '+') { |
|
290
|
140
|
|
|
|
|
112
|
$this_start = $absstart + $this_start - 1; |
|
291
|
140
|
|
|
|
|
130
|
$this_stop = $this_start + $this_length - 1; |
|
292
|
|
|
|
|
|
|
} else { |
|
293
|
20
|
|
|
|
|
30
|
$this_start = $absstop - ($this_start - 1); |
|
294
|
20
|
|
|
|
|
25
|
$this_stop = $absstop - ($this_stop - 1); |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# handle truncation in either direction |
|
298
|
|
|
|
|
|
|
# This only happens if the segment runs off the end of |
|
299
|
|
|
|
|
|
|
# the reference sequence |
|
300
|
160
|
100
|
66
|
|
|
337
|
if ($factory->strict_bounds_checking && |
|
|
|
|
66
|
|
|
|
|
|
301
|
|
|
|
|
|
|
(($this_start < $absstart) || ($this_stop > $absstop))) { |
|
302
|
|
|
|
|
|
|
# return empty if we are completely off the end of the ref se |
|
303
|
5
|
50
|
33
|
|
|
29
|
next unless $this_start<=$absstop && $this_stop>=$absstart; |
|
304
|
5
|
50
|
|
|
|
10
|
if (my $a = $factory->abscoords($absref,'Sequence')) { |
|
305
|
5
|
|
|
|
|
8
|
my $refstart = $a->[0][2]; |
|
306
|
5
|
|
|
|
|
7
|
my $refstop = $a->[0][3]; |
|
307
|
5
|
50
|
|
|
|
15
|
if ($this_start < $refstart) { |
|
308
|
0
|
|
|
|
|
0
|
$this_start = $refstart; |
|
309
|
0
|
|
|
|
|
0
|
$self->{truncated}{start}++; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
5
|
50
|
|
|
|
13
|
if ($this_stop > $refstop) { |
|
312
|
5
|
|
|
|
|
8
|
$this_stop = $absstop; |
|
313
|
5
|
|
|
|
|
19
|
$self->{truncated}{stop}++; |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
160
|
|
|
|
|
178
|
@{$self}{qw(sourceseq start stop strand class)} |
|
|
160
|
|
|
|
|
498
|
|
|
319
|
|
|
|
|
|
|
= ($absref,$this_start,$this_stop,$absstrand,$absclass); |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
# handle reference sequence |
|
322
|
160
|
50
|
|
|
|
252
|
if (defined $refseq) { |
|
323
|
0
|
0
|
|
|
|
0
|
$refclass = $refseq->class if $refseq->can('class'); |
|
324
|
0
|
|
0
|
|
|
0
|
$refclass ||= 'Sequence'; |
|
325
|
0
|
|
|
|
|
0
|
my ($refref,$refstart,$refstop,$refstrand) = $factory->abscoords($refseq,$refclass); |
|
326
|
0
|
0
|
|
|
|
0
|
unless ($refref eq $absref) { |
|
327
|
0
|
|
|
|
|
0
|
$self->error("reference sequence is on $refref but source sequence is on $absref"); |
|
328
|
0
|
|
|
|
|
0
|
return; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
0
|
0
|
|
|
|
0
|
$refstart = $refstop if $refstrand eq '-'; |
|
331
|
0
|
|
|
|
|
0
|
@{$self}{qw(ref refstart refstrand)} = ($refseq,$refstart,$refstrand); |
|
|
0
|
|
|
|
|
0
|
|
|
332
|
|
|
|
|
|
|
} else { |
|
333
|
160
|
100
|
|
|
|
211
|
$absstart = $absstop if $absstrand eq '-'; |
|
334
|
160
|
|
|
|
|
166
|
@{$self}{qw(ref refstart refstrand)} = ($sname,$absstart,$absstrand); |
|
|
160
|
|
|
|
|
415
|
|
|
335
|
|
|
|
|
|
|
} |
|
336
|
160
|
|
|
|
|
302
|
push @object_results,$self; |
|
337
|
|
|
|
|
|
|
} |
|
338
|
|
|
|
|
|
|
|
|
339
|
160
|
50
|
|
|
|
559
|
return wantarray ? @object_results : $object_results[0]; |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# overridden methods |
|
343
|
|
|
|
|
|
|
# start, stop, length |
|
344
|
|
|
|
|
|
|
sub start { |
|
345
|
1591
|
|
|
1591
|
1
|
5329
|
my $self = shift; |
|
346
|
1591
|
50
|
|
|
|
1674
|
return $self->strand < 0 ? $self->{stop} : $self->{start} if $self->absolute; |
|
|
|
100
|
|
|
|
|
|
|
347
|
1586
|
|
|
|
|
1979
|
$self->_abs2rel($self->{start}); |
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
sub end { |
|
350
|
697
|
|
|
697
|
1
|
562
|
my $self = shift; |
|
351
|
697
|
0
|
|
|
|
717
|
return $self->strand < 0 ? $self->{start} : $self->{stop} if $self->absolute; |
|
|
|
50
|
|
|
|
|
|
|
352
|
697
|
|
|
|
|
859
|
$self->_abs2rel($self->{stop}); |
|
353
|
|
|
|
|
|
|
} |
|
354
|
|
|
|
|
|
|
*stop = \&end; |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub length { |
|
357
|
35
|
|
|
35
|
1
|
3605
|
my $self = shift; |
|
358
|
35
|
50
|
|
|
|
96
|
return unless defined $self->abs_end; |
|
359
|
35
|
|
|
|
|
50
|
abs($self->abs_end - $self->abs_start) + 1; |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
sub abs_start { |
|
363
|
120
|
|
|
120
|
1
|
107
|
my $self = shift; |
|
364
|
120
|
50
|
|
|
|
151
|
if ($self->absolute) { |
|
365
|
0
|
|
|
|
|
0
|
my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end); |
|
366
|
0
|
0
|
|
|
|
0
|
return ($a<$b) ? $a : $b; |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
else { |
|
369
|
120
|
|
|
|
|
216
|
return $self->SUPER::abs_start(@_); |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
} |
|
372
|
|
|
|
|
|
|
sub abs_end { |
|
373
|
140
|
|
|
140
|
1
|
102
|
my $self = shift; |
|
374
|
140
|
50
|
|
|
|
195
|
if ($self->absolute) { |
|
375
|
0
|
|
|
|
|
0
|
my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end); |
|
376
|
0
|
0
|
|
|
|
0
|
return ($a>$b) ? $a : $b; |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
else { |
|
380
|
140
|
|
|
|
|
262
|
return $self->SUPER::abs_end(@_); |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
*abs_stop = \&abs_end; |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head2 refseq |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Title : refseq |
|
389
|
|
|
|
|
|
|
Usage : $ref = $s->refseq([$newseq] [,$newseqclass]) |
|
390
|
|
|
|
|
|
|
Function: get/set reference sequence |
|
391
|
|
|
|
|
|
|
Returns : current reference sequence |
|
392
|
|
|
|
|
|
|
Args : new reference sequence and class (optional) |
|
393
|
|
|
|
|
|
|
Status : Public |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
This method will get or set the reference sequence. Called with no |
|
396
|
|
|
|
|
|
|
arguments, it returns the current reference sequence. Called with |
|
397
|
|
|
|
|
|
|
either a sequence ID and class, a Bio::DB::GFF::Segment object (or |
|
398
|
|
|
|
|
|
|
subclass) or a Bio::DB::GFF::Featname object, it will set the current |
|
399
|
|
|
|
|
|
|
reference sequence and return the previous one. |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
The method will generate an exception if you attempt to set the |
|
402
|
|
|
|
|
|
|
reference sequence to a sequence that isn't contained in the database, |
|
403
|
|
|
|
|
|
|
or one that has a different source sequence from the segment. |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=cut |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
#' |
|
408
|
|
|
|
|
|
|
sub refseq { |
|
409
|
372
|
|
|
372
|
1
|
284
|
my $self = shift; |
|
410
|
372
|
|
|
|
|
394
|
my $g = $self->{ref}; |
|
411
|
372
|
100
|
|
|
|
490
|
if (@_) { |
|
412
|
50
|
|
|
|
|
85
|
my ($newref,$newclass); |
|
413
|
50
|
100
|
|
|
|
78
|
if (@_ == 2) { |
|
414
|
10
|
|
|
|
|
15
|
$newclass = shift; |
|
415
|
10
|
|
|
|
|
11
|
$newref = shift; |
|
416
|
|
|
|
|
|
|
} else { |
|
417
|
40
|
|
|
|
|
36
|
$newref = shift; |
|
418
|
40
|
|
|
|
|
42
|
$newclass = 'Sequence'; |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
|
|
421
|
50
|
50
|
|
|
|
81
|
defined $newref or $self->throw('refseq() called with an undef reference sequence'); |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# support for Featname objects |
|
424
|
50
|
100
|
66
|
|
|
287
|
$newclass = $newref->class if ref($newref) && $newref->can('class'); |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
# $self->throw("Cannot define a segment's reference sequence in terms of itself!") |
|
427
|
|
|
|
|
|
|
# if ref($newref) and overload::StrVal($newref) eq overload::StrVal($self); |
|
428
|
|
|
|
|
|
|
|
|
429
|
50
|
|
|
|
|
51
|
my ($refsource,undef,$refstart,$refstop,$refstrand); |
|
430
|
50
|
100
|
|
|
|
183
|
if ($newref->isa('Bio::DB::GFF::RelSegment')) { |
|
431
|
30
|
100
|
|
|
|
51
|
($refsource,undef,$refstart,$refstop,$refstrand) = |
|
432
|
|
|
|
|
|
|
($newref->sourceseq,undef,$newref->abs_start,$newref->abs_end,$newref->abs_strand >= 0 ? '+' : '-'); |
|
433
|
|
|
|
|
|
|
} else { |
|
434
|
20
|
|
|
|
|
43
|
my $coords = $self->factory->abscoords($newref,$newclass); |
|
435
|
20
|
|
|
|
|
32
|
foreach (@$coords) { # find the appropriate one |
|
436
|
20
|
|
|
|
|
43
|
($refsource,undef,$refstart,$refstop,$refstrand) = @$_; |
|
437
|
20
|
100
|
|
|
|
68
|
last if $refsource eq $self->{sourceseq}; |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
$self->throw("can't set reference sequence: $newref and $self are on different sequence segments") |
|
442
|
50
|
100
|
|
|
|
137
|
unless $refsource eq $self->{sourceseq}; |
|
443
|
|
|
|
|
|
|
|
|
444
|
45
|
|
|
|
|
50
|
@{$self}{qw(ref refstart refstrand)} = ($newref,$refstart,$refstrand); |
|
|
45
|
|
|
|
|
103
|
|
|
445
|
45
|
|
|
|
|
74
|
$self->absolute(0); |
|
446
|
|
|
|
|
|
|
} |
|
447
|
367
|
100
|
|
|
|
479
|
return $self->absolute ? $self->sourceseq : $g; |
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head2 abs_low |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Title : abs_low |
|
454
|
|
|
|
|
|
|
Usage : $s->abs_low |
|
455
|
|
|
|
|
|
|
Function: the absolute lowest coordinate of the segment |
|
456
|
|
|
|
|
|
|
Returns : an integer |
|
457
|
|
|
|
|
|
|
Args : none |
|
458
|
|
|
|
|
|
|
Status : Public |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
This is for GadFly compatibility, and returns the low coordinate in |
|
461
|
|
|
|
|
|
|
absolute coordinates; |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=cut |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
sub abs_low { |
|
466
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
467
|
0
|
|
|
|
|
0
|
my ($a,$b) = ($self->abs_start,$self->abs_end); |
|
468
|
0
|
0
|
|
|
|
0
|
return ($a<$b) ? $a : $b; |
|
469
|
|
|
|
|
|
|
} |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=head2 abs_high |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
Title : abs_high |
|
474
|
|
|
|
|
|
|
Usage : $s->abs_high |
|
475
|
|
|
|
|
|
|
Function: the absolute highest coordinate of the segment |
|
476
|
|
|
|
|
|
|
Returns : an integer |
|
477
|
|
|
|
|
|
|
Args : none |
|
478
|
|
|
|
|
|
|
Status : Public |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
This is for GadFly compatibility, and returns the high coordinate in |
|
481
|
|
|
|
|
|
|
absolute coordinates; |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=cut |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
sub abs_high { |
|
486
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
487
|
0
|
|
|
|
|
0
|
my ($a,$b) = ($self->abs_start,$self->abs_end); |
|
488
|
0
|
0
|
|
|
|
0
|
return ($a>$b) ? $a : $b; |
|
489
|
|
|
|
|
|
|
} |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head2 asString |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Title : asString |
|
495
|
|
|
|
|
|
|
Usage : $s->asString |
|
496
|
|
|
|
|
|
|
Function: human-readable representation of the segment |
|
497
|
|
|
|
|
|
|
Returns : a string |
|
498
|
|
|
|
|
|
|
Args : none |
|
499
|
|
|
|
|
|
|
Status : Public |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
This method will return a human-readable representation of the |
|
502
|
|
|
|
|
|
|
segment. It is the overloaded method call for the "" operator. |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Currently the format is: |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
refseq:start,stop |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=cut |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
sub asString { |
|
511
|
10
|
|
|
10
|
1
|
10
|
my $self = shift; |
|
512
|
10
|
50
|
|
|
|
18
|
return $self->SUPER::asString if $self->absolute; |
|
513
|
10
|
|
|
|
|
13
|
my $label = $self->{ref}; |
|
514
|
10
|
|
50
|
|
|
18
|
my $start = $self->start || ''; |
|
515
|
10
|
|
50
|
|
|
24
|
my $stop = $self->stop || ''; |
|
516
|
10
|
100
|
66
|
|
|
62
|
if (ref($label) && overload::StrVal($self) eq overload::StrVal($label->ref)) { |
|
517
|
5
|
|
|
|
|
42
|
$label = $self->abs_ref; |
|
518
|
5
|
|
|
|
|
10
|
$start = $self->abs_start; |
|
519
|
5
|
|
|
|
|
15
|
$stop = $self->abs_end; |
|
520
|
|
|
|
|
|
|
} |
|
521
|
10
|
|
|
|
|
85
|
return "$label:$start,$stop"; |
|
522
|
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=head2 name |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
Title : name |
|
527
|
|
|
|
|
|
|
Usage : Alias for asString() |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=cut |
|
530
|
|
|
|
|
|
|
|
|
531
|
0
|
|
|
0
|
1
|
0
|
sub name { shift->asString } |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=head2 absolute |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
Title : absolute |
|
536
|
|
|
|
|
|
|
Usage : $abs = $s->absolute([$abs]) |
|
537
|
|
|
|
|
|
|
Function: get/set absolute coordinates |
|
538
|
|
|
|
|
|
|
Returns : a boolean flag |
|
539
|
|
|
|
|
|
|
Args : new setting for flag (optional) |
|
540
|
|
|
|
|
|
|
Status : Public |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Called with a boolean flag, this method controls whether to display |
|
543
|
|
|
|
|
|
|
relative coordinates (relative to the reference sequence) or absolute |
|
544
|
|
|
|
|
|
|
coordinates (relative to the source sequence). It will return the |
|
545
|
|
|
|
|
|
|
previous value of the setting. |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=cut |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
sub absolute { |
|
550
|
6467
|
|
|
6467
|
1
|
4033
|
my $self = shift; |
|
551
|
6467
|
|
|
|
|
4347
|
my $g = $self->{absolute}; |
|
552
|
6467
|
100
|
|
|
|
7222
|
$self->{absolute} = shift if @_; |
|
553
|
6467
|
|
|
|
|
7751
|
$g; |
|
554
|
|
|
|
|
|
|
} |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=head2 features |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Title : features |
|
559
|
|
|
|
|
|
|
Usage : @features = $s->features(@args) |
|
560
|
|
|
|
|
|
|
Function: get features that overlap this segment |
|
561
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Feature objects |
|
562
|
|
|
|
|
|
|
Args : see below |
|
563
|
|
|
|
|
|
|
Status : Public |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
This method will find all features that overlap the segment and return |
|
566
|
|
|
|
|
|
|
a list of Bio::DB::GFF::Feature objects. The features will use |
|
567
|
|
|
|
|
|
|
coordinates relative to the reference sequence in effect at the time |
|
568
|
|
|
|
|
|
|
that features() was called. |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
The returned list can be limited to certain types of feature by |
|
571
|
|
|
|
|
|
|
filtering on their method and/or source. In addition, it is possible |
|
572
|
|
|
|
|
|
|
to obtain an iterator that will step through a large number of |
|
573
|
|
|
|
|
|
|
features sequentially. |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Arguments can be provided positionally or using the named arguments |
|
576
|
|
|
|
|
|
|
format. In the former case, the arguments are a list of feature types |
|
577
|
|
|
|
|
|
|
in the format "method:source". Either method or source can be |
|
578
|
|
|
|
|
|
|
omitted, in which case the missing component is treated as a wildcard. |
|
579
|
|
|
|
|
|
|
If no colon is present, then the type is treated as a method name. |
|
580
|
|
|
|
|
|
|
Multiple arguments are ORed together. |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
Examples: |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
@f = $s->features('exon:curated'); # all curated exons |
|
585
|
|
|
|
|
|
|
@f = $s->features('exon:curated','intron'); # curated exons and all introns |
|
586
|
|
|
|
|
|
|
@f = $s->features('similarity:.*EST.*'); # all similarities |
|
587
|
|
|
|
|
|
|
# having something to do |
|
588
|
|
|
|
|
|
|
# with ESTs |
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
The named parameter form gives you control over a few options: |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
-types an array reference to type names in the format |
|
593
|
|
|
|
|
|
|
"method:source" |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
-merge Whether to apply aggregators to the generated features (default yes) |
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
-rare Turn on an optimization suitable for a relatively rare feature type, |
|
598
|
|
|
|
|
|
|
where it will be faster to filter by feature type first |
|
599
|
|
|
|
|
|
|
and then by position, rather than vice versa. |
|
600
|
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
-attributes a hashref containing a set of attributes to match |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
-range_type One of 'overlapping', 'contains', or 'contained_in' |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
-iterator Whether to return an iterator across the features. |
|
606
|
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
-binsize A true value will create a set of artificial features whose |
|
608
|
|
|
|
|
|
|
start and stop positions indicate bins of the given size, and |
|
609
|
|
|
|
|
|
|
whose scores are the number of features in the bin. The |
|
610
|
|
|
|
|
|
|
class and method of the feature will be set to "bin", |
|
611
|
|
|
|
|
|
|
its source to "method:source", and its group to "bin:method:source". |
|
612
|
|
|
|
|
|
|
This is a handy way of generating histograms of feature density. |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
-merge is a boolean flag that controls whether the adaptor's |
|
615
|
|
|
|
|
|
|
aggregators wll be applied to the features returned by this method. |
|
616
|
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
If -iterator is true, then the method returns a single scalar value |
|
618
|
|
|
|
|
|
|
consisting of a Bio::SeqIO object. You can call next_seq() repeatedly |
|
619
|
|
|
|
|
|
|
on this object to fetch each of the features in turn. If iterator is |
|
620
|
|
|
|
|
|
|
false or absent, then all the features are returned as a list. |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
The -attributes argument is a hashref containing one or more |
|
623
|
|
|
|
|
|
|
attributes to match against: |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
-attributes => { Gene => 'abc-1', |
|
626
|
|
|
|
|
|
|
Note => 'confirmed' } |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Attribute matching is simple string matching, and multiple attributes |
|
629
|
|
|
|
|
|
|
are ANDed together. |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=cut |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
#' |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# return all features that overlap with this segment; |
|
636
|
|
|
|
|
|
|
# optionally modified by a list of types to filter on |
|
637
|
|
|
|
|
|
|
sub features { |
|
638
|
85
|
|
|
85
|
1
|
3725
|
my $self = shift; |
|
639
|
85
|
|
|
|
|
195
|
my @args = $self->_process_feature_args(@_); |
|
640
|
85
|
|
|
|
|
234
|
return $self->factory->overlapping_features(@args); |
|
641
|
|
|
|
|
|
|
} |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=head2 get_SeqFeatures |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
Title : get_SeqFeatures |
|
646
|
|
|
|
|
|
|
Usage : |
|
647
|
|
|
|
|
|
|
Function: returns the top level sequence features |
|
648
|
|
|
|
|
|
|
Returns : L objects |
|
649
|
|
|
|
|
|
|
Args : none |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
Segments do not ordinarily return any subfeatures. |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=cut |
|
654
|
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# A SEGMENT DOES NOT HAVE SUBFEATURES! |
|
656
|
0
|
|
|
0
|
1
|
0
|
sub get_SeqFeatures { return } |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head2 feature_count |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Title : feature_count |
|
661
|
|
|
|
|
|
|
Usage : $seq->feature_count() |
|
662
|
|
|
|
|
|
|
Function: Return the number of SeqFeatures attached to a sequence |
|
663
|
|
|
|
|
|
|
Returns : integer representing the number of SeqFeatures |
|
664
|
|
|
|
|
|
|
Args : none |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
This method comes through extension of Bio::FeatureHolderI. See |
|
667
|
|
|
|
|
|
|
L for more information. |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=cut |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
sub feature_count { |
|
672
|
5
|
|
|
5
|
1
|
2106
|
my $self = shift; |
|
673
|
5
|
|
|
|
|
10
|
my $ct = 0; |
|
674
|
5
|
|
|
|
|
25
|
my %type_counts = $self->types(-enumerate=>1); |
|
675
|
5
|
|
|
|
|
17
|
map { $ct += $_ } values %type_counts; |
|
|
30
|
|
|
|
|
28
|
|
|
676
|
5
|
|
|
|
|
18
|
$ct; |
|
677
|
|
|
|
|
|
|
} |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=head2 get_feature_stream |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
Title : features |
|
682
|
|
|
|
|
|
|
Usage : $stream = $s->get_feature_stream(@args) |
|
683
|
|
|
|
|
|
|
Function: get a stream of features that overlap this segment |
|
684
|
|
|
|
|
|
|
Returns : a Bio::SeqIO::Stream-compliant stream |
|
685
|
|
|
|
|
|
|
Args : see below |
|
686
|
|
|
|
|
|
|
Status : Public |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
This is the same as features(), but returns a stream. Use like this: |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
$stream = $s->get_feature_stream('exon'); |
|
691
|
|
|
|
|
|
|
while (my $exon = $stream->next_seq) { |
|
692
|
|
|
|
|
|
|
print $exon->start,"\n"; |
|
693
|
|
|
|
|
|
|
} |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=cut |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
sub get_feature_stream { |
|
698
|
5
|
|
|
5
|
1
|
2459
|
my $self = shift; |
|
699
|
5
|
50
|
33
|
|
|
60
|
my @args = defined($_[0]) && $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1); |
|
700
|
5
|
|
|
|
|
10
|
$self->features(@args); |
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
=head2 get_seq_stream |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Title : get_seq_stream |
|
706
|
|
|
|
|
|
|
Usage : $stream = $s->get_seq_stream(@args) |
|
707
|
|
|
|
|
|
|
Function: get a stream of features that overlap this segment |
|
708
|
|
|
|
|
|
|
Returns : a Bio::SeqIO::Stream-compliant stream |
|
709
|
|
|
|
|
|
|
Args : see below |
|
710
|
|
|
|
|
|
|
Status : Public |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
This is the same as feature_stream(), and is provided for Bioperl |
|
713
|
|
|
|
|
|
|
compatibility. Use like this: |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
$stream = $s->get_seq_stream('exon'); |
|
716
|
|
|
|
|
|
|
while (my $exon = $stream->next_seq) { |
|
717
|
|
|
|
|
|
|
print $exon->start,"\n"; |
|
718
|
|
|
|
|
|
|
} |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=cut |
|
721
|
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
*get_seq_stream = \&get_feature_stream; |
|
723
|
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=head2 overlapping_features |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Title : overlapping_features |
|
728
|
|
|
|
|
|
|
Usage : @features = $s->overlapping_features(@args) |
|
729
|
|
|
|
|
|
|
Function: get features that overlap this segment |
|
730
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Feature objects |
|
731
|
|
|
|
|
|
|
Args : see features() |
|
732
|
|
|
|
|
|
|
Status : Public |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
This is an alias for the features() method, and takes the same |
|
735
|
|
|
|
|
|
|
arguments. |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=cut |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
*overlapping_features = \&features; |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head2 contained_features |
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
Title : contained_features |
|
744
|
|
|
|
|
|
|
Usage : @features = $s->contained_features(@args) |
|
745
|
|
|
|
|
|
|
Function: get features that are contained by this segment |
|
746
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Feature objects |
|
747
|
|
|
|
|
|
|
Args : see features() |
|
748
|
|
|
|
|
|
|
Status : Public |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
This is identical in behavior to features() except that it returns |
|
751
|
|
|
|
|
|
|
only those features that are completely contained within the segment, |
|
752
|
|
|
|
|
|
|
rather than any that overlap. |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
=cut |
|
755
|
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# return all features completely contained within this segment |
|
757
|
|
|
|
|
|
|
sub contained_features { |
|
758
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
759
|
0
|
|
|
|
|
0
|
local $self->{whole} = 0; |
|
760
|
0
|
|
|
|
|
0
|
my @args = $self->_process_feature_args(@_); |
|
761
|
0
|
|
|
|
|
0
|
return $self->factory->contained_features(@args); |
|
762
|
|
|
|
|
|
|
} |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
# *contains = \&contained_features; |
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
=head2 contained_in |
|
767
|
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
Title : contained_in |
|
769
|
|
|
|
|
|
|
Usage : @features = $s->contained_in(@args) |
|
770
|
|
|
|
|
|
|
Function: get features that contain this segment |
|
771
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Feature objects |
|
772
|
|
|
|
|
|
|
Args : see features() |
|
773
|
|
|
|
|
|
|
Status : Public |
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
This is identical in behavior to features() except that it returns |
|
776
|
|
|
|
|
|
|
only those features that completely contain the segment. |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
=cut |
|
779
|
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
# return all features completely contained within this segment |
|
781
|
|
|
|
|
|
|
sub contained_in { |
|
782
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
783
|
0
|
|
|
|
|
0
|
local $self->{whole} = 0; |
|
784
|
0
|
|
|
|
|
0
|
my @args = $self->_process_feature_args(@_); |
|
785
|
0
|
|
|
|
|
0
|
return $self->factory->contained_in(@args); |
|
786
|
|
|
|
|
|
|
} |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
=head2 delete |
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
Title : delete |
|
791
|
|
|
|
|
|
|
Usage : $db->delete(@args) |
|
792
|
|
|
|
|
|
|
Function: delete features |
|
793
|
|
|
|
|
|
|
Returns : count of features deleted -- if available |
|
794
|
|
|
|
|
|
|
Args : numerous, see below |
|
795
|
|
|
|
|
|
|
Status : public |
|
796
|
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
This method deletes all features that overlap the specified region or |
|
798
|
|
|
|
|
|
|
are of a particular type. If no arguments are provided and the -force |
|
799
|
|
|
|
|
|
|
argument is true, then deletes ALL features. |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
Arguments: |
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
-type,-types Either a single scalar type to be deleted, or an |
|
804
|
|
|
|
|
|
|
reference to an array of types. |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
-range_type Control the range type of the deletion. One of "overlaps" (default) |
|
807
|
|
|
|
|
|
|
"contains" or "contained_in" |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
Examples: |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
$segment->delete(-type=>['intron','repeat:repeatMasker']); # remove all introns & repeats |
|
812
|
|
|
|
|
|
|
$segment->delete(-type=>['intron','repeat:repeatMasker'] |
|
813
|
|
|
|
|
|
|
-range_type => 'contains'); # remove all introns & repeats |
|
814
|
|
|
|
|
|
|
# strictly contained in segment |
|
815
|
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
IMPORTANT NOTE: This method only deletes features. It does *NOT* |
|
817
|
|
|
|
|
|
|
delete the names of groups that contain the deleted features. Group |
|
818
|
|
|
|
|
|
|
IDs will be reused if you later load a feature with the same group |
|
819
|
|
|
|
|
|
|
name as one that was previously deleted. |
|
820
|
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
NOTE ON FEATURE COUNTS: The DBI-based versions of this call return the |
|
822
|
|
|
|
|
|
|
result code from the SQL DELETE operation. Some dbd drivers return the |
|
823
|
|
|
|
|
|
|
count of rows deleted, while others return 0E0. Caveat emptor. |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
=cut |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
# return all features completely contained within this segment |
|
828
|
|
|
|
|
|
|
sub delete { |
|
829
|
5
|
|
|
5
|
1
|
50
|
my $self = shift; |
|
830
|
5
|
|
|
|
|
42
|
my ($type,$range_type) = |
|
831
|
|
|
|
|
|
|
rearrange([[qw(TYPE TYPES)],'RANGE_TYPE'],@_); |
|
832
|
5
|
|
|
|
|
21
|
my $types = $self->factory->parse_types($type); # parse out list of types |
|
833
|
5
|
|
50
|
|
|
13
|
$range_type ||= 'overlaps'; |
|
834
|
5
|
|
|
|
|
10
|
return $self->factory->_delete({ |
|
835
|
|
|
|
|
|
|
segments => [$self], |
|
836
|
|
|
|
|
|
|
types => $types, |
|
837
|
|
|
|
|
|
|
range_type => $range_type |
|
838
|
|
|
|
|
|
|
}); |
|
839
|
|
|
|
|
|
|
} |
|
840
|
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
=head2 _process_feature_args |
|
842
|
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
Title : _process_feature_args |
|
844
|
|
|
|
|
|
|
Usage : @args = $s->_process_feature_args(@args) |
|
845
|
|
|
|
|
|
|
Function: preprocess arguments passed to features, |
|
846
|
|
|
|
|
|
|
contained_features, and overlapping_features |
|
847
|
|
|
|
|
|
|
Returns : a list of parsed arguents |
|
848
|
|
|
|
|
|
|
Args : see feature() |
|
849
|
|
|
|
|
|
|
Status : Internal |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
This is an internal method that is used to check and format the |
|
852
|
|
|
|
|
|
|
arguments to features() before passing them on to the adaptor. |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
=cut |
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
sub _process_feature_args { |
|
857
|
85
|
|
|
85
|
|
62
|
my $self = shift; |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
my ($ref,$class,$start,$stop,$strand,$whole) |
|
860
|
85
|
|
|
|
|
92
|
= @{$self}{qw(sourceseq class start stop strand whole)}; |
|
|
85
|
|
|
|
|
180
|
|
|
861
|
|
|
|
|
|
|
|
|
862
|
85
|
100
|
66
|
|
|
378
|
($start,$stop) = ($stop,$start) if defined $strand && $strand eq '-'; |
|
863
|
|
|
|
|
|
|
|
|
864
|
85
|
|
|
|
|
226
|
my @args = (-ref=>$ref,-class=>$class); |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
# indicating that we are fetching the whole segment allows certain |
|
867
|
|
|
|
|
|
|
# SQL optimizations. |
|
868
|
85
|
100
|
|
|
|
187
|
push @args,(-start=>$start,-stop=>$stop) unless $whole; |
|
869
|
|
|
|
|
|
|
|
|
870
|
85
|
100
|
|
|
|
146
|
if (@_) { |
|
871
|
70
|
100
|
|
|
|
207
|
if ($_[0] =~ /^-/) { |
|
872
|
45
|
|
|
|
|
69
|
push @args,@_; |
|
873
|
|
|
|
|
|
|
} else { |
|
874
|
25
|
|
|
|
|
37
|
my @types = @_; |
|
875
|
25
|
|
|
|
|
75
|
push @args,-types=>\@types; |
|
876
|
|
|
|
|
|
|
} |
|
877
|
|
|
|
|
|
|
} |
|
878
|
85
|
|
|
|
|
117
|
push @args,-parent=>$self; |
|
879
|
85
|
|
|
|
|
279
|
@args; |
|
880
|
|
|
|
|
|
|
} |
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=head2 types |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
Title : types |
|
885
|
|
|
|
|
|
|
Usage : @types = $s->types([-enumerate=>1]) |
|
886
|
|
|
|
|
|
|
Function: list feature types that overlap this segment |
|
887
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Typename objects or a hash |
|
888
|
|
|
|
|
|
|
Args : see below |
|
889
|
|
|
|
|
|
|
Status : Public |
|
890
|
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
The types() method will return a list of Bio::DB::GFF::Typename |
|
892
|
|
|
|
|
|
|
objects, each corresponding to a feature that overlaps the segment. |
|
893
|
|
|
|
|
|
|
If the optional -enumerate parameter is set to a true value, then the |
|
894
|
|
|
|
|
|
|
method will return a hash in which the keys are the type names and the |
|
895
|
|
|
|
|
|
|
values are the number of times a feature of that type is present on |
|
896
|
|
|
|
|
|
|
the segment. For example: |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
%count = $s->types(-enumerate=>1); |
|
899
|
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=cut |
|
901
|
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
# wrapper for lower-level types() call. |
|
903
|
|
|
|
|
|
|
sub types { |
|
904
|
15
|
|
|
15
|
1
|
1367
|
my $self = shift; |
|
905
|
15
|
|
|
|
|
18
|
my ($ref,$class,$start,$stop,$strand) = @{$self}{qw(sourceseq class start stop strand)}; |
|
|
15
|
|
|
|
|
35
|
|
|
906
|
15
|
50
|
|
|
|
40
|
($start,$stop) = ($stop,$start) if $strand eq '-'; |
|
907
|
|
|
|
|
|
|
|
|
908
|
15
|
|
|
|
|
18
|
my @args; |
|
909
|
15
|
50
|
66
|
|
|
92
|
if (@_ && $_[0] !~ /^-/) { |
|
910
|
0
|
|
|
|
|
0
|
@args = (-type => \@_) |
|
911
|
|
|
|
|
|
|
} else { |
|
912
|
15
|
|
|
|
|
32
|
@args = @_; |
|
913
|
|
|
|
|
|
|
} |
|
914
|
15
|
|
|
|
|
31
|
$self->factory->types(-ref => $ref, |
|
915
|
|
|
|
|
|
|
-start=> $start, |
|
916
|
|
|
|
|
|
|
-stop => $stop, |
|
917
|
|
|
|
|
|
|
@args); |
|
918
|
|
|
|
|
|
|
} |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head1 Internal Methods |
|
921
|
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
The following are internal methods and should not be called directly. |
|
923
|
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
=head2 new_from_segment |
|
925
|
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
Title : new_from_segment |
|
927
|
|
|
|
|
|
|
Usage : $s = $segment->new_from_segment(@args) |
|
928
|
|
|
|
|
|
|
Function: create a new relative segment |
|
929
|
|
|
|
|
|
|
Returns : a new Bio::DB::GFF::RelSegment object |
|
930
|
|
|
|
|
|
|
Args : see below |
|
931
|
|
|
|
|
|
|
Status : Internal |
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
This constructor is used internally by the subseq() method. It forces |
|
934
|
|
|
|
|
|
|
the new segment into the Bio::DB::GFF::RelSegment package, regardless |
|
935
|
|
|
|
|
|
|
of the package that it is called from. This causes subclass-specfic |
|
936
|
|
|
|
|
|
|
information, such as feature types, to be dropped when a subsequence |
|
937
|
|
|
|
|
|
|
is created. |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=cut |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
sub new_from_segment { |
|
942
|
35
|
|
|
35
|
1
|
37
|
my $package = shift; |
|
943
|
35
|
50
|
|
|
|
69
|
$package = ref $package if ref $package; |
|
944
|
35
|
|
|
|
|
34
|
my $segment = shift; |
|
945
|
35
|
|
|
|
|
39
|
my $new = {}; |
|
946
|
35
|
|
|
|
|
163
|
@{$new}{qw(factory sourceseq start stop strand class ref refstart refstrand)} |
|
947
|
35
|
|
|
|
|
31
|
= @{$segment}{qw(factory sourceseq start stop strand class ref refstart refstrand)}; |
|
|
35
|
|
|
|
|
66
|
|
|
948
|
35
|
|
|
|
|
76
|
return bless $new,__PACKAGE__; |
|
949
|
|
|
|
|
|
|
} |
|
950
|
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
=head2 _abs2rel |
|
952
|
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
Title : _abs2rel |
|
954
|
|
|
|
|
|
|
Usage : @coords = $s->_abs2rel(@coords) |
|
955
|
|
|
|
|
|
|
Function: convert absolute coordinates into relative coordinates |
|
956
|
|
|
|
|
|
|
Returns : a list of relative coordinates |
|
957
|
|
|
|
|
|
|
Args : a list of absolute coordinates |
|
958
|
|
|
|
|
|
|
Status : Internal |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
This is used internally to map from absolute to relative |
|
961
|
|
|
|
|
|
|
coordinates. It does not take the offset of the reference sequence |
|
962
|
|
|
|
|
|
|
into account, so please use abs2rel() instead. |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
=cut |
|
965
|
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
sub _abs2rel { |
|
967
|
2283
|
|
|
2283
|
|
1447
|
my $self = shift; |
|
968
|
2283
|
|
|
|
|
1351
|
my @result; |
|
969
|
2283
|
50
|
|
|
|
2649
|
return unless defined $_[0]; |
|
970
|
|
|
|
|
|
|
|
|
971
|
2283
|
50
|
|
|
|
2352
|
if ($self->absolute) { |
|
972
|
0
|
|
|
|
|
0
|
@result = @_; |
|
973
|
|
|
|
|
|
|
} else { |
|
974
|
2283
|
|
|
|
|
1398
|
my ($refstart,$refstrand) = @{$self}{qw(refstart refstrand)}; |
|
|
2283
|
|
|
|
|
2802
|
|
|
975
|
161
|
|
|
|
|
301
|
@result = defined($refstrand) && $refstrand eq '-' ? map { $refstart - $_ + 1 } @_ |
|
976
|
2283
|
100
|
100
|
|
|
6613
|
: map { $_ - $refstart + 1 } @_; |
|
|
2122
|
|
|
|
|
3386
|
|
|
977
|
|
|
|
|
|
|
} |
|
978
|
|
|
|
|
|
|
# if called with a single argument, caller will expect a single scalar reply |
|
979
|
|
|
|
|
|
|
# not the size of the returned array! |
|
980
|
2283
|
100
|
66
|
|
|
7981
|
return $result[0] if @result == 1 and !wantarray; |
|
981
|
80
|
|
|
|
|
1013
|
@result; |
|
982
|
|
|
|
|
|
|
} |
|
983
|
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
=head2 rel2abs |
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
Title : rel2abs |
|
987
|
|
|
|
|
|
|
Usage : @coords = $s->rel2abs(@coords) |
|
988
|
|
|
|
|
|
|
Function: convert relative coordinates into absolute coordinates |
|
989
|
|
|
|
|
|
|
Returns : a list of absolute coordinates |
|
990
|
|
|
|
|
|
|
Args : a list of relative coordinates |
|
991
|
|
|
|
|
|
|
Status : Public |
|
992
|
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
This function takes a list of positions in relative coordinates to the |
|
994
|
|
|
|
|
|
|
segment, and converts them into absolute coordinates. |
|
995
|
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
=cut |
|
997
|
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
sub rel2abs { |
|
999
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
1000
|
0
|
|
|
|
|
0
|
my @result; |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
0
|
0
|
|
|
|
0
|
if ($self->absolute) { |
|
1003
|
0
|
|
|
|
|
0
|
@result = @_; |
|
1004
|
|
|
|
|
|
|
} else { |
|
1005
|
0
|
|
|
|
|
0
|
my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand); |
|
1006
|
0
|
|
|
|
|
0
|
@result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_ |
|
1007
|
0
|
0
|
|
|
|
0
|
: map { $_ + $abs_start - 1 } @_; |
|
|
0
|
|
|
|
|
0
|
|
|
1008
|
|
|
|
|
|
|
} |
|
1009
|
|
|
|
|
|
|
# if called with a single argument, caller will expect a single scalar reply |
|
1010
|
|
|
|
|
|
|
# not the size of the returned array! |
|
1011
|
0
|
0
|
0
|
|
|
0
|
return $result[0] if @result == 1 and !wantarray; |
|
1012
|
0
|
|
|
|
|
0
|
@result; |
|
1013
|
|
|
|
|
|
|
} |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
=head2 abs2rel |
|
1016
|
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
Title : abs2rel |
|
1018
|
|
|
|
|
|
|
Usage : @rel_coords = $s->abs2rel(@abs_coords) |
|
1019
|
|
|
|
|
|
|
Function: convert absolute coordinates into relative coordinates |
|
1020
|
|
|
|
|
|
|
Returns : a list of relative coordinates |
|
1021
|
|
|
|
|
|
|
Args : a list of absolute coordinates |
|
1022
|
|
|
|
|
|
|
Status : Public |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
This function takes a list of positions in absolute coordinates |
|
1025
|
|
|
|
|
|
|
and returns a list expressed in relative coordinates. |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
=cut |
|
1028
|
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
sub abs2rel { |
|
1030
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
1031
|
0
|
|
|
|
|
0
|
my @result; |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
0
|
0
|
|
|
|
0
|
if ($self->absolute) { |
|
1034
|
0
|
|
|
|
|
0
|
@result = @_; |
|
1035
|
|
|
|
|
|
|
} else { |
|
1036
|
0
|
|
|
|
|
0
|
my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand); |
|
1037
|
0
|
|
|
|
|
0
|
@result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_ |
|
1038
|
0
|
0
|
|
|
|
0
|
: map { $_ - $abs_start + 1 } @_; |
|
|
0
|
|
|
|
|
0
|
|
|
1039
|
|
|
|
|
|
|
} |
|
1040
|
|
|
|
|
|
|
# if called with a single argument, caller will expect a single scalar reply |
|
1041
|
|
|
|
|
|
|
# not the size of the returned array! |
|
1042
|
0
|
0
|
0
|
|
|
0
|
return $result[0] if @result == 1 and !wantarray; |
|
1043
|
0
|
|
|
|
|
0
|
@result; |
|
1044
|
|
|
|
|
|
|
} |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
sub subseq { |
|
1047
|
35
|
|
|
35
|
1
|
30
|
my $self = shift; |
|
1048
|
35
|
|
|
|
|
90
|
my $obj = $self->SUPER::subseq(@_); |
|
1049
|
35
|
|
|
|
|
94
|
bless $obj,__PACKAGE__; # always bless into the generic RelSegment package |
|
1050
|
|
|
|
|
|
|
} |
|
1051
|
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
sub strand { |
|
1053
|
627
|
|
|
627
|
1
|
516
|
my $self = shift; |
|
1054
|
627
|
100
|
|
|
|
670
|
if ($self->absolute) { |
|
1055
|
10
|
|
|
|
|
25
|
return _to_strand($self->{strand}); |
|
1056
|
|
|
|
|
|
|
} |
|
1057
|
617
|
|
|
|
|
773
|
my $start = $self->start; |
|
1058
|
617
|
|
|
|
|
754
|
my $stop = $self->stop; |
|
1059
|
617
|
50
|
33
|
|
|
1770
|
return 0 unless defined $start and defined $stop; |
|
1060
|
617
|
|
|
|
|
2148
|
return $stop <=> $start; |
|
1061
|
|
|
|
|
|
|
} |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
sub _to_strand { |
|
1064
|
10
|
|
|
10
|
|
10
|
my $s = shift; |
|
1065
|
10
|
100
|
|
|
|
38
|
return -1 if $s eq '-'; |
|
1066
|
5
|
50
|
|
|
|
28
|
return +1 if $s eq '+'; |
|
1067
|
0
|
|
|
|
|
|
return 0; |
|
1068
|
|
|
|
|
|
|
} |
|
1069
|
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
=head2 Bio::RangeI Methods |
|
1071
|
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
The following Bio::RangeI methods are supported: |
|
1073
|
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
overlaps(), contains(), equals(),intersection(),union(),overlap_extent() |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
=cut |
|
1077
|
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
sub intersection { |
|
1079
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1080
|
0
|
|
|
|
|
|
my (@ranges) = @_; |
|
1081
|
0
|
0
|
|
|
|
|
unshift @ranges,$self if ref $self; |
|
1082
|
0
|
0
|
|
|
|
|
$ranges[0]->isa('Bio::DB::GFF::RelSegment') |
|
1083
|
|
|
|
|
|
|
or return $self->SUPER::intersection(@_); |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
0
|
|
|
|
|
|
my $ref = $ranges[0]->abs_ref; |
|
1086
|
0
|
|
|
|
|
|
my ($low,$high); |
|
1087
|
0
|
|
|
|
|
|
foreach (@ranges) { |
|
1088
|
0
|
0
|
|
|
|
|
return unless $_->can('abs_ref'); |
|
1089
|
0
|
0
|
|
|
|
|
$ref eq $_->abs_ref or return; |
|
1090
|
0
|
0
|
0
|
|
|
|
$low = $_->abs_low if !defined($low) or $low < $_->abs_low; |
|
1091
|
0
|
0
|
0
|
|
|
|
$high = $_->abs_high if !defined($high) or $high > $_->abs_high; |
|
1092
|
|
|
|
|
|
|
} |
|
1093
|
|
|
|
|
|
|
|
|
1094
|
0
|
0
|
|
|
|
|
return unless $low < $high; |
|
1095
|
0
|
|
|
|
|
|
return Bio::DB::GFF::RelSegment->new(-factory => $self->factory, |
|
1096
|
|
|
|
|
|
|
-seq => $ref, |
|
1097
|
|
|
|
|
|
|
-start => $low, |
|
1098
|
|
|
|
|
|
|
-stop => $high, |
|
1099
|
|
|
|
|
|
|
); |
|
1100
|
|
|
|
|
|
|
} |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
sub overlaps { |
|
1103
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1104
|
0
|
|
|
|
|
|
my($other,$so) = @_; |
|
1105
|
0
|
0
|
|
|
|
|
return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment'); |
|
1106
|
0
|
0
|
|
|
|
|
return if $self->abs_ref ne $other->abs_ref; |
|
1107
|
0
|
0
|
|
|
|
|
return if $self->abs_low > $other->abs_high; |
|
1108
|
0
|
0
|
|
|
|
|
return if $self->abs_high < $other->abs_low; |
|
1109
|
0
|
|
|
|
|
|
1; |
|
1110
|
|
|
|
|
|
|
} |
|
1111
|
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
sub contains { |
|
1113
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1114
|
0
|
|
|
|
|
|
my($other,$so) = @_; |
|
1115
|
0
|
0
|
|
|
|
|
return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment'); |
|
1116
|
0
|
0
|
|
|
|
|
return if $self->abs_ref ne $other->abs_ref; |
|
1117
|
0
|
0
|
|
|
|
|
return unless $self->abs_low <= $other->abs_low; |
|
1118
|
0
|
0
|
|
|
|
|
return unless $self->abs_high >= $other->abs_high; |
|
1119
|
0
|
|
|
|
|
|
1; |
|
1120
|
|
|
|
|
|
|
} |
|
1121
|
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
sub union { |
|
1123
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1124
|
0
|
|
|
|
|
|
my (@ranges) = @_; |
|
1125
|
0
|
0
|
|
|
|
|
unshift @ranges,$self if ref $self; |
|
1126
|
0
|
0
|
|
|
|
|
$ranges[0]->isa('Bio::DB::GFF::RelSegment') |
|
1127
|
|
|
|
|
|
|
or return $self->SUPER::union(@_); |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
0
|
|
|
|
|
|
my $ref = $ranges[0]->abs_ref; |
|
1130
|
0
|
|
|
|
|
|
my ($low,$high); |
|
1131
|
0
|
|
|
|
|
|
foreach (@ranges) { |
|
1132
|
0
|
0
|
|
|
|
|
return unless $_->can('abs_ref'); |
|
1133
|
0
|
0
|
|
|
|
|
$ref eq $_->abs_ref or return; |
|
1134
|
0
|
0
|
0
|
|
|
|
$low = $_->abs_low if !defined($low) or $low > $_->abs_low; |
|
1135
|
0
|
0
|
0
|
|
|
|
$high = $_->abs_high if !defined($high) or $high < $_->abs_high; |
|
1136
|
|
|
|
|
|
|
} |
|
1137
|
0
|
|
|
|
|
|
$self->new(-factory=> $self->factory, |
|
1138
|
|
|
|
|
|
|
-seq => $ref, |
|
1139
|
|
|
|
|
|
|
-start => $low, |
|
1140
|
|
|
|
|
|
|
-stop => $high); |
|
1141
|
|
|
|
|
|
|
} |
|
1142
|
|
|
|
|
|
|
|
|
1143
|
0
|
|
|
0
|
0
|
|
sub version { 0 } |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
1; |
|
1147
|
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
__END__ |