line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Bio::DB::GFF::RelSegment -- Sequence segment with relative coordinate support |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
See L. |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 DESCRIPTION |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Bio::DB::GFF::RelSegment is a stretch of sequence that can handle |
12
|
|
|
|
|
|
|
relative coordinate addressing. It inherits from |
13
|
|
|
|
|
|
|
Bio::DB::GFF::Segment, and is the base class for |
14
|
|
|
|
|
|
|
Bio::DB::GFF::Feature. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
In addition to the source sequence, a relative segment has a |
17
|
|
|
|
|
|
|
"reference sequence", which is used as the basis for its coordinate |
18
|
|
|
|
|
|
|
system. The reference sequence can be changed at will, allowing you |
19
|
|
|
|
|
|
|
freedom to change the "frame of reference" for features contained |
20
|
|
|
|
|
|
|
within the segment. For example, by setting a segment's reference |
21
|
|
|
|
|
|
|
sequence to the beginning of a gene, you can view all other features |
22
|
|
|
|
|
|
|
in gene-relative coordinates. |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
The reference sequence and the source sequence must be on the same |
25
|
|
|
|
|
|
|
physical stretch of DNA, naturally. However, they do not have to be |
26
|
|
|
|
|
|
|
on the same strand. The strandedness of the reference sequence |
27
|
|
|
|
|
|
|
determines whether coordinates increase to the right or the left. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
Generally, you will not create or manipulate Bio::DB::GFF::RelSeg0ment |
30
|
|
|
|
|
|
|
objects directly, but use those that are returned by the Bio::DB::GFF |
31
|
|
|
|
|
|
|
module. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head2 An Example |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
To understand how relative coordinates work, consider the following |
36
|
|
|
|
|
|
|
example from the C. elegans database. First we create the appropriate |
37
|
|
|
|
|
|
|
GFF accessor object (the factory): |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:elegans', |
40
|
|
|
|
|
|
|
-adaptor=>'dbi:mysqlopt'); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Now we fetch out a segment based on cosmid clone ZK909: |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $seg = $db->segment('ZK909'); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
If we call the segment's refseq() method, we see that the base of the |
47
|
|
|
|
|
|
|
coordinate system is the sequence "ZK154", and that its start and |
48
|
|
|
|
|
|
|
stop positions are 1 and the length of the cosmid: |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
print $seg->refseq; |
51
|
|
|
|
|
|
|
=> ZK909 |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
print $seg->start,' - ',$seg->stop; |
54
|
|
|
|
|
|
|
=> 1 - 33782 |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
As a convenience, the "" operator is overloaded in this class, to give |
57
|
|
|
|
|
|
|
the reference sequence, and start and stop positions: |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
print $seg; |
60
|
|
|
|
|
|
|
=> ZK909:1,33782 |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Internally, Bio::DB::GFF::RelSegment has looked up the absolute |
63
|
|
|
|
|
|
|
coordinates of this segment and maintains the source sequence and the |
64
|
|
|
|
|
|
|
absolute coordinates relative to the source sequence. We can see this |
65
|
|
|
|
|
|
|
information using sourceseq() (inherited from Bio::DB::GFF::Segment) |
66
|
|
|
|
|
|
|
and the abs_start() and abs_end() methods: |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
print $seg->sourceseq; |
69
|
|
|
|
|
|
|
=> CHROMOSOME_I |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
print $seg->abs_start,' - ',$seg->abs_end; |
72
|
|
|
|
|
|
|
=> 14839545 - 14873326 |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
We can also put the segment into absolute mode, so that it behaves |
75
|
|
|
|
|
|
|
like Bio::DB::Segment, and always represents coordinates on the source |
76
|
|
|
|
|
|
|
sequence. This is done by passing a true value to the absolute() |
77
|
|
|
|
|
|
|
method: |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
$seq->absolute(1); |
80
|
|
|
|
|
|
|
print $seg; |
81
|
|
|
|
|
|
|
=> CHROMOSOME_I:14839545,14873326 |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
We can change the reference sequence at any time. One way is to call |
84
|
|
|
|
|
|
|
the segment's ref() method, giving it the ID (and optionally the |
85
|
|
|
|
|
|
|
class) of another landmark on the genome. For example, if we know |
86
|
|
|
|
|
|
|
that cosmid ZK337 is adjacent to ZK909, then we can view ZK909 in |
87
|
|
|
|
|
|
|
ZK337-relative coordinates: |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
$seg->refseq('ZK337'); |
90
|
|
|
|
|
|
|
print $seg; |
91
|
|
|
|
|
|
|
=> ZK337:-33670,111 |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
We can call the segment's features() method in order to get the list |
94
|
|
|
|
|
|
|
of contigs that overlap this segment (in the C. elegans database, |
95
|
|
|
|
|
|
|
contigs have feature type "Sequence:Link"): |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
@links = $seg->features('Sequence:Link'); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
We can now set the reference sequence to the first of these contigs like so: |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$seg->refseq($links[0]); |
102
|
|
|
|
|
|
|
print $seg; |
103
|
|
|
|
|
|
|
=> Sequence:Link(LINK_Y95D11A):3997326,4031107 |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
package Bio::DB::GFF::RelSegment; |
108
|
|
|
|
|
|
|
|
109
|
3
|
|
|
3
|
|
6
|
use strict; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
249
|
|
110
|
|
|
|
|
|
|
|
111
|
3
|
|
|
3
|
|
1020
|
use Bio::DB::GFF::Feature; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
72
|
|
112
|
3
|
|
|
3
|
|
15
|
use Bio::DB::GFF::Util::Rearrange; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
117
|
|
113
|
3
|
|
|
3
|
|
9
|
use Bio::RangeI; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
48
|
|
114
|
|
|
|
|
|
|
|
115
|
3
|
|
|
3
|
|
9
|
use base qw(Bio::DB::GFF::Segment); |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
294
|
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
use overload '""' => 'asString', |
118
|
1749
|
|
|
1749
|
|
6406
|
'bool' => sub { overload::StrVal(shift) }, |
119
|
3
|
|
|
3
|
|
12
|
fallback=>1; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
21
|
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 API |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
The remainder of this document describes the API for |
124
|
|
|
|
|
|
|
Bio::DB::GFF::Segment. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head2 new |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Title : new |
131
|
|
|
|
|
|
|
Usage : $s = Bio::DB::GFF::RelSegment->new(@args) |
132
|
|
|
|
|
|
|
Function: create a new relative segment |
133
|
|
|
|
|
|
|
Returns : a new Bio::DB::GFF::RelSegment object |
134
|
|
|
|
|
|
|
Args : see below |
135
|
|
|
|
|
|
|
Status : Public |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
This method creates a new Bio::DB::GFF::RelSegment object. Generally |
138
|
|
|
|
|
|
|
this is called automatically by the Bio::DB::GFF module and |
139
|
|
|
|
|
|
|
derivatives. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
This function uses a named-argument style: |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
-factory a Bio::DB::GFF::Adaptor to use for database access |
144
|
|
|
|
|
|
|
-seq ID of the source sequence |
145
|
|
|
|
|
|
|
-class class of the source sequence |
146
|
|
|
|
|
|
|
-start start of the desired segment relative to source sequence |
147
|
|
|
|
|
|
|
-stop stop of the desired segment relative to source sequence |
148
|
|
|
|
|
|
|
-ref ID of the reference sequence |
149
|
|
|
|
|
|
|
-refclass class of the reference sequence |
150
|
|
|
|
|
|
|
-offset 0-based offset from source sequence to start of segment |
151
|
|
|
|
|
|
|
-length length of desired segment |
152
|
|
|
|
|
|
|
-absolute, -force_absolute |
153
|
|
|
|
|
|
|
use absolute coordinates, rather than coordinates relative |
154
|
|
|
|
|
|
|
to the start of self or the reference sequence |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
The -seq argument accepts the ID of any landmark in the database. The |
157
|
|
|
|
|
|
|
stored source sequence becomes whatever the GFF file indicates is the |
158
|
|
|
|
|
|
|
proper sequence for this landmark. A class of "Sequence" is assumed |
159
|
|
|
|
|
|
|
unless otherwise specified in the -class argument. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
If the argument to -seq is a Bio::GFF::Featname object (such as |
162
|
|
|
|
|
|
|
returned by the group() method), then the class is taken from that. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
The optional -start and -stop arguments specify the end points for the |
165
|
|
|
|
|
|
|
retrieved segment. For those who do not like 1-based indexing, |
166
|
|
|
|
|
|
|
-offset and -length are provided. If both -start/-stop and |
167
|
|
|
|
|
|
|
-offset/-length are provided, the latter overrides the former. |
168
|
|
|
|
|
|
|
Generally it is not a good idea to mix metaphors. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
-ref and -refclass together indicate a sequence to be used for |
171
|
|
|
|
|
|
|
relative coordinates. If not provided, the source sequence indicated |
172
|
|
|
|
|
|
|
by -seq is used as the reference sequence. If the argument to -ref is |
173
|
|
|
|
|
|
|
a Bio::GFF::Featname object (such as returned by the group() method), |
174
|
|
|
|
|
|
|
then the class is taken from that. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
-force_absolute should be used if you wish to skip the lookup of the |
177
|
|
|
|
|
|
|
absolute position of the source sequence that ordinarily occurs when |
178
|
|
|
|
|
|
|
you create a relative segment. In this case, the source sequence must |
179
|
|
|
|
|
|
|
be a sequence that has been specified as the "source" in the GFF file. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=cut |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# Create a new Bio::DB::GFF::RelSegment Object |
184
|
|
|
|
|
|
|
# arguments are: |
185
|
|
|
|
|
|
|
# -factory => factory and DBI interface |
186
|
|
|
|
|
|
|
# -seq => $sequence_name |
187
|
|
|
|
|
|
|
# -start => $start_relative_to_sequence |
188
|
|
|
|
|
|
|
# -stop => $stop_relative_to_sequence |
189
|
|
|
|
|
|
|
# -ref => $sequence which establishes coordinate system |
190
|
|
|
|
|
|
|
# -offset => 0-based offset relative to sequence |
191
|
|
|
|
|
|
|
# -length => length of segment |
192
|
|
|
|
|
|
|
# -nocheck => turn off checking, force segment to be constructed |
193
|
|
|
|
|
|
|
# -absolute => use absolute coordinate addressing |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
sub new { |
196
|
203
|
|
|
203
|
1
|
177
|
my $package = shift; |
197
|
203
|
|
|
|
|
1207
|
my ($factory,$name,$start,$stop,$refseq,$class,$refclass,$offset,$length,$force_absolute,$nocheck) = |
198
|
|
|
|
|
|
|
rearrange([ |
199
|
|
|
|
|
|
|
'FACTORY', |
200
|
|
|
|
|
|
|
[qw(NAME SEQ SEQUENCE SOURCESEQ)], |
201
|
|
|
|
|
|
|
[qw(START BEGIN)], |
202
|
|
|
|
|
|
|
[qw(STOP END)], |
203
|
|
|
|
|
|
|
[qw(REFSEQ REF REFNAME)], |
204
|
|
|
|
|
|
|
[qw(CLASS SEQCLASS)], |
205
|
|
|
|
|
|
|
qw(REFCLASS), |
206
|
|
|
|
|
|
|
[qw(OFFSET OFF)], |
207
|
|
|
|
|
|
|
[qw(LENGTH LEN)], |
208
|
|
|
|
|
|
|
[qw(ABSOLUTE)], |
209
|
|
|
|
|
|
|
[qw(NOCHECK FORCE)], |
210
|
|
|
|
|
|
|
],@_); |
211
|
|
|
|
|
|
|
|
212
|
203
|
50
|
|
|
|
682
|
$package = ref $package if ref $package; |
213
|
203
|
50
|
|
|
|
324
|
$factory or $package->throw("new(): provide a -factory argument"); |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# to allow people to use segments as sources |
216
|
203
|
100
|
66
|
|
|
481
|
if (ref($name) && $name->isa('Bio::DB::GFF::Segment')) { |
217
|
35
|
100
|
|
|
|
106
|
$start = 1 unless defined $start; |
218
|
35
|
100
|
|
|
|
66
|
$stop = $name->length unless defined $stop; |
219
|
35
|
|
|
|
|
91
|
return $name->subseq($start,$stop); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
168
|
|
|
|
|
119
|
my @object_results; |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# support for Featname objects |
225
|
168
|
50
|
33
|
|
|
271
|
if (ref($name) && $name->can('class')) { |
226
|
0
|
|
|
|
|
0
|
$class = $name->class; |
227
|
0
|
|
|
|
|
0
|
$name = $name->name; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# if the class of the landmark is not specified then default to 'Sequence' |
231
|
168
|
|
50
|
|
|
299
|
$class ||= eval{$factory->default_class} || 'Sequence'; |
|
|
|
66
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# confirm that indicated sequence is actually in the database! |
234
|
168
|
|
|
|
|
121
|
my @abscoords; |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# abscoords() will now return an array ref, each element of which is |
237
|
|
|
|
|
|
|
# ($absref,$absclass,$absstart,$absstop,$absstrand) |
238
|
|
|
|
|
|
|
|
239
|
168
|
50
|
|
|
|
230
|
if ($nocheck) { |
240
|
0
|
|
|
|
|
0
|
$force_absolute++; |
241
|
0
|
|
|
|
|
0
|
$start = 1; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# if ($force_absolute && defined($start)) { # absolute position is given to us |
245
|
|
|
|
|
|
|
# @abscoords = ([$name,$class,$start,$stop,'+']); |
246
|
|
|
|
|
|
|
# } else { |
247
|
168
|
50
|
|
|
|
467
|
my $result = $factory->abscoords($name,$class,$force_absolute ? $name : ()) or return; |
|
|
100
|
|
|
|
|
|
248
|
160
|
|
|
|
|
199
|
@abscoords = @$result; |
249
|
|
|
|
|
|
|
# } |
250
|
|
|
|
|
|
|
|
251
|
160
|
|
|
|
|
205
|
foreach (@abscoords) { |
252
|
160
|
|
|
|
|
349
|
my ($absref,$absclass,$absstart,$absstop,$absstrand,$sname) = @$_; |
253
|
160
|
50
|
|
|
|
298
|
$sname = $name unless defined $sname; |
254
|
160
|
|
|
|
|
158
|
my ($this_start,$this_stop,$this_length) = ($start,$stop,$length); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# partially fill in object |
257
|
160
|
|
|
|
|
297
|
my $self = bless { factory => $factory },$package; |
258
|
|
|
|
|
|
|
|
259
|
160
|
|
100
|
|
|
341
|
$absstrand ||= '+'; |
260
|
|
|
|
|
|
|
|
261
|
160
|
50
|
|
|
|
312
|
if ($absstart > $absstop) { # AAARGH! DATA FORMAT ERROR! FIX. |
262
|
0
|
|
|
|
|
0
|
($absstart,$absstop) = ($absstop,$absstart); |
263
|
0
|
0
|
|
|
|
0
|
$absstrand = $absstrand eq '+' ? '-' : '+'; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# an explicit length overrides start and stop |
267
|
160
|
50
|
|
|
|
199
|
if (defined $offset) { |
268
|
0
|
0
|
|
|
|
0
|
warn "new(): bad idea to call new() with both a start and an offset" |
269
|
|
|
|
|
|
|
if defined $this_start; |
270
|
0
|
|
|
|
|
0
|
$this_start = $offset+1; |
271
|
|
|
|
|
|
|
} |
272
|
160
|
50
|
|
|
|
243
|
if (defined $this_length) { |
273
|
0
|
0
|
|
|
|
0
|
warn "new(): bad idea to call new() with both a stop and a length" |
274
|
|
|
|
|
|
|
if defined $this_stop; |
275
|
0
|
|
|
|
|
0
|
$this_stop = $this_start + $length - 1; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
# this allows a SQL optimization way down deep |
279
|
160
|
100
|
100
|
|
|
867
|
$self->{whole}++ if $absref eq $sname and !defined($this_start) and !defined($this_stop); |
|
|
|
66
|
|
|
|
|
280
|
|
|
|
|
|
|
|
281
|
160
|
100
|
|
|
|
238
|
$this_start = 1 if !defined $this_start; |
282
|
160
|
100
|
|
|
|
254
|
$this_stop = $absstop-$absstart+1 if !defined $this_stop; |
283
|
160
|
|
|
|
|
159
|
$this_length = $this_stop - $this_start + 1; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# now offset to correct subsegment based on desired start and stop |
286
|
160
|
50
|
|
|
|
292
|
if ($force_absolute) { |
|
|
100
|
|
|
|
|
|
287
|
|
|
|
|
|
|
# ($this_start,$this_stop) = ($absstart,$absstop); |
288
|
0
|
|
|
|
|
0
|
$self->absolute(1); |
289
|
|
|
|
|
|
|
} elsif ($absstrand eq '+') { |
290
|
140
|
|
|
|
|
112
|
$this_start = $absstart + $this_start - 1; |
291
|
140
|
|
|
|
|
130
|
$this_stop = $this_start + $this_length - 1; |
292
|
|
|
|
|
|
|
} else { |
293
|
20
|
|
|
|
|
30
|
$this_start = $absstop - ($this_start - 1); |
294
|
20
|
|
|
|
|
25
|
$this_stop = $absstop - ($this_stop - 1); |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# handle truncation in either direction |
298
|
|
|
|
|
|
|
# This only happens if the segment runs off the end of |
299
|
|
|
|
|
|
|
# the reference sequence |
300
|
160
|
100
|
66
|
|
|
337
|
if ($factory->strict_bounds_checking && |
|
|
|
66
|
|
|
|
|
301
|
|
|
|
|
|
|
(($this_start < $absstart) || ($this_stop > $absstop))) { |
302
|
|
|
|
|
|
|
# return empty if we are completely off the end of the ref se |
303
|
5
|
50
|
33
|
|
|
29
|
next unless $this_start<=$absstop && $this_stop>=$absstart; |
304
|
5
|
50
|
|
|
|
10
|
if (my $a = $factory->abscoords($absref,'Sequence')) { |
305
|
5
|
|
|
|
|
8
|
my $refstart = $a->[0][2]; |
306
|
5
|
|
|
|
|
7
|
my $refstop = $a->[0][3]; |
307
|
5
|
50
|
|
|
|
15
|
if ($this_start < $refstart) { |
308
|
0
|
|
|
|
|
0
|
$this_start = $refstart; |
309
|
0
|
|
|
|
|
0
|
$self->{truncated}{start}++; |
310
|
|
|
|
|
|
|
} |
311
|
5
|
50
|
|
|
|
13
|
if ($this_stop > $refstop) { |
312
|
5
|
|
|
|
|
8
|
$this_stop = $absstop; |
313
|
5
|
|
|
|
|
19
|
$self->{truncated}{stop}++; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
160
|
|
|
|
|
178
|
@{$self}{qw(sourceseq start stop strand class)} |
|
160
|
|
|
|
|
498
|
|
319
|
|
|
|
|
|
|
= ($absref,$this_start,$this_stop,$absstrand,$absclass); |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
# handle reference sequence |
322
|
160
|
50
|
|
|
|
252
|
if (defined $refseq) { |
323
|
0
|
0
|
|
|
|
0
|
$refclass = $refseq->class if $refseq->can('class'); |
324
|
0
|
|
0
|
|
|
0
|
$refclass ||= 'Sequence'; |
325
|
0
|
|
|
|
|
0
|
my ($refref,$refstart,$refstop,$refstrand) = $factory->abscoords($refseq,$refclass); |
326
|
0
|
0
|
|
|
|
0
|
unless ($refref eq $absref) { |
327
|
0
|
|
|
|
|
0
|
$self->error("reference sequence is on $refref but source sequence is on $absref"); |
328
|
0
|
|
|
|
|
0
|
return; |
329
|
|
|
|
|
|
|
} |
330
|
0
|
0
|
|
|
|
0
|
$refstart = $refstop if $refstrand eq '-'; |
331
|
0
|
|
|
|
|
0
|
@{$self}{qw(ref refstart refstrand)} = ($refseq,$refstart,$refstrand); |
|
0
|
|
|
|
|
0
|
|
332
|
|
|
|
|
|
|
} else { |
333
|
160
|
100
|
|
|
|
211
|
$absstart = $absstop if $absstrand eq '-'; |
334
|
160
|
|
|
|
|
166
|
@{$self}{qw(ref refstart refstrand)} = ($sname,$absstart,$absstrand); |
|
160
|
|
|
|
|
415
|
|
335
|
|
|
|
|
|
|
} |
336
|
160
|
|
|
|
|
302
|
push @object_results,$self; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
160
|
50
|
|
|
|
559
|
return wantarray ? @object_results : $object_results[0]; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# overridden methods |
343
|
|
|
|
|
|
|
# start, stop, length |
344
|
|
|
|
|
|
|
sub start { |
345
|
1591
|
|
|
1591
|
1
|
5329
|
my $self = shift; |
346
|
1591
|
50
|
|
|
|
1674
|
return $self->strand < 0 ? $self->{stop} : $self->{start} if $self->absolute; |
|
|
100
|
|
|
|
|
|
347
|
1586
|
|
|
|
|
1979
|
$self->_abs2rel($self->{start}); |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
sub end { |
350
|
697
|
|
|
697
|
1
|
562
|
my $self = shift; |
351
|
697
|
0
|
|
|
|
717
|
return $self->strand < 0 ? $self->{start} : $self->{stop} if $self->absolute; |
|
|
50
|
|
|
|
|
|
352
|
697
|
|
|
|
|
859
|
$self->_abs2rel($self->{stop}); |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
*stop = \&end; |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub length { |
357
|
35
|
|
|
35
|
1
|
3605
|
my $self = shift; |
358
|
35
|
50
|
|
|
|
96
|
return unless defined $self->abs_end; |
359
|
35
|
|
|
|
|
50
|
abs($self->abs_end - $self->abs_start) + 1; |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
sub abs_start { |
363
|
120
|
|
|
120
|
1
|
107
|
my $self = shift; |
364
|
120
|
50
|
|
|
|
151
|
if ($self->absolute) { |
365
|
0
|
|
|
|
|
0
|
my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end); |
366
|
0
|
0
|
|
|
|
0
|
return ($a<$b) ? $a : $b; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
else { |
369
|
120
|
|
|
|
|
216
|
return $self->SUPER::abs_start(@_); |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
sub abs_end { |
373
|
140
|
|
|
140
|
1
|
102
|
my $self = shift; |
374
|
140
|
50
|
|
|
|
195
|
if ($self->absolute) { |
375
|
0
|
|
|
|
|
0
|
my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end); |
376
|
0
|
0
|
|
|
|
0
|
return ($a>$b) ? $a : $b; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
else { |
380
|
140
|
|
|
|
|
262
|
return $self->SUPER::abs_end(@_); |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
*abs_stop = \&abs_end; |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head2 refseq |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Title : refseq |
389
|
|
|
|
|
|
|
Usage : $ref = $s->refseq([$newseq] [,$newseqclass]) |
390
|
|
|
|
|
|
|
Function: get/set reference sequence |
391
|
|
|
|
|
|
|
Returns : current reference sequence |
392
|
|
|
|
|
|
|
Args : new reference sequence and class (optional) |
393
|
|
|
|
|
|
|
Status : Public |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
This method will get or set the reference sequence. Called with no |
396
|
|
|
|
|
|
|
arguments, it returns the current reference sequence. Called with |
397
|
|
|
|
|
|
|
either a sequence ID and class, a Bio::DB::GFF::Segment object (or |
398
|
|
|
|
|
|
|
subclass) or a Bio::DB::GFF::Featname object, it will set the current |
399
|
|
|
|
|
|
|
reference sequence and return the previous one. |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
The method will generate an exception if you attempt to set the |
402
|
|
|
|
|
|
|
reference sequence to a sequence that isn't contained in the database, |
403
|
|
|
|
|
|
|
or one that has a different source sequence from the segment. |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=cut |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
#' |
408
|
|
|
|
|
|
|
sub refseq { |
409
|
372
|
|
|
372
|
1
|
284
|
my $self = shift; |
410
|
372
|
|
|
|
|
394
|
my $g = $self->{ref}; |
411
|
372
|
100
|
|
|
|
490
|
if (@_) { |
412
|
50
|
|
|
|
|
85
|
my ($newref,$newclass); |
413
|
50
|
100
|
|
|
|
78
|
if (@_ == 2) { |
414
|
10
|
|
|
|
|
15
|
$newclass = shift; |
415
|
10
|
|
|
|
|
11
|
$newref = shift; |
416
|
|
|
|
|
|
|
} else { |
417
|
40
|
|
|
|
|
36
|
$newref = shift; |
418
|
40
|
|
|
|
|
42
|
$newclass = 'Sequence'; |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
50
|
50
|
|
|
|
81
|
defined $newref or $self->throw('refseq() called with an undef reference sequence'); |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# support for Featname objects |
424
|
50
|
100
|
66
|
|
|
287
|
$newclass = $newref->class if ref($newref) && $newref->can('class'); |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
# $self->throw("Cannot define a segment's reference sequence in terms of itself!") |
427
|
|
|
|
|
|
|
# if ref($newref) and overload::StrVal($newref) eq overload::StrVal($self); |
428
|
|
|
|
|
|
|
|
429
|
50
|
|
|
|
|
51
|
my ($refsource,undef,$refstart,$refstop,$refstrand); |
430
|
50
|
100
|
|
|
|
183
|
if ($newref->isa('Bio::DB::GFF::RelSegment')) { |
431
|
30
|
100
|
|
|
|
51
|
($refsource,undef,$refstart,$refstop,$refstrand) = |
432
|
|
|
|
|
|
|
($newref->sourceseq,undef,$newref->abs_start,$newref->abs_end,$newref->abs_strand >= 0 ? '+' : '-'); |
433
|
|
|
|
|
|
|
} else { |
434
|
20
|
|
|
|
|
43
|
my $coords = $self->factory->abscoords($newref,$newclass); |
435
|
20
|
|
|
|
|
32
|
foreach (@$coords) { # find the appropriate one |
436
|
20
|
|
|
|
|
43
|
($refsource,undef,$refstart,$refstop,$refstrand) = @$_; |
437
|
20
|
100
|
|
|
|
68
|
last if $refsource eq $self->{sourceseq}; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
$self->throw("can't set reference sequence: $newref and $self are on different sequence segments") |
442
|
50
|
100
|
|
|
|
137
|
unless $refsource eq $self->{sourceseq}; |
443
|
|
|
|
|
|
|
|
444
|
45
|
|
|
|
|
50
|
@{$self}{qw(ref refstart refstrand)} = ($newref,$refstart,$refstrand); |
|
45
|
|
|
|
|
103
|
|
445
|
45
|
|
|
|
|
74
|
$self->absolute(0); |
446
|
|
|
|
|
|
|
} |
447
|
367
|
100
|
|
|
|
479
|
return $self->absolute ? $self->sourceseq : $g; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head2 abs_low |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Title : abs_low |
454
|
|
|
|
|
|
|
Usage : $s->abs_low |
455
|
|
|
|
|
|
|
Function: the absolute lowest coordinate of the segment |
456
|
|
|
|
|
|
|
Returns : an integer |
457
|
|
|
|
|
|
|
Args : none |
458
|
|
|
|
|
|
|
Status : Public |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
This is for GadFly compatibility, and returns the low coordinate in |
461
|
|
|
|
|
|
|
absolute coordinates; |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=cut |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
sub abs_low { |
466
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
467
|
0
|
|
|
|
|
0
|
my ($a,$b) = ($self->abs_start,$self->abs_end); |
468
|
0
|
0
|
|
|
|
0
|
return ($a<$b) ? $a : $b; |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=head2 abs_high |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
Title : abs_high |
474
|
|
|
|
|
|
|
Usage : $s->abs_high |
475
|
|
|
|
|
|
|
Function: the absolute highest coordinate of the segment |
476
|
|
|
|
|
|
|
Returns : an integer |
477
|
|
|
|
|
|
|
Args : none |
478
|
|
|
|
|
|
|
Status : Public |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
This is for GadFly compatibility, and returns the high coordinate in |
481
|
|
|
|
|
|
|
absolute coordinates; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=cut |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
sub abs_high { |
486
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
487
|
0
|
|
|
|
|
0
|
my ($a,$b) = ($self->abs_start,$self->abs_end); |
488
|
0
|
0
|
|
|
|
0
|
return ($a>$b) ? $a : $b; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head2 asString |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Title : asString |
495
|
|
|
|
|
|
|
Usage : $s->asString |
496
|
|
|
|
|
|
|
Function: human-readable representation of the segment |
497
|
|
|
|
|
|
|
Returns : a string |
498
|
|
|
|
|
|
|
Args : none |
499
|
|
|
|
|
|
|
Status : Public |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
This method will return a human-readable representation of the |
502
|
|
|
|
|
|
|
segment. It is the overloaded method call for the "" operator. |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Currently the format is: |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
refseq:start,stop |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=cut |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
sub asString { |
511
|
10
|
|
|
10
|
1
|
10
|
my $self = shift; |
512
|
10
|
50
|
|
|
|
18
|
return $self->SUPER::asString if $self->absolute; |
513
|
10
|
|
|
|
|
13
|
my $label = $self->{ref}; |
514
|
10
|
|
50
|
|
|
18
|
my $start = $self->start || ''; |
515
|
10
|
|
50
|
|
|
24
|
my $stop = $self->stop || ''; |
516
|
10
|
100
|
66
|
|
|
62
|
if (ref($label) && overload::StrVal($self) eq overload::StrVal($label->ref)) { |
517
|
5
|
|
|
|
|
42
|
$label = $self->abs_ref; |
518
|
5
|
|
|
|
|
10
|
$start = $self->abs_start; |
519
|
5
|
|
|
|
|
15
|
$stop = $self->abs_end; |
520
|
|
|
|
|
|
|
} |
521
|
10
|
|
|
|
|
85
|
return "$label:$start,$stop"; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=head2 name |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
Title : name |
527
|
|
|
|
|
|
|
Usage : Alias for asString() |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=cut |
530
|
|
|
|
|
|
|
|
531
|
0
|
|
|
0
|
1
|
0
|
sub name { shift->asString } |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=head2 absolute |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
Title : absolute |
536
|
|
|
|
|
|
|
Usage : $abs = $s->absolute([$abs]) |
537
|
|
|
|
|
|
|
Function: get/set absolute coordinates |
538
|
|
|
|
|
|
|
Returns : a boolean flag |
539
|
|
|
|
|
|
|
Args : new setting for flag (optional) |
540
|
|
|
|
|
|
|
Status : Public |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Called with a boolean flag, this method controls whether to display |
543
|
|
|
|
|
|
|
relative coordinates (relative to the reference sequence) or absolute |
544
|
|
|
|
|
|
|
coordinates (relative to the source sequence). It will return the |
545
|
|
|
|
|
|
|
previous value of the setting. |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=cut |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
sub absolute { |
550
|
6467
|
|
|
6467
|
1
|
4033
|
my $self = shift; |
551
|
6467
|
|
|
|
|
4347
|
my $g = $self->{absolute}; |
552
|
6467
|
100
|
|
|
|
7222
|
$self->{absolute} = shift if @_; |
553
|
6467
|
|
|
|
|
7751
|
$g; |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=head2 features |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Title : features |
559
|
|
|
|
|
|
|
Usage : @features = $s->features(@args) |
560
|
|
|
|
|
|
|
Function: get features that overlap this segment |
561
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Feature objects |
562
|
|
|
|
|
|
|
Args : see below |
563
|
|
|
|
|
|
|
Status : Public |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
This method will find all features that overlap the segment and return |
566
|
|
|
|
|
|
|
a list of Bio::DB::GFF::Feature objects. The features will use |
567
|
|
|
|
|
|
|
coordinates relative to the reference sequence in effect at the time |
568
|
|
|
|
|
|
|
that features() was called. |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
The returned list can be limited to certain types of feature by |
571
|
|
|
|
|
|
|
filtering on their method and/or source. In addition, it is possible |
572
|
|
|
|
|
|
|
to obtain an iterator that will step through a large number of |
573
|
|
|
|
|
|
|
features sequentially. |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Arguments can be provided positionally or using the named arguments |
576
|
|
|
|
|
|
|
format. In the former case, the arguments are a list of feature types |
577
|
|
|
|
|
|
|
in the format "method:source". Either method or source can be |
578
|
|
|
|
|
|
|
omitted, in which case the missing component is treated as a wildcard. |
579
|
|
|
|
|
|
|
If no colon is present, then the type is treated as a method name. |
580
|
|
|
|
|
|
|
Multiple arguments are ORed together. |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
Examples: |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
@f = $s->features('exon:curated'); # all curated exons |
585
|
|
|
|
|
|
|
@f = $s->features('exon:curated','intron'); # curated exons and all introns |
586
|
|
|
|
|
|
|
@f = $s->features('similarity:.*EST.*'); # all similarities |
587
|
|
|
|
|
|
|
# having something to do |
588
|
|
|
|
|
|
|
# with ESTs |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
The named parameter form gives you control over a few options: |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
-types an array reference to type names in the format |
593
|
|
|
|
|
|
|
"method:source" |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
-merge Whether to apply aggregators to the generated features (default yes) |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
-rare Turn on an optimization suitable for a relatively rare feature type, |
598
|
|
|
|
|
|
|
where it will be faster to filter by feature type first |
599
|
|
|
|
|
|
|
and then by position, rather than vice versa. |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
-attributes a hashref containing a set of attributes to match |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
-range_type One of 'overlapping', 'contains', or 'contained_in' |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
-iterator Whether to return an iterator across the features. |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
-binsize A true value will create a set of artificial features whose |
608
|
|
|
|
|
|
|
start and stop positions indicate bins of the given size, and |
609
|
|
|
|
|
|
|
whose scores are the number of features in the bin. The |
610
|
|
|
|
|
|
|
class and method of the feature will be set to "bin", |
611
|
|
|
|
|
|
|
its source to "method:source", and its group to "bin:method:source". |
612
|
|
|
|
|
|
|
This is a handy way of generating histograms of feature density. |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
-merge is a boolean flag that controls whether the adaptor's |
615
|
|
|
|
|
|
|
aggregators wll be applied to the features returned by this method. |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
If -iterator is true, then the method returns a single scalar value |
618
|
|
|
|
|
|
|
consisting of a Bio::SeqIO object. You can call next_seq() repeatedly |
619
|
|
|
|
|
|
|
on this object to fetch each of the features in turn. If iterator is |
620
|
|
|
|
|
|
|
false or absent, then all the features are returned as a list. |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
The -attributes argument is a hashref containing one or more |
623
|
|
|
|
|
|
|
attributes to match against: |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
-attributes => { Gene => 'abc-1', |
626
|
|
|
|
|
|
|
Note => 'confirmed' } |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Attribute matching is simple string matching, and multiple attributes |
629
|
|
|
|
|
|
|
are ANDed together. |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=cut |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
#' |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# return all features that overlap with this segment; |
636
|
|
|
|
|
|
|
# optionally modified by a list of types to filter on |
637
|
|
|
|
|
|
|
sub features { |
638
|
85
|
|
|
85
|
1
|
3725
|
my $self = shift; |
639
|
85
|
|
|
|
|
195
|
my @args = $self->_process_feature_args(@_); |
640
|
85
|
|
|
|
|
234
|
return $self->factory->overlapping_features(@args); |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=head2 get_SeqFeatures |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
Title : get_SeqFeatures |
646
|
|
|
|
|
|
|
Usage : |
647
|
|
|
|
|
|
|
Function: returns the top level sequence features |
648
|
|
|
|
|
|
|
Returns : L objects |
649
|
|
|
|
|
|
|
Args : none |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
Segments do not ordinarily return any subfeatures. |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=cut |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# A SEGMENT DOES NOT HAVE SUBFEATURES! |
656
|
0
|
|
|
0
|
1
|
0
|
sub get_SeqFeatures { return } |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head2 feature_count |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Title : feature_count |
661
|
|
|
|
|
|
|
Usage : $seq->feature_count() |
662
|
|
|
|
|
|
|
Function: Return the number of SeqFeatures attached to a sequence |
663
|
|
|
|
|
|
|
Returns : integer representing the number of SeqFeatures |
664
|
|
|
|
|
|
|
Args : none |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
This method comes through extension of Bio::FeatureHolderI. See |
667
|
|
|
|
|
|
|
L for more information. |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=cut |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
sub feature_count { |
672
|
5
|
|
|
5
|
1
|
2106
|
my $self = shift; |
673
|
5
|
|
|
|
|
10
|
my $ct = 0; |
674
|
5
|
|
|
|
|
25
|
my %type_counts = $self->types(-enumerate=>1); |
675
|
5
|
|
|
|
|
17
|
map { $ct += $_ } values %type_counts; |
|
30
|
|
|
|
|
28
|
|
676
|
5
|
|
|
|
|
18
|
$ct; |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=head2 get_feature_stream |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
Title : features |
682
|
|
|
|
|
|
|
Usage : $stream = $s->get_feature_stream(@args) |
683
|
|
|
|
|
|
|
Function: get a stream of features that overlap this segment |
684
|
|
|
|
|
|
|
Returns : a Bio::SeqIO::Stream-compliant stream |
685
|
|
|
|
|
|
|
Args : see below |
686
|
|
|
|
|
|
|
Status : Public |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
This is the same as features(), but returns a stream. Use like this: |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
$stream = $s->get_feature_stream('exon'); |
691
|
|
|
|
|
|
|
while (my $exon = $stream->next_seq) { |
692
|
|
|
|
|
|
|
print $exon->start,"\n"; |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=cut |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
sub get_feature_stream { |
698
|
5
|
|
|
5
|
1
|
2459
|
my $self = shift; |
699
|
5
|
50
|
33
|
|
|
60
|
my @args = defined($_[0]) && $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1); |
700
|
5
|
|
|
|
|
10
|
$self->features(@args); |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
=head2 get_seq_stream |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Title : get_seq_stream |
706
|
|
|
|
|
|
|
Usage : $stream = $s->get_seq_stream(@args) |
707
|
|
|
|
|
|
|
Function: get a stream of features that overlap this segment |
708
|
|
|
|
|
|
|
Returns : a Bio::SeqIO::Stream-compliant stream |
709
|
|
|
|
|
|
|
Args : see below |
710
|
|
|
|
|
|
|
Status : Public |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
This is the same as feature_stream(), and is provided for Bioperl |
713
|
|
|
|
|
|
|
compatibility. Use like this: |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
$stream = $s->get_seq_stream('exon'); |
716
|
|
|
|
|
|
|
while (my $exon = $stream->next_seq) { |
717
|
|
|
|
|
|
|
print $exon->start,"\n"; |
718
|
|
|
|
|
|
|
} |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=cut |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
*get_seq_stream = \&get_feature_stream; |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=head2 overlapping_features |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Title : overlapping_features |
728
|
|
|
|
|
|
|
Usage : @features = $s->overlapping_features(@args) |
729
|
|
|
|
|
|
|
Function: get features that overlap this segment |
730
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Feature objects |
731
|
|
|
|
|
|
|
Args : see features() |
732
|
|
|
|
|
|
|
Status : Public |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
This is an alias for the features() method, and takes the same |
735
|
|
|
|
|
|
|
arguments. |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=cut |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
*overlapping_features = \&features; |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head2 contained_features |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
Title : contained_features |
744
|
|
|
|
|
|
|
Usage : @features = $s->contained_features(@args) |
745
|
|
|
|
|
|
|
Function: get features that are contained by this segment |
746
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Feature objects |
747
|
|
|
|
|
|
|
Args : see features() |
748
|
|
|
|
|
|
|
Status : Public |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
This is identical in behavior to features() except that it returns |
751
|
|
|
|
|
|
|
only those features that are completely contained within the segment, |
752
|
|
|
|
|
|
|
rather than any that overlap. |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
=cut |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# return all features completely contained within this segment |
757
|
|
|
|
|
|
|
sub contained_features { |
758
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
759
|
0
|
|
|
|
|
0
|
local $self->{whole} = 0; |
760
|
0
|
|
|
|
|
0
|
my @args = $self->_process_feature_args(@_); |
761
|
0
|
|
|
|
|
0
|
return $self->factory->contained_features(@args); |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
# *contains = \&contained_features; |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
=head2 contained_in |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
Title : contained_in |
769
|
|
|
|
|
|
|
Usage : @features = $s->contained_in(@args) |
770
|
|
|
|
|
|
|
Function: get features that contain this segment |
771
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Feature objects |
772
|
|
|
|
|
|
|
Args : see features() |
773
|
|
|
|
|
|
|
Status : Public |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
This is identical in behavior to features() except that it returns |
776
|
|
|
|
|
|
|
only those features that completely contain the segment. |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
=cut |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
# return all features completely contained within this segment |
781
|
|
|
|
|
|
|
sub contained_in { |
782
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
783
|
0
|
|
|
|
|
0
|
local $self->{whole} = 0; |
784
|
0
|
|
|
|
|
0
|
my @args = $self->_process_feature_args(@_); |
785
|
0
|
|
|
|
|
0
|
return $self->factory->contained_in(@args); |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
=head2 delete |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
Title : delete |
791
|
|
|
|
|
|
|
Usage : $db->delete(@args) |
792
|
|
|
|
|
|
|
Function: delete features |
793
|
|
|
|
|
|
|
Returns : count of features deleted -- if available |
794
|
|
|
|
|
|
|
Args : numerous, see below |
795
|
|
|
|
|
|
|
Status : public |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
This method deletes all features that overlap the specified region or |
798
|
|
|
|
|
|
|
are of a particular type. If no arguments are provided and the -force |
799
|
|
|
|
|
|
|
argument is true, then deletes ALL features. |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
Arguments: |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
-type,-types Either a single scalar type to be deleted, or an |
804
|
|
|
|
|
|
|
reference to an array of types. |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
-range_type Control the range type of the deletion. One of "overlaps" (default) |
807
|
|
|
|
|
|
|
"contains" or "contained_in" |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
Examples: |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
$segment->delete(-type=>['intron','repeat:repeatMasker']); # remove all introns & repeats |
812
|
|
|
|
|
|
|
$segment->delete(-type=>['intron','repeat:repeatMasker'] |
813
|
|
|
|
|
|
|
-range_type => 'contains'); # remove all introns & repeats |
814
|
|
|
|
|
|
|
# strictly contained in segment |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
IMPORTANT NOTE: This method only deletes features. It does *NOT* |
817
|
|
|
|
|
|
|
delete the names of groups that contain the deleted features. Group |
818
|
|
|
|
|
|
|
IDs will be reused if you later load a feature with the same group |
819
|
|
|
|
|
|
|
name as one that was previously deleted. |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
NOTE ON FEATURE COUNTS: The DBI-based versions of this call return the |
822
|
|
|
|
|
|
|
result code from the SQL DELETE operation. Some dbd drivers return the |
823
|
|
|
|
|
|
|
count of rows deleted, while others return 0E0. Caveat emptor. |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
=cut |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
# return all features completely contained within this segment |
828
|
|
|
|
|
|
|
sub delete { |
829
|
5
|
|
|
5
|
1
|
50
|
my $self = shift; |
830
|
5
|
|
|
|
|
42
|
my ($type,$range_type) = |
831
|
|
|
|
|
|
|
rearrange([[qw(TYPE TYPES)],'RANGE_TYPE'],@_); |
832
|
5
|
|
|
|
|
21
|
my $types = $self->factory->parse_types($type); # parse out list of types |
833
|
5
|
|
50
|
|
|
13
|
$range_type ||= 'overlaps'; |
834
|
5
|
|
|
|
|
10
|
return $self->factory->_delete({ |
835
|
|
|
|
|
|
|
segments => [$self], |
836
|
|
|
|
|
|
|
types => $types, |
837
|
|
|
|
|
|
|
range_type => $range_type |
838
|
|
|
|
|
|
|
}); |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
=head2 _process_feature_args |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
Title : _process_feature_args |
844
|
|
|
|
|
|
|
Usage : @args = $s->_process_feature_args(@args) |
845
|
|
|
|
|
|
|
Function: preprocess arguments passed to features, |
846
|
|
|
|
|
|
|
contained_features, and overlapping_features |
847
|
|
|
|
|
|
|
Returns : a list of parsed arguents |
848
|
|
|
|
|
|
|
Args : see feature() |
849
|
|
|
|
|
|
|
Status : Internal |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
This is an internal method that is used to check and format the |
852
|
|
|
|
|
|
|
arguments to features() before passing them on to the adaptor. |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
=cut |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
sub _process_feature_args { |
857
|
85
|
|
|
85
|
|
62
|
my $self = shift; |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
my ($ref,$class,$start,$stop,$strand,$whole) |
860
|
85
|
|
|
|
|
92
|
= @{$self}{qw(sourceseq class start stop strand whole)}; |
|
85
|
|
|
|
|
180
|
|
861
|
|
|
|
|
|
|
|
862
|
85
|
100
|
66
|
|
|
378
|
($start,$stop) = ($stop,$start) if defined $strand && $strand eq '-'; |
863
|
|
|
|
|
|
|
|
864
|
85
|
|
|
|
|
226
|
my @args = (-ref=>$ref,-class=>$class); |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
# indicating that we are fetching the whole segment allows certain |
867
|
|
|
|
|
|
|
# SQL optimizations. |
868
|
85
|
100
|
|
|
|
187
|
push @args,(-start=>$start,-stop=>$stop) unless $whole; |
869
|
|
|
|
|
|
|
|
870
|
85
|
100
|
|
|
|
146
|
if (@_) { |
871
|
70
|
100
|
|
|
|
207
|
if ($_[0] =~ /^-/) { |
872
|
45
|
|
|
|
|
69
|
push @args,@_; |
873
|
|
|
|
|
|
|
} else { |
874
|
25
|
|
|
|
|
37
|
my @types = @_; |
875
|
25
|
|
|
|
|
75
|
push @args,-types=>\@types; |
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
} |
878
|
85
|
|
|
|
|
117
|
push @args,-parent=>$self; |
879
|
85
|
|
|
|
|
279
|
@args; |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=head2 types |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
Title : types |
885
|
|
|
|
|
|
|
Usage : @types = $s->types([-enumerate=>1]) |
886
|
|
|
|
|
|
|
Function: list feature types that overlap this segment |
887
|
|
|
|
|
|
|
Returns : a list of Bio::DB::GFF::Typename objects or a hash |
888
|
|
|
|
|
|
|
Args : see below |
889
|
|
|
|
|
|
|
Status : Public |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
The types() method will return a list of Bio::DB::GFF::Typename |
892
|
|
|
|
|
|
|
objects, each corresponding to a feature that overlaps the segment. |
893
|
|
|
|
|
|
|
If the optional -enumerate parameter is set to a true value, then the |
894
|
|
|
|
|
|
|
method will return a hash in which the keys are the type names and the |
895
|
|
|
|
|
|
|
values are the number of times a feature of that type is present on |
896
|
|
|
|
|
|
|
the segment. For example: |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
%count = $s->types(-enumerate=>1); |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=cut |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
# wrapper for lower-level types() call. |
903
|
|
|
|
|
|
|
sub types { |
904
|
15
|
|
|
15
|
1
|
1367
|
my $self = shift; |
905
|
15
|
|
|
|
|
18
|
my ($ref,$class,$start,$stop,$strand) = @{$self}{qw(sourceseq class start stop strand)}; |
|
15
|
|
|
|
|
35
|
|
906
|
15
|
50
|
|
|
|
40
|
($start,$stop) = ($stop,$start) if $strand eq '-'; |
907
|
|
|
|
|
|
|
|
908
|
15
|
|
|
|
|
18
|
my @args; |
909
|
15
|
50
|
66
|
|
|
92
|
if (@_ && $_[0] !~ /^-/) { |
910
|
0
|
|
|
|
|
0
|
@args = (-type => \@_) |
911
|
|
|
|
|
|
|
} else { |
912
|
15
|
|
|
|
|
32
|
@args = @_; |
913
|
|
|
|
|
|
|
} |
914
|
15
|
|
|
|
|
31
|
$self->factory->types(-ref => $ref, |
915
|
|
|
|
|
|
|
-start=> $start, |
916
|
|
|
|
|
|
|
-stop => $stop, |
917
|
|
|
|
|
|
|
@args); |
918
|
|
|
|
|
|
|
} |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head1 Internal Methods |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
The following are internal methods and should not be called directly. |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
=head2 new_from_segment |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
Title : new_from_segment |
927
|
|
|
|
|
|
|
Usage : $s = $segment->new_from_segment(@args) |
928
|
|
|
|
|
|
|
Function: create a new relative segment |
929
|
|
|
|
|
|
|
Returns : a new Bio::DB::GFF::RelSegment object |
930
|
|
|
|
|
|
|
Args : see below |
931
|
|
|
|
|
|
|
Status : Internal |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
This constructor is used internally by the subseq() method. It forces |
934
|
|
|
|
|
|
|
the new segment into the Bio::DB::GFF::RelSegment package, regardless |
935
|
|
|
|
|
|
|
of the package that it is called from. This causes subclass-specfic |
936
|
|
|
|
|
|
|
information, such as feature types, to be dropped when a subsequence |
937
|
|
|
|
|
|
|
is created. |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=cut |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
sub new_from_segment { |
942
|
35
|
|
|
35
|
1
|
37
|
my $package = shift; |
943
|
35
|
50
|
|
|
|
69
|
$package = ref $package if ref $package; |
944
|
35
|
|
|
|
|
34
|
my $segment = shift; |
945
|
35
|
|
|
|
|
39
|
my $new = {}; |
946
|
35
|
|
|
|
|
163
|
@{$new}{qw(factory sourceseq start stop strand class ref refstart refstrand)} |
947
|
35
|
|
|
|
|
31
|
= @{$segment}{qw(factory sourceseq start stop strand class ref refstart refstrand)}; |
|
35
|
|
|
|
|
66
|
|
948
|
35
|
|
|
|
|
76
|
return bless $new,__PACKAGE__; |
949
|
|
|
|
|
|
|
} |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
=head2 _abs2rel |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
Title : _abs2rel |
954
|
|
|
|
|
|
|
Usage : @coords = $s->_abs2rel(@coords) |
955
|
|
|
|
|
|
|
Function: convert absolute coordinates into relative coordinates |
956
|
|
|
|
|
|
|
Returns : a list of relative coordinates |
957
|
|
|
|
|
|
|
Args : a list of absolute coordinates |
958
|
|
|
|
|
|
|
Status : Internal |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
This is used internally to map from absolute to relative |
961
|
|
|
|
|
|
|
coordinates. It does not take the offset of the reference sequence |
962
|
|
|
|
|
|
|
into account, so please use abs2rel() instead. |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
=cut |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
sub _abs2rel { |
967
|
2283
|
|
|
2283
|
|
1447
|
my $self = shift; |
968
|
2283
|
|
|
|
|
1351
|
my @result; |
969
|
2283
|
50
|
|
|
|
2649
|
return unless defined $_[0]; |
970
|
|
|
|
|
|
|
|
971
|
2283
|
50
|
|
|
|
2352
|
if ($self->absolute) { |
972
|
0
|
|
|
|
|
0
|
@result = @_; |
973
|
|
|
|
|
|
|
} else { |
974
|
2283
|
|
|
|
|
1398
|
my ($refstart,$refstrand) = @{$self}{qw(refstart refstrand)}; |
|
2283
|
|
|
|
|
2802
|
|
975
|
161
|
|
|
|
|
301
|
@result = defined($refstrand) && $refstrand eq '-' ? map { $refstart - $_ + 1 } @_ |
976
|
2283
|
100
|
100
|
|
|
6613
|
: map { $_ - $refstart + 1 } @_; |
|
2122
|
|
|
|
|
3386
|
|
977
|
|
|
|
|
|
|
} |
978
|
|
|
|
|
|
|
# if called with a single argument, caller will expect a single scalar reply |
979
|
|
|
|
|
|
|
# not the size of the returned array! |
980
|
2283
|
100
|
66
|
|
|
7981
|
return $result[0] if @result == 1 and !wantarray; |
981
|
80
|
|
|
|
|
1013
|
@result; |
982
|
|
|
|
|
|
|
} |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
=head2 rel2abs |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
Title : rel2abs |
987
|
|
|
|
|
|
|
Usage : @coords = $s->rel2abs(@coords) |
988
|
|
|
|
|
|
|
Function: convert relative coordinates into absolute coordinates |
989
|
|
|
|
|
|
|
Returns : a list of absolute coordinates |
990
|
|
|
|
|
|
|
Args : a list of relative coordinates |
991
|
|
|
|
|
|
|
Status : Public |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
This function takes a list of positions in relative coordinates to the |
994
|
|
|
|
|
|
|
segment, and converts them into absolute coordinates. |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
=cut |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
sub rel2abs { |
999
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1000
|
0
|
|
|
|
|
0
|
my @result; |
1001
|
|
|
|
|
|
|
|
1002
|
0
|
0
|
|
|
|
0
|
if ($self->absolute) { |
1003
|
0
|
|
|
|
|
0
|
@result = @_; |
1004
|
|
|
|
|
|
|
} else { |
1005
|
0
|
|
|
|
|
0
|
my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand); |
1006
|
0
|
|
|
|
|
0
|
@result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_ |
1007
|
0
|
0
|
|
|
|
0
|
: map { $_ + $abs_start - 1 } @_; |
|
0
|
|
|
|
|
0
|
|
1008
|
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
# if called with a single argument, caller will expect a single scalar reply |
1010
|
|
|
|
|
|
|
# not the size of the returned array! |
1011
|
0
|
0
|
0
|
|
|
0
|
return $result[0] if @result == 1 and !wantarray; |
1012
|
0
|
|
|
|
|
0
|
@result; |
1013
|
|
|
|
|
|
|
} |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
=head2 abs2rel |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
Title : abs2rel |
1018
|
|
|
|
|
|
|
Usage : @rel_coords = $s->abs2rel(@abs_coords) |
1019
|
|
|
|
|
|
|
Function: convert absolute coordinates into relative coordinates |
1020
|
|
|
|
|
|
|
Returns : a list of relative coordinates |
1021
|
|
|
|
|
|
|
Args : a list of absolute coordinates |
1022
|
|
|
|
|
|
|
Status : Public |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
This function takes a list of positions in absolute coordinates |
1025
|
|
|
|
|
|
|
and returns a list expressed in relative coordinates. |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
=cut |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
sub abs2rel { |
1030
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1031
|
0
|
|
|
|
|
0
|
my @result; |
1032
|
|
|
|
|
|
|
|
1033
|
0
|
0
|
|
|
|
0
|
if ($self->absolute) { |
1034
|
0
|
|
|
|
|
0
|
@result = @_; |
1035
|
|
|
|
|
|
|
} else { |
1036
|
0
|
|
|
|
|
0
|
my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand); |
1037
|
0
|
|
|
|
|
0
|
@result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_ |
1038
|
0
|
0
|
|
|
|
0
|
: map { $_ - $abs_start + 1 } @_; |
|
0
|
|
|
|
|
0
|
|
1039
|
|
|
|
|
|
|
} |
1040
|
|
|
|
|
|
|
# if called with a single argument, caller will expect a single scalar reply |
1041
|
|
|
|
|
|
|
# not the size of the returned array! |
1042
|
0
|
0
|
0
|
|
|
0
|
return $result[0] if @result == 1 and !wantarray; |
1043
|
0
|
|
|
|
|
0
|
@result; |
1044
|
|
|
|
|
|
|
} |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
sub subseq { |
1047
|
35
|
|
|
35
|
1
|
30
|
my $self = shift; |
1048
|
35
|
|
|
|
|
90
|
my $obj = $self->SUPER::subseq(@_); |
1049
|
35
|
|
|
|
|
94
|
bless $obj,__PACKAGE__; # always bless into the generic RelSegment package |
1050
|
|
|
|
|
|
|
} |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
sub strand { |
1053
|
627
|
|
|
627
|
1
|
516
|
my $self = shift; |
1054
|
627
|
100
|
|
|
|
670
|
if ($self->absolute) { |
1055
|
10
|
|
|
|
|
25
|
return _to_strand($self->{strand}); |
1056
|
|
|
|
|
|
|
} |
1057
|
617
|
|
|
|
|
773
|
my $start = $self->start; |
1058
|
617
|
|
|
|
|
754
|
my $stop = $self->stop; |
1059
|
617
|
50
|
33
|
|
|
1770
|
return 0 unless defined $start and defined $stop; |
1060
|
617
|
|
|
|
|
2148
|
return $stop <=> $start; |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
sub _to_strand { |
1064
|
10
|
|
|
10
|
|
10
|
my $s = shift; |
1065
|
10
|
100
|
|
|
|
38
|
return -1 if $s eq '-'; |
1066
|
5
|
50
|
|
|
|
28
|
return +1 if $s eq '+'; |
1067
|
0
|
|
|
|
|
|
return 0; |
1068
|
|
|
|
|
|
|
} |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
=head2 Bio::RangeI Methods |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
The following Bio::RangeI methods are supported: |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
overlaps(), contains(), equals(),intersection(),union(),overlap_extent() |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
=cut |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
sub intersection { |
1079
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
1080
|
0
|
|
|
|
|
|
my (@ranges) = @_; |
1081
|
0
|
0
|
|
|
|
|
unshift @ranges,$self if ref $self; |
1082
|
0
|
0
|
|
|
|
|
$ranges[0]->isa('Bio::DB::GFF::RelSegment') |
1083
|
|
|
|
|
|
|
or return $self->SUPER::intersection(@_); |
1084
|
|
|
|
|
|
|
|
1085
|
0
|
|
|
|
|
|
my $ref = $ranges[0]->abs_ref; |
1086
|
0
|
|
|
|
|
|
my ($low,$high); |
1087
|
0
|
|
|
|
|
|
foreach (@ranges) { |
1088
|
0
|
0
|
|
|
|
|
return unless $_->can('abs_ref'); |
1089
|
0
|
0
|
|
|
|
|
$ref eq $_->abs_ref or return; |
1090
|
0
|
0
|
0
|
|
|
|
$low = $_->abs_low if !defined($low) or $low < $_->abs_low; |
1091
|
0
|
0
|
0
|
|
|
|
$high = $_->abs_high if !defined($high) or $high > $_->abs_high; |
1092
|
|
|
|
|
|
|
} |
1093
|
|
|
|
|
|
|
|
1094
|
0
|
0
|
|
|
|
|
return unless $low < $high; |
1095
|
0
|
|
|
|
|
|
return Bio::DB::GFF::RelSegment->new(-factory => $self->factory, |
1096
|
|
|
|
|
|
|
-seq => $ref, |
1097
|
|
|
|
|
|
|
-start => $low, |
1098
|
|
|
|
|
|
|
-stop => $high, |
1099
|
|
|
|
|
|
|
); |
1100
|
|
|
|
|
|
|
} |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
sub overlaps { |
1103
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
1104
|
0
|
|
|
|
|
|
my($other,$so) = @_; |
1105
|
0
|
0
|
|
|
|
|
return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment'); |
1106
|
0
|
0
|
|
|
|
|
return if $self->abs_ref ne $other->abs_ref; |
1107
|
0
|
0
|
|
|
|
|
return if $self->abs_low > $other->abs_high; |
1108
|
0
|
0
|
|
|
|
|
return if $self->abs_high < $other->abs_low; |
1109
|
0
|
|
|
|
|
|
1; |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
sub contains { |
1113
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
1114
|
0
|
|
|
|
|
|
my($other,$so) = @_; |
1115
|
0
|
0
|
|
|
|
|
return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment'); |
1116
|
0
|
0
|
|
|
|
|
return if $self->abs_ref ne $other->abs_ref; |
1117
|
0
|
0
|
|
|
|
|
return unless $self->abs_low <= $other->abs_low; |
1118
|
0
|
0
|
|
|
|
|
return unless $self->abs_high >= $other->abs_high; |
1119
|
0
|
|
|
|
|
|
1; |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
sub union { |
1123
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
1124
|
0
|
|
|
|
|
|
my (@ranges) = @_; |
1125
|
0
|
0
|
|
|
|
|
unshift @ranges,$self if ref $self; |
1126
|
0
|
0
|
|
|
|
|
$ranges[0]->isa('Bio::DB::GFF::RelSegment') |
1127
|
|
|
|
|
|
|
or return $self->SUPER::union(@_); |
1128
|
|
|
|
|
|
|
|
1129
|
0
|
|
|
|
|
|
my $ref = $ranges[0]->abs_ref; |
1130
|
0
|
|
|
|
|
|
my ($low,$high); |
1131
|
0
|
|
|
|
|
|
foreach (@ranges) { |
1132
|
0
|
0
|
|
|
|
|
return unless $_->can('abs_ref'); |
1133
|
0
|
0
|
|
|
|
|
$ref eq $_->abs_ref or return; |
1134
|
0
|
0
|
0
|
|
|
|
$low = $_->abs_low if !defined($low) or $low > $_->abs_low; |
1135
|
0
|
0
|
0
|
|
|
|
$high = $_->abs_high if !defined($high) or $high < $_->abs_high; |
1136
|
|
|
|
|
|
|
} |
1137
|
0
|
|
|
|
|
|
$self->new(-factory=> $self->factory, |
1138
|
|
|
|
|
|
|
-seq => $ref, |
1139
|
|
|
|
|
|
|
-start => $low, |
1140
|
|
|
|
|
|
|
-stop => $high); |
1141
|
|
|
|
|
|
|
} |
1142
|
|
|
|
|
|
|
|
1143
|
0
|
|
|
0
|
0
|
|
sub version { 0 } |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
1; |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
__END__ |