line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Location::Split |
3
|
|
|
|
|
|
|
# Please direct questions and support issues to |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright Jason Stajich |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
10
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Bio::Location::Split - Implementation of a Location on a Sequence |
15
|
|
|
|
|
|
|
which has multiple locations (start/end points) |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use Bio::Location::Split; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $splitlocation = Bio::Location::Split->new(); |
22
|
|
|
|
|
|
|
$splitlocation->add_sub_Location(Bio::Location::Simple->new(-start=>1, |
23
|
|
|
|
|
|
|
-end=>30, |
24
|
|
|
|
|
|
|
-strand=>1)); |
25
|
|
|
|
|
|
|
$splitlocation->add_sub_Location(Bio::Location::Simple->new(-start=>50, |
26
|
|
|
|
|
|
|
-end=>61, |
27
|
|
|
|
|
|
|
-strand=>1)); |
28
|
|
|
|
|
|
|
my @sublocs = $splitlocation->sub_Location(); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
my $count = 1; |
31
|
|
|
|
|
|
|
# print the start/end points of the sub locations |
32
|
|
|
|
|
|
|
foreach my $location ( sort { $a->start <=> $b->start } |
33
|
|
|
|
|
|
|
@sublocs ) { |
34
|
|
|
|
|
|
|
printf "sub feature %d [%d..%d]\n", |
35
|
|
|
|
|
|
|
$count, $location->start,$location->end, "\n"; |
36
|
|
|
|
|
|
|
$count++; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 DESCRIPTION |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
This implementation handles locations which span more than one |
42
|
|
|
|
|
|
|
start/end location, or and/or lie on different sequences, and can |
43
|
|
|
|
|
|
|
work with split locations that depend on the specific order of the |
44
|
|
|
|
|
|
|
sublocations ('join') or don't have a specific order but represent |
45
|
|
|
|
|
|
|
a feature spanning noncontiguous sublocations ('order', 'bond'). |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Note that the order in which sublocations are added may be very important, |
48
|
|
|
|
|
|
|
depending on the specific split location type. For instance, a 'join' |
49
|
|
|
|
|
|
|
must have the sublocations added in the order that one expects to |
50
|
|
|
|
|
|
|
join the sublocations, whereas all other types are sorted based on the |
51
|
|
|
|
|
|
|
sequence location. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 FEEDBACK |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
56
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
57
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
60
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 Support |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
I |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
69
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
70
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
71
|
|
|
|
|
|
|
with code and data examples if at all possible. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head2 Reporting Bugs |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
76
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
77
|
|
|
|
|
|
|
web: |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Email jason-AT-bioperl_DOT_org |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 APPENDIX |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
88
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Let the code begin... |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
package Bio::Location::Split; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# as defined by BSANE 0.03 |
97
|
|
|
|
|
|
|
our @CORBALOCATIONOPERATOR = ('NONE','JOIN', undef, 'ORDER');; |
98
|
|
|
|
|
|
|
|
99
|
184
|
|
|
184
|
|
2738
|
use Bio::Root::Root; |
|
184
|
|
|
|
|
221
|
|
|
184
|
|
|
|
|
5333
|
|
100
|
|
|
|
|
|
|
|
101
|
184
|
|
|
184
|
|
592
|
use base qw(Bio::Location::Atomic Bio::Location::SplitLocationI); |
|
184
|
|
|
|
|
205
|
|
|
184
|
|
|
|
|
59288
|
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub new { |
104
|
3696
|
|
|
3696
|
1
|
6485
|
my ($class, @args) = @_; |
105
|
3696
|
|
|
|
|
8789
|
my $self = $class->SUPER::new(@args); |
106
|
|
|
|
|
|
|
# initialize |
107
|
3696
|
|
|
|
|
5218
|
$self->{'_sublocations'} = []; |
108
|
3696
|
|
|
|
|
8884
|
my ( $type, $seqid, $locations ) = |
109
|
|
|
|
|
|
|
$self->_rearrange([qw(SPLITTYPE |
110
|
|
|
|
|
|
|
SEQ_ID |
111
|
|
|
|
|
|
|
LOCATIONS |
112
|
|
|
|
|
|
|
)], @args); |
113
|
3696
|
50
|
33
|
|
|
8980
|
if( defined $locations && ref($locations) =~ /array/i ) { |
114
|
0
|
|
|
|
|
0
|
$self->add_sub_Location(@$locations); |
115
|
|
|
|
|
|
|
} |
116
|
3696
|
100
|
|
|
|
5050
|
$seqid && $self->seq_id($seqid); |
117
|
3696
|
|
100
|
|
|
5070
|
$type ||= 'JOIN'; |
118
|
3696
|
|
|
|
|
3609
|
$type = lc ($type); |
119
|
3696
|
|
|
|
|
5157
|
$self->splittype($type); |
120
|
3696
|
|
|
|
|
6337
|
return $self; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head2 each_Location |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Title : each_Location |
126
|
|
|
|
|
|
|
Usage : @locations = $locObject->each_Location($order); |
127
|
|
|
|
|
|
|
Function: Conserved function call across Location:: modules - will |
128
|
|
|
|
|
|
|
return an array containing the component Location(s) in |
129
|
|
|
|
|
|
|
that object, regardless if the calling object is itself a |
130
|
|
|
|
|
|
|
single location or one containing sublocations. |
131
|
|
|
|
|
|
|
Returns : an array of Bio::LocationI implementing objects |
132
|
|
|
|
|
|
|
Args : Optional sort order to be passed to sub_Location() |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub each_Location { |
137
|
6405
|
|
|
6405
|
1
|
5006
|
my ($self, $order) = @_; |
138
|
6405
|
|
|
|
|
6451
|
my @locs = (); |
139
|
6405
|
|
|
|
|
8076
|
foreach my $subloc ($self->sub_Location($order)) { |
140
|
|
|
|
|
|
|
# Recursively check to get hierarchical split locations: |
141
|
69493
|
|
|
|
|
77405
|
push @locs, $subloc->each_Location($order); |
142
|
|
|
|
|
|
|
} |
143
|
6405
|
|
|
|
|
19357
|
return @locs; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 sub_Location |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Title : sub_Location |
149
|
|
|
|
|
|
|
Usage : @sublocs = $splitloc->sub_Location(); |
150
|
|
|
|
|
|
|
Function: Returns the array of sublocations making up this compound (split) |
151
|
|
|
|
|
|
|
location. Those sublocations referring to the same sequence as |
152
|
|
|
|
|
|
|
the root split location will be sorted by start position (forward |
153
|
|
|
|
|
|
|
sort) or end position (reverse sort) and come first (before |
154
|
|
|
|
|
|
|
those on other sequences). |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
The sort order can be optionally specified or suppressed by the |
157
|
|
|
|
|
|
|
value of the first argument. The default is no sort. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Returns : an array of Bio::LocationI implementing objects |
160
|
|
|
|
|
|
|
Args : Optionally 1, 0, or -1 for specifying a forward, no, or reverse |
161
|
|
|
|
|
|
|
sort order |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub sub_Location { |
166
|
104531
|
|
|
104531
|
1
|
75261
|
my ($self, $order) = @_; |
167
|
104531
|
100
|
|
|
|
134083
|
$order = 0 unless defined $order; |
168
|
104531
|
50
|
33
|
|
|
425891
|
if( defined($order) && ($order !~ /^-?\d+$/) ) { |
169
|
0
|
|
|
|
|
0
|
$self->throw("value $order passed in to sub_Location is $order, an invalid value"); |
170
|
|
|
|
|
|
|
} |
171
|
104531
|
50
|
|
|
|
126155
|
$order = 1 if($order > 1); |
172
|
104531
|
50
|
|
|
|
123878
|
$order = -1 if($order < -1); |
173
|
104531
|
100
|
|
|
|
131652
|
my @sublocs = defined $self->{'_sublocations'} ? @{$self->{'_sublocations'}} : (); |
|
97138
|
|
|
|
|
179690
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# return the array if no ordering requested |
176
|
104531
|
100
|
66
|
|
|
298905
|
return @sublocs if( ($order == 0) || (! @sublocs) ); |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# sort those locations that are on the same sequence as the top (`master') |
179
|
|
|
|
|
|
|
# if the top seq is undefined, we take the first defined in a sublocation |
180
|
2
|
|
|
|
|
4
|
my $seqid = $self->seq_id(); |
181
|
2
|
|
|
|
|
3
|
my $i = 0; |
182
|
2
|
|
66
|
|
|
8
|
while((! defined($seqid)) && ($i <= $#sublocs)) { |
183
|
10
|
|
|
|
|
14
|
$seqid = $sublocs[$i++]->seq_id(); |
184
|
|
|
|
|
|
|
} |
185
|
2
|
50
|
33
|
|
|
3
|
if((! $self->seq_id()) && $seqid) { |
186
|
0
|
|
|
|
|
0
|
$self->warn("sorted sublocation array requested but ". |
187
|
|
|
|
|
|
|
"root location doesn't define seq_id ". |
188
|
|
|
|
|
|
|
"(at least one sublocation does!)"); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
my @locs = ($seqid ? |
191
|
2
|
50
|
|
|
|
7
|
grep { $_->seq_id() eq $seqid; } @sublocs : |
|
0
|
|
|
|
|
0
|
|
192
|
|
|
|
|
|
|
@sublocs); |
193
|
2
|
50
|
|
|
|
5
|
if(@locs) { |
194
|
2
|
100
|
|
|
|
4
|
if($order == 1) { |
195
|
|
|
|
|
|
|
# Schwartzian transforms for performance boost |
196
|
5
|
|
|
|
|
6
|
@locs = map { $_->[0] } |
197
|
|
|
|
|
|
|
sort { |
198
|
9
|
0
|
33
|
|
|
24
|
(defined $a && defined $b) ? $a->[1] <=> $b->[1] : |
|
|
50
|
|
|
|
|
|
199
|
|
|
|
|
|
|
$a ? -1 : 1 |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
map { |
202
|
1
|
50
|
|
|
|
2
|
[$_, (defined $_->start ? $_->start : $_->end)] |
|
5
|
|
|
|
|
8
|
|
203
|
|
|
|
|
|
|
} @locs;; |
204
|
|
|
|
|
|
|
} else { # $order == -1 |
205
|
5
|
|
|
|
|
6
|
@locs = map { $_->[0]} |
206
|
|
|
|
|
|
|
sort { |
207
|
6
|
0
|
33
|
|
|
18
|
(defined $a && defined $b) ? $b->[1] <=> $a->[1] : |
|
|
50
|
|
|
|
|
|
208
|
|
|
|
|
|
|
$a ? -1 : 1 |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
map { |
211
|
1
|
50
|
|
|
|
1
|
[$_, (defined $_->end ? $_->end : $_->start)] |
|
5
|
|
|
|
|
7
|
|
212
|
|
|
|
|
|
|
} @locs; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
# push the rest unsorted |
216
|
2
|
50
|
|
|
|
6
|
if($seqid) { |
217
|
0
|
|
|
|
|
0
|
push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs); |
|
0
|
|
|
|
|
0
|
|
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
# done! |
220
|
|
|
|
|
|
|
|
221
|
2
|
|
|
|
|
5
|
return @locs; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head2 add_sub_Location |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Title : add_sub_Location |
227
|
|
|
|
|
|
|
Usage : $splitloc->add_sub_Location(@locationIobjs); |
228
|
|
|
|
|
|
|
Function: add an additional sublocation |
229
|
|
|
|
|
|
|
Returns : number of current sub locations |
230
|
|
|
|
|
|
|
Args : list of Bio::LocationI implementing object(s) to add |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=cut |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub add_sub_Location { |
235
|
35743
|
|
|
35743
|
1
|
30575
|
my ($self,@args) = @_; |
236
|
35743
|
|
|
|
|
21171
|
my @locs; |
237
|
35743
|
|
|
|
|
25825
|
foreach my $loc ( @args ) { |
238
|
35750
|
50
|
33
|
|
|
84340
|
if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) { |
239
|
0
|
|
|
|
|
0
|
$self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!"); |
240
|
0
|
|
|
|
|
0
|
next; |
241
|
|
|
|
|
|
|
} |
242
|
35750
|
|
|
|
|
22253
|
push @{$self->{'_sublocations'}}, $loc; |
|
35750
|
|
|
|
|
44818
|
|
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
35743
|
|
|
|
|
20102
|
return scalar @{$self->{'_sublocations'}}; |
|
35743
|
|
|
|
|
53864
|
|
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 splittype |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Title : splittype |
251
|
|
|
|
|
|
|
Usage : $splittype = $location->splittype(); |
252
|
|
|
|
|
|
|
Function: get/set the split splittype |
253
|
|
|
|
|
|
|
Returns : the splittype of split feature (join, order) |
254
|
|
|
|
|
|
|
Args : splittype to set |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=cut |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub splittype { |
259
|
3917
|
|
|
3917
|
1
|
3488
|
my ($self, $value) = @_; |
260
|
3917
|
100
|
66
|
|
|
6811
|
if( defined $value || ! defined $self->{'_splittype'} ) { |
261
|
3703
|
50
|
|
|
|
5098
|
$value = 'JOIN' unless( defined $value ); |
262
|
3703
|
|
|
|
|
5301
|
$self->{'_splittype'} = uc ($value); |
263
|
|
|
|
|
|
|
} |
264
|
3917
|
|
|
|
|
4032
|
return $self->{'_splittype'}; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head2 is_single_sequence |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Title : is_single_sequence |
270
|
|
|
|
|
|
|
Usage : if($splitloc->is_single_sequence()) { |
271
|
|
|
|
|
|
|
print "Location object $splitloc is split ". |
272
|
|
|
|
|
|
|
"but only across a single sequence\n"; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
Function: Determine whether this location is split across a single or |
275
|
|
|
|
|
|
|
multiple sequences. |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
This implementation ignores (sub-)locations that do not define |
278
|
|
|
|
|
|
|
seq_id(). The same holds true for the root location. |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
Returns : TRUE if all sublocations lie on the same sequence as the root |
281
|
|
|
|
|
|
|
location (feature), and FALSE otherwise. |
282
|
|
|
|
|
|
|
Args : none |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=cut |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub is_single_sequence { |
287
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
288
|
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
0
|
my $seqid = $self->seq_id(); |
290
|
0
|
|
|
|
|
0
|
foreach my $loc ($self->sub_Location(0)) { |
291
|
0
|
0
|
|
|
|
0
|
$seqid = $loc->seq_id() if(! $seqid); |
292
|
0
|
0
|
0
|
|
|
0
|
if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) { |
293
|
0
|
|
|
|
|
0
|
return 0; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
} |
296
|
0
|
|
|
|
|
0
|
return 1; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head2 guide_strand |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
Title : guide_strand |
302
|
|
|
|
|
|
|
Usage : $str = $loc->guide_strand(); |
303
|
|
|
|
|
|
|
Function: Get/Set the guide strand. Of use only if the split type is |
304
|
|
|
|
|
|
|
a 'join' (this helps determine the order of sublocation |
305
|
|
|
|
|
|
|
retrieval) |
306
|
|
|
|
|
|
|
Returns : value of guide strand (1, -1, or undef) |
307
|
|
|
|
|
|
|
Args : new value (-1 or 1, optional) |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=cut |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub guide_strand { |
312
|
9265
|
|
|
9265
|
1
|
6954
|
my $self = shift; |
313
|
9265
|
100
|
|
|
|
16942
|
return $self->{'strand'} = shift if @_; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Sublocations strand values consistency check to set Guide Strand |
316
|
5531
|
|
|
|
|
4303
|
my @subloc_strands; |
317
|
5531
|
|
|
|
|
6713
|
foreach my $loc ($self->sub_Location(0)) { |
318
|
53464
|
|
100
|
|
|
58439
|
push @subloc_strands, $loc->strand || 1; |
319
|
|
|
|
|
|
|
} |
320
|
5531
|
50
|
|
|
|
13274
|
if ($self->isa('Bio::Location::SplitLocationI')) { |
321
|
5531
|
|
|
|
|
3780
|
my $identical = 0; |
322
|
5531
|
|
|
|
|
4426
|
my $first_value = $subloc_strands[0]; |
323
|
5531
|
|
|
|
|
4874
|
foreach my $strand (@subloc_strands) { |
324
|
53464
|
100
|
|
|
|
62079
|
$identical++ if ($strand == $first_value); |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
5531
|
100
|
|
|
|
6428
|
if ($identical == scalar @subloc_strands) { |
328
|
5483
|
|
|
|
|
6812
|
$self->{'strand'} = $first_value; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
else { |
331
|
48
|
|
|
|
|
56
|
$self->{'strand'} = undef; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
} |
334
|
5531
|
|
|
|
|
15523
|
return $self->{'strand'}; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=head1 LocationI methods |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head2 strand |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Title : strand |
342
|
|
|
|
|
|
|
Usage : $obj->strand($newval) |
343
|
|
|
|
|
|
|
Function: For SplitLocations, setting the strand of the container |
344
|
|
|
|
|
|
|
(this object) is a short-cut for setting the strand of all |
345
|
|
|
|
|
|
|
sublocations. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
In get-mode, checks if no sub-location is remote, and if |
348
|
|
|
|
|
|
|
all have the same strand. If so, it returns that shared |
349
|
|
|
|
|
|
|
strand value. Otherwise it returns undef. |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Example : |
352
|
|
|
|
|
|
|
Returns : on get, value of strand if identical between sublocations |
353
|
|
|
|
|
|
|
(-1, 1, or undef) |
354
|
|
|
|
|
|
|
Args : new value (-1 or 1, optional) |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=cut |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub strand{ |
360
|
14257
|
|
|
14257
|
1
|
14283
|
my ($self,$value) = @_; |
361
|
14257
|
100
|
|
|
|
17285
|
if( defined $value) { |
362
|
1663
|
|
|
|
|
1606
|
$self->{'strand'} = $value; |
363
|
|
|
|
|
|
|
# propagate to all sublocs |
364
|
1663
|
|
|
|
|
2877
|
foreach my $loc ($self->sub_Location(0)) { |
365
|
16770
|
|
|
|
|
18845
|
$loc->strand($value); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
else { |
369
|
12594
|
|
|
|
|
10944
|
my ($strand, $lstrand); |
370
|
12594
|
|
|
|
|
17718
|
foreach my $loc ($self->sub_Location(0)) { |
371
|
|
|
|
|
|
|
# we give up upon any location that doesn't have |
372
|
|
|
|
|
|
|
# the strand specified, or has a differing one set than |
373
|
|
|
|
|
|
|
# previously seen. |
374
|
|
|
|
|
|
|
# calling strand() is potentially expensive if the subloc |
375
|
|
|
|
|
|
|
# is also a split location, so we cache it |
376
|
124817
|
|
|
|
|
145807
|
$lstrand = $loc->strand(); |
377
|
124817
|
100
|
100
|
|
|
440030
|
if ( ! $lstrand |
|
|
100
|
66
|
|
|
|
|
378
|
|
|
|
|
|
|
or ($strand and ($strand != $lstrand)) |
379
|
|
|
|
|
|
|
) { |
380
|
41
|
|
|
|
|
99
|
$strand = undef; |
381
|
41
|
|
|
|
|
55
|
last; |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
elsif (! $strand) { |
384
|
12586
|
|
|
|
|
12588
|
$strand = $lstrand; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
} |
387
|
12594
|
|
|
|
|
32740
|
return $strand; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=head2 flip_strand |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Title : flip_strand |
394
|
|
|
|
|
|
|
Usage : $location->flip_strand(); |
395
|
|
|
|
|
|
|
Function: Flip-flop a strand to the opposite. Also sets Split strand |
396
|
|
|
|
|
|
|
to be consistent with the sublocation strands |
397
|
|
|
|
|
|
|
(1, -1 or undef for mixed strand values) |
398
|
|
|
|
|
|
|
Returns : None |
399
|
|
|
|
|
|
|
Args : None |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=cut |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub flip_strand { |
404
|
107
|
|
|
107
|
1
|
106
|
my $self = shift; |
405
|
107
|
|
|
|
|
85
|
my @sublocs; |
406
|
|
|
|
|
|
|
my @subloc_strands; |
407
|
|
|
|
|
|
|
|
408
|
107
|
|
|
|
|
167
|
for my $loc ( $self->sub_Location(0) ) { |
409
|
|
|
|
|
|
|
# Atomic "flip_strand" now initialize strand if necessary |
410
|
344
|
|
|
|
|
518
|
my $new_strand = $loc->flip_strand; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
# Store strand values for later consistency check |
413
|
344
|
|
|
|
|
351
|
push @sublocs, $loc; |
414
|
344
|
|
|
|
|
356
|
push @subloc_strands, $new_strand; |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
# Sublocations strand values consistency check to set Guide Strand |
418
|
107
|
50
|
|
|
|
323
|
if ($self->isa('Bio::Location::SplitLocationI')) { |
419
|
107
|
|
|
|
|
101
|
my $identical = 0; |
420
|
107
|
|
|
|
|
111
|
my $first_value = $subloc_strands[0]; |
421
|
107
|
|
|
|
|
116
|
foreach my $strand (@subloc_strands) { |
422
|
344
|
100
|
|
|
|
771
|
$identical++ if ($strand == $first_value); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
107
|
100
|
|
|
|
147
|
if ($identical == scalar @subloc_strands) { |
426
|
95
|
|
|
|
|
148
|
$self->guide_strand($first_value); |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
else { |
429
|
|
|
|
|
|
|
# Mixed strand values, must reverse the sublocations order |
430
|
12
|
|
|
|
|
17
|
$self->guide_strand(undef); |
431
|
12
|
|
|
|
|
8
|
@{ $self->{_sublocations} } = reverse @sublocs; |
|
12
|
|
|
|
|
35
|
|
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=head2 start |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Title : start |
439
|
|
|
|
|
|
|
Usage : $start = $location->start(); |
440
|
|
|
|
|
|
|
Function: get the starting point of the first (sorted) sublocation |
441
|
|
|
|
|
|
|
Returns : integer |
442
|
|
|
|
|
|
|
Args : none |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub start { |
447
|
36525
|
|
|
36525
|
1
|
30531
|
my ($self,$value) = @_; |
448
|
36525
|
50
|
|
|
|
44339
|
if( defined $value ) { |
449
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the starting point of a split location, " |
450
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
451
|
|
|
|
|
|
|
} |
452
|
36525
|
|
|
|
|
62460
|
return $self->SUPER::start(); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 end |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Title : end |
458
|
|
|
|
|
|
|
Usage : $end = $location->end(); |
459
|
|
|
|
|
|
|
Function: get the ending point of the last (sorted) sublocation |
460
|
|
|
|
|
|
|
Returns : integer |
461
|
|
|
|
|
|
|
Args : none |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=cut |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
sub end { |
466
|
32829
|
|
|
32829
|
1
|
31218
|
my ($self,$value) = @_; |
467
|
32829
|
50
|
|
|
|
50197
|
if( defined $value ) { |
468
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the ending point of a split location, " |
469
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
470
|
|
|
|
|
|
|
} |
471
|
32829
|
|
|
|
|
51387
|
return $self->SUPER::end(); |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head2 min_start |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Title : min_start |
477
|
|
|
|
|
|
|
Usage : $min_start = $location->min_start(); |
478
|
|
|
|
|
|
|
Function: get the minimum starting point |
479
|
|
|
|
|
|
|
Returns : the minimum starting point from the contained sublocations |
480
|
|
|
|
|
|
|
Args : none |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=cut |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
sub min_start { |
485
|
36530
|
|
|
36530
|
1
|
27573
|
my ($self, $value) = @_; |
486
|
|
|
|
|
|
|
|
487
|
36530
|
50
|
|
|
|
48264
|
if( defined $value ) { |
488
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the minimum starting point of a split " |
489
|
|
|
|
|
|
|
. "location, that is not possible, try manipulating the sub Locations"); |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
492
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
493
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
494
|
36530
|
|
|
|
|
41864
|
my @locs = $self->sub_Location(0); |
495
|
36530
|
100
|
|
|
|
91677
|
return ( @locs ) ? $locs[0]->min_start : undef; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head2 max_start |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Title : max_start |
501
|
|
|
|
|
|
|
Usage : my $maxstart = $location->max_start(); |
502
|
|
|
|
|
|
|
Function: Get maximum starting location of feature startpoint |
503
|
|
|
|
|
|
|
Returns : integer or undef if no maximum starting point. |
504
|
|
|
|
|
|
|
Args : none |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=cut |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
sub max_start { |
509
|
4591
|
|
|
4591
|
1
|
4545
|
my ($self,$value) = @_; |
510
|
|
|
|
|
|
|
|
511
|
4591
|
50
|
|
|
|
6634
|
if( defined $value ) { |
512
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the maximum starting point of a split " |
513
|
|
|
|
|
|
|
. "location, that is not possible, try manipulating the sub Locations"); |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
516
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
517
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
518
|
4591
|
|
|
|
|
5321
|
my @locs = $self->sub_Location(0); |
519
|
4591
|
100
|
|
|
|
8441
|
return ( @locs ) ? $locs[0]->max_start : undef; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head2 start_pos_type |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Title : start_pos_type |
525
|
|
|
|
|
|
|
Usage : my $start_pos_type = $location->start_pos_type(); |
526
|
|
|
|
|
|
|
Function: Get start position type (ie <,>, ^) |
527
|
|
|
|
|
|
|
Returns : type of position coded as text |
528
|
|
|
|
|
|
|
('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') |
529
|
|
|
|
|
|
|
Args : none |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
=cut |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
sub start_pos_type { |
534
|
6
|
|
|
6
|
1
|
8
|
my ($self,$value) = @_; |
535
|
|
|
|
|
|
|
|
536
|
6
|
50
|
|
|
|
15
|
if( defined $value ) { |
537
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the start_pos_type of a split location, " |
538
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
541
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
542
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
543
|
6
|
|
|
|
|
12
|
my @locs = $self->sub_Location(0); |
544
|
6
|
50
|
|
|
|
22
|
return ( @locs ) ? $locs[0]->start_pos_type : undef; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=head2 min_end |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
Title : min_end |
550
|
|
|
|
|
|
|
Usage : my $minend = $location->min_end(); |
551
|
|
|
|
|
|
|
Function: Get minimum ending location of feature endpoint |
552
|
|
|
|
|
|
|
Returns : integer or undef if no minimum ending point. |
553
|
|
|
|
|
|
|
Args : none |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
=cut |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
sub min_end { |
558
|
510
|
|
|
510
|
1
|
392
|
my ($self,$value) = @_; |
559
|
|
|
|
|
|
|
|
560
|
510
|
50
|
|
|
|
765
|
if( defined $value ) { |
561
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the minimum end point of a split location, " |
562
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
565
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
566
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
567
|
510
|
|
|
|
|
606
|
my @locs = $self->sub_Location(0); |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# Return the End corresponding to the same sequence as the top ('master') |
570
|
|
|
|
|
|
|
# if the top seq is undefined, take the first defined in a sublocation. |
571
|
|
|
|
|
|
|
# Example: for "join(1..100,J00194.1:100..202)", End would be 100 |
572
|
510
|
|
|
|
|
654
|
my $seqid = $self->seq_id; |
573
|
510
|
|
|
|
|
456
|
my $i = 0; |
574
|
510
|
|
100
|
|
|
917
|
while (not defined $seqid and $i <= $#locs) { |
575
|
6
|
|
|
|
|
13
|
$seqid = $locs[$i++]->seq_id; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
510
|
100
|
|
|
|
639
|
my @same_id_locs = ($seqid ? grep { $_->seq_id eq $seqid } @locs |
|
2197
|
|
|
|
|
2514
|
|
579
|
|
|
|
|
|
|
: @locs); |
580
|
|
|
|
|
|
|
# If there is a $seqid but no sublocations have the same id, |
581
|
|
|
|
|
|
|
# try with the first id found in the sublocations instead, |
582
|
|
|
|
|
|
|
# and if that fails return the last segment value |
583
|
510
|
100
|
66
|
|
|
1438
|
if (@locs and not @same_id_locs) { |
584
|
1
|
|
|
|
|
2
|
my $first_id; |
585
|
1
|
|
66
|
|
|
6
|
while (not defined $first_id and $i <= $#locs) { |
586
|
1
|
|
|
|
|
4
|
$first_id = $locs[$i++]->seq_id; |
587
|
|
|
|
|
|
|
} |
588
|
1
|
50
|
|
|
|
3
|
@same_id_locs = ($first_id ? grep { $_->seq_id eq $first_id } @locs |
|
2
|
|
|
|
|
3
|
|
589
|
|
|
|
|
|
|
: @locs); |
590
|
|
|
|
|
|
|
} |
591
|
510
|
50
|
|
|
|
1139
|
return ( @same_id_locs ) ? $same_id_locs[-1]->min_end : undef; |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=head2 max_end |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
Title : max_end |
597
|
|
|
|
|
|
|
Usage : my $maxend = $location->max_end(); |
598
|
|
|
|
|
|
|
Function: Get maximum ending location of feature endpoint |
599
|
|
|
|
|
|
|
Returns : integer or undef if no maximum ending point. |
600
|
|
|
|
|
|
|
Args : none |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=cut |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub max_end { |
605
|
32834
|
|
|
32834
|
1
|
24170
|
my ($self,$value) = @_; |
606
|
|
|
|
|
|
|
|
607
|
32834
|
50
|
|
|
|
45929
|
if( defined $value ) { |
608
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the maximum end point of a split location, " |
609
|
|
|
|
|
|
|
."that is not possible, try manipulating the sub Locations"); |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
612
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
613
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
614
|
32834
|
|
|
|
|
38689
|
my @locs = $self->sub_Location(0); |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
# Return the End corresponding to the same sequence as the top ('master') |
617
|
|
|
|
|
|
|
# if the top seq is undefined, take the first defined in a sublocation. |
618
|
|
|
|
|
|
|
# Example: for "join(1..100,J00194.1:100..202)", End would be 100 |
619
|
32834
|
|
|
|
|
50769
|
my $seqid = $self->seq_id; |
620
|
32834
|
|
|
|
|
29219
|
my $i = 0; |
621
|
32834
|
|
100
|
|
|
67000
|
while (not defined $seqid and $i <= $#locs) { |
622
|
573
|
|
|
|
|
753
|
$seqid = $locs[$i++]->seq_id; |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
32834
|
100
|
|
|
|
49556
|
my @same_id_locs = ($seqid ? grep { $_->seq_id eq $seqid } @locs |
|
338856
|
|
|
|
|
370360
|
|
626
|
|
|
|
|
|
|
: @locs); |
627
|
|
|
|
|
|
|
# If there is a $seqid but no sublocations have the same id, |
628
|
|
|
|
|
|
|
# try with the first id found in the sublocations instead, |
629
|
|
|
|
|
|
|
# and if that fails return the last segment value |
630
|
32834
|
100
|
66
|
|
|
119649
|
if (@locs and not @same_id_locs) { |
631
|
1
|
|
|
|
|
2
|
my $first_id; |
632
|
1
|
|
66
|
|
|
6
|
while (not defined $first_id and $i <= $#locs) { |
633
|
1
|
|
|
|
|
2
|
$first_id = $locs[$i++]->seq_id; |
634
|
|
|
|
|
|
|
} |
635
|
1
|
50
|
|
|
|
3
|
@same_id_locs = ($first_id ? grep { $_->seq_id eq $first_id } @locs |
|
2
|
|
|
|
|
3
|
|
636
|
|
|
|
|
|
|
: @locs); |
637
|
|
|
|
|
|
|
} |
638
|
32834
|
50
|
|
|
|
73794
|
return ( @same_id_locs ) ? $same_id_locs[-1]->max_end : undef; |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=head2 end_pos_type |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
Title : end_pos_type |
644
|
|
|
|
|
|
|
Usage : my $end_pos_type = $location->end_pos_type(); |
645
|
|
|
|
|
|
|
Function: Get end position type (ie <,>, ^) |
646
|
|
|
|
|
|
|
Returns : type of position coded as text |
647
|
|
|
|
|
|
|
('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') |
648
|
|
|
|
|
|
|
Args : none |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=cut |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
sub end_pos_type { |
653
|
6
|
|
|
6
|
1
|
9
|
my ($self,$value) = @_; |
654
|
|
|
|
|
|
|
|
655
|
6
|
50
|
|
|
|
15
|
if( defined $value ) { |
656
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set end_pos_type of a split location, " |
657
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
660
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
661
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
662
|
6
|
|
|
|
|
11
|
my @locs = $self->sub_Location(0); |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
# Return the End corresponding to the same sequence as the top ('master') |
665
|
|
|
|
|
|
|
# if the top seq is undefined, take the first defined in a sublocation. |
666
|
|
|
|
|
|
|
# Example: for "join(1..>100,J00194.1:100..202)", End pos type would be 'AFTER' |
667
|
6
|
|
|
|
|
13
|
my $seqid = $self->seq_id; |
668
|
6
|
|
|
|
|
6
|
my $i = 0; |
669
|
6
|
|
100
|
|
|
24
|
while (not defined $seqid and $i <= $#locs) { |
670
|
8
|
|
|
|
|
14
|
$seqid = $locs[$i++]->seq_id; |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
|
673
|
6
|
100
|
|
|
|
14
|
my @same_id_locs = ($seqid ? grep { $_->seq_id eq $seqid } @locs |
|
4
|
|
|
|
|
7
|
|
674
|
|
|
|
|
|
|
: @locs); |
675
|
|
|
|
|
|
|
# If there is a $seqid but no sublocations have the same id, |
676
|
|
|
|
|
|
|
# try with the first id found in the sublocations instead, |
677
|
|
|
|
|
|
|
# and if that fails return the last segment value |
678
|
6
|
100
|
66
|
|
|
22
|
if (@locs and not @same_id_locs) { |
679
|
1
|
|
|
|
|
1
|
my $first_id; |
680
|
1
|
|
66
|
|
|
6
|
while (not defined $first_id and $i <= $#locs) { |
681
|
1
|
|
|
|
|
4
|
$first_id = $locs[$i++]->seq_id; |
682
|
|
|
|
|
|
|
} |
683
|
1
|
50
|
|
|
|
3
|
@same_id_locs = ($first_id ? grep { $_->seq_id eq $first_id } @locs |
|
2
|
|
|
|
|
4
|
|
684
|
|
|
|
|
|
|
: @locs); |
685
|
|
|
|
|
|
|
} |
686
|
6
|
50
|
|
|
|
23
|
return ( @same_id_locs ) ? $same_id_locs[-1]->end_pos_type : undef; |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=head2 length |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
Title : length |
692
|
|
|
|
|
|
|
Usage : $len = $loc->length(); |
693
|
|
|
|
|
|
|
Function: get the length in the coordinate space this location spans |
694
|
|
|
|
|
|
|
Example : |
695
|
|
|
|
|
|
|
Returns : an integer |
696
|
|
|
|
|
|
|
Args : none |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=cut |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
sub length { |
701
|
22
|
|
|
22
|
1
|
30
|
my ($self) = @_; |
702
|
22
|
|
|
|
|
22
|
my $length = 0; |
703
|
|
|
|
|
|
|
# Mixed strand values means transplicing (where exons can even |
704
|
|
|
|
|
|
|
# be in different chromosomes), so in that case only give the sum |
705
|
|
|
|
|
|
|
# of the lengths of the individual segments |
706
|
22
|
100
|
|
|
|
35
|
if (! defined $self->guide_strand) { |
707
|
13
|
|
|
|
|
17
|
for my $loc ( $self->sub_Location(0) ) { |
708
|
34
|
|
|
|
|
53
|
$length += abs($loc->end - $loc->start) + 1; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
else { |
712
|
9
|
|
|
|
|
12
|
my @sublocs = $self->sub_Location(0); |
713
|
9
|
|
|
|
|
19
|
my $start = $sublocs[0]->start; |
714
|
9
|
|
|
|
|
14
|
my $end = $sublocs[-1]->end; |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
# If Start > ·End, its a possible case of cut by origin |
717
|
|
|
|
|
|
|
# location in circular sequences (e.g "join(16..20,1..2)") |
718
|
9
|
100
|
|
|
|
13
|
if ($start > $end) { |
719
|
|
|
|
|
|
|
# Figure out which segments are located before |
720
|
|
|
|
|
|
|
# and which are located after coordinate 1 |
721
|
|
|
|
|
|
|
# (END_SEQ - 1 - START_SEQ) |
722
|
6
|
|
|
|
|
5
|
my @end_seq_segments; |
723
|
|
|
|
|
|
|
my @start_seq_segments; |
724
|
6
|
|
|
|
|
6
|
my $switch = 0; |
725
|
6
|
|
|
|
|
8
|
foreach my $subloc (@sublocs) { |
726
|
16
|
100
|
|
|
|
16
|
if ($switch == 0) { |
727
|
14
|
100
|
|
|
|
17
|
if ($subloc->start == 1) { |
728
|
6
|
|
|
|
|
4
|
$switch = 1; |
729
|
6
|
|
|
|
|
8
|
push @start_seq_segments, $subloc; |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
else { |
732
|
8
|
|
|
|
|
21
|
push @end_seq_segments, $subloc; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
else { |
736
|
2
|
|
|
|
|
3
|
push @start_seq_segments, $subloc; |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
} |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
# If its a cut by origin location, sum the whole length of each group |
741
|
6
|
50
|
33
|
|
|
19
|
if (scalar @end_seq_segments > 0 and @start_seq_segments > 0) { |
742
|
6
|
|
|
|
|
11
|
my $end_segments_length = abs( $end_seq_segments[0]->start |
743
|
|
|
|
|
|
|
- $end_seq_segments[-1]->end) |
744
|
|
|
|
|
|
|
+ 1; |
745
|
6
|
|
|
|
|
10
|
my $start_segments_length = abs( $start_seq_segments[0]->start |
746
|
|
|
|
|
|
|
- $start_seq_segments[-1]->end) |
747
|
|
|
|
|
|
|
+ 1; |
748
|
6
|
|
|
|
|
11
|
$length = $end_segments_length + $start_segments_length; |
749
|
|
|
|
|
|
|
} |
750
|
|
|
|
|
|
|
} |
751
|
|
|
|
|
|
|
else { |
752
|
3
|
|
|
|
|
6
|
$length = $end - $start + 1; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
} |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# If for some reason nothing worked, fall back to previous behaviour |
757
|
22
|
50
|
|
|
|
39
|
if ($length == 0) { |
758
|
0
|
|
|
|
|
0
|
$length = abs($self->end - $self->start) + 1 |
759
|
|
|
|
|
|
|
} |
760
|
|
|
|
|
|
|
|
761
|
22
|
|
|
|
|
126
|
return $length; |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=head2 seq_id |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
Title : seq_id |
767
|
|
|
|
|
|
|
Usage : my $seqid = $location->seq_id(); |
768
|
|
|
|
|
|
|
Function: Get/Set seq_id that location refers to |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
We override this here in order to propagate to all sublocations |
771
|
|
|
|
|
|
|
which are not remote (provided this root is not remote either) |
772
|
|
|
|
|
|
|
Returns : seq_id |
773
|
|
|
|
|
|
|
Args : [optional] seq_id value to set |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=cut |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
sub seq_id { |
779
|
37345
|
|
|
37345
|
1
|
28759
|
my $self = shift; |
780
|
|
|
|
|
|
|
|
781
|
37345
|
100
|
66
|
|
|
64730
|
if(@_ && !$self->is_remote()) { |
782
|
3588
|
|
|
|
|
4664
|
foreach my $subloc ($self->sub_Location(0)) { |
783
|
35365
|
100
|
|
|
|
38720
|
$subloc->seq_id(@_) if !$subloc->is_remote(); |
784
|
|
|
|
|
|
|
} |
785
|
|
|
|
|
|
|
} |
786
|
37345
|
|
|
|
|
57248
|
return $self->SUPER::seq_id(@_); |
787
|
|
|
|
|
|
|
} |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
=head2 coordinate_policy |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
Title : coordinate_policy |
792
|
|
|
|
|
|
|
Usage : $policy = $location->coordinate_policy(); |
793
|
|
|
|
|
|
|
$location->coordinate_policy($mypolicy); # set may not be possible |
794
|
|
|
|
|
|
|
Function: Get the coordinate computing policy employed by this object. |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
See Bio::Location::CoordinatePolicyI for documentation about |
797
|
|
|
|
|
|
|
the policy object and its use. |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
The interface *does not* require implementing classes to accept |
800
|
|
|
|
|
|
|
setting of a different policy. The implementation provided here |
801
|
|
|
|
|
|
|
does, however, allow to do so. |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
Implementors of this interface are expected to initialize every |
804
|
|
|
|
|
|
|
new instance with a CoordinatePolicyI object. The implementation |
805
|
|
|
|
|
|
|
provided here will return a default policy object if none has |
806
|
|
|
|
|
|
|
been set yet. To change this default policy object call this |
807
|
|
|
|
|
|
|
method as a class method with an appropriate argument. Note that |
808
|
|
|
|
|
|
|
in this case only subsequently created Location objects will be |
809
|
|
|
|
|
|
|
affected. |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
Returns : A Bio::Location::CoordinatePolicyI implementing object. |
812
|
|
|
|
|
|
|
Args : On set, a Bio::Location::CoordinatePolicyI implementing object. |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
=head2 to_FTstring |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
Title : to_FTstring |
817
|
|
|
|
|
|
|
Usage : my $locstr = $location->to_FTstring() |
818
|
|
|
|
|
|
|
Function: returns the FeatureTable string of this location |
819
|
|
|
|
|
|
|
Returns : string |
820
|
|
|
|
|
|
|
Args : none |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=cut |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
sub to_FTstring { |
825
|
110
|
|
|
110
|
1
|
1680
|
my ($self) = @_; |
826
|
110
|
|
|
|
|
100
|
my @strs; |
827
|
110
|
|
100
|
|
|
210
|
my $strand = $self->strand() || 0; |
828
|
110
|
|
|
|
|
174
|
my $stype = lc($self->splittype()); |
829
|
|
|
|
|
|
|
|
830
|
110
|
100
|
|
|
|
197
|
if( $strand < 0 ) { |
831
|
42
|
|
|
|
|
87
|
$self->flip_strand; # this will recursively set the strand |
832
|
|
|
|
|
|
|
# to +1 for all the sub locations |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
|
835
|
110
|
|
|
|
|
213
|
foreach my $loc ( $self->sub_Location(0) ) { |
836
|
355
|
|
|
|
|
616
|
$loc->verbose($self->verbose); |
837
|
355
|
|
|
|
|
548
|
my $str = $loc->to_FTstring(); |
838
|
|
|
|
|
|
|
# we only append the remote seq_id if it hasn't been done already |
839
|
|
|
|
|
|
|
# by the sub-location (which it should if it knows it's remote) |
840
|
|
|
|
|
|
|
# (and of course only if it's necessary) |
841
|
355
|
50
|
100
|
|
|
467
|
if( (! $loc->is_remote) && |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
842
|
|
|
|
|
|
|
defined($self->seq_id) && defined($loc->seq_id) && |
843
|
|
|
|
|
|
|
($loc->seq_id ne $self->seq_id) ) { |
844
|
0
|
|
|
|
|
0
|
$str = sprintf("%s:%s", $loc->seq_id, $str); |
845
|
|
|
|
|
|
|
} |
846
|
355
|
|
|
|
|
521
|
push @strs, $str; |
847
|
|
|
|
|
|
|
} |
848
|
110
|
100
|
|
|
|
249
|
$self->flip_strand if $strand < 0; |
849
|
110
|
|
|
|
|
87
|
my $str; |
850
|
110
|
100
|
|
|
|
218
|
if( @strs == 1 ) { |
|
|
50
|
|
|
|
|
|
851
|
6
|
|
|
|
|
17
|
($str) = @strs; |
852
|
|
|
|
|
|
|
} elsif( @strs == 0 ) { |
853
|
0
|
|
|
|
|
0
|
$self->warn("no Sublocations for this splitloc, so not returning anything\n"); |
854
|
|
|
|
|
|
|
} else { |
855
|
104
|
|
|
|
|
171
|
$str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs)); |
856
|
|
|
|
|
|
|
} |
857
|
110
|
100
|
|
|
|
278
|
if( $strand < 0 ) { # wrap this in a complement if it was unrolled |
858
|
42
|
|
|
|
|
111
|
$str = sprintf("%s(%s)",'complement',$str); |
859
|
|
|
|
|
|
|
} |
860
|
|
|
|
|
|
|
|
861
|
110
|
|
|
|
|
308
|
return $str; |
862
|
|
|
|
|
|
|
} |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=head2 valid_Location |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
Title : valid_Location |
867
|
|
|
|
|
|
|
Usage : if ($location->valid_location) {...}; |
868
|
|
|
|
|
|
|
Function: boolean method to determine whether location is considered valid |
869
|
|
|
|
|
|
|
(has minimum requirements for Simple implementation) |
870
|
|
|
|
|
|
|
Returns : Boolean value: true if location is valid, false otherwise |
871
|
|
|
|
|
|
|
Args : none |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=cut |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
# we'll probably need to override the RangeI methods since our locations will |
876
|
|
|
|
|
|
|
# not be contiguous. |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
1; |