| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Location::Split |
|
3
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# Copyright Jason Stajich |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
10
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Bio::Location::Split - Implementation of a Location on a Sequence |
|
15
|
|
|
|
|
|
|
which has multiple locations (start/end points) |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use Bio::Location::Split; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $splitlocation = Bio::Location::Split->new(); |
|
22
|
|
|
|
|
|
|
$splitlocation->add_sub_Location(Bio::Location::Simple->new(-start=>1, |
|
23
|
|
|
|
|
|
|
-end=>30, |
|
24
|
|
|
|
|
|
|
-strand=>1)); |
|
25
|
|
|
|
|
|
|
$splitlocation->add_sub_Location(Bio::Location::Simple->new(-start=>50, |
|
26
|
|
|
|
|
|
|
-end=>61, |
|
27
|
|
|
|
|
|
|
-strand=>1)); |
|
28
|
|
|
|
|
|
|
my @sublocs = $splitlocation->sub_Location(); |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
my $count = 1; |
|
31
|
|
|
|
|
|
|
# print the start/end points of the sub locations |
|
32
|
|
|
|
|
|
|
foreach my $location ( sort { $a->start <=> $b->start } |
|
33
|
|
|
|
|
|
|
@sublocs ) { |
|
34
|
|
|
|
|
|
|
printf "sub feature %d [%d..%d]\n", |
|
35
|
|
|
|
|
|
|
$count, $location->start,$location->end, "\n"; |
|
36
|
|
|
|
|
|
|
$count++; |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
This implementation handles locations which span more than one |
|
42
|
|
|
|
|
|
|
start/end location, or and/or lie on different sequences, and can |
|
43
|
|
|
|
|
|
|
work with split locations that depend on the specific order of the |
|
44
|
|
|
|
|
|
|
sublocations ('join') or don't have a specific order but represent |
|
45
|
|
|
|
|
|
|
a feature spanning noncontiguous sublocations ('order', 'bond'). |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Note that the order in which sublocations are added may be very important, |
|
48
|
|
|
|
|
|
|
depending on the specific split location type. For instance, a 'join' |
|
49
|
|
|
|
|
|
|
must have the sublocations added in the order that one expects to |
|
50
|
|
|
|
|
|
|
join the sublocations, whereas all other types are sorted based on the |
|
51
|
|
|
|
|
|
|
sequence location. |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
56
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
57
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
60
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 Support |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
I |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
69
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
70
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
71
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
76
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
77
|
|
|
|
|
|
|
web: |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Email jason-AT-bioperl_DOT_org |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 APPENDIX |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
88
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Let the code begin... |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
package Bio::Location::Split; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# as defined by BSANE 0.03 |
|
97
|
|
|
|
|
|
|
our @CORBALOCATIONOPERATOR = ('NONE','JOIN', undef, 'ORDER');; |
|
98
|
|
|
|
|
|
|
|
|
99
|
184
|
|
|
184
|
|
2738
|
use Bio::Root::Root; |
|
|
184
|
|
|
|
|
221
|
|
|
|
184
|
|
|
|
|
5333
|
|
|
100
|
|
|
|
|
|
|
|
|
101
|
184
|
|
|
184
|
|
592
|
use base qw(Bio::Location::Atomic Bio::Location::SplitLocationI); |
|
|
184
|
|
|
|
|
205
|
|
|
|
184
|
|
|
|
|
59288
|
|
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub new { |
|
104
|
3696
|
|
|
3696
|
1
|
6485
|
my ($class, @args) = @_; |
|
105
|
3696
|
|
|
|
|
8789
|
my $self = $class->SUPER::new(@args); |
|
106
|
|
|
|
|
|
|
# initialize |
|
107
|
3696
|
|
|
|
|
5218
|
$self->{'_sublocations'} = []; |
|
108
|
3696
|
|
|
|
|
8884
|
my ( $type, $seqid, $locations ) = |
|
109
|
|
|
|
|
|
|
$self->_rearrange([qw(SPLITTYPE |
|
110
|
|
|
|
|
|
|
SEQ_ID |
|
111
|
|
|
|
|
|
|
LOCATIONS |
|
112
|
|
|
|
|
|
|
)], @args); |
|
113
|
3696
|
50
|
33
|
|
|
8980
|
if( defined $locations && ref($locations) =~ /array/i ) { |
|
114
|
0
|
|
|
|
|
0
|
$self->add_sub_Location(@$locations); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
3696
|
100
|
|
|
|
5050
|
$seqid && $self->seq_id($seqid); |
|
117
|
3696
|
|
100
|
|
|
5070
|
$type ||= 'JOIN'; |
|
118
|
3696
|
|
|
|
|
3609
|
$type = lc ($type); |
|
119
|
3696
|
|
|
|
|
5157
|
$self->splittype($type); |
|
120
|
3696
|
|
|
|
|
6337
|
return $self; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head2 each_Location |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Title : each_Location |
|
126
|
|
|
|
|
|
|
Usage : @locations = $locObject->each_Location($order); |
|
127
|
|
|
|
|
|
|
Function: Conserved function call across Location:: modules - will |
|
128
|
|
|
|
|
|
|
return an array containing the component Location(s) in |
|
129
|
|
|
|
|
|
|
that object, regardless if the calling object is itself a |
|
130
|
|
|
|
|
|
|
single location or one containing sublocations. |
|
131
|
|
|
|
|
|
|
Returns : an array of Bio::LocationI implementing objects |
|
132
|
|
|
|
|
|
|
Args : Optional sort order to be passed to sub_Location() |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub each_Location { |
|
137
|
6405
|
|
|
6405
|
1
|
5006
|
my ($self, $order) = @_; |
|
138
|
6405
|
|
|
|
|
6451
|
my @locs = (); |
|
139
|
6405
|
|
|
|
|
8076
|
foreach my $subloc ($self->sub_Location($order)) { |
|
140
|
|
|
|
|
|
|
# Recursively check to get hierarchical split locations: |
|
141
|
69493
|
|
|
|
|
77405
|
push @locs, $subloc->each_Location($order); |
|
142
|
|
|
|
|
|
|
} |
|
143
|
6405
|
|
|
|
|
19357
|
return @locs; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 sub_Location |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Title : sub_Location |
|
149
|
|
|
|
|
|
|
Usage : @sublocs = $splitloc->sub_Location(); |
|
150
|
|
|
|
|
|
|
Function: Returns the array of sublocations making up this compound (split) |
|
151
|
|
|
|
|
|
|
location. Those sublocations referring to the same sequence as |
|
152
|
|
|
|
|
|
|
the root split location will be sorted by start position (forward |
|
153
|
|
|
|
|
|
|
sort) or end position (reverse sort) and come first (before |
|
154
|
|
|
|
|
|
|
those on other sequences). |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
The sort order can be optionally specified or suppressed by the |
|
157
|
|
|
|
|
|
|
value of the first argument. The default is no sort. |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Returns : an array of Bio::LocationI implementing objects |
|
160
|
|
|
|
|
|
|
Args : Optionally 1, 0, or -1 for specifying a forward, no, or reverse |
|
161
|
|
|
|
|
|
|
sort order |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub sub_Location { |
|
166
|
104531
|
|
|
104531
|
1
|
75261
|
my ($self, $order) = @_; |
|
167
|
104531
|
100
|
|
|
|
134083
|
$order = 0 unless defined $order; |
|
168
|
104531
|
50
|
33
|
|
|
425891
|
if( defined($order) && ($order !~ /^-?\d+$/) ) { |
|
169
|
0
|
|
|
|
|
0
|
$self->throw("value $order passed in to sub_Location is $order, an invalid value"); |
|
170
|
|
|
|
|
|
|
} |
|
171
|
104531
|
50
|
|
|
|
126155
|
$order = 1 if($order > 1); |
|
172
|
104531
|
50
|
|
|
|
123878
|
$order = -1 if($order < -1); |
|
173
|
104531
|
100
|
|
|
|
131652
|
my @sublocs = defined $self->{'_sublocations'} ? @{$self->{'_sublocations'}} : (); |
|
|
97138
|
|
|
|
|
179690
|
|
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# return the array if no ordering requested |
|
176
|
104531
|
100
|
66
|
|
|
298905
|
return @sublocs if( ($order == 0) || (! @sublocs) ); |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# sort those locations that are on the same sequence as the top (`master') |
|
179
|
|
|
|
|
|
|
# if the top seq is undefined, we take the first defined in a sublocation |
|
180
|
2
|
|
|
|
|
4
|
my $seqid = $self->seq_id(); |
|
181
|
2
|
|
|
|
|
3
|
my $i = 0; |
|
182
|
2
|
|
66
|
|
|
8
|
while((! defined($seqid)) && ($i <= $#sublocs)) { |
|
183
|
10
|
|
|
|
|
14
|
$seqid = $sublocs[$i++]->seq_id(); |
|
184
|
|
|
|
|
|
|
} |
|
185
|
2
|
50
|
33
|
|
|
3
|
if((! $self->seq_id()) && $seqid) { |
|
186
|
0
|
|
|
|
|
0
|
$self->warn("sorted sublocation array requested but ". |
|
187
|
|
|
|
|
|
|
"root location doesn't define seq_id ". |
|
188
|
|
|
|
|
|
|
"(at least one sublocation does!)"); |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
my @locs = ($seqid ? |
|
191
|
2
|
50
|
|
|
|
7
|
grep { $_->seq_id() eq $seqid; } @sublocs : |
|
|
0
|
|
|
|
|
0
|
|
|
192
|
|
|
|
|
|
|
@sublocs); |
|
193
|
2
|
50
|
|
|
|
5
|
if(@locs) { |
|
194
|
2
|
100
|
|
|
|
4
|
if($order == 1) { |
|
195
|
|
|
|
|
|
|
# Schwartzian transforms for performance boost |
|
196
|
5
|
|
|
|
|
6
|
@locs = map { $_->[0] } |
|
197
|
|
|
|
|
|
|
sort { |
|
198
|
9
|
0
|
33
|
|
|
24
|
(defined $a && defined $b) ? $a->[1] <=> $b->[1] : |
|
|
|
50
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
$a ? -1 : 1 |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
map { |
|
202
|
1
|
50
|
|
|
|
2
|
[$_, (defined $_->start ? $_->start : $_->end)] |
|
|
5
|
|
|
|
|
8
|
|
|
203
|
|
|
|
|
|
|
} @locs;; |
|
204
|
|
|
|
|
|
|
} else { # $order == -1 |
|
205
|
5
|
|
|
|
|
6
|
@locs = map { $_->[0]} |
|
206
|
|
|
|
|
|
|
sort { |
|
207
|
6
|
0
|
33
|
|
|
18
|
(defined $a && defined $b) ? $b->[1] <=> $a->[1] : |
|
|
|
50
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
$a ? -1 : 1 |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
map { |
|
211
|
1
|
50
|
|
|
|
1
|
[$_, (defined $_->end ? $_->end : $_->start)] |
|
|
5
|
|
|
|
|
7
|
|
|
212
|
|
|
|
|
|
|
} @locs; |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
# push the rest unsorted |
|
216
|
2
|
50
|
|
|
|
6
|
if($seqid) { |
|
217
|
0
|
|
|
|
|
0
|
push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs); |
|
|
0
|
|
|
|
|
0
|
|
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
# done! |
|
220
|
|
|
|
|
|
|
|
|
221
|
2
|
|
|
|
|
5
|
return @locs; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head2 add_sub_Location |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Title : add_sub_Location |
|
227
|
|
|
|
|
|
|
Usage : $splitloc->add_sub_Location(@locationIobjs); |
|
228
|
|
|
|
|
|
|
Function: add an additional sublocation |
|
229
|
|
|
|
|
|
|
Returns : number of current sub locations |
|
230
|
|
|
|
|
|
|
Args : list of Bio::LocationI implementing object(s) to add |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=cut |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub add_sub_Location { |
|
235
|
35743
|
|
|
35743
|
1
|
30575
|
my ($self,@args) = @_; |
|
236
|
35743
|
|
|
|
|
21171
|
my @locs; |
|
237
|
35743
|
|
|
|
|
25825
|
foreach my $loc ( @args ) { |
|
238
|
35750
|
50
|
33
|
|
|
84340
|
if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) { |
|
239
|
0
|
|
|
|
|
0
|
$self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!"); |
|
240
|
0
|
|
|
|
|
0
|
next; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
35750
|
|
|
|
|
22253
|
push @{$self->{'_sublocations'}}, $loc; |
|
|
35750
|
|
|
|
|
44818
|
|
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
35743
|
|
|
|
|
20102
|
return scalar @{$self->{'_sublocations'}}; |
|
|
35743
|
|
|
|
|
53864
|
|
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 splittype |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Title : splittype |
|
251
|
|
|
|
|
|
|
Usage : $splittype = $location->splittype(); |
|
252
|
|
|
|
|
|
|
Function: get/set the split splittype |
|
253
|
|
|
|
|
|
|
Returns : the splittype of split feature (join, order) |
|
254
|
|
|
|
|
|
|
Args : splittype to set |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=cut |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub splittype { |
|
259
|
3917
|
|
|
3917
|
1
|
3488
|
my ($self, $value) = @_; |
|
260
|
3917
|
100
|
66
|
|
|
6811
|
if( defined $value || ! defined $self->{'_splittype'} ) { |
|
261
|
3703
|
50
|
|
|
|
5098
|
$value = 'JOIN' unless( defined $value ); |
|
262
|
3703
|
|
|
|
|
5301
|
$self->{'_splittype'} = uc ($value); |
|
263
|
|
|
|
|
|
|
} |
|
264
|
3917
|
|
|
|
|
4032
|
return $self->{'_splittype'}; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head2 is_single_sequence |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Title : is_single_sequence |
|
270
|
|
|
|
|
|
|
Usage : if($splitloc->is_single_sequence()) { |
|
271
|
|
|
|
|
|
|
print "Location object $splitloc is split ". |
|
272
|
|
|
|
|
|
|
"but only across a single sequence\n"; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
Function: Determine whether this location is split across a single or |
|
275
|
|
|
|
|
|
|
multiple sequences. |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
This implementation ignores (sub-)locations that do not define |
|
278
|
|
|
|
|
|
|
seq_id(). The same holds true for the root location. |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
Returns : TRUE if all sublocations lie on the same sequence as the root |
|
281
|
|
|
|
|
|
|
location (feature), and FALSE otherwise. |
|
282
|
|
|
|
|
|
|
Args : none |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=cut |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub is_single_sequence { |
|
287
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
288
|
|
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
0
|
my $seqid = $self->seq_id(); |
|
290
|
0
|
|
|
|
|
0
|
foreach my $loc ($self->sub_Location(0)) { |
|
291
|
0
|
0
|
|
|
|
0
|
$seqid = $loc->seq_id() if(! $seqid); |
|
292
|
0
|
0
|
0
|
|
|
0
|
if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) { |
|
293
|
0
|
|
|
|
|
0
|
return 0; |
|
294
|
|
|
|
|
|
|
} |
|
295
|
|
|
|
|
|
|
} |
|
296
|
0
|
|
|
|
|
0
|
return 1; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head2 guide_strand |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
Title : guide_strand |
|
302
|
|
|
|
|
|
|
Usage : $str = $loc->guide_strand(); |
|
303
|
|
|
|
|
|
|
Function: Get/Set the guide strand. Of use only if the split type is |
|
304
|
|
|
|
|
|
|
a 'join' (this helps determine the order of sublocation |
|
305
|
|
|
|
|
|
|
retrieval) |
|
306
|
|
|
|
|
|
|
Returns : value of guide strand (1, -1, or undef) |
|
307
|
|
|
|
|
|
|
Args : new value (-1 or 1, optional) |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=cut |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub guide_strand { |
|
312
|
9265
|
|
|
9265
|
1
|
6954
|
my $self = shift; |
|
313
|
9265
|
100
|
|
|
|
16942
|
return $self->{'strand'} = shift if @_; |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Sublocations strand values consistency check to set Guide Strand |
|
316
|
5531
|
|
|
|
|
4303
|
my @subloc_strands; |
|
317
|
5531
|
|
|
|
|
6713
|
foreach my $loc ($self->sub_Location(0)) { |
|
318
|
53464
|
|
100
|
|
|
58439
|
push @subloc_strands, $loc->strand || 1; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
5531
|
50
|
|
|
|
13274
|
if ($self->isa('Bio::Location::SplitLocationI')) { |
|
321
|
5531
|
|
|
|
|
3780
|
my $identical = 0; |
|
322
|
5531
|
|
|
|
|
4426
|
my $first_value = $subloc_strands[0]; |
|
323
|
5531
|
|
|
|
|
4874
|
foreach my $strand (@subloc_strands) { |
|
324
|
53464
|
100
|
|
|
|
62079
|
$identical++ if ($strand == $first_value); |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
|
|
327
|
5531
|
100
|
|
|
|
6428
|
if ($identical == scalar @subloc_strands) { |
|
328
|
5483
|
|
|
|
|
6812
|
$self->{'strand'} = $first_value; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
else { |
|
331
|
48
|
|
|
|
|
56
|
$self->{'strand'} = undef; |
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
} |
|
334
|
5531
|
|
|
|
|
15523
|
return $self->{'strand'}; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=head1 LocationI methods |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head2 strand |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Title : strand |
|
342
|
|
|
|
|
|
|
Usage : $obj->strand($newval) |
|
343
|
|
|
|
|
|
|
Function: For SplitLocations, setting the strand of the container |
|
344
|
|
|
|
|
|
|
(this object) is a short-cut for setting the strand of all |
|
345
|
|
|
|
|
|
|
sublocations. |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
In get-mode, checks if no sub-location is remote, and if |
|
348
|
|
|
|
|
|
|
all have the same strand. If so, it returns that shared |
|
349
|
|
|
|
|
|
|
strand value. Otherwise it returns undef. |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Example : |
|
352
|
|
|
|
|
|
|
Returns : on get, value of strand if identical between sublocations |
|
353
|
|
|
|
|
|
|
(-1, 1, or undef) |
|
354
|
|
|
|
|
|
|
Args : new value (-1 or 1, optional) |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=cut |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub strand{ |
|
360
|
14257
|
|
|
14257
|
1
|
14283
|
my ($self,$value) = @_; |
|
361
|
14257
|
100
|
|
|
|
17285
|
if( defined $value) { |
|
362
|
1663
|
|
|
|
|
1606
|
$self->{'strand'} = $value; |
|
363
|
|
|
|
|
|
|
# propagate to all sublocs |
|
364
|
1663
|
|
|
|
|
2877
|
foreach my $loc ($self->sub_Location(0)) { |
|
365
|
16770
|
|
|
|
|
18845
|
$loc->strand($value); |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
else { |
|
369
|
12594
|
|
|
|
|
10944
|
my ($strand, $lstrand); |
|
370
|
12594
|
|
|
|
|
17718
|
foreach my $loc ($self->sub_Location(0)) { |
|
371
|
|
|
|
|
|
|
# we give up upon any location that doesn't have |
|
372
|
|
|
|
|
|
|
# the strand specified, or has a differing one set than |
|
373
|
|
|
|
|
|
|
# previously seen. |
|
374
|
|
|
|
|
|
|
# calling strand() is potentially expensive if the subloc |
|
375
|
|
|
|
|
|
|
# is also a split location, so we cache it |
|
376
|
124817
|
|
|
|
|
145807
|
$lstrand = $loc->strand(); |
|
377
|
124817
|
100
|
100
|
|
|
440030
|
if ( ! $lstrand |
|
|
|
100
|
66
|
|
|
|
|
|
378
|
|
|
|
|
|
|
or ($strand and ($strand != $lstrand)) |
|
379
|
|
|
|
|
|
|
) { |
|
380
|
41
|
|
|
|
|
99
|
$strand = undef; |
|
381
|
41
|
|
|
|
|
55
|
last; |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
elsif (! $strand) { |
|
384
|
12586
|
|
|
|
|
12588
|
$strand = $lstrand; |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
} |
|
387
|
12594
|
|
|
|
|
32740
|
return $strand; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=head2 flip_strand |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Title : flip_strand |
|
394
|
|
|
|
|
|
|
Usage : $location->flip_strand(); |
|
395
|
|
|
|
|
|
|
Function: Flip-flop a strand to the opposite. Also sets Split strand |
|
396
|
|
|
|
|
|
|
to be consistent with the sublocation strands |
|
397
|
|
|
|
|
|
|
(1, -1 or undef for mixed strand values) |
|
398
|
|
|
|
|
|
|
Returns : None |
|
399
|
|
|
|
|
|
|
Args : None |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=cut |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub flip_strand { |
|
404
|
107
|
|
|
107
|
1
|
106
|
my $self = shift; |
|
405
|
107
|
|
|
|
|
85
|
my @sublocs; |
|
406
|
|
|
|
|
|
|
my @subloc_strands; |
|
407
|
|
|
|
|
|
|
|
|
408
|
107
|
|
|
|
|
167
|
for my $loc ( $self->sub_Location(0) ) { |
|
409
|
|
|
|
|
|
|
# Atomic "flip_strand" now initialize strand if necessary |
|
410
|
344
|
|
|
|
|
518
|
my $new_strand = $loc->flip_strand; |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
# Store strand values for later consistency check |
|
413
|
344
|
|
|
|
|
351
|
push @sublocs, $loc; |
|
414
|
344
|
|
|
|
|
356
|
push @subloc_strands, $new_strand; |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
# Sublocations strand values consistency check to set Guide Strand |
|
418
|
107
|
50
|
|
|
|
323
|
if ($self->isa('Bio::Location::SplitLocationI')) { |
|
419
|
107
|
|
|
|
|
101
|
my $identical = 0; |
|
420
|
107
|
|
|
|
|
111
|
my $first_value = $subloc_strands[0]; |
|
421
|
107
|
|
|
|
|
116
|
foreach my $strand (@subloc_strands) { |
|
422
|
344
|
100
|
|
|
|
771
|
$identical++ if ($strand == $first_value); |
|
423
|
|
|
|
|
|
|
} |
|
424
|
|
|
|
|
|
|
|
|
425
|
107
|
100
|
|
|
|
147
|
if ($identical == scalar @subloc_strands) { |
|
426
|
95
|
|
|
|
|
148
|
$self->guide_strand($first_value); |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
else { |
|
429
|
|
|
|
|
|
|
# Mixed strand values, must reverse the sublocations order |
|
430
|
12
|
|
|
|
|
17
|
$self->guide_strand(undef); |
|
431
|
12
|
|
|
|
|
8
|
@{ $self->{_sublocations} } = reverse @sublocs; |
|
|
12
|
|
|
|
|
35
|
|
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
} |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=head2 start |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Title : start |
|
439
|
|
|
|
|
|
|
Usage : $start = $location->start(); |
|
440
|
|
|
|
|
|
|
Function: get the starting point of the first (sorted) sublocation |
|
441
|
|
|
|
|
|
|
Returns : integer |
|
442
|
|
|
|
|
|
|
Args : none |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub start { |
|
447
|
36525
|
|
|
36525
|
1
|
30531
|
my ($self,$value) = @_; |
|
448
|
36525
|
50
|
|
|
|
44339
|
if( defined $value ) { |
|
449
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the starting point of a split location, " |
|
450
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
|
451
|
|
|
|
|
|
|
} |
|
452
|
36525
|
|
|
|
|
62460
|
return $self->SUPER::start(); |
|
453
|
|
|
|
|
|
|
} |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 end |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Title : end |
|
458
|
|
|
|
|
|
|
Usage : $end = $location->end(); |
|
459
|
|
|
|
|
|
|
Function: get the ending point of the last (sorted) sublocation |
|
460
|
|
|
|
|
|
|
Returns : integer |
|
461
|
|
|
|
|
|
|
Args : none |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=cut |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
sub end { |
|
466
|
32829
|
|
|
32829
|
1
|
31218
|
my ($self,$value) = @_; |
|
467
|
32829
|
50
|
|
|
|
50197
|
if( defined $value ) { |
|
468
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the ending point of a split location, " |
|
469
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
|
470
|
|
|
|
|
|
|
} |
|
471
|
32829
|
|
|
|
|
51387
|
return $self->SUPER::end(); |
|
472
|
|
|
|
|
|
|
} |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head2 min_start |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Title : min_start |
|
477
|
|
|
|
|
|
|
Usage : $min_start = $location->min_start(); |
|
478
|
|
|
|
|
|
|
Function: get the minimum starting point |
|
479
|
|
|
|
|
|
|
Returns : the minimum starting point from the contained sublocations |
|
480
|
|
|
|
|
|
|
Args : none |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=cut |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
sub min_start { |
|
485
|
36530
|
|
|
36530
|
1
|
27573
|
my ($self, $value) = @_; |
|
486
|
|
|
|
|
|
|
|
|
487
|
36530
|
50
|
|
|
|
48264
|
if( defined $value ) { |
|
488
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the minimum starting point of a split " |
|
489
|
|
|
|
|
|
|
. "location, that is not possible, try manipulating the sub Locations"); |
|
490
|
|
|
|
|
|
|
} |
|
491
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
|
492
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
|
493
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
|
494
|
36530
|
|
|
|
|
41864
|
my @locs = $self->sub_Location(0); |
|
495
|
36530
|
100
|
|
|
|
91677
|
return ( @locs ) ? $locs[0]->min_start : undef; |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head2 max_start |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Title : max_start |
|
501
|
|
|
|
|
|
|
Usage : my $maxstart = $location->max_start(); |
|
502
|
|
|
|
|
|
|
Function: Get maximum starting location of feature startpoint |
|
503
|
|
|
|
|
|
|
Returns : integer or undef if no maximum starting point. |
|
504
|
|
|
|
|
|
|
Args : none |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=cut |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
sub max_start { |
|
509
|
4591
|
|
|
4591
|
1
|
4545
|
my ($self,$value) = @_; |
|
510
|
|
|
|
|
|
|
|
|
511
|
4591
|
50
|
|
|
|
6634
|
if( defined $value ) { |
|
512
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the maximum starting point of a split " |
|
513
|
|
|
|
|
|
|
. "location, that is not possible, try manipulating the sub Locations"); |
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
|
516
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
|
517
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
|
518
|
4591
|
|
|
|
|
5321
|
my @locs = $self->sub_Location(0); |
|
519
|
4591
|
100
|
|
|
|
8441
|
return ( @locs ) ? $locs[0]->max_start : undef; |
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head2 start_pos_type |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Title : start_pos_type |
|
525
|
|
|
|
|
|
|
Usage : my $start_pos_type = $location->start_pos_type(); |
|
526
|
|
|
|
|
|
|
Function: Get start position type (ie <,>, ^) |
|
527
|
|
|
|
|
|
|
Returns : type of position coded as text |
|
528
|
|
|
|
|
|
|
('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') |
|
529
|
|
|
|
|
|
|
Args : none |
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
=cut |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
sub start_pos_type { |
|
534
|
6
|
|
|
6
|
1
|
8
|
my ($self,$value) = @_; |
|
535
|
|
|
|
|
|
|
|
|
536
|
6
|
50
|
|
|
|
15
|
if( defined $value ) { |
|
537
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the start_pos_type of a split location, " |
|
538
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
|
539
|
|
|
|
|
|
|
} |
|
540
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
|
541
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
|
542
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
|
543
|
6
|
|
|
|
|
12
|
my @locs = $self->sub_Location(0); |
|
544
|
6
|
50
|
|
|
|
22
|
return ( @locs ) ? $locs[0]->start_pos_type : undef; |
|
545
|
|
|
|
|
|
|
} |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=head2 min_end |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
Title : min_end |
|
550
|
|
|
|
|
|
|
Usage : my $minend = $location->min_end(); |
|
551
|
|
|
|
|
|
|
Function: Get minimum ending location of feature endpoint |
|
552
|
|
|
|
|
|
|
Returns : integer or undef if no minimum ending point. |
|
553
|
|
|
|
|
|
|
Args : none |
|
554
|
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
=cut |
|
556
|
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
sub min_end { |
|
558
|
510
|
|
|
510
|
1
|
392
|
my ($self,$value) = @_; |
|
559
|
|
|
|
|
|
|
|
|
560
|
510
|
50
|
|
|
|
765
|
if( defined $value ) { |
|
561
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the minimum end point of a split location, " |
|
562
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
|
563
|
|
|
|
|
|
|
} |
|
564
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
|
565
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
|
566
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
|
567
|
510
|
|
|
|
|
606
|
my @locs = $self->sub_Location(0); |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# Return the End corresponding to the same sequence as the top ('master') |
|
570
|
|
|
|
|
|
|
# if the top seq is undefined, take the first defined in a sublocation. |
|
571
|
|
|
|
|
|
|
# Example: for "join(1..100,J00194.1:100..202)", End would be 100 |
|
572
|
510
|
|
|
|
|
654
|
my $seqid = $self->seq_id; |
|
573
|
510
|
|
|
|
|
456
|
my $i = 0; |
|
574
|
510
|
|
100
|
|
|
917
|
while (not defined $seqid and $i <= $#locs) { |
|
575
|
6
|
|
|
|
|
13
|
$seqid = $locs[$i++]->seq_id; |
|
576
|
|
|
|
|
|
|
} |
|
577
|
|
|
|
|
|
|
|
|
578
|
510
|
100
|
|
|
|
639
|
my @same_id_locs = ($seqid ? grep { $_->seq_id eq $seqid } @locs |
|
|
2197
|
|
|
|
|
2514
|
|
|
579
|
|
|
|
|
|
|
: @locs); |
|
580
|
|
|
|
|
|
|
# If there is a $seqid but no sublocations have the same id, |
|
581
|
|
|
|
|
|
|
# try with the first id found in the sublocations instead, |
|
582
|
|
|
|
|
|
|
# and if that fails return the last segment value |
|
583
|
510
|
100
|
66
|
|
|
1438
|
if (@locs and not @same_id_locs) { |
|
584
|
1
|
|
|
|
|
2
|
my $first_id; |
|
585
|
1
|
|
66
|
|
|
6
|
while (not defined $first_id and $i <= $#locs) { |
|
586
|
1
|
|
|
|
|
4
|
$first_id = $locs[$i++]->seq_id; |
|
587
|
|
|
|
|
|
|
} |
|
588
|
1
|
50
|
|
|
|
3
|
@same_id_locs = ($first_id ? grep { $_->seq_id eq $first_id } @locs |
|
|
2
|
|
|
|
|
3
|
|
|
589
|
|
|
|
|
|
|
: @locs); |
|
590
|
|
|
|
|
|
|
} |
|
591
|
510
|
50
|
|
|
|
1139
|
return ( @same_id_locs ) ? $same_id_locs[-1]->min_end : undef; |
|
592
|
|
|
|
|
|
|
} |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=head2 max_end |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
Title : max_end |
|
597
|
|
|
|
|
|
|
Usage : my $maxend = $location->max_end(); |
|
598
|
|
|
|
|
|
|
Function: Get maximum ending location of feature endpoint |
|
599
|
|
|
|
|
|
|
Returns : integer or undef if no maximum ending point. |
|
600
|
|
|
|
|
|
|
Args : none |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=cut |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub max_end { |
|
605
|
32834
|
|
|
32834
|
1
|
24170
|
my ($self,$value) = @_; |
|
606
|
|
|
|
|
|
|
|
|
607
|
32834
|
50
|
|
|
|
45929
|
if( defined $value ) { |
|
608
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set the maximum end point of a split location, " |
|
609
|
|
|
|
|
|
|
."that is not possible, try manipulating the sub Locations"); |
|
610
|
|
|
|
|
|
|
} |
|
611
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
|
612
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
|
613
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
|
614
|
32834
|
|
|
|
|
38689
|
my @locs = $self->sub_Location(0); |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
# Return the End corresponding to the same sequence as the top ('master') |
|
617
|
|
|
|
|
|
|
# if the top seq is undefined, take the first defined in a sublocation. |
|
618
|
|
|
|
|
|
|
# Example: for "join(1..100,J00194.1:100..202)", End would be 100 |
|
619
|
32834
|
|
|
|
|
50769
|
my $seqid = $self->seq_id; |
|
620
|
32834
|
|
|
|
|
29219
|
my $i = 0; |
|
621
|
32834
|
|
100
|
|
|
67000
|
while (not defined $seqid and $i <= $#locs) { |
|
622
|
573
|
|
|
|
|
753
|
$seqid = $locs[$i++]->seq_id; |
|
623
|
|
|
|
|
|
|
} |
|
624
|
|
|
|
|
|
|
|
|
625
|
32834
|
100
|
|
|
|
49556
|
my @same_id_locs = ($seqid ? grep { $_->seq_id eq $seqid } @locs |
|
|
338856
|
|
|
|
|
370360
|
|
|
626
|
|
|
|
|
|
|
: @locs); |
|
627
|
|
|
|
|
|
|
# If there is a $seqid but no sublocations have the same id, |
|
628
|
|
|
|
|
|
|
# try with the first id found in the sublocations instead, |
|
629
|
|
|
|
|
|
|
# and if that fails return the last segment value |
|
630
|
32834
|
100
|
66
|
|
|
119649
|
if (@locs and not @same_id_locs) { |
|
631
|
1
|
|
|
|
|
2
|
my $first_id; |
|
632
|
1
|
|
66
|
|
|
6
|
while (not defined $first_id and $i <= $#locs) { |
|
633
|
1
|
|
|
|
|
2
|
$first_id = $locs[$i++]->seq_id; |
|
634
|
|
|
|
|
|
|
} |
|
635
|
1
|
50
|
|
|
|
3
|
@same_id_locs = ($first_id ? grep { $_->seq_id eq $first_id } @locs |
|
|
2
|
|
|
|
|
3
|
|
|
636
|
|
|
|
|
|
|
: @locs); |
|
637
|
|
|
|
|
|
|
} |
|
638
|
32834
|
50
|
|
|
|
73794
|
return ( @same_id_locs ) ? $same_id_locs[-1]->max_end : undef; |
|
639
|
|
|
|
|
|
|
} |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=head2 end_pos_type |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
Title : end_pos_type |
|
644
|
|
|
|
|
|
|
Usage : my $end_pos_type = $location->end_pos_type(); |
|
645
|
|
|
|
|
|
|
Function: Get end position type (ie <,>, ^) |
|
646
|
|
|
|
|
|
|
Returns : type of position coded as text |
|
647
|
|
|
|
|
|
|
('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN') |
|
648
|
|
|
|
|
|
|
Args : none |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=cut |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
sub end_pos_type { |
|
653
|
6
|
|
|
6
|
1
|
9
|
my ($self,$value) = @_; |
|
654
|
|
|
|
|
|
|
|
|
655
|
6
|
50
|
|
|
|
15
|
if( defined $value ) { |
|
656
|
0
|
|
|
|
|
0
|
$self->throw( "Trying to set end_pos_type of a split location, " |
|
657
|
|
|
|
|
|
|
. "that is not possible, try manipulating the sub Locations"); |
|
658
|
|
|
|
|
|
|
} |
|
659
|
|
|
|
|
|
|
# No sort by default because it breaks circular cut by origin features |
|
660
|
|
|
|
|
|
|
# (like "join(2006035..2007700,1..257)"). Sorting is user responsability. |
|
661
|
|
|
|
|
|
|
# Assume Start to be 1st segment start and End to be last segment End. |
|
662
|
6
|
|
|
|
|
11
|
my @locs = $self->sub_Location(0); |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
# Return the End corresponding to the same sequence as the top ('master') |
|
665
|
|
|
|
|
|
|
# if the top seq is undefined, take the first defined in a sublocation. |
|
666
|
|
|
|
|
|
|
# Example: for "join(1..>100,J00194.1:100..202)", End pos type would be 'AFTER' |
|
667
|
6
|
|
|
|
|
13
|
my $seqid = $self->seq_id; |
|
668
|
6
|
|
|
|
|
6
|
my $i = 0; |
|
669
|
6
|
|
100
|
|
|
24
|
while (not defined $seqid and $i <= $#locs) { |
|
670
|
8
|
|
|
|
|
14
|
$seqid = $locs[$i++]->seq_id; |
|
671
|
|
|
|
|
|
|
} |
|
672
|
|
|
|
|
|
|
|
|
673
|
6
|
100
|
|
|
|
14
|
my @same_id_locs = ($seqid ? grep { $_->seq_id eq $seqid } @locs |
|
|
4
|
|
|
|
|
7
|
|
|
674
|
|
|
|
|
|
|
: @locs); |
|
675
|
|
|
|
|
|
|
# If there is a $seqid but no sublocations have the same id, |
|
676
|
|
|
|
|
|
|
# try with the first id found in the sublocations instead, |
|
677
|
|
|
|
|
|
|
# and if that fails return the last segment value |
|
678
|
6
|
100
|
66
|
|
|
22
|
if (@locs and not @same_id_locs) { |
|
679
|
1
|
|
|
|
|
1
|
my $first_id; |
|
680
|
1
|
|
66
|
|
|
6
|
while (not defined $first_id and $i <= $#locs) { |
|
681
|
1
|
|
|
|
|
4
|
$first_id = $locs[$i++]->seq_id; |
|
682
|
|
|
|
|
|
|
} |
|
683
|
1
|
50
|
|
|
|
3
|
@same_id_locs = ($first_id ? grep { $_->seq_id eq $first_id } @locs |
|
|
2
|
|
|
|
|
4
|
|
|
684
|
|
|
|
|
|
|
: @locs); |
|
685
|
|
|
|
|
|
|
} |
|
686
|
6
|
50
|
|
|
|
23
|
return ( @same_id_locs ) ? $same_id_locs[-1]->end_pos_type : undef; |
|
687
|
|
|
|
|
|
|
} |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=head2 length |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
Title : length |
|
692
|
|
|
|
|
|
|
Usage : $len = $loc->length(); |
|
693
|
|
|
|
|
|
|
Function: get the length in the coordinate space this location spans |
|
694
|
|
|
|
|
|
|
Example : |
|
695
|
|
|
|
|
|
|
Returns : an integer |
|
696
|
|
|
|
|
|
|
Args : none |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=cut |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
sub length { |
|
701
|
22
|
|
|
22
|
1
|
30
|
my ($self) = @_; |
|
702
|
22
|
|
|
|
|
22
|
my $length = 0; |
|
703
|
|
|
|
|
|
|
# Mixed strand values means transplicing (where exons can even |
|
704
|
|
|
|
|
|
|
# be in different chromosomes), so in that case only give the sum |
|
705
|
|
|
|
|
|
|
# of the lengths of the individual segments |
|
706
|
22
|
100
|
|
|
|
35
|
if (! defined $self->guide_strand) { |
|
707
|
13
|
|
|
|
|
17
|
for my $loc ( $self->sub_Location(0) ) { |
|
708
|
34
|
|
|
|
|
53
|
$length += abs($loc->end - $loc->start) + 1; |
|
709
|
|
|
|
|
|
|
} |
|
710
|
|
|
|
|
|
|
} |
|
711
|
|
|
|
|
|
|
else { |
|
712
|
9
|
|
|
|
|
12
|
my @sublocs = $self->sub_Location(0); |
|
713
|
9
|
|
|
|
|
19
|
my $start = $sublocs[0]->start; |
|
714
|
9
|
|
|
|
|
14
|
my $end = $sublocs[-1]->end; |
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
# If Start > ·End, its a possible case of cut by origin |
|
717
|
|
|
|
|
|
|
# location in circular sequences (e.g "join(16..20,1..2)") |
|
718
|
9
|
100
|
|
|
|
13
|
if ($start > $end) { |
|
719
|
|
|
|
|
|
|
# Figure out which segments are located before |
|
720
|
|
|
|
|
|
|
# and which are located after coordinate 1 |
|
721
|
|
|
|
|
|
|
# (END_SEQ - 1 - START_SEQ) |
|
722
|
6
|
|
|
|
|
5
|
my @end_seq_segments; |
|
723
|
|
|
|
|
|
|
my @start_seq_segments; |
|
724
|
6
|
|
|
|
|
6
|
my $switch = 0; |
|
725
|
6
|
|
|
|
|
8
|
foreach my $subloc (@sublocs) { |
|
726
|
16
|
100
|
|
|
|
16
|
if ($switch == 0) { |
|
727
|
14
|
100
|
|
|
|
17
|
if ($subloc->start == 1) { |
|
728
|
6
|
|
|
|
|
4
|
$switch = 1; |
|
729
|
6
|
|
|
|
|
8
|
push @start_seq_segments, $subloc; |
|
730
|
|
|
|
|
|
|
} |
|
731
|
|
|
|
|
|
|
else { |
|
732
|
8
|
|
|
|
|
21
|
push @end_seq_segments, $subloc; |
|
733
|
|
|
|
|
|
|
} |
|
734
|
|
|
|
|
|
|
} |
|
735
|
|
|
|
|
|
|
else { |
|
736
|
2
|
|
|
|
|
3
|
push @start_seq_segments, $subloc; |
|
737
|
|
|
|
|
|
|
} |
|
738
|
|
|
|
|
|
|
} |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
# If its a cut by origin location, sum the whole length of each group |
|
741
|
6
|
50
|
33
|
|
|
19
|
if (scalar @end_seq_segments > 0 and @start_seq_segments > 0) { |
|
742
|
6
|
|
|
|
|
11
|
my $end_segments_length = abs( $end_seq_segments[0]->start |
|
743
|
|
|
|
|
|
|
- $end_seq_segments[-1]->end) |
|
744
|
|
|
|
|
|
|
+ 1; |
|
745
|
6
|
|
|
|
|
10
|
my $start_segments_length = abs( $start_seq_segments[0]->start |
|
746
|
|
|
|
|
|
|
- $start_seq_segments[-1]->end) |
|
747
|
|
|
|
|
|
|
+ 1; |
|
748
|
6
|
|
|
|
|
11
|
$length = $end_segments_length + $start_segments_length; |
|
749
|
|
|
|
|
|
|
} |
|
750
|
|
|
|
|
|
|
} |
|
751
|
|
|
|
|
|
|
else { |
|
752
|
3
|
|
|
|
|
6
|
$length = $end - $start + 1; |
|
753
|
|
|
|
|
|
|
} |
|
754
|
|
|
|
|
|
|
} |
|
755
|
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# If for some reason nothing worked, fall back to previous behaviour |
|
757
|
22
|
50
|
|
|
|
39
|
if ($length == 0) { |
|
758
|
0
|
|
|
|
|
0
|
$length = abs($self->end - $self->start) + 1 |
|
759
|
|
|
|
|
|
|
} |
|
760
|
|
|
|
|
|
|
|
|
761
|
22
|
|
|
|
|
126
|
return $length; |
|
762
|
|
|
|
|
|
|
} |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=head2 seq_id |
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
Title : seq_id |
|
767
|
|
|
|
|
|
|
Usage : my $seqid = $location->seq_id(); |
|
768
|
|
|
|
|
|
|
Function: Get/Set seq_id that location refers to |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
We override this here in order to propagate to all sublocations |
|
771
|
|
|
|
|
|
|
which are not remote (provided this root is not remote either) |
|
772
|
|
|
|
|
|
|
Returns : seq_id |
|
773
|
|
|
|
|
|
|
Args : [optional] seq_id value to set |
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=cut |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
sub seq_id { |
|
779
|
37345
|
|
|
37345
|
1
|
28759
|
my $self = shift; |
|
780
|
|
|
|
|
|
|
|
|
781
|
37345
|
100
|
66
|
|
|
64730
|
if(@_ && !$self->is_remote()) { |
|
782
|
3588
|
|
|
|
|
4664
|
foreach my $subloc ($self->sub_Location(0)) { |
|
783
|
35365
|
100
|
|
|
|
38720
|
$subloc->seq_id(@_) if !$subloc->is_remote(); |
|
784
|
|
|
|
|
|
|
} |
|
785
|
|
|
|
|
|
|
} |
|
786
|
37345
|
|
|
|
|
57248
|
return $self->SUPER::seq_id(@_); |
|
787
|
|
|
|
|
|
|
} |
|
788
|
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
=head2 coordinate_policy |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
Title : coordinate_policy |
|
792
|
|
|
|
|
|
|
Usage : $policy = $location->coordinate_policy(); |
|
793
|
|
|
|
|
|
|
$location->coordinate_policy($mypolicy); # set may not be possible |
|
794
|
|
|
|
|
|
|
Function: Get the coordinate computing policy employed by this object. |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
See Bio::Location::CoordinatePolicyI for documentation about |
|
797
|
|
|
|
|
|
|
the policy object and its use. |
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
The interface *does not* require implementing classes to accept |
|
800
|
|
|
|
|
|
|
setting of a different policy. The implementation provided here |
|
801
|
|
|
|
|
|
|
does, however, allow to do so. |
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
Implementors of this interface are expected to initialize every |
|
804
|
|
|
|
|
|
|
new instance with a CoordinatePolicyI object. The implementation |
|
805
|
|
|
|
|
|
|
provided here will return a default policy object if none has |
|
806
|
|
|
|
|
|
|
been set yet. To change this default policy object call this |
|
807
|
|
|
|
|
|
|
method as a class method with an appropriate argument. Note that |
|
808
|
|
|
|
|
|
|
in this case only subsequently created Location objects will be |
|
809
|
|
|
|
|
|
|
affected. |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
Returns : A Bio::Location::CoordinatePolicyI implementing object. |
|
812
|
|
|
|
|
|
|
Args : On set, a Bio::Location::CoordinatePolicyI implementing object. |
|
813
|
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
=head2 to_FTstring |
|
815
|
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
Title : to_FTstring |
|
817
|
|
|
|
|
|
|
Usage : my $locstr = $location->to_FTstring() |
|
818
|
|
|
|
|
|
|
Function: returns the FeatureTable string of this location |
|
819
|
|
|
|
|
|
|
Returns : string |
|
820
|
|
|
|
|
|
|
Args : none |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=cut |
|
823
|
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
sub to_FTstring { |
|
825
|
110
|
|
|
110
|
1
|
1680
|
my ($self) = @_; |
|
826
|
110
|
|
|
|
|
100
|
my @strs; |
|
827
|
110
|
|
100
|
|
|
210
|
my $strand = $self->strand() || 0; |
|
828
|
110
|
|
|
|
|
174
|
my $stype = lc($self->splittype()); |
|
829
|
|
|
|
|
|
|
|
|
830
|
110
|
100
|
|
|
|
197
|
if( $strand < 0 ) { |
|
831
|
42
|
|
|
|
|
87
|
$self->flip_strand; # this will recursively set the strand |
|
832
|
|
|
|
|
|
|
# to +1 for all the sub locations |
|
833
|
|
|
|
|
|
|
} |
|
834
|
|
|
|
|
|
|
|
|
835
|
110
|
|
|
|
|
213
|
foreach my $loc ( $self->sub_Location(0) ) { |
|
836
|
355
|
|
|
|
|
616
|
$loc->verbose($self->verbose); |
|
837
|
355
|
|
|
|
|
548
|
my $str = $loc->to_FTstring(); |
|
838
|
|
|
|
|
|
|
# we only append the remote seq_id if it hasn't been done already |
|
839
|
|
|
|
|
|
|
# by the sub-location (which it should if it knows it's remote) |
|
840
|
|
|
|
|
|
|
# (and of course only if it's necessary) |
|
841
|
355
|
50
|
100
|
|
|
467
|
if( (! $loc->is_remote) && |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
842
|
|
|
|
|
|
|
defined($self->seq_id) && defined($loc->seq_id) && |
|
843
|
|
|
|
|
|
|
($loc->seq_id ne $self->seq_id) ) { |
|
844
|
0
|
|
|
|
|
0
|
$str = sprintf("%s:%s", $loc->seq_id, $str); |
|
845
|
|
|
|
|
|
|
} |
|
846
|
355
|
|
|
|
|
521
|
push @strs, $str; |
|
847
|
|
|
|
|
|
|
} |
|
848
|
110
|
100
|
|
|
|
249
|
$self->flip_strand if $strand < 0; |
|
849
|
110
|
|
|
|
|
87
|
my $str; |
|
850
|
110
|
100
|
|
|
|
218
|
if( @strs == 1 ) { |
|
|
|
50
|
|
|
|
|
|
|
851
|
6
|
|
|
|
|
17
|
($str) = @strs; |
|
852
|
|
|
|
|
|
|
} elsif( @strs == 0 ) { |
|
853
|
0
|
|
|
|
|
0
|
$self->warn("no Sublocations for this splitloc, so not returning anything\n"); |
|
854
|
|
|
|
|
|
|
} else { |
|
855
|
104
|
|
|
|
|
171
|
$str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs)); |
|
856
|
|
|
|
|
|
|
} |
|
857
|
110
|
100
|
|
|
|
278
|
if( $strand < 0 ) { # wrap this in a complement if it was unrolled |
|
858
|
42
|
|
|
|
|
111
|
$str = sprintf("%s(%s)",'complement',$str); |
|
859
|
|
|
|
|
|
|
} |
|
860
|
|
|
|
|
|
|
|
|
861
|
110
|
|
|
|
|
308
|
return $str; |
|
862
|
|
|
|
|
|
|
} |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=head2 valid_Location |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
Title : valid_Location |
|
867
|
|
|
|
|
|
|
Usage : if ($location->valid_location) {...}; |
|
868
|
|
|
|
|
|
|
Function: boolean method to determine whether location is considered valid |
|
869
|
|
|
|
|
|
|
(has minimum requirements for Simple implementation) |
|
870
|
|
|
|
|
|
|
Returns : Boolean value: true if location is valid, false otherwise |
|
871
|
|
|
|
|
|
|
Args : none |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=cut |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
# we'll probably need to override the RangeI methods since our locations will |
|
876
|
|
|
|
|
|
|
# not be contiguous. |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
1; |