line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: GeneMap.pm,v 1.17 2006/07/17 14:16:53 sendu Exp $ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# BioPerl module for Bio::Map::GeneMap |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Please direct questions and support issues to |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Cared for by Sendu Bala |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Copyright Sendu bala |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Bio::Map::GeneMap - A MapI implementation to represent the area around a gene |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use Bio::Map::GeneMap; |
22
|
|
|
|
|
|
|
use Bio::Map::Gene; |
23
|
|
|
|
|
|
|
use Bio::Map::TranscriptionFactor; |
24
|
|
|
|
|
|
|
use Bio::Map::GeneRelative; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# make some maps that will represent an area around a particular gene in |
27
|
|
|
|
|
|
|
# particular species (by default, the map represents the area in the genome |
28
|
|
|
|
|
|
|
# 1000bp upstream of the gene) |
29
|
|
|
|
|
|
|
my $map1 = Bio::Map::GeneMap->get(-gene => 'BRCA2', |
30
|
|
|
|
|
|
|
-species => 'human', |
31
|
|
|
|
|
|
|
-description => 'breast cancer 2, early onset'); |
32
|
|
|
|
|
|
|
my $map2 = Bio::Map::GeneMap->get(-gene => 'BRCA2', |
33
|
|
|
|
|
|
|
-species => 'mouse'); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# model a TF that binds 500bp upstream of the BRCA2 gene in humans and |
36
|
|
|
|
|
|
|
# 250bp upstream of BRCA2 in mice |
37
|
|
|
|
|
|
|
my $rel = Bio::Map::GeneRelative->new(-description => "gene start"); |
38
|
|
|
|
|
|
|
my $tf = Bio::Map::TranscriptionFactor->get(-universal_name => 'tf1'); |
39
|
|
|
|
|
|
|
Bio::Map::Position->new(-map => $map1, |
40
|
|
|
|
|
|
|
-element => $tf, |
41
|
|
|
|
|
|
|
-start => -500, |
42
|
|
|
|
|
|
|
-length => 10, |
43
|
|
|
|
|
|
|
-relative => $rel); |
44
|
|
|
|
|
|
|
Bio::Map::Position->new(-map => $map2, |
45
|
|
|
|
|
|
|
-element => $tf, |
46
|
|
|
|
|
|
|
-start => -250, |
47
|
|
|
|
|
|
|
-length => 10, |
48
|
|
|
|
|
|
|
-relative => $rel); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# find out all the things that map near BRCA2 in all species |
51
|
|
|
|
|
|
|
foreach my $map ($gene->known_maps) { |
52
|
|
|
|
|
|
|
foreach my $thing ($map->get_elements) { |
53
|
|
|
|
|
|
|
next if $thing eq $gene; |
54
|
|
|
|
|
|
|
foreach my $pos ($thing->get_positions($map)) { |
55
|
|
|
|
|
|
|
print "In species ", $map->species, ", ", |
56
|
|
|
|
|
|
|
$thing->universal_name, " maps at ", $pos->value, |
57
|
|
|
|
|
|
|
" relative to ", $pos->relative->description, " of gene ", |
58
|
|
|
|
|
|
|
$gene->universal_name, "\n"; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# a GeneMap isa PrimarySeq and so can have sequence associated with it |
64
|
|
|
|
|
|
|
$map1->seq('ATGC'); |
65
|
|
|
|
|
|
|
my $subseq = $map1->subseq(2,3); # TG |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 DESCRIPTION |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Model the abstract notion of the area around a gene - you don't care exactly |
70
|
|
|
|
|
|
|
where this area is in the genome, you just want to be able to say "something |
71
|
|
|
|
|
|
|
binds upstream of gene X" and "something else binds 20bp upstream of the first |
72
|
|
|
|
|
|
|
something" etc. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
It's useful for modelling transcription factor bindings sites, letting you find |
75
|
|
|
|
|
|
|
out which transcription factors bind near a gene of interest, or which genes |
76
|
|
|
|
|
|
|
are bound by a transcription factor of interest. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
See t/Map/Map.t for more example usage. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head1 FEEDBACK |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head2 Mailing Lists |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
85
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
86
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
89
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=head2 Support |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
I |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
98
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
99
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
100
|
|
|
|
|
|
|
with code and data examples if at all possible. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head2 Reporting Bugs |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
105
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via the |
106
|
|
|
|
|
|
|
web: |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 AUTHOR - Sendu Bala |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Email bix@sendu.me.uk |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 APPENDIX |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
117
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=cut |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Let the code begin... |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
package Bio::Map::GeneMap; |
124
|
1
|
|
|
1
|
|
1301
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
24
|
|
125
|
|
|
|
|
|
|
|
126
|
1
|
|
|
1
|
|
3
|
use Bio::Map::Gene; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
53
|
|
127
|
1
|
|
|
1
|
|
5
|
use Bio::Map::Position; |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
45
|
|
128
|
|
|
|
|
|
|
|
129
|
1
|
|
|
1
|
|
3
|
use base qw(Bio::Map::SimpleMap Bio::PrimarySeq); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1269
|
|
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
our $GENEMAPS = {}; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 new |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Title : new |
136
|
|
|
|
|
|
|
Usage : my $obj = Bio::Map::GeneMap->new(); |
137
|
|
|
|
|
|
|
Function: Builds a new Bio::Map::GeneMap object (that has placed on it a |
138
|
|
|
|
|
|
|
mappable element (Bio::Map::Gene) representing a gene). |
139
|
|
|
|
|
|
|
Returns : Bio::Map::GeneMap |
140
|
|
|
|
|
|
|
Args : -gene => string name of the gene this map will be for |
141
|
|
|
|
|
|
|
(in a form common to all species that have the gene, |
142
|
|
|
|
|
|
|
but unique amongst non-orthologous genes) or a |
143
|
|
|
|
|
|
|
Bio::Map::Gene object, REQUIRED |
144
|
|
|
|
|
|
|
-species => Bio::Taxon or string representing species, REQUIRED |
145
|
|
|
|
|
|
|
-uid => string, unique identifier for this map (must be |
146
|
|
|
|
|
|
|
unique amongst all gene/species combinations) |
147
|
|
|
|
|
|
|
-description => string, free text description of the gene |
148
|
|
|
|
|
|
|
-upstream => int, the number of bases the map extends before the |
149
|
|
|
|
|
|
|
start of the gene element (default 1000). |
150
|
|
|
|
|
|
|
-downstream => int, the number of bases the map extends beyond the |
151
|
|
|
|
|
|
|
end of the gene element (default 0). |
152
|
|
|
|
|
|
|
-seq => string, the sequence of the map, presumably the |
153
|
|
|
|
|
|
|
genomic sequence -upstream bases of the gene, |
154
|
|
|
|
|
|
|
including the gene, and -downstream bases of the gene |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=cut |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub new { |
159
|
9
|
|
|
9
|
1
|
14
|
my ($class, @args) = @_; |
160
|
9
|
|
|
|
|
26
|
my $self = $class->SUPER::new(@args); |
161
|
|
|
|
|
|
|
|
162
|
9
|
|
|
|
|
22
|
my ($uid, $gene, $species, $desc, $up, $down, $seq) = $self->_rearrange([qw(UID |
163
|
|
|
|
|
|
|
GENE |
164
|
|
|
|
|
|
|
SPECIES |
165
|
|
|
|
|
|
|
DESCRIPTION |
166
|
|
|
|
|
|
|
UPSTREAM |
167
|
|
|
|
|
|
|
DOWNSTREAM |
168
|
|
|
|
|
|
|
SEQ)], @args); |
169
|
|
|
|
|
|
|
|
170
|
9
|
50
|
33
|
|
|
39
|
unless (defined $gene && defined $species) { |
171
|
0
|
|
|
|
|
0
|
$self->throw("You must supply both -species and -gene"); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
9
|
|
|
|
|
26
|
$self->gene(-gene => $gene, -description => $desc, -upstream => $up, -downstream => $down); |
175
|
9
|
100
|
|
|
|
20
|
$self->seq($seq) if $seq; |
176
|
|
|
|
|
|
|
|
177
|
9
|
50
|
|
|
|
14
|
unless (defined($uid)) { |
178
|
|
|
|
|
|
|
# trigger the special behaviour in our unique_id method by supplying it |
179
|
|
|
|
|
|
|
# the unique_id we got from our parent class |
180
|
9
|
|
|
|
|
13
|
$self->unique_id($self->unique_id); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
9
|
|
|
|
|
26
|
return $self; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head2 get |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Title : get |
189
|
|
|
|
|
|
|
Usage : my $map = Bio::Map::GeneMap->get(); |
190
|
|
|
|
|
|
|
Function: Builds a new Bio::Map::GeneMap object (like new()), or gets a |
191
|
|
|
|
|
|
|
pre-existing one that corresponds to your arguments. |
192
|
|
|
|
|
|
|
Returns : Bio::Map::GeneMap |
193
|
|
|
|
|
|
|
Args : -gene => string name of the gene this map will be for |
194
|
|
|
|
|
|
|
(in a form common to all species that have the gene, |
195
|
|
|
|
|
|
|
but unique amongst non-orthologous genes) or a |
196
|
|
|
|
|
|
|
Bio::Map::Gene object, REQUIRED |
197
|
|
|
|
|
|
|
-species => Bio::Taxon or string representing species, REQUIRED |
198
|
|
|
|
|
|
|
-uid => string, unique identifier for this map (must be |
199
|
|
|
|
|
|
|
unique amongst all gene/species combinations) |
200
|
|
|
|
|
|
|
-description => string, free text description of the gene |
201
|
|
|
|
|
|
|
-upstream => int, the number of bases the map extends before the |
202
|
|
|
|
|
|
|
start of the gene element (default 1000). |
203
|
|
|
|
|
|
|
-downstream => int, the number of bases the map extends beyond the |
204
|
|
|
|
|
|
|
end of the gene element (default 0). |
205
|
|
|
|
|
|
|
-seq => string, the sequence of the map, presumably the |
206
|
|
|
|
|
|
|
genomic sequence -upstream bases of the gene, |
207
|
|
|
|
|
|
|
including the gene, and -downstream bases of the gene |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
If you supply a -uid, and a map had previously been created and |
210
|
|
|
|
|
|
|
given that uid, that same map object will be returned. Otherwise, the |
211
|
|
|
|
|
|
|
combination of -gene and -species will be used to determine |
212
|
|
|
|
|
|
|
if the same map had previously been made. If a corresponding map |
213
|
|
|
|
|
|
|
hadn't previously been made, a new map object will be created and |
214
|
|
|
|
|
|
|
returned. |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=cut |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub get { |
219
|
20
|
|
|
20
|
1
|
543
|
my ($class, @args) = @_; |
220
|
20
|
|
|
|
|
98
|
my ($uid, $gene, $species, $desc, $up, $down, $seq) = Bio::Root::Root->_rearrange([qw(UID |
221
|
|
|
|
|
|
|
GENE |
222
|
|
|
|
|
|
|
SPECIES |
223
|
|
|
|
|
|
|
DESCRIPTION |
224
|
|
|
|
|
|
|
UPSTREAM |
225
|
|
|
|
|
|
|
DOWNSTREAM |
226
|
|
|
|
|
|
|
SEQ)], @args); |
227
|
|
|
|
|
|
|
|
228
|
20
|
|
|
|
|
35
|
my $gene_map; |
229
|
20
|
50
|
33
|
|
|
84
|
if ($uid && defined $GENEMAPS->{by_uid}->{$uid}) { |
|
|
50
|
33
|
|
|
|
|
230
|
0
|
|
|
|
|
0
|
$gene_map = $GENEMAPS->{by_uid}->{$uid}; |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
elsif ($gene && $species) { |
233
|
20
|
100
|
|
|
|
31
|
my $name = ref($gene) ? $gene->universal_name : $gene; |
234
|
20
|
100
|
|
|
|
45
|
if (defined $GENEMAPS->{by_ns}->{$name}->{$species}) { |
235
|
11
|
|
|
|
|
13
|
$gene_map = $GENEMAPS->{by_ns}->{$name}->{$species}; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
} |
238
|
20
|
100
|
|
|
|
25
|
if ($gene_map) { |
239
|
11
|
50
|
|
|
|
16
|
$gene_map->gene->description($desc) if $desc; |
240
|
11
|
50
|
|
|
|
14
|
$gene_map->upstream($up) if defined($up); |
241
|
11
|
50
|
|
|
|
13
|
$gene_map->downstream($down) if defined($down); |
242
|
11
|
50
|
|
|
|
12
|
$gene_map->seq($seq) if $seq; |
243
|
11
|
|
|
|
|
44
|
return $gene_map; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
9
|
|
|
|
|
17
|
return $class->new(@args); |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=head2 unique_id |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Title : unique_id |
252
|
|
|
|
|
|
|
Usage : my $id = $map->unique_id; |
253
|
|
|
|
|
|
|
Function: Get/set the unique ID for this map |
254
|
|
|
|
|
|
|
Returns : string |
255
|
|
|
|
|
|
|
Args : none to get, OR string to set |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=cut |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub unique_id { |
260
|
76
|
|
|
76
|
1
|
58
|
my ($self, $id) = @_; |
261
|
76
|
100
|
|
|
|
99
|
if (defined $id) { |
262
|
9
|
|
|
|
|
16
|
delete $GENEMAPS->{by_uid}->{$self->{'_uid'}}; |
263
|
9
|
|
|
|
|
8
|
$self->{'_uid'} = $id; |
264
|
9
|
|
|
|
|
13
|
$GENEMAPS->{by_uid}->{$id} = $self; |
265
|
|
|
|
|
|
|
} |
266
|
76
|
|
|
|
|
179
|
return $self->{'_uid'}; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=head2 species |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Title : species |
272
|
|
|
|
|
|
|
Usage : my $species = $map->species; |
273
|
|
|
|
|
|
|
Function: Get/set Species for a map. It is not recommended to change this once |
274
|
|
|
|
|
|
|
set. |
275
|
|
|
|
|
|
|
Returns : Bio::Taxon object or string |
276
|
|
|
|
|
|
|
Args : none to get, OR Bio::Taxon or string to set |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub species { |
281
|
30
|
|
|
30
|
1
|
1636
|
my ($self, $value) = @_; |
282
|
30
|
100
|
|
|
|
44
|
if ($value) { |
283
|
9
|
|
|
|
|
11
|
my $old_species = $self->{_species}; |
284
|
9
|
|
|
|
|
8
|
$self->{'_species'} = $value; |
285
|
9
|
|
50
|
|
|
17
|
my $name = $self->universal_name || return $value; |
286
|
0
|
0
|
|
|
|
0
|
if ($old_species) { |
287
|
0
|
|
|
|
|
0
|
delete $GENEMAPS->{by_ns}->{$name}->{$old_species}; |
288
|
|
|
|
|
|
|
} |
289
|
0
|
|
|
|
|
0
|
$GENEMAPS->{by_ns}->{$name}->{$value} = $self; |
290
|
|
|
|
|
|
|
} |
291
|
21
|
|
|
|
|
42
|
return $self->{'_species'}; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=head2 type |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
Title : type |
297
|
|
|
|
|
|
|
Usage : my $type = $map->type |
298
|
|
|
|
|
|
|
Function: Get Map type |
299
|
|
|
|
|
|
|
Returns : string 'gene' |
300
|
|
|
|
|
|
|
Args : none |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=cut |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub type { |
305
|
0
|
|
|
0
|
1
|
0
|
return 'gene'; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head2 gene |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Title : gene |
311
|
|
|
|
|
|
|
Usage : my $gene = $map->gene; |
312
|
|
|
|
|
|
|
$map->gene(-gene => $gene); |
313
|
|
|
|
|
|
|
Function: Get/set the mappable element on this map that represents the gene |
314
|
|
|
|
|
|
|
this map is for. Once set, it is not recommended to re-set the gene |
315
|
|
|
|
|
|
|
to something else. Behaviour in that case is undefined. |
316
|
|
|
|
|
|
|
Returns : Bio::Map::Gene |
317
|
|
|
|
|
|
|
Args : none to get, OR to set: |
318
|
|
|
|
|
|
|
-gene => Bio::Map::Gene or string of the universal name (see |
319
|
|
|
|
|
|
|
Bio::Map::Gene docs), REQUIRED |
320
|
|
|
|
|
|
|
-description => string, applied to the Bio::Map::Gene |
321
|
|
|
|
|
|
|
-upstream => int, the number of bases the map extends before the |
322
|
|
|
|
|
|
|
start of the gene element (default 1000). |
323
|
|
|
|
|
|
|
-downstream => int, the number of bases the map extends beyond the |
324
|
|
|
|
|
|
|
end of the gene element (default 0). |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=cut |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
sub gene { |
329
|
487
|
|
|
487
|
1
|
920
|
my ($self, @args) = @_; |
330
|
|
|
|
|
|
|
|
331
|
487
|
100
|
|
|
|
719
|
if (@args > 0) { |
332
|
9
|
|
|
|
|
22
|
my ($gene, $desc, $up, $down) = $self->_rearrange([qw(GENE |
333
|
|
|
|
|
|
|
DESCRIPTION |
334
|
|
|
|
|
|
|
UPSTREAM |
335
|
|
|
|
|
|
|
DOWNSTREAM)], @args); |
336
|
9
|
50
|
|
|
|
20
|
$self->throw("You must supply -gene") unless $gene; |
337
|
|
|
|
|
|
|
|
338
|
9
|
100
|
|
|
|
28
|
my $gene_obj = ref($gene) ? $gene : Bio::Map::Gene->get(-universal_name => $gene, -description => $desc); |
339
|
9
|
50
|
|
|
|
17
|
if (defined $self->{gene}) { |
340
|
0
|
0
|
|
|
|
0
|
if ($self->{gene} ne $gene_obj) { |
341
|
0
|
|
|
|
|
0
|
$self->warn("Changing the gene that this map is for, which could be bad"); |
342
|
0
|
|
|
|
|
0
|
$self->purge_positions($self->{gene}); |
343
|
0
|
|
|
|
|
0
|
delete $GENEMAPS->{by_ns}->{$self->universal_name}->{$self->species}; |
344
|
0
|
|
|
|
|
0
|
$self->{gene} = $gene_obj; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# change the gene's position on us if necessary |
348
|
0
|
0
|
|
|
|
0
|
$self->upstream($up) if defined $up; |
349
|
0
|
0
|
|
|
|
0
|
$self->downstream($down) if defined $down; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
else { |
352
|
|
|
|
|
|
|
# give the gene object a position on us |
353
|
9
|
|
100
|
|
|
17
|
$up ||= 1000; |
354
|
9
|
50
|
|
|
|
15
|
$up >= 0 || $self->throw("-upstream must be a positive integer"); |
355
|
9
|
|
|
|
|
36
|
Bio::Map::Position->new(-map => $self, -start => ($up + 1), -element => $gene_obj); |
356
|
9
|
|
|
|
|
18
|
$self->{gene} = $gene_obj; |
357
|
9
|
|
50
|
|
|
36
|
$self->downstream($down || 0); |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# set other gene positions from db if already user-requested |
360
|
9
|
|
|
|
|
20
|
$gene_obj->_set_from_db($self); |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
9
|
|
|
|
|
19
|
$GENEMAPS->{by_ns}->{$self->universal_name}->{$self->species} = $self; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
487
|
|
|
|
|
788
|
return $self->{gene}; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head2 universal_name |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Title : universal_name |
372
|
|
|
|
|
|
|
Usage : my $name = $map->universal_name |
373
|
|
|
|
|
|
|
Function: Get/set the name of Bio::Map::Gene object associated with this map. |
374
|
|
|
|
|
|
|
It is not recommended to change this once set. |
375
|
|
|
|
|
|
|
Returns : string |
376
|
|
|
|
|
|
|
Args : none to get, OR string to set |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=cut |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub universal_name { |
381
|
19
|
|
|
19
|
1
|
9
|
my ($self, $value) = @_; |
382
|
19
|
100
|
|
|
|
28
|
$self->gene || return; |
383
|
10
|
50
|
|
|
|
16
|
if ($value) { |
384
|
0
|
|
|
|
|
0
|
my $species = $self->species; |
385
|
0
|
|
|
|
|
0
|
delete $GENEMAPS->{by_ns}->{$self->gene->universal_name}->{$species}; |
386
|
0
|
|
|
|
|
0
|
$self->gene->universal_name($value); |
387
|
0
|
|
|
|
|
0
|
$GENEMAPS->{by_ns}->{$value}->{$species} = $self; |
388
|
|
|
|
|
|
|
} |
389
|
10
|
|
|
|
|
12
|
return $self->gene->universal_name; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=head2 upstream |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Title : upstream |
395
|
|
|
|
|
|
|
Usage : my $distance = $map->upstream; |
396
|
|
|
|
|
|
|
$map->upstream($distance); |
397
|
|
|
|
|
|
|
Function: Get/set how long the map is before the start of the Bio::Map::Gene |
398
|
|
|
|
|
|
|
object on this map. |
399
|
|
|
|
|
|
|
Returns : int |
400
|
|
|
|
|
|
|
Args : none to get, OR int to set (the number of bases the map extends |
401
|
|
|
|
|
|
|
before the start of the gene) |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=cut |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
sub upstream { |
406
|
13
|
|
|
13
|
1
|
14
|
my ($self, $value) = @_; |
407
|
|
|
|
|
|
|
|
408
|
13
|
|
|
|
|
22
|
my $pos = $self->gene->position($self); |
409
|
13
|
50
|
|
|
|
26
|
if (defined($value)) { |
410
|
0
|
0
|
|
|
|
0
|
$value >= 0 || $self->throw("Supplied value must be a positive integer"); |
411
|
0
|
|
|
|
|
0
|
$pos->start($value + 1); |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
13
|
|
|
|
|
26
|
return $pos->start - 1; |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head2 downstream |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Title : downstream |
420
|
|
|
|
|
|
|
Usage : my $distance = $map->downstream; |
421
|
|
|
|
|
|
|
$map->downstream($distance); |
422
|
|
|
|
|
|
|
Function: Get/set the nominal end of the map relative to the end of the |
423
|
|
|
|
|
|
|
Bio::Map::Gene object on this map. |
424
|
|
|
|
|
|
|
Returns : int |
425
|
|
|
|
|
|
|
Args : none to get, OR int to set (the number of bases the map extends |
426
|
|
|
|
|
|
|
beyond the end of the gene) |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=cut |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub downstream { |
431
|
20
|
|
|
20
|
1
|
16
|
my $self = shift; |
432
|
20
|
100
|
|
|
|
33
|
if (@_) { $self->{_downstream} = shift } |
|
9
|
|
|
|
|
11
|
|
433
|
20
|
|
50
|
|
|
52
|
return $self->{_downstream} || 0; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=head2 length |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Title : length |
439
|
|
|
|
|
|
|
Usage : my $length = $map->length(); |
440
|
|
|
|
|
|
|
Function: Retrieves the length of the map. This is normally the length of the |
441
|
|
|
|
|
|
|
upstream region + length of the gene + length of the downstream |
442
|
|
|
|
|
|
|
region, but may be longer if positions have been placed on the map |
443
|
|
|
|
|
|
|
beyond the end of the nominal downstream region. |
444
|
|
|
|
|
|
|
Returns : int |
445
|
|
|
|
|
|
|
Args : none |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
=cut |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
sub length { |
450
|
11
|
|
|
11
|
1
|
11
|
my $self = shift; |
451
|
11
|
|
|
|
|
19
|
my $expected_length = $self->gene->position($self)->length + $self->upstream + $self->downstream; |
452
|
11
|
|
|
|
|
35
|
my $actual_length = $self->SUPER::length; |
453
|
11
|
50
|
|
|
|
36
|
return $actual_length > $expected_length ? $actual_length : $expected_length; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 seq |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
Title : seq |
459
|
|
|
|
|
|
|
Usage : $string = $obj->seq() |
460
|
|
|
|
|
|
|
Function: Get/set the sequence as a string of letters. When getting, If the |
461
|
|
|
|
|
|
|
GeneMap object didn't have sequence attached directly to it for the |
462
|
|
|
|
|
|
|
region requested, the map's gene's database will be asked for the |
463
|
|
|
|
|
|
|
sequence, and failing that, the map's gene's positions will be asked |
464
|
|
|
|
|
|
|
for their sequences. Areas for which no sequence could be found will |
465
|
|
|
|
|
|
|
be filled with Ns, unless no sequence was found anywhere, in which |
466
|
|
|
|
|
|
|
case undef is returned. |
467
|
|
|
|
|
|
|
Returns : string |
468
|
|
|
|
|
|
|
Args : Optionally on set the new value (a string). An optional second |
469
|
|
|
|
|
|
|
argument presets the alphabet (otherwise it will be guessed). |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=cut |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
sub seq { |
474
|
7
|
|
|
7
|
1
|
11
|
my ($self, @args) = @_; |
475
|
7
|
|
|
|
|
23
|
my $seq = $self->SUPER::seq(@args); |
476
|
7
|
|
|
|
|
14
|
my $expected_length = $self->length; |
477
|
7
|
50
|
66
|
|
|
27
|
if (! $seq || CORE::length($seq) < $expected_length) { |
478
|
7
|
|
100
|
|
|
42
|
my @have = split('', $seq || ''); |
479
|
7
|
|
|
|
|
5
|
my @result; |
480
|
7
|
|
|
|
|
18
|
for (0..($expected_length - 1)) { |
481
|
7906
|
|
100
|
|
|
15030
|
$result[$_] = shift(@have) || 'N'; |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# build map sequence by asking gene or positions |
485
|
7
|
|
|
|
|
21
|
my @slice_stuff = $self->gene->_get_slice($self); |
486
|
7
|
50
|
|
|
|
13
|
if (@slice_stuff) { |
487
|
0
|
|
|
|
|
0
|
my ($slice_adaptor, $slice, $strand) = @slice_stuff; |
488
|
0
|
|
0
|
|
|
0
|
my ($start, $end, $gene_start) = (CORE::length($seq || '') + 1, $expected_length, $self->upstream + 1); |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# convert map coords to genomic coords |
491
|
0
|
0
|
|
|
|
0
|
my $adjust = $strand == -1 ? $slice->end : $slice->start; |
492
|
0
|
0
|
|
0
|
|
0
|
my $adjustment = sub { return $strand == -1 ? $adjust - shift() : shift() + $adjust; }; |
|
0
|
|
|
|
|
0
|
|
493
|
0
|
|
|
|
|
0
|
my $converted_start = &$adjustment($start - $gene_start); |
494
|
0
|
|
|
|
|
0
|
my $converted_end = &$adjustment($end - $gene_start); |
495
|
0
|
0
|
|
|
|
0
|
($converted_start, $converted_end) = ($converted_end, $converted_start) if $converted_start > $converted_end; |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
# get sequence from a new slice of desired region |
498
|
|
|
|
|
|
|
#*** what happens if desired region starts or ends off end of chromo?... |
499
|
0
|
|
|
|
|
0
|
my $new_slice = $slice_adaptor->fetch_by_region($slice->coord_system_name, $slice->seq_region_name, $converted_start, $converted_end); |
500
|
0
|
0
|
0
|
|
|
0
|
if ($new_slice && (my $seq_str = $new_slice->seq)) { |
501
|
0
|
0
|
|
|
|
0
|
if ($strand == -1) { |
502
|
0
|
|
|
|
|
0
|
$seq_str = $self->_revcom($seq_str); |
503
|
|
|
|
|
|
|
} |
504
|
0
|
|
0
|
|
|
0
|
splice(@result, CORE::length($seq || ''), CORE::length($seq_str), split('', $seq_str)); |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
else { |
508
|
7
|
|
|
|
|
20
|
foreach my $pos ($self->get_positions) { |
509
|
18
|
100
|
|
|
|
77
|
next unless $pos->can('seq'); |
510
|
8
|
|
100
|
|
|
20
|
my @pos_seq = split('', $pos->seq(undef, undef, 1) || next); |
511
|
1
|
|
|
|
|
4
|
for my $i ($pos->start($pos->absolute_relative)..$pos->end($pos->absolute_relative)) { |
512
|
3
|
|
|
|
|
3
|
$i--; |
513
|
3
|
|
|
|
|
3
|
my $base = shift(@pos_seq); |
514
|
3
|
50
|
|
|
|
7
|
if ($result[$i] eq 'N') { |
515
|
3
|
|
|
|
|
5
|
$result[$i] = $base; |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
|
521
|
7
|
|
|
|
|
421
|
$seq = join('', @result); |
522
|
|
|
|
|
|
|
} |
523
|
7
|
|
|
|
|
29
|
return $seq; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=head2 subseq |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Title : subseq |
529
|
|
|
|
|
|
|
Usage : $substring = $obj->subseq(10, 40); |
530
|
|
|
|
|
|
|
Function: Returns the subseq from start to end, where the first base |
531
|
|
|
|
|
|
|
is 1 and the number is inclusive, ie 1-2 are the first two |
532
|
|
|
|
|
|
|
bases of the sequence. If the GeneMap object didn't have sequence |
533
|
|
|
|
|
|
|
attached directly to it for the region requested, the map's gene's |
534
|
|
|
|
|
|
|
database will be asked for the sequence, and failing that, the map's |
535
|
|
|
|
|
|
|
gene's positions will be asked for their sequences. Areas for which |
536
|
|
|
|
|
|
|
no sequence could be found will be filled with Ns, unless no |
537
|
|
|
|
|
|
|
sequence was found anywhere, in which case undef is returned. subseq |
538
|
|
|
|
|
|
|
requests that extend beyond the end of the map will throw. |
539
|
|
|
|
|
|
|
Returns : string |
540
|
|
|
|
|
|
|
Args : integer for start position AND integer for end position |
541
|
|
|
|
|
|
|
OR |
542
|
|
|
|
|
|
|
Bio::LocationI location for subseq (strand honored) |
543
|
|
|
|
|
|
|
OR |
544
|
|
|
|
|
|
|
Bio::RangeI (eg. a Bio::Map::PositionI) |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=cut |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
sub subseq { |
549
|
4
|
|
|
4
|
1
|
7
|
my ($self, $start, $end) = @_; |
550
|
|
|
|
|
|
|
|
551
|
4
|
100
|
66
|
|
|
23
|
if ($start && ref($start) && $start->isa('Bio::RangeI')) { |
|
|
|
66
|
|
|
|
|
552
|
1
|
|
|
|
|
1
|
my $thing = $start; |
553
|
1
|
50
|
|
|
|
3
|
if ($start->isa('Bio::Map::Position')) { |
554
|
1
|
|
|
|
|
8
|
($start, $end) = ($thing->start($thing->absolute_relative), $thing->end($thing->absolute_relative)); |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
else { |
557
|
0
|
|
|
|
|
0
|
($start, $end) = ($thing->start, $thing->end); |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# *** this implementation potentially wastefull? Should duplicate code |
562
|
|
|
|
|
|
|
# from seq() to do this just for the desired region?? |
563
|
4
|
|
|
|
|
5
|
my $orig_seq = $self->{seq}; |
564
|
4
|
|
|
|
|
11
|
$self->{seq} = $self->seq(); |
565
|
4
|
50
|
|
|
|
26
|
my $subseq = $self->{seq} ? $self->SUPER::subseq($start, $end) : ''; |
566
|
4
|
|
|
|
|
8
|
$self->{seq} = $orig_seq; |
567
|
|
|
|
|
|
|
|
568
|
4
|
|
|
|
|
17
|
return $subseq; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# quick revcom for strings (silly to create a PrimarySeq just to revcom and then |
572
|
|
|
|
|
|
|
# return a string again) |
573
|
|
|
|
|
|
|
sub _revcom { |
574
|
0
|
|
|
0
|
|
|
my ($self, $seq) = @_; |
575
|
0
|
0
|
|
|
|
|
$seq or return; |
576
|
0
|
|
|
|
|
|
$seq = reverse($seq); |
577
|
0
|
|
|
|
|
|
$seq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; |
578
|
0
|
|
|
|
|
|
return $seq; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
1; |