File Coverage

Bio/Map/Gene.pm
Criterion Covered Total %
statement 165 282 58.5
branch 74 142 52.1
condition 41 94 43.6
subroutine 26 34 76.4
pod 23 23 100.0
total 329 575 57.2


line stmt bran cond sub pod time code
1             # $Id: Gene.pm,v 1.6 2006/07/17 14:16:53 sendu Exp $
2             #
3             # BioPerl module for Bio::Map::Gene
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Sendu Bala
8             #
9             # Copyright Sendu Bala
10             #
11             # You may distribute this module under the same terms as perl itself
12              
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Map::Gene - An gene modelled as a mappable element.
18              
19             =head1 SYNOPSIS
20              
21             use Bio::Map::Gene;
22              
23             my $gene = Bio::Map::Gene->get(-universal_name => 'BRCA2',
24             -description => 'breast cancer 2, early onset');
25              
26             # Normally you get Gene objects from GeneMaps
27             use Bio::Map::GeneMap;
28              
29             # Model a gene with its orthologous versions found in different species,
30             # but at abstract locations within each genome
31             my $map1 = Bio::Map::GeneMap->get(-universal_name => 'BRCA2', -species => $human);
32             my $map2 = Bio::Map::GeneMap->get(-universal_name => 'BRCA2', -species => $mouse);
33              
34             $gene = $map1->gene;
35              
36             # Genes can have special kinds of positions (Bio::Map::GenePosition) that
37             # define where various sub-regions of the gene are, relative to one of the
38             # normal Positions the gene has placing it on a map.
39             my $trans = Bio::Map::GenePosition->new(-start => 0, -length => 700,
40             -map => $map1, -type => 'transcript');
41             $gene->add_transcript_position($trans);
42             my $exon = Bio::Map::GenePosition->new(-start => 0, -length => 100,
43             -map => $map1, -type => 'exon');
44             $gene->add_exon_position($exon, 1);
45             # (so now the gene has 1 transcript 700bp long which starts at the beginning
46             # of the gene, and we've defined the first of many exons which starts at the
47             # start of the transcript and is 100bp long)
48              
49             =head1 DESCRIPTION
50              
51             Model a gene as an abstract mappable element. This is for when you don't care
52             exactly where a gene is in a genome, but just want to model other things (like
53             transcription factor binding sites) that are near it so you can answer questions
54             like "what binds near this gene?", or "which genes does this bind near?".
55              
56             See t/Map/Map.t for more example usage.
57              
58             =head1 FEEDBACK
59              
60             =head2 Mailing Lists
61              
62             User feedback is an integral part of the evolution of this and other
63             Bioperl modules. Send your comments and suggestions preferably to the
64             Bioperl mailing list. Your participation is much appreciated.
65              
66             bioperl-l@bioperl.org - General discussion
67             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
68              
69             =head2 Support
70              
71             Please direct usage questions or support issues to the mailing list:
72              
73             I
74              
75             rather than to the module maintainer directly. Many experienced and
76             reponsive experts will be able look at the problem and quickly
77             address it. Please include a thorough description of the problem
78             with code and data examples if at all possible.
79              
80             =head2 Reporting Bugs
81              
82             Report bugs to the Bioperl bug tracking system to help us keep track
83             of the bugs and their resolution. Bug reports can be submitted via the
84             web:
85              
86             https://github.com/bioperl/bioperl-live/issues
87              
88             =head1 AUTHOR - Sendu Bala
89              
90             Email bix@sendu.me.uk
91              
92             =head1 APPENDIX
93              
94             The rest of the documentation details each of the object methods.
95             Internal methods are usually preceded with a _
96              
97             =cut
98              
99             # Let the code begin...
100              
101             package Bio::Map::Gene;
102 1     1   1512 use strict;
  1         1  
  1         33  
103              
104 1     1   666 use Bio::Map::GenePosition;
  1         2  
  1         38  
105              
106 1     1   6 use base qw(Bio::Map::Mappable);
  1         1  
  1         102  
107              
108             our $USE_ENSEMBL;
109             our $GENES = {};
110             our $SET_FROM_DB = 0;
111              
112             BEGIN {
113             # Bio::Tools::Run::Ensembl is in bioperl-run package which may not be
114             # installed, but its functionality is only optional here
115 1     1   2 eval {require Bio::Tools::Run::Ensembl;};
  1         280  
116 1         2227 $USE_ENSEMBL = ! $@;
117             }
118              
119             =head2 new
120              
121             Title : new
122             Usage : my $gene = Bio::Map::Gene->new();
123             Function: Builds a new Bio::Map::Gene object
124             Returns : Bio::Map::Gene
125             Args : -universal_name => string : name of the gene (in a form common to all
126             species that have the gene, but unique
127             amongst non-orthologous genes), REQUIRED
128             -description => string : free text description of the gene
129              
130             =cut
131              
132             sub new {
133 4     4 1 5 my ($class, @args) = @_;
134 4         14 my $self = $class->SUPER::new(@args);
135            
136 4         10 my ($u_name, $desc) = $self->_rearrange([qw(UNIVERSAL_NAME DESCRIPTION)], @args);
137 4 50       8 $u_name || $self->throw("You must supply a -universal_name");
138 4         7 $self->universal_name($u_name);
139            
140 4 100       9 defined $desc && $self->description($desc);
141            
142 4         8 return $self;
143             }
144              
145             =head2 get
146              
147             Title : get
148             Usage : my $gene = Bio::Map::Gene->get();
149             Function: Builds a new Bio::Map::Gene object (like new()), or gets a
150             pre-existing one that shares the same universal_name.
151             Returns : Bio::Map::Gene
152             Args : -universal_name => string, name of the gene (in a form common to all
153             species that have the gene, but unique amongst
154             non-orthologous genes), REQUIRED
155             -description => string, free text description of the gene
156              
157             =cut
158              
159             sub get {
160 10     10 1 353 my ($class, @args) = @_;
161 10         30 my ($u_name, $desc) = Bio::Root::Root->_rearrange([qw(UNIVERSAL_NAME DESCRIPTION)], @args);
162            
163 10 100 66     41 if ($u_name && defined $GENES->{$u_name}) {
164 6 50       10 $GENES->{$u_name}->description($desc) if $desc;
165 6         10 return $GENES->{$u_name};
166             }
167            
168 4         9 return $class->new(@args);
169             }
170              
171             =head2 universal_name
172              
173             Title : universal_name
174             Usage : my $name = $gene->universal_name
175             Function: Get/Set Mappable name, corresponding to the name of the gene in a
176             form shared by orthologous versions of the gene in different species,
177             but otherwise unique.
178             Returns : string
179             Args : none to get, OR string to set
180              
181             =cut
182              
183             sub universal_name {
184 32     32 1 31 my ($self, $value) = @_;
185 32 100       48 if (defined $value) {
186 4 50       7 delete $GENES->{$self->{'_uname'}} if $self->{'_uname'};
187 4         6 $self->{'_uname'} = $value;
188 4         5 $GENES->{$value} = $self;
189             }
190 32         64 return $self->{'_uname'};
191             }
192              
193             =head2 description
194              
195             Title : description
196             Usage : my $description = $gene->description();
197             $gene->description($description);
198             Function: Get/set information relating to the gene, in this case the
199             description (eg. 'full name of gene')
200             Returns : string (empty string if not defined)
201             Args : none to get general version, OR Bio::Map::GeneMap to get map-specific
202             version.
203             string to set general version, optionally AND Bio::Map::GeneMap to
204             set map-specific version
205              
206             =cut
207              
208             sub description {
209 2     2 1 3 my $self = shift;
210 2         7 return $self->_gene_data('description', @_);
211             }
212              
213             =head2 display_id
214              
215             Title : display_id
216             Usage : my $display_id = $gene->display_id();
217             $gene->display_id($display_id);
218             Function: Get/set information relating to the gene, in this case the
219             display_id (eg. 'ENSG00000155287')
220             Returns : string (empty string if not defined)
221             Args : none to get general version, OR Bio::Map::GeneMap to get map-specific
222             version.
223             string to set general version, optionally AND Bio::Map::GeneMap to
224             set map-specific version
225              
226             =cut
227              
228             sub display_id {
229 0     0 1 0 my $self = shift;
230 0         0 return $self->_gene_data('display_id', @_);
231             }
232              
233             =head2 display_xref
234              
235             Title : display_xref
236             Usage : my $display_xref = $gene->display_xref();
237             $gene->display_xref($display_xref);
238             Function: Get/set information relating to the gene, in this case the
239             display_xref (eg. 'HUGO:23472').
240             Returns : string (empty string if not defined)
241             Args : none to get general version, OR Bio::Map::GeneMap to get map-specific
242             version.
243             string to set general version, optionally AND Bio::Map::GeneMap to
244             set map-specific version
245              
246             =cut
247              
248             sub display_xref {
249 0     0 1 0 my $self = shift;
250 0         0 return $self->_gene_data('display_xref', @_);
251             }
252              
253             =head2 external_db
254              
255             Title : external_db
256             Usage : my $external_db = $gene->external_db();
257             $gene->external_db($external_db);
258             Function: Get/set information relating to the gene, in this case the
259             external_db (eg. 'HUGO').
260             Returns : string (empty string if not defined)
261             Args : none to get general version, OR Bio::Map::GeneMap to get map-specific
262             version.
263             string to set general version, optionally AND Bio::Map::GeneMap to
264             set map-specific version
265              
266             =cut
267              
268             sub external_db {
269 0     0 1 0 my $self = shift;
270 0         0 return $self->_gene_data('external_db', @_);
271             }
272              
273             =head2 external_name
274              
275             Title : external_name
276             Usage : my $external_name = $gene->external_name();
277             $gene->external_name($external_name);
278             Function: Get/set information relating to the gene, in this case the (eg.
279             'gene_name', probably the same as or similar to what you set
280             universal_name() to, but could be a species-specific alternative).
281             Returns : string (empty string if not defined)
282             Args : none to get general version, OR Bio::Map::GeneMap to get map-specific
283             version.
284             string to set general version, optionally AND Bio::Map::GeneMap to
285             set map-specific version
286              
287             =cut
288              
289             sub external_name {
290 0     0 1 0 my $self = shift;
291 0         0 return $self->_gene_data('external_name', @_);
292             }
293              
294             =head2 biotype
295              
296             Title : biotype
297             Usage : my $biotype = $gene->biotype();
298             $gene->biotype($biotype);
299             Function: Get/set information relating to the gene, in this case the biotype
300             (eg. 'protein_coding').
301             Returns : string (empty string if not defined)
302             Args : none to get general version, OR Bio::Map::GeneMap to get map-specific
303             version.
304             string to set general version, optionally AND Bio::Map::GeneMap to
305             set map-specific version
306              
307             =cut
308              
309             sub biotype {
310 0     0 1 0 my $self = shift;
311 0         0 return $self->_gene_data('biotype', @_);
312             }
313              
314             =head2 source
315              
316             Title : source
317             Usage : my $source = $gene->source();
318             $gene->source($source);
319             Function: Get/set information relating to the gene, in this case the source
320             (eg. '??').
321             Returns : string (empty string if not defined)
322             Args : none to get general version, OR Bio::Map::GeneMap to get map-specific
323             version.
324             string to set general version, optionally AND Bio::Map::GeneMap to
325             set map-specific version
326              
327             =cut
328              
329             sub source {
330 0     0 1 0 my $self = shift;
331 0         0 return $self->_gene_data('source', @_);
332             }
333              
334             =head2 position
335              
336             Title : position
337             Usage : my $position = $mappable->position($map);
338             Function: Get the main Position of this Mappable on a given map. (A gene may
339             have many positions on a map, but all but one of them are
340             Bio::Map::GenePosition objects that describe sub-regions of the gene
341             which are relative to the 'main' Bio::Map::Position position, which
342             is the only one that is directly relative to the map - this is the
343             Position returned by this method.)
344             Returns : Bio::Map::Position
345             Args : L object.
346              
347             =cut
348              
349             sub position {
350 323     323 1 269 my ($self, $map) = @_;
351 323 50 33     789 ($map && $self->in_map($map)) || return;
352            
353 323         646 foreach my $pos ($self->get_positions($map, 1)) {
354 604 100       1434 next if $pos->isa('Bio::Map::GenePosition');
355 323         521 return $pos;
356             #*** could do sanity checking; there should only be 1 non-GenePosition
357             # object here, and it should have a relative of type 'map', and it
358             # should sort before or equal to all other positions
359             }
360             }
361              
362             =head2 add_transcript_position
363              
364             Title : add_transcript_position
365             Usage : $gene->add_transcript_position($position);
366             Function: Set the bounds of a transcript on a map (that of the supplied
367             position). All transcript positions added this way must have
368             coordinates relative to the main position of the 'gene' mappable on
369             this transcript's map. The first position added using this method
370             must have a start of 0. The supplied Position will be given a type of
371             'transcript' and relative of (gene => 0). The active_transcript for
372             the Position's map will be set to this one.
373             Returns : n/a
374             Args : Bio::Map::GenePosition (which must have its map() defined, and be for
375             a map this gene is on)
376              
377             =cut
378              
379             sub add_transcript_position {
380 2     2 1 5 my ($self, $pos) = @_;
381 2 50 33     17 ($pos && $pos->isa('Bio::Map::GenePosition')) || return;
382            
383 2   33     4 my $map = $pos->map || $self->throw("Supplied GenePosition has no map");
384 2 50       6 $self->in_map($map) || $self->throw("Supplied GenePosition is not on a map that this gene belong to");
385 2         6 my @transcripts = $self->get_transcript_positions($map);
386 2 100       5 if (@transcripts == 0) {
387             # first transcript needs start of 0
388 1 50       3 if ($pos->start != 0) {
389 0         0 $self->warn("The first transcript position added to a map needs a start of 0, not adding");
390 0         0 return;
391             }
392             }
393            
394 2         5 $pos->type('transcript');
395 2         6 $pos->relative->gene(0);
396 2         7 $self->SUPER::add_position($pos);
397            
398             # need to remember the order these were added, but remember what we store
399             # here could become invalid if positions are purged outside of this class
400 2         2 push(@{$self->{t_order}->{$map}}, $pos);
  2         6  
401            
402             # adjust main position's length to hold this transcript
403 2         3 my $main_pos = $self->position($map);
404 2         9 my $increase = ($pos->length + $pos->start($pos->absolute_relative)) - ($main_pos->end + 1);
405 2 50       4 if ($increase > 0) {
406 2         5 $main_pos->end($main_pos->end + $increase);
407             }
408            
409             # make this new transcript the active one
410 2         7 $self->active_transcript($map, scalar(@transcripts) + 1);
411             }
412              
413             =head2 active_transcript
414              
415             Title : active_transcript
416             Usage : my $active = $gene->active_transcript($map);
417             $gene->active_transcript($map, $int);
418             Function: Get/set the active transcript number (an int of 1 would mean the 1st
419             transcript position added to the object for the given map, ie. would
420             correspond to the the 1st Position object in the list returned by
421             get_transcript_positions($map)). The active transcript is the one
422             considered by other methods and objects when dealing with positions
423             relative to 'the' transcript.
424             Returns : int, 0 means there were no transcript positions on the given map,
425             undef is some other problem
426             Args : Just Bio::Map::GeneMap to get
427             Bio::Map::GeneMap AND int to set
428              
429             =cut
430              
431             sub active_transcript {
432 181     181 1 162 my ($self, $map, $int) = @_;
433 181 50       234 $map or return;
434            
435 181         225 my @transcripts = $self->get_transcript_positions($map);
436 181 100       297 if (@transcripts > 0) {
437 173 100       177 if (defined($int)) {
438 3 50 33     13 if ($int > 0 && $int <= @transcripts) {
439 3         5 $self->{active_transcript}->{$map} = $int;
440 3         10 return $int;
441             }
442             else {
443 0         0 $self->warn("Supplied int '$int' not a good number (higher than the number of transcripts on the map?)");
444 0         0 return;
445             }
446             }
447             else {
448 170 50       249 if (defined $self->{active_transcript}->{$map}) {
449 170         450 return $self->{active_transcript}->{$map};
450             }
451             else {
452             # default to the total number of transcripts on the map, ie. the
453             # most recently added
454 0         0 $self->{active_transcript}->{$map} = @transcripts;
455 0         0 return $self->{active_transcript}->{$map};
456             }
457             }
458             }
459 8         12 return 0;
460             }
461              
462             =head2 get_transcript_positions
463              
464             Title : get_transcript_positions
465             Usage : my @transcript_positions = $gene->get_transcript_positions($map);
466             Function: Get all the transcript positions of this gene on the given map, in
467             the order they were added to the map.
468             Returns : list of Bio::Map::GenePosition
469             Args : Bio::Map::GeneMap
470              
471             =cut
472              
473             sub get_transcript_positions {
474 285     285 1 215 my ($self, $map) = @_;
475 285 50       325 $map or return;
476 285 50       516 $map->isa('Bio::Map::GeneMap') or return;
477 285         330 return $self->_get_typed_positions($map, 'transcript');
478             }
479              
480             =head2 get_transcript_position
481              
482             Title : get_transcript_position
483             Usage : my $position = $gene->get_transcript_position($map, $int);
484             Function: Get the $int'th transcript position added to the map. If no
485             transcripts have been added to the map, and the default transcript
486             was requested, $gene->position is returned, as that will have the
487             same start and end as the first transcript.
488             Returns : Bio::Map::GenePosition
489             Args : Bio::Map::GeneMap AND int (if int not supplied, or 0, returns
490             the currently active transcript position)
491              
492             =cut
493              
494             sub get_transcript_position {
495 101     101 1 94 my ($self, $map, $value) = @_;
496 101 50       133 $map or return;
497 101   100     216 $value ||= $self->active_transcript($map);
498 101         155 my @transcripts = $self->get_transcript_positions($map);
499 101 100 66     182 if (@transcripts == 0 && $value == 0) {
500 4         5 return $self->position($map);
501             }
502 97         164 return $self->_get_list_element($value, @transcripts);
503             }
504              
505             =head2 coding_position
506              
507             Title : coding_position
508             Usage : $gene->coding_position($position, $transcript_number);
509             $gene->coding_position($map, $transcript_number);
510             Function: Get/set the bounds of a coding region of a given transcript on a map
511             (that of the supplied position).
512              
513             When setting, coordinates must be relative to the transcript start.
514             The supplied position will be given a type 'coding' and a relative
515             (-transcript => $transcript_number). There can be only one coding
516             position per transcript (hence this is a get/set).
517              
518             When getting, if a coding region has not been defined for the
519             requested transcript, $gene->get_transcript_position($map,
520             $transcript_number) is returned, as if assuming the entirety of the
521             transcript is coding.
522              
523             Returns : Bio::Map::GenePosition
524             Args : Bio::Map::GeneMap AND int (the transcript number) to get, OR to set:
525             Bio::Map::GenePosition (which must have its map() defined, and be for
526             a map this gene is on) AND int (the transcript number)
527             In both cases, if transcript number not supplied or 0 this will be
528             resolved to the current active transcript number - there must be at
529             least one transcript on the map
530              
531             =cut
532              
533             sub coding_position {
534 13     13 1 16 my ($self, $thing, $transcript_num) = @_;
535 13 50       31 ref($thing) || return;
536 13   50     35 $transcript_num ||= 0;
537            
538             # deliberate test for PositionI so _add_type_position can do nothing if
539             # its not a GenePosition
540 13 100       46 if ($thing->isa('Bio::Map::PositionI')) {
541 2   50     4 my $map = $thing->map || return;
542 2         7 my ($existing_pos) = $self->_get_typed_positions($map, 'coding', $transcript_num);
543 2 100       6 if ($existing_pos) {
544             # purge it
545 1         12 $self->purge_positions($existing_pos);
546             }
547 2         6 $self->_add_type_position('coding', $thing, $transcript_num);
548 2         3 $thing = $map;
549             }
550            
551 13         21 my ($pos) = $self->_get_typed_positions($thing, 'coding', $transcript_num);
552 13   66     49 return $pos || $self->get_transcript_position($thing, $transcript_num);
553             }
554              
555             =head2 add_exon_position
556              
557             Title : add_exon_position
558             Usage : $gene->add_exon_position($position, $transcript_number);
559             Function: Set the bounds of an exon of a given transcript on a map (that of the
560             supplied position). Coordinates must be relative to the transcript
561             start. The supplied position will be given a type 'exon' and a
562             relative (-transcript => $transcript_number).
563             Returns : n/a
564             Args : Bio::Map::GenePosition (which must have its map() defined, and be for
565             a map this gene is on) AND int (the transcript number; if not
566             supplied or 0 this will be resolved to the current active transcript
567             number - there must be at least one transcript on the map)
568              
569             =cut
570              
571             sub add_exon_position {
572 3     3 1 6 my $self = shift;
573 3         10 $self->_add_type_position('exon', @_);
574             }
575              
576             =head2 get_exon_positions
577              
578             Title : get_exon_positions
579             Usage : my @positions = $gene->get_exon_positions($map, $int);
580             Function: Get all the exon positions that are relative to the $int'th
581             transcript position added to the map. Exons are returned sorted by
582             their start positions.
583             Returns : array of Bio::Map::GenePosition
584             Args : Bio::Map::GeneMap AND int (the transcript number; if second int not
585             supplied, or 0, considers the currently active transcript)
586              
587             =cut
588              
589             sub get_exon_positions {
590 23     23 1 26 my ($self, $map, $value) = @_;
591 23 50       33 $map || return;
592 23   100     58 $value ||= 0;
593 23         36 return $self->_get_typed_positions($map, 'exon', $value);
594             }
595              
596             =head2 get_exon_position
597              
598             Title : get_exon_position
599             Usage : my $position = $gene->get_exon_position($map, $exon_num, $int);
600             Function: Get the $exon_num'th exon position that is relative to the $int'th
601             transcript position added to the map. Exons are numbered in Position
602             order, not the order they were added to the map. If no exons have
603             been added to the map, and the first exon was requested,
604             $gene->get_transcript_position($map, $int) is returned, as that will
605             have the same start as the first exon, and could have the same end
606             for a single exon gene.
607             Returns : Bio::Map::GenePosition
608             Args : Bio::Map::GeneMap AND int (the exon you want) AND int (the transcript
609             number; if second int not supplied, or 0, considers the currently
610             active transcript)
611              
612             =cut
613              
614             sub get_exon_position {
615 21     21 1 28 my ($self, $map, $exon_num, $value) = @_;
616 21         35 my @exons = $self->get_exon_positions($map, $value);
617 21 100 66     73 if (@exons == 0 && $exon_num == 1) {
618 9         17 return $self->get_transcript_position($map, $value);
619             }
620 12         23 return $self->_get_list_element($exon_num, @exons);
621             }
622              
623             =head2 add_intron_position
624              
625             Title : add_intron_position
626             Usage : $gene->add_intron_position($position, $transcript_number);
627             Function: Set the bounds of an intron of a given transcript on a map (that of
628             the supplied position). Coordinates must be relative to the
629             transcript start. The supplied position will be given a type 'intron'
630             and a relative (-transcript => $transcript_number).
631             Returns : n/a
632             Args : Bio::Map::GenePosition (which must have its map() defined, and be for
633             a map this gene is on) AND int (the transcript number; if not
634             supplied or 0 this will be resolved to the current active transcript
635             number - there must be at least one transcript on the map)
636              
637             =cut
638              
639             sub add_intron_position {
640 1     1 1 2 my $self = shift;
641 1         4 $self->_add_type_position('intron', @_);
642             }
643              
644             =head2 get_intron_positions
645              
646             Title : get_intron_positions
647             Usage : my @positions = $gene->get_intron_positions($map, $int);
648             Function: Get all the intron positions that are relative to the $int'th
649             transcript position added to the map. Introns are returned sorted by
650             their start positions.
651             Returns : array of Bio::Map::GenePosition
652             Args : Bio::Map::GeneMap AND int (the transcript number; if second int not
653             supplied, or 0, considers the currently active transcript)
654              
655             =cut
656              
657             sub get_intron_positions {
658 5     5 1 5 my ($self, $map, $value) = @_;
659 5 50       10 $map || return;
660 5   100     13 $value ||= 0;
661 5         10 return $self->_get_typed_positions($map, 'intron', $value);
662             }
663              
664             =head2 get_intron_position
665              
666             Title : get_intron_position
667             Usage : my $position = $gene->get_intron_position($map, $intron_num, $int);
668             Function: Get the $intron_num'th intron position that is relative to the
669             $int'th transcript position added to the map. Introns are numbered in
670             Position order, not the order they were added to the map.
671             Returns : Bio::Map::GenePosition
672             Args : Bio::Map::GeneMap AND int (the intron you want) AND int (the
673             transcript number; if second int not supplied, or 0, considers the
674             currently active transcript)
675              
676             =cut
677              
678             sub get_intron_position {
679 4     4 1 7 my ($self, $map, $intron_num, $value) = @_;
680 4         10 my @introns = $self->get_intron_positions($map, $value);
681 4         9 return $self->_get_list_element($intron_num, @introns);
682             }
683              
684             =head2 set_from_db
685              
686             Title : set_from_db
687             Usage : $gene->set_from_db(); # for an instance only
688             Bio::Map::Gene->set_from_db(); # decide that all future genes added
689             # to maps will be set from db
690             Function: Creates all the various types of positions (transcripts, coding,
691             exons, introns) for this gene on all its maps. The information comes
692             from an Ensembl database via Bio::Tools::Run::Ensembl. NB: will
693             purge any existing Bio::Map::GenePosition objects that were
694             previously on the maps this gene is one.
695             Returns : undef on failure, otherwise the number of maps that successfully
696             had positions added to them
697             Args : boolean (no argument/undef is treated as 1, ie. do set from db;
698             supply 0 to turn off)
699              
700             NB: Bio::Tools::Run::Ensembl is available in the bioperl-run package;
701             see it for details on setting up a database to use.
702              
703             Once set, any new maps (species) this gene is added to will
704             automatically also have their positions set_from_db
705              
706             =cut
707              
708             sub set_from_db {
709 0     0 1 0 my ($self, $bool) = @_;
710 0 0       0 return unless $USE_ENSEMBL;
711 0 0       0 return unless Bio::Tools::Run::Ensembl->registry_setup();
712 0 0       0 defined($bool) || ($bool = 1);
713            
714 0 0       0 unless (ref($self)) {
715 0         0 $SET_FROM_DB = $bool;
716 0         0 return 0;
717             }
718            
719 0         0 $self->{_set_from_db} = $bool;
720            
721 0         0 my $success = 0;
722 0         0 foreach my $map ($self->known_maps) {
723 0         0 $success += $self->_set_from_db($map);
724             }
725            
726 0         0 return $success;
727             }
728              
729             # set from db for a particular map (species)
730             sub _set_from_db {
731 9     9   7 my ($self, $map) = @_;
732 9   50     15 my $gene_name = $self->universal_name || return 0;
733 9 50 33     32 $SET_FROM_DB || $self->{_set_from_db} || return;
734            
735 0         0 my $species = $map->species;
736            
737 0   0     0 my $slice_adaptor = Bio::Tools::Run::Ensembl->get_adaptor($species, 'Slice') || return 0;
738 0   0     0 my $gene = Bio::Tools::Run::Ensembl->get_gene_by_name(-species => $species,
739             -name => $gene_name,
740             -use_orthologues => 'Homo sapiens',
741             -use_swiss_lookup => 1,
742             -use_entrez_lookup => 1) || return 0;
743            
744             # attach species(map)-specific gene info to self
745 0         0 $self->description($gene->description, $map);
746 0         0 $self->display_id($gene->display_id, $map);
747 0         0 $self->display_xref($gene->display_xref->display_id, $map);
748 0         0 $self->external_db($gene->external_db, $map);
749 0         0 $self->external_name($gene->external_name, $map);
750 0         0 $self->biotype($gene->biotype, $map);
751 0         0 $self->source($gene->source, $map);
752            
753             # get the transcripts for this map
754 0         0 my $trans_ref = $gene->get_all_Transcripts;
755 0 0 0     0 unless ($trans_ref && @{$trans_ref} > 0) {
  0         0  
756 0         0 return 0;
757             }
758            
759             # purge all existing GenePositions from the map
760 0         0 my $handler = $map->get_position_handler();
761 0         0 foreach my $pos ($map->get_positions) {
762 0 0       0 if ($pos->isa('Bio::Map::GenePosition')) {
763 0         0 $handler->purge_positions($pos);
764             }
765             }
766            
767             # assume all transcripts on the same strand, sort them
768 0         0 my $strand = ${$trans_ref}[0]->strand;
  0         0  
769 0 0       0 my @transcripts = sort { $strand == -1 ? ($b->end <=> $a->end) : ($a->start <=> $b->start) } @{$trans_ref};
  0         0  
  0         0  
770            
771             # store slice of first transcript so we can use it to get seq data, and
772             # add chromosome info to our map if not set
773 0         0 my $primary_slice = $slice_adaptor->fetch_by_transcript_stable_id($transcripts[0]->stable_id, 0);
774 0         0 my $uid = $map->unique_id;
775 0         0 @{$self->{_ensembl}->{$uid}} = ($slice_adaptor, $primary_slice, $strand);
  0         0  
776            
777             #my $cyto = $map->location || Bio::Map::CytoPosition->new();
778             #unless ($cyto->chr) {
779             # $cyto->chr($primary_slice->seq_region_name);
780             #}
781             #$map->location($cyto);
782            
783             # adjustment needed to make all transcript coords relative to the start of
784             # the first transcript which must start at 0
785 0 0       0 my $adjust = $strand == -1 ? $transcripts[0]->end : $transcripts[0]->start;
786 0         0 my $orig_adjust = $adjust;
787 0 0   0   0 my $adjustment = sub { return $strand == -1 ? $adjust - shift() : shift() - $adjust; };
  0         0  
788            
789             # go through all the transcripts, remembering the longest
790 0         0 my $longest_trans = 0;
791 0         0 my $longest = 1;
792 0         0 my $count = 1;
793 0         0 foreach my $transcript (@transcripts) {
794             # length is the total number of bases the exons cover, not genomic span
795 0         0 my $length = $transcript->length();
796 0 0       0 if ($length > $longest_trans) {
797 0         0 $longest_trans = $length;
798 0         0 $longest = $count;
799             }
800            
801             # make positions for this transcript
802 0         0 my $slice = $slice_adaptor->fetch_by_transcript_stable_id($transcript->stable_id, 0);
803 0         0 my $start = &$adjustment($slice->start());
804 0         0 my $end = &$adjustment($slice->end());
805 0 0       0 ($start, $end) = ($end, $start) if $start > $end;
806            
807 0         0 my $trans_pos = Bio::Map::GenePosition->new(-map => $map, -start => $start, -end => $end, -type => 'transcript');
808 0         0 $self->add_transcript_position($trans_pos);
809            
810             # all subsequent coordinates need to be relative to the start of this
811             # transcript
812 0 0       0 $adjust = $strand == -1 ? $slice->end : $slice->start;
813            
814             # there may not be a coding region
815 0 0       0 if (defined($transcript->coding_region_start)) {
816 0         0 my $atg = &$adjustment($transcript->coding_region_start());
817 0         0 my $stop = &$adjustment($transcript->coding_region_end());
818 0 0       0 ($atg, $stop) = ($stop, $atg) if $atg > $stop;
819            
820 0         0 my $cod_pos = Bio::Map::GenePosition->new(-map => $map, -start => $atg, -end => $stop, -type => 'coding');
821 0         0 $self->coding_position($cod_pos);
822             }
823            
824             # exons
825 0         0 foreach my $exon (@{$transcript->get_all_Exons}) {
  0         0  
826 0         0 my $start = &$adjustment($exon->start());
827 0         0 my $end = &$adjustment($exon->end());
828 0 0       0 ($start, $end) = ($end, $start) if $start > $end;
829            
830 0 0       0 my $throw_species = ref($species) ? $species->scientific_name : $species;
831 0 0       0 defined($end) || $self->throw("gene $gene_name in species $throw_species (".$gene->display_id.") had exon $start with no end");
832 0         0 my $pos = Bio::Map::GenePosition->new(-map => $map, -start => $start, -end => $end, -type => 'exon');
833 0         0 $self->add_exon_position($pos);
834             }
835            
836             # introns
837 0         0 foreach my $intron (@{$transcript->get_all_Introns}) {
  0         0  
838 0         0 my $start = &$adjustment($intron->start());
839 0         0 my $end = &$adjustment($intron->end());
840 0 0       0 ($start, $end) = ($end, $start) if $start > $end;
841            
842 0         0 my $pos = Bio::Map::GenePosition->new(-map => $map, -start => $start, -end => $end, -type => 'intron');
843 0         0 $self->add_intron_position($pos);
844             }
845            
846 0         0 $adjust = $orig_adjust;
847 0         0 } continue { $count++ };
848            
849 0         0 $self->active_transcript($map, $longest);
850            
851 0         0 return 1;
852             }
853              
854             # get safely sorted positions of a certain type
855             sub _get_typed_positions {
856 328     328   283 my ($self, $map, $type, $transcript_number) = @_;
857 328 100 100     580 if (defined $transcript_number && $transcript_number == 0) {
858 39         60 $transcript_number = $self->active_transcript($map);
859             }
860            
861 328         216 my @positions;
862 328         497 foreach my $pos ($self->get_positions($map, 1)) {
863 2027 100       3653 $pos->isa('Bio::Map::GenePosition') || next;
864 1699 100       1823 $pos->type eq $type || next;
865            
866 586 100       695 if (defined $transcript_number) {
867 44   50     63 my $rel = $pos->relative || next;
868 44 50       85 $rel->type eq 'transcript' || next;
869 44   33     60 my $rel_transcript_num = $rel->transcript || $self->active_transcript($map);
870 44 100       132 $rel_transcript_num == $transcript_number || next;
871             }
872            
873 585         497 push(@positions, $pos);
874             }
875            
876             # avoid sorting using $pos->sortable since we would go infinite from the
877             # call to absolute_conversion - we don't need absolute_conversion here
878             # since we know the raw starts are all relative to the same thing, or in
879             # the case of transcripts, we want them sorted in the way they were added
880 328 100       507 if (defined $transcript_number) {
881             # ensure we get raw start; ask for starts relative to the things
882             # the positions are relative to. Precompute answer for efficiency
883 43         62 my @sort = map { $_->[1] }
884 14         42 sort { $a->[0] <=> $b->[0] }
885 43         53 map { [$_->start($_->relative), $_] }
  43         68  
886             @positions;
887 43         106 return @sort;
888             }
889             else {
890 285 100       175 my @known_order = @{$self->{t_order}->{$map} || []};
  285         767  
891 285 100       391 @known_order || return;
892            
893             # transcripts might have been removed, so known_order could be invalid
894 272 50       703 return @known_order if @known_order == @positions; #*** dangerous assumption?
895 0         0 my %exists = map { $_ => $_ } @positions;
  0         0  
896 0         0 my @new_order;
897 0         0 foreach my $pos (@known_order) {
898 0 0       0 exists $exists{$pos} || next;
899 0         0 push(@new_order, $pos);
900             }
901 0         0 @{$self->{t_order}->{$map}} = @new_order;
  0         0  
902 0         0 return @new_order;
903             }
904             }
905              
906             # get a certain element from an array, checking the array has that element
907             sub _get_list_element {
908 113     113   136 my ($self, $wanted, @list) = @_;
909 113 50 33     361 ($wanted && $wanted > 0) || return;
910 113 100       145 @list > 0 || return;
911 111         98 my $index = $wanted - 1;
912 111 100 66     328 if ($index >= 0 && $index <= $#list) {
913 110         361 return $list[$index];
914             }
915 1         4 return;
916             }
917              
918             # add a certain type of posiiton
919             sub _add_type_position {
920 6     6   9 my ($self, $type, $pos, $transcript_num) = @_;
921 6 50 33     35 ($pos && $pos->isa('Bio::Map::GenePosition')) || return;
922            
923 6   33     14 my $map = $pos->map || $self->throw("Supplied GenePosition has no map");
924 6 50       14 $self->in_map($map) || $self->throw("Supplied GenePosition is not on a map that this gene belong to");
925            
926 6   33     18 $transcript_num ||= $self->active_transcript($map) || $self->throw("Asked to be relative to the active transcript, but there is no transcript");
      66        
927            
928             # sanity check - must be within the transcript
929 6   33     16 my $transcript_pos = $self->get_transcript_position($map, $transcript_num) || $self->throw("Asked to be relative to transcript $transcript_num, but there is no such transcript");
930 6 50 0     15 $transcript_pos->end || ($self->warn("no transcript pos end for pos for gene ".$self->universal_name." and species ".$pos->map->species."!") && exit);
931 6 50 0     9 $pos->end || ($self->warn("no pos end for pos for gene ".$self->universal_name." and species ".$pos->map->species."!") && exit);
932 6 50       26 unless ($transcript_pos->contains($pos)) {
933 0         0 $self->warn("$type coordinates must lie within those of the transcript, not adding $type");
934 0         0 return;
935             }
936            
937 6         16 $pos->type($type);
938 6         12 $pos->relative->transcript($transcript_num);
939 6         16 $self->SUPER::add_position($pos);
940             }
941              
942             # get/setter for general/map-specific data
943             sub _gene_data {
944 2     2   3 my ($self, $type, $thing, $map) = @_;
945 2 100 50     10 $thing or return ($self->{$type}->{general} || '');
946            
947 1 50 33     4 if (ref($thing) && $thing->isa('Bio::Map::GeneMap')) {
948 0   0     0 return $self->{$type}->{$thing} || '';
949             }
950            
951 1 50 33     5 if ($map && $map->isa('Bio::Map::GeneMap')) {
952 0         0 $self->{$type}->{$map} = $thing;
953             }
954             else {
955 1         4 $self->{$type}->{general} = $thing;
956             }
957 1         2 return $thing;
958             }
959              
960             # for exclusive use by GeneMap so it can get sequence data
961             sub _get_slice {
962 7     7   9 my ($self, $map) = @_;
963 7 50       15 $map || return;
964 7   50     18 my $uid = $map->unique_id || return;
965 7 50       16 if (defined $self->{_ensembl}->{$uid}) {
966 0         0 return @{$self->{_ensembl}->{$uid}};
  0         0  
967             }
968 7         14 return;
969             }
970              
971             1;