File Coverage

Bio/Map/GeneMap.pm
Criterion Covered Total %
statement 107 145 73.7
branch 43 84 51.1
condition 20 39 51.2
subroutine 15 18 83.3
pod 12 12 100.0
total 197 298 66.1


line stmt bran cond sub pod time code
1             # $Id: GeneMap.pm,v 1.17 2006/07/17 14:16:53 sendu Exp $
2             #
3             # BioPerl module for Bio::Map::GeneMap
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Sendu Bala
8             #
9             # Copyright Sendu bala
10             #
11             # You may distribute this module under the same terms as perl itself
12              
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Map::GeneMap - A MapI implementation to represent the area around a gene
18              
19             =head1 SYNOPSIS
20              
21             use Bio::Map::GeneMap;
22             use Bio::Map::Gene;
23             use Bio::Map::TranscriptionFactor;
24             use Bio::Map::GeneRelative;
25              
26             # make some maps that will represent an area around a particular gene in
27             # particular species (by default, the map represents the area in the genome
28             # 1000bp upstream of the gene)
29             my $map1 = Bio::Map::GeneMap->get(-gene => 'BRCA2',
30             -species => 'human',
31             -description => 'breast cancer 2, early onset');
32             my $map2 = Bio::Map::GeneMap->get(-gene => 'BRCA2',
33             -species => 'mouse');
34              
35             # model a TF that binds 500bp upstream of the BRCA2 gene in humans and
36             # 250bp upstream of BRCA2 in mice
37             my $rel = Bio::Map::GeneRelative->new(-description => "gene start");
38             my $tf = Bio::Map::TranscriptionFactor->get(-universal_name => 'tf1');
39             Bio::Map::Position->new(-map => $map1,
40             -element => $tf,
41             -start => -500,
42             -length => 10,
43             -relative => $rel);
44             Bio::Map::Position->new(-map => $map2,
45             -element => $tf,
46             -start => -250,
47             -length => 10,
48             -relative => $rel);
49              
50             # find out all the things that map near BRCA2 in all species
51             foreach my $map ($gene->known_maps) {
52             foreach my $thing ($map->get_elements) {
53             next if $thing eq $gene;
54             foreach my $pos ($thing->get_positions($map)) {
55             print "In species ", $map->species, ", ",
56             $thing->universal_name, " maps at ", $pos->value,
57             " relative to ", $pos->relative->description, " of gene ",
58             $gene->universal_name, "\n";
59             }
60             }
61             }
62            
63             # a GeneMap isa PrimarySeq and so can have sequence associated with it
64             $map1->seq('ATGC');
65             my $subseq = $map1->subseq(2,3); # TG
66              
67             =head1 DESCRIPTION
68              
69             Model the abstract notion of the area around a gene - you don't care exactly
70             where this area is in the genome, you just want to be able to say "something
71             binds upstream of gene X" and "something else binds 20bp upstream of the first
72             something" etc.
73              
74             It's useful for modelling transcription factor bindings sites, letting you find
75             out which transcription factors bind near a gene of interest, or which genes
76             are bound by a transcription factor of interest.
77              
78             See t/Map/Map.t for more example usage.
79              
80             =head1 FEEDBACK
81              
82             =head2 Mailing Lists
83              
84             User feedback is an integral part of the evolution of this and other
85             Bioperl modules. Send your comments and suggestions preferably to
86             the Bioperl mailing list. Your participation is much appreciated.
87              
88             bioperl-l@bioperl.org - General discussion
89             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
90              
91             =head2 Support
92              
93             Please direct usage questions or support issues to the mailing list:
94              
95             I
96              
97             rather than to the module maintainer directly. Many experienced and
98             reponsive experts will be able look at the problem and quickly
99             address it. Please include a thorough description of the problem
100             with code and data examples if at all possible.
101              
102             =head2 Reporting Bugs
103              
104             Report bugs to the Bioperl bug tracking system to help us keep track
105             of the bugs and their resolution. Bug reports can be submitted via the
106             web:
107              
108             https://github.com/bioperl/bioperl-live/issues
109              
110             =head1 AUTHOR - Sendu Bala
111              
112             Email bix@sendu.me.uk
113              
114             =head1 APPENDIX
115              
116             The rest of the documentation details each of the object methods.
117             Internal methods are usually preceded with a _
118              
119             =cut
120              
121             # Let the code begin...
122              
123             package Bio::Map::GeneMap;
124 1     1   1589 use strict;
  1         1  
  1         41  
125              
126 1     1   4 use Bio::Map::Gene;
  1         2  
  1         17  
127 1     1   3 use Bio::Map::Position;
  1         1  
  1         23  
128              
129 1     1   4 use base qw(Bio::Map::SimpleMap Bio::PrimarySeq);
  1         1  
  1         1285  
130              
131             our $GENEMAPS = {};
132              
133             =head2 new
134              
135             Title : new
136             Usage : my $obj = Bio::Map::GeneMap->new();
137             Function: Builds a new Bio::Map::GeneMap object (that has placed on it a
138             mappable element (Bio::Map::Gene) representing a gene).
139             Returns : Bio::Map::GeneMap
140             Args : -gene => string name of the gene this map will be for
141             (in a form common to all species that have the gene,
142             but unique amongst non-orthologous genes) or a
143             Bio::Map::Gene object, REQUIRED
144             -species => Bio::Taxon or string representing species, REQUIRED
145             -uid => string, unique identifier for this map (must be
146             unique amongst all gene/species combinations)
147             -description => string, free text description of the gene
148             -upstream => int, the number of bases the map extends before the
149             start of the gene element (default 1000).
150             -downstream => int, the number of bases the map extends beyond the
151             end of the gene element (default 0).
152             -seq => string, the sequence of the map, presumably the
153             genomic sequence -upstream bases of the gene,
154             including the gene, and -downstream bases of the gene
155              
156             =cut
157              
158             sub new {
159 9     9 1 13 my ($class, @args) = @_;
160 9         28 my $self = $class->SUPER::new(@args);
161            
162 9         27 my ($uid, $gene, $species, $desc, $up, $down, $seq) = $self->_rearrange([qw(UID
163             GENE
164             SPECIES
165             DESCRIPTION
166             UPSTREAM
167             DOWNSTREAM
168             SEQ)], @args);
169            
170 9 50 33     40 unless (defined $gene && defined $species) {
171 0         0 $self->throw("You must supply both -species and -gene");
172             }
173            
174 9         23 $self->gene(-gene => $gene, -description => $desc, -upstream => $up, -downstream => $down);
175 9 100       22 $self->seq($seq) if $seq;
176            
177 9 50       17 unless (defined($uid)) {
178             # trigger the special behaviour in our unique_id method by supplying it
179             # the unique_id we got from our parent class
180 9         13 $self->unique_id($self->unique_id);
181             }
182            
183 9         30 return $self;
184             }
185              
186             =head2 get
187              
188             Title : get
189             Usage : my $map = Bio::Map::GeneMap->get();
190             Function: Builds a new Bio::Map::GeneMap object (like new()), or gets a
191             pre-existing one that corresponds to your arguments.
192             Returns : Bio::Map::GeneMap
193             Args : -gene => string name of the gene this map will be for
194             (in a form common to all species that have the gene,
195             but unique amongst non-orthologous genes) or a
196             Bio::Map::Gene object, REQUIRED
197             -species => Bio::Taxon or string representing species, REQUIRED
198             -uid => string, unique identifier for this map (must be
199             unique amongst all gene/species combinations)
200             -description => string, free text description of the gene
201             -upstream => int, the number of bases the map extends before the
202             start of the gene element (default 1000).
203             -downstream => int, the number of bases the map extends beyond the
204             end of the gene element (default 0).
205             -seq => string, the sequence of the map, presumably the
206             genomic sequence -upstream bases of the gene,
207             including the gene, and -downstream bases of the gene
208              
209             If you supply a -uid, and a map had previously been created and
210             given that uid, that same map object will be returned. Otherwise, the
211             combination of -gene and -species will be used to determine
212             if the same map had previously been made. If a corresponding map
213             hadn't previously been made, a new map object will be created and
214             returned.
215              
216             =cut
217              
218             sub get {
219 20     20 1 579 my ($class, @args) = @_;
220 20         71 my ($uid, $gene, $species, $desc, $up, $down, $seq) = Bio::Root::Root->_rearrange([qw(UID
221             GENE
222             SPECIES
223             DESCRIPTION
224             UPSTREAM
225             DOWNSTREAM
226             SEQ)], @args);
227            
228 20         32 my $gene_map;
229 20 50 33     87 if ($uid && defined $GENEMAPS->{by_uid}->{$uid}) {
    50 33        
230 0         0 $gene_map = $GENEMAPS->{by_uid}->{$uid};
231             }
232             elsif ($gene && $species) {
233 20 100       32 my $name = ref($gene) ? $gene->universal_name : $gene;
234 20 100       45 if (defined $GENEMAPS->{by_ns}->{$name}->{$species}) {
235 11         13 $gene_map = $GENEMAPS->{by_ns}->{$name}->{$species};
236             }
237             }
238 20 100       25 if ($gene_map) {
239 11 50       14 $gene_map->gene->description($desc) if $desc;
240 11 50       15 $gene_map->upstream($up) if defined($up);
241 11 50       18 $gene_map->downstream($down) if defined($down);
242 11 50       13 $gene_map->seq($seq) if $seq;
243 11         44 return $gene_map;
244             }
245            
246 9         22 return $class->new(@args);
247             }
248              
249             =head2 unique_id
250              
251             Title : unique_id
252             Usage : my $id = $map->unique_id;
253             Function: Get/set the unique ID for this map
254             Returns : string
255             Args : none to get, OR string to set
256              
257             =cut
258              
259             sub unique_id {
260 76     76 1 69 my ($self, $id) = @_;
261 76 100       104 if (defined $id) {
262 9         17 delete $GENEMAPS->{by_uid}->{$self->{'_uid'}};
263 9         12 $self->{'_uid'} = $id;
264 9         16 $GENEMAPS->{by_uid}->{$id} = $self;
265             }
266 76         194 return $self->{'_uid'};
267             }
268              
269             =head2 species
270              
271             Title : species
272             Usage : my $species = $map->species;
273             Function: Get/set Species for a map. It is not recommended to change this once
274             set.
275             Returns : Bio::Taxon object or string
276             Args : none to get, OR Bio::Taxon or string to set
277              
278             =cut
279              
280             sub species {
281 30     30 1 1732 my ($self, $value) = @_;
282 30 100       42 if ($value) {
283 9         8 my $old_species = $self->{_species};
284 9         10 $self->{'_species'} = $value;
285 9   50     13 my $name = $self->universal_name || return $value;
286 0 0       0 if ($old_species) {
287 0         0 delete $GENEMAPS->{by_ns}->{$name}->{$old_species};
288             }
289 0         0 $GENEMAPS->{by_ns}->{$name}->{$value} = $self;
290             }
291 21         55 return $self->{'_species'};
292             }
293              
294             =head2 type
295              
296             Title : type
297             Usage : my $type = $map->type
298             Function: Get Map type
299             Returns : string 'gene'
300             Args : none
301              
302             =cut
303              
304             sub type {
305 0     0 1 0 return 'gene';
306             }
307              
308             =head2 gene
309              
310             Title : gene
311             Usage : my $gene = $map->gene;
312             $map->gene(-gene => $gene);
313             Function: Get/set the mappable element on this map that represents the gene
314             this map is for. Once set, it is not recommended to re-set the gene
315             to something else. Behaviour in that case is undefined.
316             Returns : Bio::Map::Gene
317             Args : none to get, OR to set:
318             -gene => Bio::Map::Gene or string of the universal name (see
319             Bio::Map::Gene docs), REQUIRED
320             -description => string, applied to the Bio::Map::Gene
321             -upstream => int, the number of bases the map extends before the
322             start of the gene element (default 1000).
323             -downstream => int, the number of bases the map extends beyond the
324             end of the gene element (default 0).
325              
326             =cut
327              
328             sub gene {
329 487     487 1 925 my ($self, @args) = @_;
330            
331 487 100       786 if (@args > 0) {
332 9         19 my ($gene, $desc, $up, $down) = $self->_rearrange([qw(GENE
333             DESCRIPTION
334             UPSTREAM
335             DOWNSTREAM)], @args);
336 9 50       19 $self->throw("You must supply -gene") unless $gene;
337            
338 9 100       30 my $gene_obj = ref($gene) ? $gene : Bio::Map::Gene->get(-universal_name => $gene, -description => $desc);
339 9 50       15 if (defined $self->{gene}) {
340 0 0       0 if ($self->{gene} ne $gene_obj) {
341 0         0 $self->warn("Changing the gene that this map is for, which could be bad");
342 0         0 $self->purge_positions($self->{gene});
343 0         0 delete $GENEMAPS->{by_ns}->{$self->universal_name}->{$self->species};
344 0         0 $self->{gene} = $gene_obj;
345             }
346            
347             # change the gene's position on us if necessary
348 0 0       0 $self->upstream($up) if defined $up;
349 0 0       0 $self->downstream($down) if defined $down;
350             }
351             else {
352             # give the gene object a position on us
353 9   100     19 $up ||= 1000;
354 9 50       27 $up >= 0 || $self->throw("-upstream must be a positive integer");
355 9         42 Bio::Map::Position->new(-map => $self, -start => ($up + 1), -element => $gene_obj);
356 9         22 $self->{gene} = $gene_obj;
357 9   50     34 $self->downstream($down || 0);
358            
359             # set other gene positions from db if already user-requested
360 9         23 $gene_obj->_set_from_db($self);
361             }
362            
363 9         17 $GENEMAPS->{by_ns}->{$self->universal_name}->{$self->species} = $self;
364             }
365            
366 487         722 return $self->{gene};
367             }
368              
369             =head2 universal_name
370              
371             Title : universal_name
372             Usage : my $name = $map->universal_name
373             Function: Get/set the name of Bio::Map::Gene object associated with this map.
374             It is not recommended to change this once set.
375             Returns : string
376             Args : none to get, OR string to set
377              
378             =cut
379              
380             sub universal_name {
381 19     19 1 17 my ($self, $value) = @_;
382 19 100       26 $self->gene || return;
383 10 50       22 if ($value) {
384 0         0 my $species = $self->species;
385 0         0 delete $GENEMAPS->{by_ns}->{$self->gene->universal_name}->{$species};
386 0         0 $self->gene->universal_name($value);
387 0         0 $GENEMAPS->{by_ns}->{$value}->{$species} = $self;
388             }
389 10         14 return $self->gene->universal_name;
390             }
391              
392             =head2 upstream
393              
394             Title : upstream
395             Usage : my $distance = $map->upstream;
396             $map->upstream($distance);
397             Function: Get/set how long the map is before the start of the Bio::Map::Gene
398             object on this map.
399             Returns : int
400             Args : none to get, OR int to set (the number of bases the map extends
401             before the start of the gene)
402              
403             =cut
404              
405             sub upstream {
406 13     13 1 17 my ($self, $value) = @_;
407            
408 13         21 my $pos = $self->gene->position($self);
409 13 50       28 if (defined($value)) {
410 0 0       0 $value >= 0 || $self->throw("Supplied value must be a positive integer");
411 0         0 $pos->start($value + 1);
412             }
413            
414 13         22 return $pos->start - 1;
415             }
416              
417             =head2 downstream
418              
419             Title : downstream
420             Usage : my $distance = $map->downstream;
421             $map->downstream($distance);
422             Function: Get/set the nominal end of the map relative to the end of the
423             Bio::Map::Gene object on this map.
424             Returns : int
425             Args : none to get, OR int to set (the number of bases the map extends
426             beyond the end of the gene)
427              
428             =cut
429              
430             sub downstream {
431 20     20 1 17 my $self = shift;
432 20 100       35 if (@_) { $self->{_downstream} = shift }
  9         12  
433 20   50     48 return $self->{_downstream} || 0;
434             }
435              
436             =head2 length
437              
438             Title : length
439             Usage : my $length = $map->length();
440             Function: Retrieves the length of the map. This is normally the length of the
441             upstream region + length of the gene + length of the downstream
442             region, but may be longer if positions have been placed on the map
443             beyond the end of the nominal downstream region.
444             Returns : int
445             Args : none
446              
447             =cut
448              
449             sub length {
450 11     11 1 13 my $self = shift;
451 11         18 my $expected_length = $self->gene->position($self)->length + $self->upstream + $self->downstream;
452 11         36 my $actual_length = $self->SUPER::length;
453 11 50       36 return $actual_length > $expected_length ? $actual_length : $expected_length;
454             }
455              
456             =head2 seq
457              
458             Title : seq
459             Usage : $string = $obj->seq()
460             Function: Get/set the sequence as a string of letters. When getting, If the
461             GeneMap object didn't have sequence attached directly to it for the
462             region requested, the map's gene's database will be asked for the
463             sequence, and failing that, the map's gene's positions will be asked
464             for their sequences. Areas for which no sequence could be found will
465             be filled with Ns, unless no sequence was found anywhere, in which
466             case undef is returned.
467             Returns : string
468             Args : Optionally on set the new value (a string). An optional second
469             argument presets the alphabet (otherwise it will be guessed).
470              
471             =cut
472              
473             sub seq {
474 7     7 1 14 my ($self, @args) = @_;
475 7         26 my $seq = $self->SUPER::seq(@args);
476 7         14 my $expected_length = $self->length;
477 7 50 66     29 if (! $seq || CORE::length($seq) < $expected_length) {
478 7   100     40 my @have = split('', $seq || '');
479 7         8 my @result;
480 7         18 for (0..($expected_length - 1)) {
481 7906   100     15299 $result[$_] = shift(@have) || 'N';
482             }
483            
484             # build map sequence by asking gene or positions
485 7         21 my @slice_stuff = $self->gene->_get_slice($self);
486 7 50       11 if (@slice_stuff) {
487 0         0 my ($slice_adaptor, $slice, $strand) = @slice_stuff;
488 0   0     0 my ($start, $end, $gene_start) = (CORE::length($seq || '') + 1, $expected_length, $self->upstream + 1);
489            
490             # convert map coords to genomic coords
491 0 0       0 my $adjust = $strand == -1 ? $slice->end : $slice->start;
492 0 0   0   0 my $adjustment = sub { return $strand == -1 ? $adjust - shift() : shift() + $adjust; };
  0         0  
493 0         0 my $converted_start = &$adjustment($start - $gene_start);
494 0         0 my $converted_end = &$adjustment($end - $gene_start);
495 0 0       0 ($converted_start, $converted_end) = ($converted_end, $converted_start) if $converted_start > $converted_end;
496            
497             # get sequence from a new slice of desired region
498             #*** what happens if desired region starts or ends off end of chromo?...
499 0         0 my $new_slice = $slice_adaptor->fetch_by_region($slice->coord_system_name, $slice->seq_region_name, $converted_start, $converted_end);
500 0 0 0     0 if ($new_slice && (my $seq_str = $new_slice->seq)) {
501 0 0       0 if ($strand == -1) {
502 0         0 $seq_str = $self->_revcom($seq_str);
503             }
504 0   0     0 splice(@result, CORE::length($seq || ''), CORE::length($seq_str), split('', $seq_str));
505             }
506             }
507             else {
508 7         20 foreach my $pos ($self->get_positions) {
509 18 100       79 next unless $pos->can('seq');
510 8   100     15 my @pos_seq = split('', $pos->seq(undef, undef, 1) || next);
511 1         2 for my $i ($pos->start($pos->absolute_relative)..$pos->end($pos->absolute_relative)) {
512 3         3 $i--;
513 3         3 my $base = shift(@pos_seq);
514 3 50       6 if ($result[$i] eq 'N') {
515 3         6 $result[$i] = $base;
516             }
517             }
518             }
519             }
520            
521 7         458 $seq = join('', @result);
522             }
523 7         33 return $seq;
524             }
525              
526             =head2 subseq
527              
528             Title : subseq
529             Usage : $substring = $obj->subseq(10, 40);
530             Function: Returns the subseq from start to end, where the first base
531             is 1 and the number is inclusive, ie 1-2 are the first two
532             bases of the sequence. If the GeneMap object didn't have sequence
533             attached directly to it for the region requested, the map's gene's
534             database will be asked for the sequence, and failing that, the map's
535             gene's positions will be asked for their sequences. Areas for which
536             no sequence could be found will be filled with Ns, unless no
537             sequence was found anywhere, in which case undef is returned. subseq
538             requests that extend beyond the end of the map will throw.
539             Returns : string
540             Args : integer for start position AND integer for end position
541             OR
542             Bio::LocationI location for subseq (strand honored)
543             OR
544             Bio::RangeI (eg. a Bio::Map::PositionI)
545              
546             =cut
547              
548             sub subseq {
549 4     4 1 9 my ($self, $start, $end) = @_;
550            
551 4 100 66     27 if ($start && ref($start) && $start->isa('Bio::RangeI')) {
      66        
552 1         1 my $thing = $start;
553 1 50       12 if ($start->isa('Bio::Map::Position')) {
554 1         3 ($start, $end) = ($thing->start($thing->absolute_relative), $thing->end($thing->absolute_relative));
555             }
556             else {
557 0         0 ($start, $end) = ($thing->start, $thing->end);
558             }
559             }
560            
561             # *** this implementation potentially wastefull? Should duplicate code
562             # from seq() to do this just for the desired region??
563 4         7 my $orig_seq = $self->{seq};
564 4         10 $self->{seq} = $self->seq();
565 4 50       25 my $subseq = $self->{seq} ? $self->SUPER::subseq($start, $end) : '';
566 4         9 $self->{seq} = $orig_seq;
567            
568 4         17 return $subseq;
569             }
570              
571             # quick revcom for strings (silly to create a PrimarySeq just to revcom and then
572             # return a string again)
573             sub _revcom {
574 0     0     my ($self, $seq) = @_;
575 0 0         $seq or return;
576 0           $seq = reverse($seq);
577 0           $seq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
578 0           return $seq;
579             }
580              
581             1;