File Coverage

Bio/LiveSeq/SeqI.pm
Criterion Covered Total %
statement 173 348 49.7
branch 81 184 44.0
condition 3 18 16.6
subroutine 25 33 75.7
pod 25 26 96.1
total 307 609 50.4


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::LiveSeq::SeqI
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Joseph Insana
7             #
8             # Copyright Joseph Insana
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq
17              
18             =head1 SYNOPSIS
19              
20             # documentation needed
21              
22             =head1 DESCRIPTION
23              
24             This class implements BioPerl PrimarySeqI interface for Live Seq objects.
25              
26             One of the main difference in LiveSequence compared to traditional
27             "string" sequences is that coordinate systems are flexible. Typically
28             gene nucleotide numbering starts from 1 at the first character of the
29             initiator codon (A in ATG). This means that negative positions are
30             possible and common!
31              
32             Secondly, the sequence manipulation methods do not return a new
33             sequence object but change the current object. The current status can
34             be written out to BioPerl sequence objects.
35              
36             =head1 FEEDBACK
37              
38             =head2 Mailing Lists
39              
40             User feedback is an integral part of the evolution of this and other
41             Bioperl modules. Send your comments and suggestions preferably to one
42             of the Bioperl mailing lists. Your participation is much appreciated.
43              
44             bioperl-l@bioperl.org - General discussion
45             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46              
47             =head2 Support
48              
49             Please direct usage questions or support issues to the mailing list:
50              
51             I
52              
53             rather than to the module maintainer directly. Many experienced and
54             reponsive experts will be able look at the problem and quickly
55             address it. Please include a thorough description of the problem
56             with code and data examples if at all possible.
57              
58             =head2 Reporting Bugs
59              
60             Report bugs to the Bioperl bug tracking system to help us keep track
61             the bugs and their resolution. Bug reports can be submitted via the
62             web:
63              
64             https://github.com/bioperl/bioperl-live/issues
65              
66             =head1 AUTHOR - Joseph A.L. Insana
67              
68             Email: Insana@ebi.ac.uk, jinsana@gmx.net
69              
70             =head1 APPENDIX
71              
72             The rest of the documentation details each of the object
73             methods. Internal methods are usually preceded with a _
74              
75             Some note on the terminology/notation of method names:
76             label: a unique pointer to a single nucleotide
77             position: the position of a nucleotide according to a particular coordinate
78             system (e.g. counting downstream from a particular label taken as
79             number 1)
80             base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g")
81              
82             a base is the "value" that an "element" of a "chain" can assume
83             (see documentation on the Chain datastructure if interested)
84              
85             =cut
86              
87             #'
88             # Let the code begin...
89              
90             package Bio::LiveSeq::SeqI;
91 2     2   8 use strict;
  2         4  
  2         44  
92 2     2   321 use Bio::Tools::CodonTable; # for the translate() function
  2         3  
  2         41  
93              
94 2     2   7 use base qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI);
  2         3  
  2         576  
95              
96             =head2 seq
97              
98             Title : seq
99             Usage : $string = $obj->seq()
100             Function: Returns the complete sequence of an object as a string of letters.
101             Suggested cases are upper case for proteins and lower case for
102             DNA sequence (IUPAC standard),
103             Returns : a string
104              
105              
106             =cut
107              
108             sub seq {
109 543     543 1 635 my $self = shift;
110 543         946 my ($start,$end) = ($self->start(),$self->end());
111 543 50       802 if ($self->strand() == 1) {
112 543         1240 return $self->{'seq'}->down_chain2string($start,undef,$end);
113             } else { # reverse strand
114 0         0 my $str = $self->{'seq'}->up_chain2string($start,undef,$end);
115 0         0 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
116 0         0 return $str;
117             }
118             }
119              
120             =head2 all_labels
121              
122             Title : all_labels
123             Usage : @labels = $obj->all_labels()
124             Function: all the labels of every nucleotide an object is composed of
125             Returns : an array of labels
126             Args : none
127              
128             =cut
129              
130             sub all_labels {
131 822     822 1 740 my $self = shift;
132 822         1154 my ($start,$end) = ($self->start(),$self->end());
133 822         680 my $labels;
134 822 50       1089 if ($self->strand() == 1) {
135 822         1703 $labels=$self->{'seq'}->down_labels($start,$end);
136             } else {
137 0         0 $labels=$self->{'seq'}->up_labels($start,$end);
138             }
139 822         1102 return (@{$labels});
  822         45368  
140             }
141              
142             =head2 labelsubseq
143              
144             Title : labelsubseq
145             Usage : $dna->labelsubseq();
146             : $dna->labelsubseq($startlabel);
147             : $dna->labelsubseq($startlabel,$length);
148             : $dna->labelsubseq($startlabel,undef,$endlabel);
149             e.g. : $dna->labelsubseq(4,undef,8);
150             Function: prints the sequence as string. The difference between labelsubseq
151             and normal subseq is that it uses /labels/ as arguments, instead
152             than positions. This allows for faster and more efficient lookup,
153             skipping the (usually) lengthy conversion of positions into labels.
154             This is especially useful for manipulating with high power
155             LiveSeq objects, knowing the labels and exploiting their
156             usefulness.
157             Returns : a string
158             Errorcode -1
159             Args : without arguments it returns the entire sequence
160             with a startlabel it returns the sequence downstream that label
161             if a length is specified, it returns only that number of bases
162             if an endlabel is specified, it overrides the length argument
163             and prints instead up to that label (included)
164             Defaults: $startlabel defaults to the beginning of the entire sequence
165             $endlabel defaults to the end of the entire sequence
166              
167             =cut
168              
169             # NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, especially that they follow each other in the correct order!!!!
170              
171             sub labelsubseq {
172 58     58 1 112 my ($self,$start,$length,$end,$unsecuremode) = @_;
173 58 100 66     229 if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested")
174             { # to skip security checks (faster)
175 21 100       58 unless ($start) {
176 5         16 $start=$self->start;
177             }
178 21 100       58 if ($end) {
179 13 100       40 if ($end == $start) {
180 2         6 $length=1;
181 2         6 undef $end;
182             } else {
183 11         48 undef $length;
184             }
185             } else {
186 8 100       26 unless ($length) {
187 7         22 $end=$self->end;
188             }
189             }
190             } else {
191 37 50       80 if ($start) {
192 37 50       135 unless ($self->{'seq'}->valid($start)) {
193 0         0 $self->warn("Start label not valid"); return (-1);
  0         0  
194             }
195             }
196 37 100       100 if ($end) {
197 31 50       83 if ($end == $start) {
198 0         0 $length=1;
199 0         0 undef $end;
200             } else {
201 31 50       89 unless ($self->{'seq'}->valid($end)) {
202 0         0 $self->warn("End label not valid"); return (-1);
  0         0  
203             }
204 31 50       101 unless ($self->follows($start,$end) == 1) {
205 0         0 $self->warn("End label does not follow Start label!"); return (-1);
  0         0  
206             }
207 31         75 undef $length;
208             }
209             }
210             }
211 58 50       137 if ($self->strand() == 1) {
212 58         203 return $self->{'seq'}->down_chain2string($start,$length,$end);
213             } else { # reverse strand
214 0         0 my $str = $self->{'seq'}->up_chain2string($start,$length,$end);
215 0         0 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
216 0         0 return $str;
217             }
218             }
219              
220             =head2 subseq
221              
222             Title : subseq
223             Usage : $substring = $obj->subseq(10,40);
224             : $substring = $obj->subseq(10,undef,4);
225             Function: returns the subseq from start to end, where the first base
226             is 1 and the number is inclusive, ie 1-2 are the first two
227             bases of the sequence
228              
229             Start cannot be larger than end but can be equal.
230              
231             Allows for negative numbers $obj->subseq(-10,-1). By
232             definition, there is no 0!
233             -5 -1 1 5
234             gctagcgcccaac atggctcgctg
235              
236             This allows one to retrieve sequences upstream from given position.
237              
238             The precedence is from left to right: if END is given LENGTH is
239             ignored.
240              
241             Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1
242             $obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive
243              
244             Returns : a string
245             Errorcode: -1
246             Args : start, integer, defaults to start of the sequence
247             end, integer, '' or undef, defaults to end of the sequence
248             length, integer, '' or undef
249             an optional strand (1 or -1) 4th argument
250             if strand argument is not given, it will default to the object
251             argment. This argument is useful when a call is issued from a child
252             of a parent object containing the subseq method
253              
254             =cut
255              
256             #'
257             # check the fact about reverse strand!
258             # is it feasible? Is it correct? Should we do it? How about exons? Does it
259             # work when you ask subseq of an exon?
260             # eliminated now (Mon night)
261             sub subseq {
262             ##my ($self,$pos1,$pos2,$length,$strand) = @_;
263 0     0 1 0 my ($self,$pos1,$pos2,$length,$strand) = @_;
264             ##unless (defined ($strand)) { # if optional [strand] argument not given
265             ## $strand=$self->strand;
266             ##}
267 0         0 $strand=$self->strand;
268 0         0 my ($str,$startlabel,$endlabel);
269 0 0       0 if (defined ($length)) {
270 0 0       0 if ($length < 1) {
271 0         0 $self->warn("No sense asking for a subseq of length < 1");
272 0         0 return (-1);
273             }
274             }
275 0 0       0 unless (defined ($pos1)) {
276             #print "\n##### DEBUG pos1 not defined\n";
277 0         0 $startlabel=$self->start;
278             } else {
279 0 0       0 if ($pos1 == 0) { # if position = 0 complain
280 0         0 $self->warn("Position cannot be 0!"); return (-1);
  0         0  
281             }
282             ##if ($strand == 1) { # CHECK THIS!
283 0 0 0     0 if ((defined ($pos2))&&($pos1>$pos2)) {
284 0         0 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
  0         0  
285             }
286             ##} else { # CHECK THIS!
287             ## if ((defined ($pos2))&&($pos1<$pos2)) {
288             ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1);
289             ## }
290             ##}
291 0         0 $startlabel=$self->label($pos1);
292 0 0       0 if ($startlabel < 1) {
293 0         0 $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
  0         0  
294             }
295             }
296 0 0       0 unless (defined ($pos2)) {
297             #print "\n##### pos2 not defined\n";
298 0 0       0 unless (defined ($length)) {
299 0         0 $endlabel=$self->end;
300             }
301             } else {
302 0 0       0 if ($pos2 == 0) { # if position = 0 complain
303 0         0 $self->warn("Position cannot be 0!"); return (-1);
  0         0  
304             }
305 0         0 undef $length;
306             ##if ($strand == 1) { # CHECK THIS!
307 0 0 0     0 if ((defined ($pos1))&&($pos1>$pos2)) {
308 0         0 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
  0         0  
309             }
310             ##} else { # CHECK THIS!
311             ## if ((defined ($pos1))&&($pos1<$pos2)) {
312             ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1);
313             ## }
314             ##}
315 0         0 $endlabel=$self->label($pos2);
316 0 0       0 if ($endlabel < 1) {
317 0         0 $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
  0         0  
318             }
319             }
320             #print "\n ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n";
321              
322 0 0       0 if ($strand == 1) {
323 0         0 $str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel);
324             } else { # reverse strand
325 0         0 $str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel);
326 0         0 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
327             }
328 0         0 return $str;
329             }
330              
331             =head2 length
332              
333             Title : length
334             Usage : $seq->length();
335             Function: returns the number of nucleotides (or the number of aminoacids)
336             in the entire sequence
337             Returns : an integer
338             Errorcode -1
339             Args : none
340              
341             =cut
342              
343             sub length {
344 216     216 1 215 my $self=shift;
345 216         355 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
346 216 50       369 if ($strand == 1) {
347 216         483 return $self->{'seq'}->down_subchain_length($start,$end);
348             } else {
349 0         0 return $self->{'seq'}->up_subchain_length($start,$end);
350             }
351             }
352              
353             =head2 display_id
354              
355             Title : display_id
356             Usage : $id_string = $obj->display_id();
357             Function: returns the display id, alias the common name of the object
358              
359             The semantics of this is that it is the most likely string
360             to be used as an identifier of the sequence, and likely to
361             have "human" readability. The id is equivalent to the ID
362             field of the GenBank/EMBL databanks and the id field of the
363             Swissprot/sptrembl database. In fasta format, the >(\S+) is
364             presumed to be the id, though some people overload the id
365             to embed other information.
366              
367             See also: accession_number
368             Returns : a string
369             Args : none
370              
371             =cut
372              
373             sub display_id {
374 7     7 1 12 my ($self,$value) = @_;
375 7 100       18 if(defined $value) {
376 6         15 $self->{'display_id'} = $value;
377             }
378 7         15 return $self->{'display_id'};
379             }
380              
381              
382             =head2 accession_number
383              
384             Title : accession_number
385             Usage : $unique_biological_key = $obj->accession_number;
386             Function: Returns the unique biological id for a sequence, commonly
387             called the accession_number.
388             Notice that primary_id() provides the unique id for the
389             implemetation, allowing multiple objects to have the same accession
390             number in a particular implementation.
391              
392             For objects with no accession_number this method returns "unknown".
393             Returns : a string
394             Args : none
395              
396             =cut
397              
398             sub accession_number {
399 12     12 1 19 my ($self,$value) = @_;
400 12 100       39 if (defined $value) {
401 6         15 $self->{'accession_number'} = $value;
402             }
403 12 50       30 unless (exists $self->{'accession_number'}) {
404 0         0 return "unknown";
405             } else {
406 12         44 return $self->{'accession_number'};
407             }
408             }
409              
410             =head2 primary_id
411              
412             Title : primary_id
413             Usage : $unique_implementation_key = $obj->primary_id;
414             Function: Returns the unique id for this object in this
415             implementation. This allows implementations to manage their own
416             object ids in a way the implementation can control. Clients can
417             expect one id to map to one object.
418              
419             For sequences with no primary_id, this method returns
420             a stringified memory location.
421              
422             Returns : A string
423             Args : None
424              
425             =cut
426              
427              
428             sub primary_id {
429 0     0 1 0 my ($self,$value) = @_;
430 0 0       0 if(defined $value) {
431 0         0 $self->{'primary_id'} = $value;
432             }
433 0 0       0 unless (exists $self->{'primary_id'}) {
434 0         0 return "$self";
435             } else {
436 0         0 return $self->{'primary_id'};
437             }
438             }
439              
440             =head2 change
441              
442             Title : change
443             Usage : $substring = $obj->change('AA', 10);
444             Function: changes, modifies, mutates the LiveSequence
445             Examples:
446             $obj->change('', 10); delete nucleotide #10
447             $obj->change('', 10, 2); delete two nucleotides starting from #10
448             $obj->change('G', 10); change nuc #10 to 'G'
449             $obj->change('GA', 10, 4); replace #10 and 3 following with 'GA'
450             $obj->change('GA', 10, 2)); is same as $obj->change('GA', 10);
451             $obj->change('GA', 10, 0 ); insert 'GA' before nucleotide at #10
452             $obj->change('GA', 10, 1); GA inserted before #10, #10 deleted
453             $obj->change('GATC', 10, 2); GATC inserted before #10, #10 deleted
454             $obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
455              
456              
457             Returns : a string of deleted bases (if any) or 1 (everything OK)
458             Errorcode: -1
459             Args : seq, string, or '' ('' = undef = 0 = deletion)
460             start, integer
461             length, integer (optional)
462              
463             =cut
464              
465             sub change {
466 0     0 1 0 &positionchange;
467             }
468              
469             =head2 positionchange
470              
471             Title : positionchange
472             Function: Exactly like change. I.e. change() defaults to positionchange()
473              
474             =cut
475              
476             sub positionchange {
477 0     0 1 0 my ($self,$newseq,$position,$length)=@_;
478 0 0       0 unless ($position) {
479 0         0 $self->warn("Position not given or position 0");
480 0         0 return (-1);
481             }
482 0         0 my $label=$self->label($position);
483 0 0       0 unless ($label > 0) { # label not found or error
484 0         0 $self->warn("No valid label found at that position!");
485 0         0 return (-1);
486             }
487 0         0 return ($self->labelchange($newseq,$label,$length));
488             }
489              
490             =head2 labelchange
491              
492             Title : labelchange
493             Function: Exactly like change but uses a /label/ instead than a position
494             as second argument. This allows for multiple changes in a LiveSeq
495             without the burden of recomputing positions. I.e. for a multiple
496             change in two different points of the LiveSeq, the approach would
497             be the following: fetch the correct labels out of the two different
498             positions (method: label($position)) and then use the labelchange()
499             method to modify the sequence using those labels instead than
500             relying on the positions (that would have modified after the
501             first change).
502              
503             =cut
504              
505             sub labelchange {
506 5     5 1 9 my ($self,$newseq,$label,$length)=@_;
507 5 50       21 unless ($self->valid($label)) {
508 0 0       0 if ($self->{'seq'}->valid($label)) {
509             #$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that");
510 0         0 shift @_;
511 0         0 return($self->{'seq'}->labelchange(@_));
512             } else {
513 0         0 $self->warn("Label \'$label\' not valid for executing a LiveSeq change");
514 0         0 return (-1);
515             }
516             }
517 5 50       21 unless ($newseq) { # it means this is a simple deletion
518 0 0       0 if (defined($length)) {
519 0 0       0 unless ($length >= 0) {
520 0         0 $self->warn("No sense having length < 0 in a deletion");
521 0         0 return (-1);
522             }
523             } else {
524 0         0 $self->warn("Length not defined for deletion!");
525 0         0 return (-1);
526             }
527 0         0 return $self->_delete($label,$length);
528             }
529 5         11 my $newseqlength=CORE::length($newseq);
530 5 50       15 if (defined($length)) {
531 5 50       18 unless ($length >= 0) {
532 0         0 $self->warn("No sense having length < 0 in a change()");
533 0         0 return (-1);
534             }
535             } else {
536 0         0 $length=$newseqlength; # defaults to pointmutation(s)
537             }
538 5 50       14 if ($length == 0) { # it means this is a simple insertion, length def&==0
539 0         0 my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq);
540 0 0       0 if ($insertbegin == -1) {
541 0         0 return (-1);
542             } else {
543 0         0 return (1);
544             }
545             }
546 5 50       16 if ($newseqlength == $length) { # it means this is simple pointmutation(s)
547 5         27 return $self->_mutate($label,$newseq,$length);
548             }
549             # if we arrived here then change is complex mixture
550 0         0 my $strand=$self->strand();
551 0         0 my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label
552 0 0       0 unless ($afterendlabel > 0) { # label not found or error
553 0         0 $self->warn("No valid afterendlabel found for executing the complex mutation!");
554 0         0 return (-1);
555             }
556 0         0 my $deleted=$self->_delete($label,$length); # first delete length nucs
557 0 0       0 if ($deleted eq -1) { # if errors
558 0         0 return (-1);
559             } else { # then insert the newsequence
560 0         0 my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq);
561 0 0       0 if ($insertbegin == -1) {
562 0         0 return (-1);
563             } else {
564 0         0 return (1);
565             }
566             }
567             }
568              
569             # internal methods for change()
570              
571             # arguments: label for beginning of deletion, new sequence to insert
572             # returns: labels of beginning and end of the inserted sequence
573             # errorcode: -1
574             sub _praeinsert {
575 0     0   0 my ($self,$label,$newseq)=@_;
576 0         0 my ($insertbegin,$insertend);
577 0         0 my $strand=$self->strand();
578 0 0       0 if ($strand == 1) {
579 0         0 ($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label));
580             } else { # since it's reverse strand and we insert in forward direction....
581 0         0 $newseq=reverse($newseq);
582 0         0 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
583 0         0 ($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label));
584             }
585 0 0 0     0 if (($insertbegin==0)||($insertend==0)) {
586 0         0 $self->warn("Some error occurred while inserting!");
587 0         0 return (-1);
588             } else {
589 0         0 return ($insertbegin,$insertend);
590             }
591             }
592              
593             # arguments: label for beginning of deletion, length of deletion
594             # returns: string of deleted bases
595             # errorcode: -1
596             sub _delete {
597 0     0   0 my ($self,$label,$length)=@_;
598 0         0 my $strand=$self->strand();
599 0         0 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
600 0 0       0 unless ($endlabel > 0) { # label not found or error
601 0         0 $self->warn("No valid endlabel found for executing the deletion!");
602 0         0 return (-1);
603             }
604             # this is important in Transcript to fix exon structure
605 0         0 $self->_deletecheck($label,$endlabel);
606 0         0 my $deletedseq;
607 0 0       0 if ($strand == 1) {
608 0         0 $deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel);
609             } else {
610 0         0 $deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label);
611 0         0 $deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway
612             # in forward direction
613 0         0 $deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
614             }
615 0         0 return ($deletedseq);
616             }
617              
618             # empty function, overridden in Transcript, not useful here
619       0     sub _deletecheck {
620             }
621              
622             # arguments: label for beginning of mutation, newsequence, number of mutations
623             # returns: 1 all OK
624             # errorcode: -1
625             sub _mutate {
626 5     5   10 my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq)
627 5         7 my ($i,$base,$nextlabel);
628 0         0 my @labels; # array of labels
629 5         17 my $strand=$self->strand();
630 5 50       15 if ($length == 1) { # special cases first
631 5         16 @labels=($label);
632             } else {
633 0         0 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
634 0 0       0 unless ($endlabel > 0) { # label not found or error
635 0         0 $self->warn("No valid endlabel found for executing the mutation!");
636 0         0 return (-1);
637             }
638 0 0       0 if ($length == 2) { # another special case
639 0         0 @labels=($label,$endlabel);
640             } else { # more than 3 bases changed
641             # this wouldn't work for Transcript
642             #my $labelsarrayref;
643             #if ($strand == 1) {
644             #$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel);
645             #} else {
646             #$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel);
647             #}
648             #@labels=@{$labelsarrayref};
649             #if ($length != scalar(@labels)) { # not enough labels returned
650             #$self->warn("Not enough valid labels found for executing the mutation!");
651             #return (-1);
652             #}
653              
654             # this should be more general
655 0         0 @labels=($label); # put the first one
656 0         0 while ($label != $endlabel) {
657 0         0 $nextlabel=$self->label(2,$label,$strand); # retrieve the next label
658 0         0 push (@labels,$nextlabel);
659 0         0 $label=$nextlabel; # move on reference
660             }
661             }
662             }
663 5 50       15 if ($strand == -1) { # only for reverse strand
664 0         0 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
665             }
666 5         7 my $errorcheck; # if not equal to $length after summing for all changes, error did occurr
667 5         6 $i = 0;
668 5         24 foreach $base (split(//,$newseq)) {
669 5         30 $errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]);
670 5         10 $i++;
671             }
672 5 50       16 if ($errorcheck != $length) {
673 0         0 $self->warn("Some error occurred while mutating!");
674 0         0 return (-1);
675             } else {
676 5         14 return (1);
677             }
678             }
679              
680             =head2 valid
681              
682             Title : valid
683             Usage : $boolean = $obj->valid($label)
684             Function: tests if a label exists inside the object
685             Returns : boolean
686             Args : label
687              
688             =cut
689              
690             # argument: label
691             # returns: 1 YES 0 NO
692             sub valid {
693 157     157 1 177 my ($self,$label)=@_;
694 157         133 my $checkme;
695 157         274 my @labels=$self->all_labels;
696 157         3257 foreach $checkme (@labels) {
697 108565 100       115770 if ($label == $checkme) {
698 68         2159 return (1); # found
699             }
700             }
701 89         833 return (0); # not found
702             }
703              
704              
705             =head2 start
706              
707             Title : start
708             Usage : $startlabel=$obj->start()
709             Function: returns the label of the first nucleotide of the object (exon, CDS)
710             Returns : label
711             Args : none
712              
713             =cut
714              
715             sub start {
716 3448     3448 1 2649 my ($self) = @_;
717 3448         12709 return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons)
718             }
719              
720             =head2 end
721              
722             Title : end
723             Usage : $endlabel=$obj->end()
724             Function: returns the label of the last nucleotide of the object (exon, CDS)
725             Returns : label
726             Args : none
727              
728             =cut
729              
730             sub end {
731 3508     3508 1 2412 my ($self) = @_;
732 3508         5965 return $self->{'end'};
733             }
734              
735             =head2 strand
736              
737             Title : strand
738             Usage : $strand=$obj->strand()
739             $obj->strand($strand)
740             Function: gets or sets strand information, being 1 or -1 (forward or reverse)
741             Returns : -1 or 1
742             Args : none OR -1 or 1
743              
744             =cut
745              
746             sub strand {
747 1882     1882 1 1421 my ($self,$strand) = @_;
748 1882 50       2779 if ($strand) {
749 0 0 0     0 if (($strand != 1)&&($strand != -1)) {
750 0         0 $self->warn("strand information not changed because strand identifier not valid");
751             } else {
752 0         0 $self->{'strand'} = $strand;
753             }
754             }
755 1882         3393 return $self->{'strand'};
756             }
757              
758             =head2 alphabet
759              
760             Title : alphabet
761             Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
762             Function: Returns the type of sequence being one of
763             'dna', 'rna' or 'protein'. This is case sensitive.
764              
765             Returns : a string either 'dna','rna','protein'.
766             Args : none
767              
768             =cut
769              
770              
771             sub alphabet {
772 91     91 1 196 my %valid_type = map {$_, 1} qw( dna rna protein );
  273         666  
773 91         195 my ($self,$value) = @_;
774 91 100       213 if (defined $value) {
775 34 100       196 $value = 'dna' if $value =~ /dna/i;
776 34 100       95 $value = 'rna' if $value =~ /rna/i;
777 34 50       100 unless ( $valid_type{$value} ) {
778 0         0 $self->warn("Molecular type '$value' is not a valid type");
779             }
780 34         107 $self->{'alphabet'} = $value;
781             }
782 91         372 return $self->{'alphabet'};
783             }
784              
785             =head2 coordinate_start
786              
787             Title : coordinate_start
788             Usage : $coordstartlabel=$obj->coordinate_start()
789             : $coordstartlabel=$obj->coordinate_start($label)
790             Function: returns and optionally sets the first label of the coordinate
791             system used
792             For some objects only labels inside the object or in frame (for
793             Translation objects) will be allowed to get set as coordinate start
794              
795             Returns : label. It returns 0 if label not found.
796             Errorcode -1
797             Args : an optional reference $label that is position 1
798              
799             =cut
800              
801              
802             sub coordinate_start {
803 28     28 1 37 my ($self,$label) = @_;
804 28 100       63 if ($label) {
805 5 50       13 if ($self->valid($label)) {
806 5         13 $self->{'coordinate_start'} = $label;
807             } else {
808 0         0 $self->warn("The label you are trying to set as coordinate_start is not valid for this object");
809             }
810             }
811 28         48 my $coord_start = $self->{'coordinate_start'};
812 28 100       70 if ($coord_start) {
813 9         15 return $coord_start;
814             } else {
815 19         67 return $self->start();
816             }
817             }
818              
819             =head2 label
820              
821             Title : label
822             Usage : $seq->label($position)
823             : $seq->label($position,$firstlabel)
824             Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label
825             : $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label
826              
827             Function: returns the label of the nucleotide at $position from current
828             coordinate start
829             Returns : a label. It returns 0 if label not found.
830             Errorcode -1
831             Args : a position,
832             an optional reference $firstlabel that is to be used as position 1
833             an optional strand (1 or -1) argument
834             if strand argument is not given, it will default to the object
835             argument. This argument is useful when a call is issued from a child
836             of a parent object containing the subseq method
837              
838             =cut
839              
840              
841             sub label {
842 19     19 1 29 my ($self,$position,$firstlabel,$strand)=@_;
843 19         22 my $label;
844 19 100       37 unless (defined ($firstlabel)) {
845 4         12 $firstlabel=$self->coordinate_start;
846             }
847 19 50       42 unless ($position) { # if position = 0 complain ?
848 0         0 $self->warn("Position not given or position 0");
849 0         0 return (-1);
850             }
851 19 50       38 unless (defined ($strand)) { # if optional [strand] argument not given
852 19         34 $strand=$self->strand;
853             }
854 19 50       38 if ($strand == 1) {
855 19 100       40 if ($position > 0) {
856 9         42 $label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel)
857             } else { # if < 0
858 10         43 $label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel)
859             }
860             } else {
861 0 0       0 if ($position > 0) {
862 0         0 $label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel)
863             } else { # if < 0
864 0         0 $label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel)
865             }
866             }
867 19         54 return $label;
868             }
869              
870              
871             =head2 position
872              
873             Title : position
874             Usage : $seq->position($label)
875             : $seq->position($label,$firstlabel)
876             Function: returns the position of nucleotide at $label
877             Returns : the position of the label from current coordinate start
878             Errorcode 0
879             Args : a label pointing to a certain nucleotide (e.g. start of exon)
880             an optional "firstlabel" as reference to count from
881             an optional strand (1 or -1) argument
882             if strand argument is not given, it will default to the object
883             argument. This argument is useful when a call is issued from a child
884             of a parent object containing the subseq method
885              
886             =cut
887              
888              
889             sub position {
890 5     5 1 13 my ($self,$label,$firstlabel,$strand)=@_;
891 5 50       22 unless (defined ($strand)) { # if optional [strand] argument not given
892 5         17 $strand=$self->strand;
893             }
894 5 50       17 unless (defined ($firstlabel)) {
895 5         20 $firstlabel=$self->coordinate_start;
896             }
897 5 50       18 unless ($self->valid($label)) {
898 0         0 $self->warn("label not valid");
899 0         0 return (0);
900             }
901 5 50       25 if ($firstlabel == $label) {
902 0         0 return (1);
903             }
904 5         6 my ($coordpos,$position0,$position);
905 5         27 $position0=$self->{'seq'}->down_get_pos_of_label($label);
906 5         23 $coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel);
907 5         15 $position=$position0-$coordpos+1;
908 5 50       16 if ($position <= 0) {
909 0         0 $position--;
910             }
911 5 50       15 if ($strand == -1) {
912             #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position;
913 0         0 return (1-$position);
914             } else {
915             #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position;
916 5         22 return ($position);
917             }
918             }
919              
920             =head2 follows
921              
922             Title : follows
923             Usage : $seq->follows($firstlabel,$secondlabel)
924             : $seq->follows($firstlabel,$secondlabel,$strand)
925             Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand
926             i.e. it checks downstream for forward strand and
927             upstream for reverse strand
928             Returns : 1 or 0
929             Errorcode -1
930             Args : two labels
931             an optional strand (1 or -1) argument
932             if strand argument is not given, it will default to the object
933             argument. This argument is useful when a call is issued from a child
934             of a parent object containing the subseq method
935              
936             =cut
937              
938             #'
939             # wraparound to is_downstream and is_upstream that chooses the correct one
940             # depending on the strand
941             sub follows {
942 129     129 1 152 my ($self,$firstlabel,$secondlabel,$strand)=@_;
943 129 100       209 unless (defined ($strand)) { # if optional [strand] argument not given
944 37         86 $strand=$self->strand;
945             }
946 129 50       196 if ($strand == 1) {
947 129         291 return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel));
948             } else {
949 0         0 return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel));
950             }
951             }
952             #
953             #=head2 translate
954             #
955             # Title : translate
956             # Usage : $protein_seq = $obj->translate
957             # Function: Provides the translation of the DNA sequence
958             # using full IUPAC ambiguities in DNA/RNA and amino acid codes.
959             #
960             # The resulting translation is identical to EMBL/TREMBL database
961             # translations.
962             #
963             # Returns : a string
964             # Args : character for terminator (optional) defaults to '*'
965             # character for unknown amino acid (optional) defaults to 'X'
966             # frame (optional) valid values 0, 1, 3, defaults to 0
967             # codon table id (optional) defaults to 1
968             #
969             #=cut
970             #
971             #sub translate {
972             # my ($self) = shift;
973             # return ($self->translate_string($self->seq,@_));
974             #}
975             #
976             #=head2 translate_string
977             #
978             # Title : translate_string
979             # Usage : $protein_seq = $obj->translate_string("attcgtgttgatcgatta");
980             # Function: Like translate, but can be used to translate subsequences after
981             # having retrieved them as string.
982             # Args : 1st argument is a string. Optional following arguments: like in
983             # the translate method
984             #
985             #=cut
986             #
987             #
988             #sub translate_string {
989             # my($self) = shift;
990             # my($seq) = shift;
991             # my($stop, $unknown, $frame, $tableid) = @_;
992             # my($i, $len, $output) = (0,0,'');
993             # my($codon) = "";
994             # my $aa;
995             #
996             #
997             # ## User can pass in symbol for stop and unknown codons
998             # unless(defined($stop) and $stop ne '') { $stop = "*"; }
999             # unless(defined($unknown) and $unknown ne '') { $unknown = "X"; }
1000             # unless(defined($frame) and $frame ne '') { $frame = 0; }
1001             #
1002             # ## the codon table ID
1003             # if ($self->translation_table) {
1004             # $tableid = $self->translation_table;
1005             # }
1006             # unless(defined($tableid) and $tableid ne '') { $tableid = 1; }
1007             #
1008             # ##Error if monomer is "Amino"
1009             # $self->warn("Can't translate an amino acid sequence.")
1010             # if (defined $self->alphabet && $self->alphabet eq 'protein');
1011             #
1012             # ##Error if frame is not 0, 1 or 2
1013             # $self->warn("Valid values for frame are 0, 1, 2, not [$frame].")
1014             # unless ($frame == 0 or $frame == 1 or $frame == 2);
1015             #
1016             # #thows a warning if ID is invalid
1017             # my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid);
1018             #
1019             # # deal with frame offset.
1020             # if( $frame ) {
1021             # $seq = substr ($seq,$frame);
1022             # }
1023             #
1024             # for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) {
1025             # my $aa = $codonTable->translate($codon);
1026             # if ($aa eq '*') {
1027             # $output .= $stop;
1028             # }
1029             # elsif ($aa eq 'X') {
1030             # $output .= $unknown;
1031             # }
1032             # else {
1033             # $output .= $aa ;
1034             # }
1035             # }
1036             # #if( substr($output,-1,1) eq $stop ) {
1037             # # chop $output;
1038             # #}
1039             #
1040             # return ($output);
1041             #}
1042              
1043             =head2 gene
1044              
1045             Title : gene
1046             Usage : my $gene=$obj->gene;
1047             Function: Gets or sets the reference to the LiveSeq::Gene object.
1048             Objects that are features of a LiveSeq Gene will have this
1049             attribute set automatically.
1050              
1051             Returns : reference to an object of class Gene
1052             Note : if Gene object is not set, this method will return 0;
1053             Args : none or reference to object of class Bio::LiveSeq::Gene
1054              
1055             =cut
1056              
1057             sub gene {
1058 74     74 1 89 my ($self,$value) = @_;
1059 74 100       114 if (defined $value) {
1060 33         43 $self->{'gene'} = $value;
1061             }
1062 74 50       151 unless (exists $self->{'gene'}) {
1063 0         0 return (0);
1064             } else {
1065 74         221 return $self->{'gene'};
1066             }
1067             }
1068              
1069             =head2 obj_valid
1070              
1071             Title : obj_valid
1072             Usage : if ($obj->obj_valid) {do something;}
1073             Function: Checks if start and end labels are still valid for the ojbect,
1074             i.e. tests if the LiveSeq object is still valid
1075             Returns : boolean
1076             Args : none
1077              
1078             =cut
1079              
1080             sub obj_valid {
1081 1642     1642 1 1181 my $self=shift;
1082 1642 50 33     2066 unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) {
1083 0         0 return (0);
1084             }
1085 1642         2603 return (1);
1086             }
1087              
1088             =head2 name
1089              
1090             Title : name
1091             Usage : $name = $obj->name;
1092             : $name = $obj->name("ABCD");
1093             Function: Returns or sets the name of the object.
1094             If there is no name, it will return "unknown";
1095             Returns : A string
1096             Args : None
1097              
1098             =cut
1099              
1100             sub name {
1101 0     0 1 0 my ($self,$value) = @_;
1102 0 0       0 if (defined $value) {
1103 0         0 $self->{'name'} = $value;
1104             }
1105 0 0       0 unless (exists $self->{'name'}) {
1106 0         0 return "unknown";
1107             } else {
1108 0         0 return $self->{'name'};
1109             }
1110             }
1111              
1112             =head2 desc
1113              
1114             Title : desc
1115             Usage : $desc = $obj->desc;
1116             : $desc = $obj->desc("ABCD");
1117             Function: Returns or sets the description of the object.
1118             If there is no description, it will return "unknown";
1119             Returns : A string
1120             Args : None
1121              
1122             =cut
1123              
1124             sub desc {
1125 26     26 1 886 my ($self,$value) = @_;
1126 26 100       50 if (defined $value) {
1127 23         37 $self->{'desc'} = $value;
1128             }
1129 26 50       43 unless (exists $self->{'desc'}) {
1130 0         0 return "unknown";
1131             } else {
1132 26         39 return $self->{'desc'};
1133             }
1134             }
1135              
1136             =head2 source
1137              
1138             Title : source
1139             Usage : $name = $obj->source;
1140             : $name = $obj->source("Homo sapiens");
1141             Function: Returns or sets the organism that is source of the object.
1142             If there is no source, it will return "unknown";
1143             Returns : A string
1144             Args : None
1145              
1146             =cut
1147              
1148             sub source {
1149 7     7 1 14 my ($self,$value) = @_;
1150 7 100       18 if (defined $value) {
1151 6         12 $self->{'source'} = $value;
1152             }
1153 7 50       17 unless (exists $self->{'source'}) {
1154 0         0 return "unknown";
1155             } else {
1156 7         18 return $self->{'source'};
1157             }
1158             }
1159              
1160             sub delete_Obj {
1161 78     78 0 49 my $self = shift;
1162 78         53 my @values= values %{$self};
  78         2851  
1163 78         56 my @keys= keys %{$self};
  78         3550  
1164              
1165 78         426 foreach my $key ( @keys ) {
1166 13042         9424 delete $self->{$key};
1167             }
1168 78         69 foreach my $value ( @values ) {
1169 13042 100       22565 if (index(ref($value),"LiveSeq") != -1) { # object case
    100          
    50          
1170 67         42 eval {
1171             # delete $self->{$value};
1172 67         93 $value->delete_Obj;
1173             };
1174             } elsif (index(ref($value),"ARRAY") != -1) { # array case
1175 12851         7458 my @array=@{$value};
  12851         18337  
1176 12851         7685 my $element;
1177 12851         9736 foreach $element (@array) {
1178 38559         23949 eval {
1179 38559         172455 $element->delete_Obj;
1180             };
1181             }
1182             } elsif (index(ref($value),"HASH") != -1) { # object case
1183 0         0 my %hash=%{$value};
  0         0  
1184 0         0 my $element;
1185 0         0 foreach $element (%hash) {
1186 0         0 eval {
1187 0         0 $element->delete_Obj;
1188             };
1189             }
1190             }
1191             }
1192 78         5515 return(1);
1193             }
1194              
1195             1;