line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# bioperl module for Bio::LiveSeq::SeqI |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Joseph Insana |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Joseph Insana |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# documentation needed |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 DESCRIPTION |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This class implements BioPerl PrimarySeqI interface for Live Seq objects. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
One of the main difference in LiveSequence compared to traditional |
27
|
|
|
|
|
|
|
"string" sequences is that coordinate systems are flexible. Typically |
28
|
|
|
|
|
|
|
gene nucleotide numbering starts from 1 at the first character of the |
29
|
|
|
|
|
|
|
initiator codon (A in ATG). This means that negative positions are |
30
|
|
|
|
|
|
|
possible and common! |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Secondly, the sequence manipulation methods do not return a new |
33
|
|
|
|
|
|
|
sequence object but change the current object. The current status can |
34
|
|
|
|
|
|
|
be written out to BioPerl sequence objects. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 FEEDBACK |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head2 Mailing Lists |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
41
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
42
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
45
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head2 Support |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
I |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
54
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
55
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
56
|
|
|
|
|
|
|
with code and data examples if at all possible. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head2 Reporting Bugs |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
61
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
62
|
|
|
|
|
|
|
web: |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 AUTHOR - Joseph A.L. Insana |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Email: Insana@ebi.ac.uk, jinsana@gmx.net |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head1 APPENDIX |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
73
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Some note on the terminology/notation of method names: |
76
|
|
|
|
|
|
|
label: a unique pointer to a single nucleotide |
77
|
|
|
|
|
|
|
position: the position of a nucleotide according to a particular coordinate |
78
|
|
|
|
|
|
|
system (e.g. counting downstream from a particular label taken as |
79
|
|
|
|
|
|
|
number 1) |
80
|
|
|
|
|
|
|
base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g") |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
a base is the "value" that an "element" of a "chain" can assume |
83
|
|
|
|
|
|
|
(see documentation on the Chain datastructure if interested) |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=cut |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
#' |
88
|
|
|
|
|
|
|
# Let the code begin... |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
package Bio::LiveSeq::SeqI; |
91
|
2
|
|
|
2
|
|
12
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
45
|
|
92
|
2
|
|
|
2
|
|
273
|
use Bio::Tools::CodonTable; # for the translate() function |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
43
|
|
93
|
|
|
|
|
|
|
|
94
|
2
|
|
|
2
|
|
10
|
use base qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
513
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head2 seq |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Title : seq |
99
|
|
|
|
|
|
|
Usage : $string = $obj->seq() |
100
|
|
|
|
|
|
|
Function: Returns the complete sequence of an object as a string of letters. |
101
|
|
|
|
|
|
|
Suggested cases are upper case for proteins and lower case for |
102
|
|
|
|
|
|
|
DNA sequence (IUPAC standard), |
103
|
|
|
|
|
|
|
Returns : a string |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=cut |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub seq { |
109
|
543
|
|
|
543
|
1
|
684
|
my $self = shift; |
110
|
543
|
|
|
|
|
1126
|
my ($start,$end) = ($self->start(),$self->end()); |
111
|
543
|
50
|
|
|
|
977
|
if ($self->strand() == 1) { |
112
|
543
|
|
|
|
|
1427
|
return $self->{'seq'}->down_chain2string($start,undef,$end); |
113
|
|
|
|
|
|
|
} else { # reverse strand |
114
|
0
|
|
|
|
|
0
|
my $str = $self->{'seq'}->up_chain2string($start,undef,$end); |
115
|
0
|
|
|
|
|
0
|
$str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; |
116
|
0
|
|
|
|
|
0
|
return $str; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 all_labels |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Title : all_labels |
123
|
|
|
|
|
|
|
Usage : @labels = $obj->all_labels() |
124
|
|
|
|
|
|
|
Function: all the labels of every nucleotide an object is composed of |
125
|
|
|
|
|
|
|
Returns : an array of labels |
126
|
|
|
|
|
|
|
Args : none |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub all_labels { |
131
|
822
|
|
|
822
|
1
|
976
|
my $self = shift; |
132
|
822
|
|
|
|
|
1592
|
my ($start,$end) = ($self->start(),$self->end()); |
133
|
822
|
|
|
|
|
1089
|
my $labels; |
134
|
822
|
50
|
|
|
|
1230
|
if ($self->strand() == 1) { |
135
|
822
|
|
|
|
|
1830
|
$labels=$self->{'seq'}->down_labels($start,$end); |
136
|
|
|
|
|
|
|
} else { |
137
|
0
|
|
|
|
|
0
|
$labels=$self->{'seq'}->up_labels($start,$end); |
138
|
|
|
|
|
|
|
} |
139
|
822
|
|
|
|
|
1499
|
return (@{$labels}); |
|
822
|
|
|
|
|
46736
|
|
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 labelsubseq |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Title : labelsubseq |
145
|
|
|
|
|
|
|
Usage : $dna->labelsubseq(); |
146
|
|
|
|
|
|
|
: $dna->labelsubseq($startlabel); |
147
|
|
|
|
|
|
|
: $dna->labelsubseq($startlabel,$length); |
148
|
|
|
|
|
|
|
: $dna->labelsubseq($startlabel,undef,$endlabel); |
149
|
|
|
|
|
|
|
e.g. : $dna->labelsubseq(4,undef,8); |
150
|
|
|
|
|
|
|
Function: prints the sequence as string. The difference between labelsubseq |
151
|
|
|
|
|
|
|
and normal subseq is that it uses /labels/ as arguments, instead |
152
|
|
|
|
|
|
|
than positions. This allows for faster and more efficient lookup, |
153
|
|
|
|
|
|
|
skipping the (usually) lengthy conversion of positions into labels. |
154
|
|
|
|
|
|
|
This is especially useful for manipulating with high power |
155
|
|
|
|
|
|
|
LiveSeq objects, knowing the labels and exploiting their |
156
|
|
|
|
|
|
|
usefulness. |
157
|
|
|
|
|
|
|
Returns : a string |
158
|
|
|
|
|
|
|
Errorcode -1 |
159
|
|
|
|
|
|
|
Args : without arguments it returns the entire sequence |
160
|
|
|
|
|
|
|
with a startlabel it returns the sequence downstream that label |
161
|
|
|
|
|
|
|
if a length is specified, it returns only that number of bases |
162
|
|
|
|
|
|
|
if an endlabel is specified, it overrides the length argument |
163
|
|
|
|
|
|
|
and prints instead up to that label (included) |
164
|
|
|
|
|
|
|
Defaults: $startlabel defaults to the beginning of the entire sequence |
165
|
|
|
|
|
|
|
$endlabel defaults to the end of the entire sequence |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=cut |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, especially that they follow each other in the correct order!!!! |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub labelsubseq { |
172
|
58
|
|
|
58
|
1
|
161
|
my ($self,$start,$length,$end,$unsecuremode) = @_; |
173
|
58
|
100
|
66
|
|
|
229
|
if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested") |
174
|
|
|
|
|
|
|
{ # to skip security checks (faster) |
175
|
21
|
100
|
|
|
|
68
|
unless ($start) { |
176
|
5
|
|
|
|
|
17
|
$start=$self->start; |
177
|
|
|
|
|
|
|
} |
178
|
21
|
100
|
|
|
|
59
|
if ($end) { |
179
|
13
|
100
|
|
|
|
50
|
if ($end == $start) { |
180
|
2
|
|
|
|
|
4
|
$length=1; |
181
|
2
|
|
|
|
|
7
|
undef $end; |
182
|
|
|
|
|
|
|
} else { |
183
|
11
|
|
|
|
|
30
|
undef $length; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
} else { |
186
|
8
|
100
|
|
|
|
23
|
unless ($length) { |
187
|
7
|
|
|
|
|
19
|
$end=$self->end; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
} else { |
191
|
37
|
50
|
|
|
|
104
|
if ($start) { |
192
|
37
|
50
|
|
|
|
138
|
unless ($self->{'seq'}->valid($start)) { |
193
|
0
|
|
|
|
|
0
|
$self->warn("Start label not valid"); return (-1); |
|
0
|
|
|
|
|
0
|
|
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
37
|
100
|
|
|
|
92
|
if ($end) { |
197
|
31
|
50
|
|
|
|
105
|
if ($end == $start) { |
198
|
0
|
|
|
|
|
0
|
$length=1; |
199
|
0
|
|
|
|
|
0
|
undef $end; |
200
|
|
|
|
|
|
|
} else { |
201
|
31
|
50
|
|
|
|
87
|
unless ($self->{'seq'}->valid($end)) { |
202
|
0
|
|
|
|
|
0
|
$self->warn("End label not valid"); return (-1); |
|
0
|
|
|
|
|
0
|
|
203
|
|
|
|
|
|
|
} |
204
|
31
|
50
|
|
|
|
121
|
unless ($self->follows($start,$end) == 1) { |
205
|
0
|
|
|
|
|
0
|
$self->warn("End label does not follow Start label!"); return (-1); |
|
0
|
|
|
|
|
0
|
|
206
|
|
|
|
|
|
|
} |
207
|
31
|
|
|
|
|
68
|
undef $length; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
} |
211
|
58
|
50
|
|
|
|
171
|
if ($self->strand() == 1) { |
212
|
58
|
|
|
|
|
233
|
return $self->{'seq'}->down_chain2string($start,$length,$end); |
213
|
|
|
|
|
|
|
} else { # reverse strand |
214
|
0
|
|
|
|
|
0
|
my $str = $self->{'seq'}->up_chain2string($start,$length,$end); |
215
|
0
|
|
|
|
|
0
|
$str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; |
216
|
0
|
|
|
|
|
0
|
return $str; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head2 subseq |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Title : subseq |
223
|
|
|
|
|
|
|
Usage : $substring = $obj->subseq(10,40); |
224
|
|
|
|
|
|
|
: $substring = $obj->subseq(10,undef,4); |
225
|
|
|
|
|
|
|
Function: returns the subseq from start to end, where the first base |
226
|
|
|
|
|
|
|
is 1 and the number is inclusive, ie 1-2 are the first two |
227
|
|
|
|
|
|
|
bases of the sequence |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Start cannot be larger than end but can be equal. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
Allows for negative numbers $obj->subseq(-10,-1). By |
232
|
|
|
|
|
|
|
definition, there is no 0! |
233
|
|
|
|
|
|
|
-5 -1 1 5 |
234
|
|
|
|
|
|
|
gctagcgcccaac atggctcgctg |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
This allows one to retrieve sequences upstream from given position. |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
The precedence is from left to right: if END is given LENGTH is |
239
|
|
|
|
|
|
|
ignored. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1 |
242
|
|
|
|
|
|
|
$obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
Returns : a string |
245
|
|
|
|
|
|
|
Errorcode: -1 |
246
|
|
|
|
|
|
|
Args : start, integer, defaults to start of the sequence |
247
|
|
|
|
|
|
|
end, integer, '' or undef, defaults to end of the sequence |
248
|
|
|
|
|
|
|
length, integer, '' or undef |
249
|
|
|
|
|
|
|
an optional strand (1 or -1) 4th argument |
250
|
|
|
|
|
|
|
if strand argument is not given, it will default to the object |
251
|
|
|
|
|
|
|
argument. This argument is useful when a call is issued from a child |
252
|
|
|
|
|
|
|
of a parent object containing the subseq method |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=cut |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
#' |
257
|
|
|
|
|
|
|
# check the fact about reverse strand! |
258
|
|
|
|
|
|
|
# is it feasible? Is it correct? Should we do it? How about exons? Does it |
259
|
|
|
|
|
|
|
# work when you ask subseq of an exon? |
260
|
|
|
|
|
|
|
# eliminated now (Mon night) |
261
|
|
|
|
|
|
|
sub subseq { |
262
|
|
|
|
|
|
|
##my ($self,$pos1,$pos2,$length,$strand) = @_; |
263
|
0
|
|
|
0
|
1
|
0
|
my ($self,$pos1,$pos2,$length,$strand) = @_; |
264
|
|
|
|
|
|
|
##unless (defined ($strand)) { # if optional [strand] argument not given |
265
|
|
|
|
|
|
|
## $strand=$self->strand; |
266
|
|
|
|
|
|
|
##} |
267
|
0
|
|
|
|
|
0
|
$strand=$self->strand; |
268
|
0
|
|
|
|
|
0
|
my ($str,$startlabel,$endlabel); |
269
|
0
|
0
|
|
|
|
0
|
if (defined ($length)) { |
270
|
0
|
0
|
|
|
|
0
|
if ($length < 1) { |
271
|
0
|
|
|
|
|
0
|
$self->warn("No sense asking for a subseq of length < 1"); |
272
|
0
|
|
|
|
|
0
|
return (-1); |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
0
|
0
|
|
|
|
0
|
unless (defined ($pos1)) { |
276
|
|
|
|
|
|
|
#print "\n##### DEBUG pos1 not defined\n"; |
277
|
0
|
|
|
|
|
0
|
$startlabel=$self->start; |
278
|
|
|
|
|
|
|
} else { |
279
|
0
|
0
|
|
|
|
0
|
if ($pos1 == 0) { # if position = 0 complain |
280
|
0
|
|
|
|
|
0
|
$self->warn("Position cannot be 0!"); return (-1); |
|
0
|
|
|
|
|
0
|
|
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
##if ($strand == 1) { # CHECK THIS! |
283
|
0
|
0
|
0
|
|
|
0
|
if ((defined ($pos2))&&($pos1>$pos2)) { |
284
|
0
|
|
|
|
|
0
|
$self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); |
|
0
|
|
|
|
|
0
|
|
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
##} else { # CHECK THIS! |
287
|
|
|
|
|
|
|
## if ((defined ($pos2))&&($pos1<$pos2)) { |
288
|
|
|
|
|
|
|
## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1); |
289
|
|
|
|
|
|
|
## } |
290
|
|
|
|
|
|
|
##} |
291
|
0
|
|
|
|
|
0
|
$startlabel=$self->label($pos1); |
292
|
0
|
0
|
|
|
|
0
|
if ($startlabel < 1) { |
293
|
0
|
|
|
|
|
0
|
$self->warn("position $pos1 not valid as start of subseq!"); return (-1); |
|
0
|
|
|
|
|
0
|
|
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
} |
296
|
0
|
0
|
|
|
|
0
|
unless (defined ($pos2)) { |
297
|
|
|
|
|
|
|
#print "\n##### pos2 not defined\n"; |
298
|
0
|
0
|
|
|
|
0
|
unless (defined ($length)) { |
299
|
0
|
|
|
|
|
0
|
$endlabel=$self->end; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
} else { |
302
|
0
|
0
|
|
|
|
0
|
if ($pos2 == 0) { # if position = 0 complain |
303
|
0
|
|
|
|
|
0
|
$self->warn("Position cannot be 0!"); return (-1); |
|
0
|
|
|
|
|
0
|
|
304
|
|
|
|
|
|
|
} |
305
|
0
|
|
|
|
|
0
|
undef $length; |
306
|
|
|
|
|
|
|
##if ($strand == 1) { # CHECK THIS! |
307
|
0
|
0
|
0
|
|
|
0
|
if ((defined ($pos1))&&($pos1>$pos2)) { |
308
|
0
|
|
|
|
|
0
|
$self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); |
|
0
|
|
|
|
|
0
|
|
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
##} else { # CHECK THIS! |
311
|
|
|
|
|
|
|
## if ((defined ($pos1))&&($pos1<$pos2)) { |
312
|
|
|
|
|
|
|
## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1); |
313
|
|
|
|
|
|
|
## } |
314
|
|
|
|
|
|
|
##} |
315
|
0
|
|
|
|
|
0
|
$endlabel=$self->label($pos2); |
316
|
0
|
0
|
|
|
|
0
|
if ($endlabel < 1) { |
317
|
0
|
|
|
|
|
0
|
$self->warn("position $pos2 not valid as end of subseq!"); return (-1); |
|
0
|
|
|
|
|
0
|
|
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
#print "\n ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n"; |
321
|
|
|
|
|
|
|
|
322
|
0
|
0
|
|
|
|
0
|
if ($strand == 1) { |
323
|
0
|
|
|
|
|
0
|
$str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel); |
324
|
|
|
|
|
|
|
} else { # reverse strand |
325
|
0
|
|
|
|
|
0
|
$str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel); |
326
|
0
|
|
|
|
|
0
|
$str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; |
327
|
|
|
|
|
|
|
} |
328
|
0
|
|
|
|
|
0
|
return $str; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 length |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Title : length |
334
|
|
|
|
|
|
|
Usage : $seq->length(); |
335
|
|
|
|
|
|
|
Function: returns the number of nucleotides (or the number of aminoacids) |
336
|
|
|
|
|
|
|
in the entire sequence |
337
|
|
|
|
|
|
|
Returns : an integer |
338
|
|
|
|
|
|
|
Errorcode -1 |
339
|
|
|
|
|
|
|
Args : none |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=cut |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub length { |
344
|
216
|
|
|
216
|
1
|
265
|
my $self=shift; |
345
|
216
|
|
|
|
|
456
|
my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand()); |
346
|
216
|
50
|
|
|
|
457
|
if ($strand == 1) { |
347
|
216
|
|
|
|
|
552
|
return $self->{'seq'}->down_subchain_length($start,$end); |
348
|
|
|
|
|
|
|
} else { |
349
|
0
|
|
|
|
|
0
|
return $self->{'seq'}->up_subchain_length($start,$end); |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=head2 display_id |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Title : display_id |
356
|
|
|
|
|
|
|
Usage : $id_string = $obj->display_id(); |
357
|
|
|
|
|
|
|
Function: returns the display id, alias the common name of the object |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
The semantics of this is that it is the most likely string |
360
|
|
|
|
|
|
|
to be used as an identifier of the sequence, and likely to |
361
|
|
|
|
|
|
|
have "human" readability. The id is equivalent to the ID |
362
|
|
|
|
|
|
|
field of the GenBank/EMBL databanks and the id field of the |
363
|
|
|
|
|
|
|
Swissprot/sptrembl database. In fasta format, the >(\S+) is |
364
|
|
|
|
|
|
|
presumed to be the id, though some people overload the id |
365
|
|
|
|
|
|
|
to embed other information. |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
See also: accession_number |
368
|
|
|
|
|
|
|
Returns : a string |
369
|
|
|
|
|
|
|
Args : none |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=cut |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
sub display_id { |
374
|
7
|
|
|
7
|
1
|
20
|
my ($self,$value) = @_; |
375
|
7
|
100
|
|
|
|
18
|
if(defined $value) { |
376
|
6
|
|
|
|
|
15
|
$self->{'display_id'} = $value; |
377
|
|
|
|
|
|
|
} |
378
|
7
|
|
|
|
|
11
|
return $self->{'display_id'}; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=head2 accession_number |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
Title : accession_number |
385
|
|
|
|
|
|
|
Usage : $unique_biological_key = $obj->accession_number; |
386
|
|
|
|
|
|
|
Function: Returns the unique biological id for a sequence, commonly |
387
|
|
|
|
|
|
|
called the accession_number. |
388
|
|
|
|
|
|
|
Notice that primary_id() provides the unique id for the |
389
|
|
|
|
|
|
|
implementation, allowing multiple objects to have the same accession |
390
|
|
|
|
|
|
|
number in a particular implementation. |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
For objects with no accession_number this method returns "unknown". |
393
|
|
|
|
|
|
|
Returns : a string |
394
|
|
|
|
|
|
|
Args : none |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=cut |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
sub accession_number { |
399
|
12
|
|
|
12
|
1
|
35
|
my ($self,$value) = @_; |
400
|
12
|
100
|
|
|
|
46
|
if (defined $value) { |
401
|
6
|
|
|
|
|
14
|
$self->{'accession_number'} = $value; |
402
|
|
|
|
|
|
|
} |
403
|
12
|
50
|
|
|
|
37
|
unless (exists $self->{'accession_number'}) { |
404
|
0
|
|
|
|
|
0
|
return "unknown"; |
405
|
|
|
|
|
|
|
} else { |
406
|
12
|
|
|
|
|
54
|
return $self->{'accession_number'}; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=head2 primary_id |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
Title : primary_id |
413
|
|
|
|
|
|
|
Usage : $unique_implementation_key = $obj->primary_id; |
414
|
|
|
|
|
|
|
Function: Returns the unique id for this object in this |
415
|
|
|
|
|
|
|
implementation. This allows implementations to manage their own |
416
|
|
|
|
|
|
|
object ids in a way the implementation can control. Clients can |
417
|
|
|
|
|
|
|
expect one id to map to one object. |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
For sequences with no primary_id, this method returns |
420
|
|
|
|
|
|
|
a stringified memory location. |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Returns : A string |
423
|
|
|
|
|
|
|
Args : None |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=cut |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
sub primary_id { |
429
|
0
|
|
|
0
|
1
|
0
|
my ($self,$value) = @_; |
430
|
0
|
0
|
|
|
|
0
|
if(defined $value) { |
431
|
0
|
|
|
|
|
0
|
$self->{'primary_id'} = $value; |
432
|
|
|
|
|
|
|
} |
433
|
0
|
0
|
|
|
|
0
|
unless (exists $self->{'primary_id'}) { |
434
|
0
|
|
|
|
|
0
|
return "$self"; |
435
|
|
|
|
|
|
|
} else { |
436
|
0
|
|
|
|
|
0
|
return $self->{'primary_id'}; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=head2 change |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
Title : change |
443
|
|
|
|
|
|
|
Usage : $substring = $obj->change('AA', 10); |
444
|
|
|
|
|
|
|
Function: changes, modifies, mutates the LiveSequence |
445
|
|
|
|
|
|
|
Examples: |
446
|
|
|
|
|
|
|
$obj->change('', 10); delete nucleotide #10 |
447
|
|
|
|
|
|
|
$obj->change('', 10, 2); delete two nucleotides starting from #10 |
448
|
|
|
|
|
|
|
$obj->change('G', 10); change nuc #10 to 'G' |
449
|
|
|
|
|
|
|
$obj->change('GA', 10, 4); replace #10 and 3 following with 'GA' |
450
|
|
|
|
|
|
|
$obj->change('GA', 10, 2)); is same as $obj->change('GA', 10); |
451
|
|
|
|
|
|
|
$obj->change('GA', 10, 0 ); insert 'GA' before nucleotide at #10 |
452
|
|
|
|
|
|
|
$obj->change('GA', 10, 1); GA inserted before #10, #10 deleted |
453
|
|
|
|
|
|
|
$obj->change('GATC', 10, 2); GATC inserted before #10, #10 deleted |
454
|
|
|
|
|
|
|
$obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Returns : a string of deleted bases (if any) or 1 (everything OK) |
458
|
|
|
|
|
|
|
Errorcode: -1 |
459
|
|
|
|
|
|
|
Args : seq, string, or '' ('' = undef = 0 = deletion) |
460
|
|
|
|
|
|
|
start, integer |
461
|
|
|
|
|
|
|
length, integer (optional) |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=cut |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
sub change { |
466
|
0
|
|
|
0
|
1
|
0
|
&positionchange; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=head2 positionchange |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
Title : positionchange |
472
|
|
|
|
|
|
|
Function: Exactly like change. I.e. change() defaults to positionchange() |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=cut |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
sub positionchange { |
477
|
0
|
|
|
0
|
1
|
0
|
my ($self,$newseq,$position,$length)=@_; |
478
|
0
|
0
|
|
|
|
0
|
unless ($position) { |
479
|
0
|
|
|
|
|
0
|
$self->warn("Position not given or position 0"); |
480
|
0
|
|
|
|
|
0
|
return (-1); |
481
|
|
|
|
|
|
|
} |
482
|
0
|
|
|
|
|
0
|
my $label=$self->label($position); |
483
|
0
|
0
|
|
|
|
0
|
unless ($label > 0) { # label not found or error |
484
|
0
|
|
|
|
|
0
|
$self->warn("No valid label found at that position!"); |
485
|
0
|
|
|
|
|
0
|
return (-1); |
486
|
|
|
|
|
|
|
} |
487
|
0
|
|
|
|
|
0
|
return ($self->labelchange($newseq,$label,$length)); |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head2 labelchange |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Title : labelchange |
493
|
|
|
|
|
|
|
Function: Exactly like change but uses a /label/ instead than a position |
494
|
|
|
|
|
|
|
as second argument. This allows for multiple changes in a LiveSeq |
495
|
|
|
|
|
|
|
without the burden of recomputing positions. I.e. for a multiple |
496
|
|
|
|
|
|
|
change in two different points of the LiveSeq, the approach would |
497
|
|
|
|
|
|
|
be the following: fetch the correct labels out of the two different |
498
|
|
|
|
|
|
|
positions (method: label($position)) and then use the labelchange() |
499
|
|
|
|
|
|
|
method to modify the sequence using those labels instead than |
500
|
|
|
|
|
|
|
relying on the positions (that would have modified after the |
501
|
|
|
|
|
|
|
first change). |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=cut |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
sub labelchange { |
506
|
5
|
|
|
5
|
1
|
17
|
my ($self,$newseq,$label,$length)=@_; |
507
|
5
|
50
|
|
|
|
28
|
unless ($self->valid($label)) { |
508
|
0
|
0
|
|
|
|
0
|
if ($self->{'seq'}->valid($label)) { |
509
|
|
|
|
|
|
|
#$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that"); |
510
|
0
|
|
|
|
|
0
|
shift @_; |
511
|
0
|
|
|
|
|
0
|
return($self->{'seq'}->labelchange(@_)); |
512
|
|
|
|
|
|
|
} else { |
513
|
0
|
|
|
|
|
0
|
$self->warn("Label \'$label\' not valid for executing a LiveSeq change"); |
514
|
0
|
|
|
|
|
0
|
return (-1); |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
} |
517
|
5
|
50
|
|
|
|
17
|
unless ($newseq) { # it means this is a simple deletion |
518
|
0
|
0
|
|
|
|
0
|
if (defined($length)) { |
519
|
0
|
0
|
|
|
|
0
|
unless ($length >= 0) { |
520
|
0
|
|
|
|
|
0
|
$self->warn("No sense having length < 0 in a deletion"); |
521
|
0
|
|
|
|
|
0
|
return (-1); |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
} else { |
524
|
0
|
|
|
|
|
0
|
$self->warn("Length not defined for deletion!"); |
525
|
0
|
|
|
|
|
0
|
return (-1); |
526
|
|
|
|
|
|
|
} |
527
|
0
|
|
|
|
|
0
|
return $self->_delete($label,$length); |
528
|
|
|
|
|
|
|
} |
529
|
5
|
|
|
|
|
12
|
my $newseqlength=CORE::length($newseq); |
530
|
5
|
50
|
|
|
|
14
|
if (defined($length)) { |
531
|
5
|
50
|
|
|
|
16
|
unless ($length >= 0) { |
532
|
0
|
|
|
|
|
0
|
$self->warn("No sense having length < 0 in a change()"); |
533
|
0
|
|
|
|
|
0
|
return (-1); |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
} else { |
536
|
0
|
|
|
|
|
0
|
$length=$newseqlength; # defaults to pointmutation(s) |
537
|
|
|
|
|
|
|
} |
538
|
5
|
50
|
|
|
|
19
|
if ($length == 0) { # it means this is a simple insertion, length def&==0 |
539
|
0
|
|
|
|
|
0
|
my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq); |
540
|
0
|
0
|
|
|
|
0
|
if ($insertbegin == -1) { |
541
|
0
|
|
|
|
|
0
|
return (-1); |
542
|
|
|
|
|
|
|
} else { |
543
|
0
|
|
|
|
|
0
|
return (1); |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
} |
546
|
5
|
50
|
|
|
|
15
|
if ($newseqlength == $length) { # it means this is simple pointmutation(s) |
547
|
5
|
|
|
|
|
38
|
return $self->_mutate($label,$newseq,$length); |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
# if we arrived here then change is complex mixture |
550
|
0
|
|
|
|
|
0
|
my $strand=$self->strand(); |
551
|
0
|
|
|
|
|
0
|
my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label |
552
|
0
|
0
|
|
|
|
0
|
unless ($afterendlabel > 0) { # label not found or error |
553
|
0
|
|
|
|
|
0
|
$self->warn("No valid afterendlabel found for executing the complex mutation!"); |
554
|
0
|
|
|
|
|
0
|
return (-1); |
555
|
|
|
|
|
|
|
} |
556
|
0
|
|
|
|
|
0
|
my $deleted=$self->_delete($label,$length); # first delete length nucs |
557
|
0
|
0
|
|
|
|
0
|
if ($deleted eq -1) { # if errors |
558
|
0
|
|
|
|
|
0
|
return (-1); |
559
|
|
|
|
|
|
|
} else { # then insert the newsequence |
560
|
0
|
|
|
|
|
0
|
my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq); |
561
|
0
|
0
|
|
|
|
0
|
if ($insertbegin == -1) { |
562
|
0
|
|
|
|
|
0
|
return (-1); |
563
|
|
|
|
|
|
|
} else { |
564
|
0
|
|
|
|
|
0
|
return (1); |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# internal methods for change() |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# arguments: label for beginning of deletion, new sequence to insert |
572
|
|
|
|
|
|
|
# returns: labels of beginning and end of the inserted sequence |
573
|
|
|
|
|
|
|
# errorcode: -1 |
574
|
|
|
|
|
|
|
sub _praeinsert { |
575
|
0
|
|
|
0
|
|
0
|
my ($self,$label,$newseq)=@_; |
576
|
0
|
|
|
|
|
0
|
my ($insertbegin,$insertend); |
577
|
0
|
|
|
|
|
0
|
my $strand=$self->strand(); |
578
|
0
|
0
|
|
|
|
0
|
if ($strand == 1) { |
579
|
0
|
|
|
|
|
0
|
($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label)); |
580
|
|
|
|
|
|
|
} else { # since it's reverse strand and we insert in forward direction.... |
581
|
0
|
|
|
|
|
0
|
$newseq=reverse($newseq); |
582
|
0
|
|
|
|
|
0
|
$newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases |
583
|
0
|
|
|
|
|
0
|
($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label)); |
584
|
|
|
|
|
|
|
} |
585
|
0
|
0
|
0
|
|
|
0
|
if (($insertbegin==0)||($insertend==0)) { |
586
|
0
|
|
|
|
|
0
|
$self->warn("Some error occurred while inserting!"); |
587
|
0
|
|
|
|
|
0
|
return (-1); |
588
|
|
|
|
|
|
|
} else { |
589
|
0
|
|
|
|
|
0
|
return ($insertbegin,$insertend); |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
# arguments: label for beginning of deletion, length of deletion |
594
|
|
|
|
|
|
|
# returns: string of deleted bases |
595
|
|
|
|
|
|
|
# errorcode: -1 |
596
|
|
|
|
|
|
|
sub _delete { |
597
|
0
|
|
|
0
|
|
0
|
my ($self,$label,$length)=@_; |
598
|
0
|
|
|
|
|
0
|
my $strand=$self->strand(); |
599
|
0
|
|
|
|
|
0
|
my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label |
600
|
0
|
0
|
|
|
|
0
|
unless ($endlabel > 0) { # label not found or error |
601
|
0
|
|
|
|
|
0
|
$self->warn("No valid endlabel found for executing the deletion!"); |
602
|
0
|
|
|
|
|
0
|
return (-1); |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
# this is important in Transcript to fix exon structure |
605
|
0
|
|
|
|
|
0
|
$self->_deletecheck($label,$endlabel); |
606
|
0
|
|
|
|
|
0
|
my $deletedseq; |
607
|
0
|
0
|
|
|
|
0
|
if ($strand == 1) { |
608
|
0
|
|
|
|
|
0
|
$deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel); |
609
|
|
|
|
|
|
|
} else { |
610
|
0
|
|
|
|
|
0
|
$deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label); |
611
|
0
|
|
|
|
|
0
|
$deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway |
612
|
|
|
|
|
|
|
# in forward direction |
613
|
0
|
|
|
|
|
0
|
$deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases |
614
|
|
|
|
|
|
|
} |
615
|
0
|
|
|
|
|
0
|
return ($deletedseq); |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# empty function, overridden in Transcript, not useful here |
619
|
|
|
|
0
|
|
|
sub _deletecheck { |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
# arguments: label for beginning of mutation, newsequence, number of mutations |
623
|
|
|
|
|
|
|
# returns: 1 all OK |
624
|
|
|
|
|
|
|
# errorcode: -1 |
625
|
|
|
|
|
|
|
sub _mutate { |
626
|
5
|
|
|
5
|
|
14
|
my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq) |
627
|
5
|
|
|
|
|
18
|
my ($i,$base,$nextlabel); |
628
|
5
|
|
|
|
|
0
|
my @labels; # array of labels |
629
|
5
|
|
|
|
|
17
|
my $strand=$self->strand(); |
630
|
5
|
50
|
|
|
|
12
|
if ($length == 1) { # special cases first |
631
|
5
|
|
|
|
|
14
|
@labels=($label); |
632
|
|
|
|
|
|
|
} else { |
633
|
0
|
|
|
|
|
0
|
my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label |
634
|
0
|
0
|
|
|
|
0
|
unless ($endlabel > 0) { # label not found or error |
635
|
0
|
|
|
|
|
0
|
$self->warn("No valid endlabel found for executing the mutation!"); |
636
|
0
|
|
|
|
|
0
|
return (-1); |
637
|
|
|
|
|
|
|
} |
638
|
0
|
0
|
|
|
|
0
|
if ($length == 2) { # another special case |
639
|
0
|
|
|
|
|
0
|
@labels=($label,$endlabel); |
640
|
|
|
|
|
|
|
} else { # more than 3 bases changed |
641
|
|
|
|
|
|
|
# this wouldn't work for Transcript |
642
|
|
|
|
|
|
|
#my $labelsarrayref; |
643
|
|
|
|
|
|
|
#if ($strand == 1) { |
644
|
|
|
|
|
|
|
#$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel); |
645
|
|
|
|
|
|
|
#} else { |
646
|
|
|
|
|
|
|
#$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel); |
647
|
|
|
|
|
|
|
#} |
648
|
|
|
|
|
|
|
#@labels=@{$labelsarrayref}; |
649
|
|
|
|
|
|
|
#if ($length != scalar(@labels)) { # not enough labels returned |
650
|
|
|
|
|
|
|
#$self->warn("Not enough valid labels found for executing the mutation!"); |
651
|
|
|
|
|
|
|
#return (-1); |
652
|
|
|
|
|
|
|
#} |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
# this should be more general |
655
|
0
|
|
|
|
|
0
|
@labels=($label); # put the first one |
656
|
0
|
|
|
|
|
0
|
while ($label != $endlabel) { |
657
|
0
|
|
|
|
|
0
|
$nextlabel=$self->label(2,$label,$strand); # retrieve the next label |
658
|
0
|
|
|
|
|
0
|
push (@labels,$nextlabel); |
659
|
0
|
|
|
|
|
0
|
$label=$nextlabel; # move on reference |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
} |
663
|
5
|
50
|
|
|
|
15
|
if ($strand == -1) { # only for reverse strand |
664
|
0
|
|
|
|
|
0
|
$newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases |
665
|
|
|
|
|
|
|
} |
666
|
5
|
|
|
|
|
7
|
my $errorcheck; # if not equal to $length after summing for all changes, error did occurr |
667
|
5
|
|
|
|
|
10
|
$i = 0; |
668
|
5
|
|
|
|
|
20
|
foreach $base (split(//,$newseq)) { |
669
|
5
|
|
|
|
|
35
|
$errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]); |
670
|
5
|
|
|
|
|
12
|
$i++; |
671
|
|
|
|
|
|
|
} |
672
|
5
|
50
|
|
|
|
15
|
if ($errorcheck != $length) { |
673
|
0
|
|
|
|
|
0
|
$self->warn("Some error occurred while mutating!"); |
674
|
0
|
|
|
|
|
0
|
return (-1); |
675
|
|
|
|
|
|
|
} else { |
676
|
5
|
|
|
|
|
19
|
return (1); |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=head2 valid |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
Title : valid |
683
|
|
|
|
|
|
|
Usage : $boolean = $obj->valid($label) |
684
|
|
|
|
|
|
|
Function: tests if a label exists inside the object |
685
|
|
|
|
|
|
|
Returns : boolean |
686
|
|
|
|
|
|
|
Args : label |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
=cut |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
# argument: label |
691
|
|
|
|
|
|
|
# returns: 1 YES 0 NO |
692
|
|
|
|
|
|
|
sub valid { |
693
|
157
|
|
|
157
|
1
|
325
|
my ($self,$label)=@_; |
694
|
157
|
|
|
|
|
175
|
my $checkme; |
695
|
157
|
|
|
|
|
333
|
my @labels=$self->all_labels; |
696
|
157
|
|
|
|
|
694
|
foreach $checkme (@labels) { |
697
|
108565
|
100
|
|
|
|
118013
|
if ($label == $checkme) { |
698
|
68
|
|
|
|
|
2185
|
return (1); # found |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
} |
701
|
89
|
|
|
|
|
855
|
return (0); # not found |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=head2 start |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
Title : start |
708
|
|
|
|
|
|
|
Usage : $startlabel=$obj->start() |
709
|
|
|
|
|
|
|
Function: returns the label of the first nucleotide of the object (exon, CDS) |
710
|
|
|
|
|
|
|
Returns : label |
711
|
|
|
|
|
|
|
Args : none |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=cut |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
sub start { |
716
|
3448
|
|
|
3448
|
1
|
3900
|
my ($self) = @_; |
717
|
3448
|
|
|
|
|
15068
|
return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons) |
718
|
|
|
|
|
|
|
} |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=head2 end |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
Title : end |
723
|
|
|
|
|
|
|
Usage : $endlabel=$obj->end() |
724
|
|
|
|
|
|
|
Function: returns the label of the last nucleotide of the object (exon, CDS) |
725
|
|
|
|
|
|
|
Returns : label |
726
|
|
|
|
|
|
|
Args : none |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=cut |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
sub end { |
731
|
3508
|
|
|
3508
|
1
|
3906
|
my ($self) = @_; |
732
|
3508
|
|
|
|
|
7501
|
return $self->{'end'}; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
=head2 strand |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Title : strand |
738
|
|
|
|
|
|
|
Usage : $strand=$obj->strand() |
739
|
|
|
|
|
|
|
$obj->strand($strand) |
740
|
|
|
|
|
|
|
Function: gets or sets strand information, being 1 or -1 (forward or reverse) |
741
|
|
|
|
|
|
|
Returns : -1 or 1 |
742
|
|
|
|
|
|
|
Args : none OR -1 or 1 |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
=cut |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
sub strand { |
747
|
1882
|
|
|
1882
|
1
|
2472
|
my ($self,$strand) = @_; |
748
|
1882
|
50
|
|
|
|
3002
|
if ($strand) { |
749
|
0
|
0
|
0
|
|
|
0
|
if (($strand != 1)&&($strand != -1)) { |
750
|
0
|
|
|
|
|
0
|
$self->warn("strand information not changed because strand identifier not valid"); |
751
|
|
|
|
|
|
|
} else { |
752
|
0
|
|
|
|
|
0
|
$self->{'strand'} = $strand; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
} |
755
|
1882
|
|
|
|
|
3986
|
return $self->{'strand'}; |
756
|
|
|
|
|
|
|
} |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=head2 alphabet |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
Title : alphabet |
761
|
|
|
|
|
|
|
Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } |
762
|
|
|
|
|
|
|
Function: Returns the type of sequence being one of |
763
|
|
|
|
|
|
|
'dna', 'rna' or 'protein'. This is case sensitive. |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
Returns : a string either 'dna','rna','protein'. |
766
|
|
|
|
|
|
|
Args : none |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
=cut |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
sub alphabet { |
772
|
91
|
|
|
91
|
1
|
258
|
my %valid_type = map {$_, 1} qw( dna rna protein ); |
|
273
|
|
|
|
|
772
|
|
773
|
91
|
|
|
|
|
209
|
my ($self,$value) = @_; |
774
|
91
|
100
|
|
|
|
330
|
if (defined $value) { |
775
|
34
|
100
|
|
|
|
236
|
$value = 'dna' if $value =~ /dna/i; |
776
|
34
|
100
|
|
|
|
109
|
$value = 'rna' if $value =~ /rna/i; |
777
|
34
|
50
|
|
|
|
126
|
unless ( $valid_type{$value} ) { |
778
|
0
|
|
|
|
|
0
|
$self->warn("Molecular type '$value' is not a valid type"); |
779
|
|
|
|
|
|
|
} |
780
|
34
|
|
|
|
|
94
|
$self->{'alphabet'} = $value; |
781
|
|
|
|
|
|
|
} |
782
|
91
|
|
|
|
|
430
|
return $self->{'alphabet'}; |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=head2 coordinate_start |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
Title : coordinate_start |
788
|
|
|
|
|
|
|
Usage : $coordstartlabel=$obj->coordinate_start() |
789
|
|
|
|
|
|
|
: $coordstartlabel=$obj->coordinate_start($label) |
790
|
|
|
|
|
|
|
Function: returns and optionally sets the first label of the coordinate |
791
|
|
|
|
|
|
|
system used |
792
|
|
|
|
|
|
|
For some objects only labels inside the object or in frame (for |
793
|
|
|
|
|
|
|
Translation objects) will be allowed to get set as coordinate start |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
Returns : label. It returns 0 if label not found. |
796
|
|
|
|
|
|
|
Errorcode -1 |
797
|
|
|
|
|
|
|
Args : an optional reference $label that is position 1 |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=cut |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
sub coordinate_start { |
803
|
28
|
|
|
28
|
1
|
64
|
my ($self,$label) = @_; |
804
|
28
|
100
|
|
|
|
64
|
if ($label) { |
805
|
5
|
50
|
|
|
|
14
|
if ($self->valid($label)) { |
806
|
5
|
|
|
|
|
16
|
$self->{'coordinate_start'} = $label; |
807
|
|
|
|
|
|
|
} else { |
808
|
0
|
|
|
|
|
0
|
$self->warn("The label you are trying to set as coordinate_start is not valid for this object"); |
809
|
|
|
|
|
|
|
} |
810
|
|
|
|
|
|
|
} |
811
|
28
|
|
|
|
|
63
|
my $coord_start = $self->{'coordinate_start'}; |
812
|
28
|
100
|
|
|
|
63
|
if ($coord_start) { |
813
|
9
|
|
|
|
|
20
|
return $coord_start; |
814
|
|
|
|
|
|
|
} else { |
815
|
19
|
|
|
|
|
69
|
return $self->start(); |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
=head2 label |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
Title : label |
822
|
|
|
|
|
|
|
Usage : $seq->label($position) |
823
|
|
|
|
|
|
|
: $seq->label($position,$firstlabel) |
824
|
|
|
|
|
|
|
Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label |
825
|
|
|
|
|
|
|
: $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Function: returns the label of the nucleotide at $position from current |
828
|
|
|
|
|
|
|
coordinate start |
829
|
|
|
|
|
|
|
Returns : a label. It returns 0 if label not found. |
830
|
|
|
|
|
|
|
Errorcode -1 |
831
|
|
|
|
|
|
|
Args : a position, |
832
|
|
|
|
|
|
|
an optional reference $firstlabel that is to be used as position 1 |
833
|
|
|
|
|
|
|
an optional strand (1 or -1) argument |
834
|
|
|
|
|
|
|
if strand argument is not given, it will default to the object |
835
|
|
|
|
|
|
|
argument. This argument is useful when a call is issued from a child |
836
|
|
|
|
|
|
|
of a parent object containing the subseq method |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
=cut |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
sub label { |
842
|
19
|
|
|
19
|
1
|
37
|
my ($self,$position,$firstlabel,$strand)=@_; |
843
|
19
|
|
|
|
|
25
|
my $label; |
844
|
19
|
100
|
|
|
|
39
|
unless (defined ($firstlabel)) { |
845
|
4
|
|
|
|
|
13
|
$firstlabel=$self->coordinate_start; |
846
|
|
|
|
|
|
|
} |
847
|
19
|
50
|
|
|
|
40
|
unless ($position) { # if position = 0 complain ? |
848
|
0
|
|
|
|
|
0
|
$self->warn("Position not given or position 0"); |
849
|
0
|
|
|
|
|
0
|
return (-1); |
850
|
|
|
|
|
|
|
} |
851
|
19
|
50
|
|
|
|
49
|
unless (defined ($strand)) { # if optional [strand] argument not given |
852
|
19
|
|
|
|
|
38
|
$strand=$self->strand; |
853
|
|
|
|
|
|
|
} |
854
|
19
|
50
|
|
|
|
51
|
if ($strand == 1) { |
855
|
19
|
100
|
|
|
|
37
|
if ($position > 0) { |
856
|
9
|
|
|
|
|
46
|
$label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel) |
857
|
|
|
|
|
|
|
} else { # if < 0 |
858
|
10
|
|
|
|
|
46
|
$label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel) |
859
|
|
|
|
|
|
|
} |
860
|
|
|
|
|
|
|
} else { |
861
|
0
|
0
|
|
|
|
0
|
if ($position > 0) { |
862
|
0
|
|
|
|
|
0
|
$label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel) |
863
|
|
|
|
|
|
|
} else { # if < 0 |
864
|
0
|
|
|
|
|
0
|
$label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel) |
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
} |
867
|
19
|
|
|
|
|
63
|
return $label; |
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
=head2 position |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
Title : position |
874
|
|
|
|
|
|
|
Usage : $seq->position($label) |
875
|
|
|
|
|
|
|
: $seq->position($label,$firstlabel) |
876
|
|
|
|
|
|
|
Function: returns the position of nucleotide at $label |
877
|
|
|
|
|
|
|
Returns : the position of the label from current coordinate start |
878
|
|
|
|
|
|
|
Errorcode 0 |
879
|
|
|
|
|
|
|
Args : a label pointing to a certain nucleotide (e.g. start of exon) |
880
|
|
|
|
|
|
|
an optional "firstlabel" as reference to count from |
881
|
|
|
|
|
|
|
an optional strand (1 or -1) argument |
882
|
|
|
|
|
|
|
if strand argument is not given, it will default to the object |
883
|
|
|
|
|
|
|
argument. This argument is useful when a call is issued from a child |
884
|
|
|
|
|
|
|
of a parent object containing the subseq method |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=cut |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
sub position { |
890
|
5
|
|
|
5
|
1
|
14
|
my ($self,$label,$firstlabel,$strand)=@_; |
891
|
5
|
50
|
|
|
|
17
|
unless (defined ($strand)) { # if optional [strand] argument not given |
892
|
5
|
|
|
|
|
21
|
$strand=$self->strand; |
893
|
|
|
|
|
|
|
} |
894
|
5
|
50
|
|
|
|
15
|
unless (defined ($firstlabel)) { |
895
|
5
|
|
|
|
|
20
|
$firstlabel=$self->coordinate_start; |
896
|
|
|
|
|
|
|
} |
897
|
5
|
50
|
|
|
|
22
|
unless ($self->valid($label)) { |
898
|
0
|
|
|
|
|
0
|
$self->warn("label not valid"); |
899
|
0
|
|
|
|
|
0
|
return (0); |
900
|
|
|
|
|
|
|
} |
901
|
5
|
50
|
|
|
|
32
|
if ($firstlabel == $label) { |
902
|
0
|
|
|
|
|
0
|
return (1); |
903
|
|
|
|
|
|
|
} |
904
|
5
|
|
|
|
|
12
|
my ($coordpos,$position0,$position); |
905
|
5
|
|
|
|
|
32
|
$position0=$self->{'seq'}->down_get_pos_of_label($label); |
906
|
5
|
|
|
|
|
23
|
$coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel); |
907
|
5
|
|
|
|
|
15
|
$position=$position0-$coordpos+1; |
908
|
5
|
50
|
|
|
|
17
|
if ($position <= 0) { |
909
|
0
|
|
|
|
|
0
|
$position--; |
910
|
|
|
|
|
|
|
} |
911
|
5
|
50
|
|
|
|
15
|
if ($strand == -1) { |
912
|
|
|
|
|
|
|
#print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position; |
913
|
0
|
|
|
|
|
0
|
return (1-$position); |
914
|
|
|
|
|
|
|
} else { |
915
|
|
|
|
|
|
|
#print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position; |
916
|
5
|
|
|
|
|
24
|
return ($position); |
917
|
|
|
|
|
|
|
} |
918
|
|
|
|
|
|
|
} |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head2 follows |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
Title : follows |
923
|
|
|
|
|
|
|
Usage : $seq->follows($firstlabel,$secondlabel) |
924
|
|
|
|
|
|
|
: $seq->follows($firstlabel,$secondlabel,$strand) |
925
|
|
|
|
|
|
|
Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand |
926
|
|
|
|
|
|
|
i.e. it checks downstream for forward strand and |
927
|
|
|
|
|
|
|
upstream for reverse strand |
928
|
|
|
|
|
|
|
Returns : 1 or 0 |
929
|
|
|
|
|
|
|
Errorcode -1 |
930
|
|
|
|
|
|
|
Args : two labels |
931
|
|
|
|
|
|
|
an optional strand (1 or -1) argument |
932
|
|
|
|
|
|
|
if strand argument is not given, it will default to the object |
933
|
|
|
|
|
|
|
argument. This argument is useful when a call is issued from a child |
934
|
|
|
|
|
|
|
of a parent object containing the subseq method |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
=cut |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
#' |
939
|
|
|
|
|
|
|
# wraparound to is_downstream and is_upstream that chooses the correct one |
940
|
|
|
|
|
|
|
# depending on the strand |
941
|
|
|
|
|
|
|
sub follows { |
942
|
129
|
|
|
129
|
1
|
242
|
my ($self,$firstlabel,$secondlabel,$strand)=@_; |
943
|
129
|
100
|
|
|
|
220
|
unless (defined ($strand)) { # if optional [strand] argument not given |
944
|
37
|
|
|
|
|
95
|
$strand=$self->strand; |
945
|
|
|
|
|
|
|
} |
946
|
129
|
50
|
|
|
|
213
|
if ($strand == 1) { |
947
|
129
|
|
|
|
|
338
|
return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel)); |
948
|
|
|
|
|
|
|
} else { |
949
|
0
|
|
|
|
|
0
|
return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel)); |
950
|
|
|
|
|
|
|
} |
951
|
|
|
|
|
|
|
} |
952
|
|
|
|
|
|
|
# |
953
|
|
|
|
|
|
|
#=head2 translate |
954
|
|
|
|
|
|
|
# |
955
|
|
|
|
|
|
|
# Title : translate |
956
|
|
|
|
|
|
|
# Usage : $protein_seq = $obj->translate |
957
|
|
|
|
|
|
|
# Function: Provides the translation of the DNA sequence |
958
|
|
|
|
|
|
|
# using full IUPAC ambiguities in DNA/RNA and amino acid codes. |
959
|
|
|
|
|
|
|
# |
960
|
|
|
|
|
|
|
# The resulting translation is identical to EMBL/TREMBL database |
961
|
|
|
|
|
|
|
# translations. |
962
|
|
|
|
|
|
|
# |
963
|
|
|
|
|
|
|
# Returns : a string |
964
|
|
|
|
|
|
|
# Args : character for terminator (optional) defaults to '*' |
965
|
|
|
|
|
|
|
# character for unknown amino acid (optional) defaults to 'X' |
966
|
|
|
|
|
|
|
# frame (optional) valid values 0, 1, 3, defaults to 0 |
967
|
|
|
|
|
|
|
# codon table id (optional) defaults to 1 |
968
|
|
|
|
|
|
|
# |
969
|
|
|
|
|
|
|
#=cut |
970
|
|
|
|
|
|
|
# |
971
|
|
|
|
|
|
|
#sub translate { |
972
|
|
|
|
|
|
|
# my ($self) = shift; |
973
|
|
|
|
|
|
|
# return ($self->translate_string($self->seq,@_)); |
974
|
|
|
|
|
|
|
#} |
975
|
|
|
|
|
|
|
# |
976
|
|
|
|
|
|
|
#=head2 translate_string |
977
|
|
|
|
|
|
|
# |
978
|
|
|
|
|
|
|
# Title : translate_string |
979
|
|
|
|
|
|
|
# Usage : $protein_seq = $obj->translate_string("attcgtgttgatcgatta"); |
980
|
|
|
|
|
|
|
# Function: Like translate, but can be used to translate subsequences after |
981
|
|
|
|
|
|
|
# having retrieved them as string. |
982
|
|
|
|
|
|
|
# Args : 1st argument is a string. Optional following arguments: like in |
983
|
|
|
|
|
|
|
# the translate method |
984
|
|
|
|
|
|
|
# |
985
|
|
|
|
|
|
|
#=cut |
986
|
|
|
|
|
|
|
# |
987
|
|
|
|
|
|
|
# |
988
|
|
|
|
|
|
|
#sub translate_string { |
989
|
|
|
|
|
|
|
# my($self) = shift; |
990
|
|
|
|
|
|
|
# my($seq) = shift; |
991
|
|
|
|
|
|
|
# my($stop, $unknown, $frame, $tableid) = @_; |
992
|
|
|
|
|
|
|
# my($i, $len, $output) = (0,0,''); |
993
|
|
|
|
|
|
|
# my($codon) = ""; |
994
|
|
|
|
|
|
|
# my $aa; |
995
|
|
|
|
|
|
|
# |
996
|
|
|
|
|
|
|
# |
997
|
|
|
|
|
|
|
# ## User can pass in symbol for stop and unknown codons |
998
|
|
|
|
|
|
|
# unless(defined($stop) and $stop ne '') { $stop = "*"; } |
999
|
|
|
|
|
|
|
# unless(defined($unknown) and $unknown ne '') { $unknown = "X"; } |
1000
|
|
|
|
|
|
|
# unless(defined($frame) and $frame ne '') { $frame = 0; } |
1001
|
|
|
|
|
|
|
# |
1002
|
|
|
|
|
|
|
# ## the codon table ID |
1003
|
|
|
|
|
|
|
# if ($self->translation_table) { |
1004
|
|
|
|
|
|
|
# $tableid = $self->translation_table; |
1005
|
|
|
|
|
|
|
# } |
1006
|
|
|
|
|
|
|
# unless(defined($tableid) and $tableid ne '') { $tableid = 1; } |
1007
|
|
|
|
|
|
|
# |
1008
|
|
|
|
|
|
|
# ##Error if monomer is "Amino" |
1009
|
|
|
|
|
|
|
# $self->warn("Can't translate an amino acid sequence.") |
1010
|
|
|
|
|
|
|
# if (defined $self->alphabet && $self->alphabet eq 'protein'); |
1011
|
|
|
|
|
|
|
# |
1012
|
|
|
|
|
|
|
# ##Error if frame is not 0, 1 or 2 |
1013
|
|
|
|
|
|
|
# $self->warn("Valid values for frame are 0, 1, 2, not [$frame].") |
1014
|
|
|
|
|
|
|
# unless ($frame == 0 or $frame == 1 or $frame == 2); |
1015
|
|
|
|
|
|
|
# |
1016
|
|
|
|
|
|
|
# #thows a warning if ID is invalid |
1017
|
|
|
|
|
|
|
# my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid); |
1018
|
|
|
|
|
|
|
# |
1019
|
|
|
|
|
|
|
# # deal with frame offset. |
1020
|
|
|
|
|
|
|
# if( $frame ) { |
1021
|
|
|
|
|
|
|
# $seq = substr ($seq,$frame); |
1022
|
|
|
|
|
|
|
# } |
1023
|
|
|
|
|
|
|
# |
1024
|
|
|
|
|
|
|
# for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) { |
1025
|
|
|
|
|
|
|
# my $aa = $codonTable->translate($codon); |
1026
|
|
|
|
|
|
|
# if ($aa eq '*') { |
1027
|
|
|
|
|
|
|
# $output .= $stop; |
1028
|
|
|
|
|
|
|
# } |
1029
|
|
|
|
|
|
|
# elsif ($aa eq 'X') { |
1030
|
|
|
|
|
|
|
# $output .= $unknown; |
1031
|
|
|
|
|
|
|
# } |
1032
|
|
|
|
|
|
|
# else { |
1033
|
|
|
|
|
|
|
# $output .= $aa ; |
1034
|
|
|
|
|
|
|
# } |
1035
|
|
|
|
|
|
|
# } |
1036
|
|
|
|
|
|
|
# #if( substr($output,-1,1) eq $stop ) { |
1037
|
|
|
|
|
|
|
# # chop $output; |
1038
|
|
|
|
|
|
|
# #} |
1039
|
|
|
|
|
|
|
# |
1040
|
|
|
|
|
|
|
# return ($output); |
1041
|
|
|
|
|
|
|
#} |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
=head2 gene |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
Title : gene |
1046
|
|
|
|
|
|
|
Usage : my $gene=$obj->gene; |
1047
|
|
|
|
|
|
|
Function: Gets or sets the reference to the LiveSeq::Gene object. |
1048
|
|
|
|
|
|
|
Objects that are features of a LiveSeq Gene will have this |
1049
|
|
|
|
|
|
|
attribute set automatically. |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
Returns : reference to an object of class Gene |
1052
|
|
|
|
|
|
|
Note : if Gene object is not set, this method will return 0; |
1053
|
|
|
|
|
|
|
Args : none or reference to object of class Bio::LiveSeq::Gene |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
=cut |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
sub gene { |
1058
|
74
|
|
|
74
|
1
|
126
|
my ($self,$value) = @_; |
1059
|
74
|
100
|
|
|
|
119
|
if (defined $value) { |
1060
|
33
|
|
|
|
|
57
|
$self->{'gene'} = $value; |
1061
|
|
|
|
|
|
|
} |
1062
|
74
|
50
|
|
|
|
117
|
unless (exists $self->{'gene'}) { |
1063
|
0
|
|
|
|
|
0
|
return (0); |
1064
|
|
|
|
|
|
|
} else { |
1065
|
74
|
|
|
|
|
216
|
return $self->{'gene'}; |
1066
|
|
|
|
|
|
|
} |
1067
|
|
|
|
|
|
|
} |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
=head2 obj_valid |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
Title : obj_valid |
1072
|
|
|
|
|
|
|
Usage : if ($obj->obj_valid) {do something;} |
1073
|
|
|
|
|
|
|
Function: Checks if start and end labels are still valid for the ojbect, |
1074
|
|
|
|
|
|
|
i.e. tests if the LiveSeq object is still valid |
1075
|
|
|
|
|
|
|
Returns : boolean |
1076
|
|
|
|
|
|
|
Args : none |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
=cut |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
sub obj_valid { |
1081
|
1642
|
|
|
1642
|
1
|
1721
|
my $self=shift; |
1082
|
1642
|
50
|
33
|
|
|
2581
|
unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) { |
1083
|
0
|
|
|
|
|
0
|
return (0); |
1084
|
|
|
|
|
|
|
} |
1085
|
1642
|
|
|
|
|
3079
|
return (1); |
1086
|
|
|
|
|
|
|
} |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
=head2 name |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
Title : name |
1091
|
|
|
|
|
|
|
Usage : $name = $obj->name; |
1092
|
|
|
|
|
|
|
: $name = $obj->name("ABCD"); |
1093
|
|
|
|
|
|
|
Function: Returns or sets the name of the object. |
1094
|
|
|
|
|
|
|
If there is no name, it will return "unknown"; |
1095
|
|
|
|
|
|
|
Returns : A string |
1096
|
|
|
|
|
|
|
Args : None |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
=cut |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
sub name { |
1101
|
0
|
|
|
0
|
1
|
0
|
my ($self,$value) = @_; |
1102
|
0
|
0
|
|
|
|
0
|
if (defined $value) { |
1103
|
0
|
|
|
|
|
0
|
$self->{'name'} = $value; |
1104
|
|
|
|
|
|
|
} |
1105
|
0
|
0
|
|
|
|
0
|
unless (exists $self->{'name'}) { |
1106
|
0
|
|
|
|
|
0
|
return "unknown"; |
1107
|
|
|
|
|
|
|
} else { |
1108
|
0
|
|
|
|
|
0
|
return $self->{'name'}; |
1109
|
|
|
|
|
|
|
} |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
=head2 desc |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
Title : desc |
1115
|
|
|
|
|
|
|
Usage : $desc = $obj->desc; |
1116
|
|
|
|
|
|
|
: $desc = $obj->desc("ABCD"); |
1117
|
|
|
|
|
|
|
Function: Returns or sets the description of the object. |
1118
|
|
|
|
|
|
|
If there is no description, it will return "unknown"; |
1119
|
|
|
|
|
|
|
Returns : A string |
1120
|
|
|
|
|
|
|
Args : None |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
=cut |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
sub desc { |
1125
|
26
|
|
|
26
|
1
|
1011
|
my ($self,$value) = @_; |
1126
|
26
|
100
|
|
|
|
44
|
if (defined $value) { |
1127
|
23
|
|
|
|
|
31
|
$self->{'desc'} = $value; |
1128
|
|
|
|
|
|
|
} |
1129
|
26
|
50
|
|
|
|
56
|
unless (exists $self->{'desc'}) { |
1130
|
0
|
|
|
|
|
0
|
return "unknown"; |
1131
|
|
|
|
|
|
|
} else { |
1132
|
26
|
|
|
|
|
47
|
return $self->{'desc'}; |
1133
|
|
|
|
|
|
|
} |
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
=head2 source |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
Title : source |
1139
|
|
|
|
|
|
|
Usage : $name = $obj->source; |
1140
|
|
|
|
|
|
|
: $name = $obj->source("Homo sapiens"); |
1141
|
|
|
|
|
|
|
Function: Returns or sets the organism that is source of the object. |
1142
|
|
|
|
|
|
|
If there is no source, it will return "unknown"; |
1143
|
|
|
|
|
|
|
Returns : A string |
1144
|
|
|
|
|
|
|
Args : None |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
=cut |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
sub source { |
1149
|
7
|
|
|
7
|
1
|
18
|
my ($self,$value) = @_; |
1150
|
7
|
100
|
|
|
|
20
|
if (defined $value) { |
1151
|
6
|
|
|
|
|
13
|
$self->{'source'} = $value; |
1152
|
|
|
|
|
|
|
} |
1153
|
7
|
50
|
|
|
|
21
|
unless (exists $self->{'source'}) { |
1154
|
0
|
|
|
|
|
0
|
return "unknown"; |
1155
|
|
|
|
|
|
|
} else { |
1156
|
7
|
|
|
|
|
20
|
return $self->{'source'}; |
1157
|
|
|
|
|
|
|
} |
1158
|
|
|
|
|
|
|
} |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
sub delete_Obj { |
1161
|
78
|
|
|
78
|
0
|
82
|
my $self = shift; |
1162
|
78
|
|
|
|
|
63
|
my @values= values %{$self}; |
|
78
|
|
|
|
|
2089
|
|
1163
|
78
|
|
|
|
|
68
|
my @keys= keys %{$self}; |
|
78
|
|
|
|
|
2654
|
|
1164
|
|
|
|
|
|
|
|
1165
|
78
|
|
|
|
|
124
|
foreach my $key ( @keys ) { |
1166
|
13042
|
|
|
|
|
13372
|
delete $self->{$key}; |
1167
|
|
|
|
|
|
|
} |
1168
|
78
|
|
|
|
|
81
|
foreach my $value ( @values ) { |
1169
|
13042
|
100
|
|
|
|
28304
|
if (index(ref($value),"LiveSeq") != -1) { # object case |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1170
|
67
|
|
|
|
|
61
|
eval { |
1171
|
|
|
|
|
|
|
# delete $self->{$value}; |
1172
|
67
|
|
|
|
|
1589
|
$value->delete_Obj; |
1173
|
|
|
|
|
|
|
}; |
1174
|
|
|
|
|
|
|
} elsif (index(ref($value),"ARRAY") != -1) { # array case |
1175
|
12851
|
|
|
|
|
12451
|
my @array=@{$value}; |
|
12851
|
|
|
|
|
22234
|
|
1176
|
12851
|
|
|
|
|
13074
|
my $element; |
1177
|
12851
|
|
|
|
|
13511
|
foreach $element (@array) { |
1178
|
38559
|
|
|
|
|
39139
|
eval { |
1179
|
38559
|
|
|
|
|
195448
|
$element->delete_Obj; |
1180
|
|
|
|
|
|
|
}; |
1181
|
|
|
|
|
|
|
} |
1182
|
|
|
|
|
|
|
} elsif (index(ref($value),"HASH") != -1) { # object case |
1183
|
0
|
|
|
|
|
0
|
my %hash=%{$value}; |
|
0
|
|
|
|
|
0
|
|
1184
|
0
|
|
|
|
|
0
|
my $element; |
1185
|
0
|
|
|
|
|
0
|
foreach $element (%hash) { |
1186
|
0
|
|
|
|
|
0
|
eval { |
1187
|
0
|
|
|
|
|
0
|
$element->delete_Obj; |
1188
|
|
|
|
|
|
|
}; |
1189
|
|
|
|
|
|
|
} |
1190
|
|
|
|
|
|
|
} |
1191
|
|
|
|
|
|
|
} |
1192
|
78
|
|
|
|
|
5833
|
return(1); |
1193
|
|
|
|
|
|
|
} |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
1; |