line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Seq |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Ewan Birney |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Seq - Sequence object, with features |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# This is the main sequence object in Bioperl |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# gets a sequence from a file |
23
|
|
|
|
|
|
|
$seqio = Bio::SeqIO->new( '-format' => 'embl' , -file => 'myfile.dat'); |
24
|
|
|
|
|
|
|
$seqobj = $seqio->next_seq(); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# SeqIO can both read and write sequences; see Bio::SeqIO |
27
|
|
|
|
|
|
|
# for more information and examples |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# get from database |
30
|
|
|
|
|
|
|
$db = Bio::DB::GenBank->new(); |
31
|
|
|
|
|
|
|
$seqobj = $db->get_Seq_by_acc('X78121'); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# make from strings in script |
34
|
|
|
|
|
|
|
$seqobj = Bio::Seq->new( -display_id => 'my_id', |
35
|
|
|
|
|
|
|
-seq => $sequence_as_string); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# gets sequence as a string from sequence object |
38
|
|
|
|
|
|
|
$seqstr = $seqobj->seq(); # actual sequence as a string |
39
|
|
|
|
|
|
|
$seqstr = $seqobj->subseq(10,50); # slice in biological coordinates |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# retrieves information from the sequence |
42
|
|
|
|
|
|
|
# features must implement Bio::SeqFeatureI interface |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
@features = $seqobj->get_SeqFeatures(); # just top level |
45
|
|
|
|
|
|
|
foreach my $feat ( @features ) { |
46
|
|
|
|
|
|
|
print "Feature ",$feat->primary_tag," starts ",$feat->start," ends ", |
47
|
|
|
|
|
|
|
$feat->end," strand ",$feat->strand,"\n"; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# features retain link to underlying sequence object |
50
|
|
|
|
|
|
|
print "Feature sequence is ",$feat->seq->seq(),"\n" |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# sequences may have a species |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
if( defined $seq->species ) { |
56
|
|
|
|
|
|
|
print "Sequence is from ",$species->binomial," [",$species->common_name,"]\n"; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# annotation objects are Bio::AnnotationCollectionI's |
60
|
|
|
|
|
|
|
$ann = $seqobj->annotation(); # annotation object |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# references is one type of annotations to get. Also get |
63
|
|
|
|
|
|
|
# comment and dblink. Look at Bio::AnnotationCollection for |
64
|
|
|
|
|
|
|
# more information |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
foreach my $ref ( $ann->get_Annotations('reference') ) { |
67
|
|
|
|
|
|
|
print "Reference ",$ref->title,"\n"; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# you can get truncations, translations and reverse complements, these |
71
|
|
|
|
|
|
|
# all give back Bio::Seq objects themselves, though currently with no |
72
|
|
|
|
|
|
|
# features transfered |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
my $trunc = $seqobj->trunc(100,200); |
75
|
|
|
|
|
|
|
my $rev = $seqobj->revcom(); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# there are many options to translate - check out the docs |
78
|
|
|
|
|
|
|
my $trans = $seqobj->translate(); |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# these functions can be chained together |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
my $trans_trunc_rev = $seqobj->trunc(100,200)->revcom->translate(); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 DESCRIPTION |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
A Seq object is a sequence with sequence features placed on it. The |
89
|
|
|
|
|
|
|
Seq object contains a PrimarySeq object for the actual sequence and |
90
|
|
|
|
|
|
|
also implements its interface. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
In Bioperl we have 3 main players that people are going to use frequently |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Bio::PrimarySeq - just the sequence and its names, nothing else. |
95
|
|
|
|
|
|
|
Bio::SeqFeatureI - a feature on a sequence, potentially with a sequence |
96
|
|
|
|
|
|
|
and a location and annotation. |
97
|
|
|
|
|
|
|
Bio::Seq - A sequence and a collection of sequence features |
98
|
|
|
|
|
|
|
(an aggregate) with its own annotation. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
Although Bioperl is not tied heavily to file formats these distinctions do |
101
|
|
|
|
|
|
|
map to file formats sensibly and for some bioinformaticians this might help |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Bio::PrimarySeq - Fasta file of a sequence |
104
|
|
|
|
|
|
|
Bio::SeqFeatureI - A single entry in an EMBL/GenBank/DDBJ feature table |
105
|
|
|
|
|
|
|
Bio::Seq - A single EMBL/GenBank/DDBJ entry |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
By having this split we avoid a lot of nasty circular references |
108
|
|
|
|
|
|
|
(sequence features can hold a reference to a sequence without the sequence |
109
|
|
|
|
|
|
|
holding a reference to the sequence feature). See L and |
110
|
|
|
|
|
|
|
L for more information. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Ian Korf really helped in the design of the Seq and SeqFeature system. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head2 Examples |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
A simple and fundamental block of code: |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
use Bio::SeqIO; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
my $seqIOobj = Bio::SeqIO->new(-file=>"1.fa"); # create a SeqIO object |
121
|
|
|
|
|
|
|
my $seqobj = $seqIOobj->next_seq; # get a Seq object |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
With the Seq object in hand one has access to a powerful set of Bioperl |
124
|
|
|
|
|
|
|
methods and related Bioperl objects. This next script will take a file of sequences |
125
|
|
|
|
|
|
|
in EMBL format and create a file of the reverse-complemented sequences |
126
|
|
|
|
|
|
|
in Fasta format using Seq objects. It also prints out details about the |
127
|
|
|
|
|
|
|
exons it finds as sequence features in Genbank Flat File format. |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
use Bio::Seq; |
130
|
|
|
|
|
|
|
use Bio::SeqIO; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
$seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); |
133
|
|
|
|
|
|
|
$seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
while((my $seqobj = $seqin->next_seq())) { |
136
|
|
|
|
|
|
|
print "Seen sequence ",$seqobj->display_id,", start of seq ", |
137
|
|
|
|
|
|
|
substr($seqobj->seq,1,10),"\n"; |
138
|
|
|
|
|
|
|
if( $seqobj->alphabet eq 'dna') { |
139
|
|
|
|
|
|
|
$rev = $seqobj->revcom; |
140
|
|
|
|
|
|
|
$id = $seqobj->display_id(); |
141
|
|
|
|
|
|
|
$id = "$id.rev"; |
142
|
|
|
|
|
|
|
$rev->display_id($id); |
143
|
|
|
|
|
|
|
$seqout->write_seq($rev); |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
foreach $feat ( $seqobj->get_SeqFeatures() ) { |
147
|
|
|
|
|
|
|
if( $feat->primary_tag eq 'exon' ) { |
148
|
|
|
|
|
|
|
print STDOUT "Location ",$feat->start,":", |
149
|
|
|
|
|
|
|
$feat->end," GFF[",$feat->gff_string,"]\n"; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Let's examine the script. The lines below import the Bioperl modules. |
155
|
|
|
|
|
|
|
Seq is the main Bioperl sequence object and SeqIO is the Bioperl support |
156
|
|
|
|
|
|
|
for reading sequences from files and to files |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
use Bio::Seq; |
159
|
|
|
|
|
|
|
use Bio::SeqIO; |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
These two lines create two SeqIO streams: one for reading in sequences |
162
|
|
|
|
|
|
|
and one for outputting sequences: |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
$seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); |
165
|
|
|
|
|
|
|
$seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Notice that in the "$seqout" case there is a greater-than sign, |
168
|
|
|
|
|
|
|
indicating the file is being opened for writing. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Using the |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
'-argument' => value |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
syntax is common in Bioperl. The file argument is like an argument |
175
|
|
|
|
|
|
|
to open() . You can also pass in filehandles or FileHandle objects by |
176
|
|
|
|
|
|
|
using the -fh argument (see L documentation for details). |
177
|
|
|
|
|
|
|
Many formats in Bioperl are handled, including Fasta, EMBL, GenBank, |
178
|
|
|
|
|
|
|
Swissprot (swiss), PIR, and GCG. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
$seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat'); |
181
|
|
|
|
|
|
|
$seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa'); |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
This is the main loop which will loop progressively through sequences |
184
|
|
|
|
|
|
|
in a file, and each call to $seqio-Enext_seq() provides a new Seq |
185
|
|
|
|
|
|
|
object from the file: |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
while((my $seqobj = $seqio->next_seq())) { |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
This print line below accesses fields in the Seq object directly. The |
190
|
|
|
|
|
|
|
$seqobj-Edisplay_id is the way to access the display_id attribute |
191
|
|
|
|
|
|
|
of the Seq object. The $seqobj-Eseq method gets the actual |
192
|
|
|
|
|
|
|
sequence out as string. Then you can do manipulation of this if |
193
|
|
|
|
|
|
|
you want to (there are however easy ways of doing truncation, |
194
|
|
|
|
|
|
|
reverse-complement and translation). |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
print "Seen sequence ",$seqobj->display_id,", start of seq ", |
197
|
|
|
|
|
|
|
substr($seqobj->seq,1,10),"\n"; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Bioperl has to guess the alphabet of the sequence, being either 'dna', |
200
|
|
|
|
|
|
|
'rna', or 'protein'. The alphabet attribute is one of these three |
201
|
|
|
|
|
|
|
possibilities. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
if( $seqobj->alphabet eq 'dna') { |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
The $seqobj-Erevcom method provides the reverse complement of the Seq |
206
|
|
|
|
|
|
|
object as another Seq object. Thus, the $rev variable is a reference to |
207
|
|
|
|
|
|
|
another Seq object. For example, one could repeat the above print line |
208
|
|
|
|
|
|
|
for this Seq object (putting $rev in place of $seqobj). In this |
209
|
|
|
|
|
|
|
case we are going to output the object into the file stream we built |
210
|
|
|
|
|
|
|
earlier on. |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
$rev = $seqobj->revcom; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
When we output it, we want the id of the outputted object |
215
|
|
|
|
|
|
|
to be changed to "$id.rev", ie, with .rev on the end of the name. The |
216
|
|
|
|
|
|
|
following lines retrieve the id of the sequence object, add .rev |
217
|
|
|
|
|
|
|
to this and then set the display_id of the rev sequence object to |
218
|
|
|
|
|
|
|
this. Notice that to set the display_id attribute you just need |
219
|
|
|
|
|
|
|
call the same method, display_id(), with the new value as an argument. |
220
|
|
|
|
|
|
|
Getting and setting values with the same method is common in Bioperl. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
$id = $seqobj->display_id(); |
223
|
|
|
|
|
|
|
$id = "$id.rev"; |
224
|
|
|
|
|
|
|
$rev->display_id($id); |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
The write_seq method on the SeqIO output object, $seqout, writes the |
227
|
|
|
|
|
|
|
$rev object to the filestream we built at the top of the script. |
228
|
|
|
|
|
|
|
The filestream knows that it is outputting in fasta format, and |
229
|
|
|
|
|
|
|
so it provides fasta output. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
$seqout->write_seq($rev); |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
This block of code loops over sequence features in the sequence |
234
|
|
|
|
|
|
|
object, trying to find ones who have been tagged as 'exon'. |
235
|
|
|
|
|
|
|
Features have start and end attributes and can be outputted |
236
|
|
|
|
|
|
|
in Genbank Flat File format, GFF, a standarized format for sequence |
237
|
|
|
|
|
|
|
features. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
foreach $feat ( $seqobj->get_SeqFeatures() ) { |
240
|
|
|
|
|
|
|
if( $feat->primary_tag eq 'exon' ) { |
241
|
|
|
|
|
|
|
print STDOUT "Location ",$feat->start,":", |
242
|
|
|
|
|
|
|
$feat->end," GFF[",$feat->gff_string,"]\n"; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
The code above shows how a few Bio::Seq methods suffice to read, parse, |
247
|
|
|
|
|
|
|
reformat and analyze sequences from a file. A full list of methods |
248
|
|
|
|
|
|
|
available to Bio::Seq objects is shown below. Bear in mind that some of |
249
|
|
|
|
|
|
|
these methods come from PrimarySeq objects, which are simpler |
250
|
|
|
|
|
|
|
than Seq objects, stripped of features (see L for |
251
|
|
|
|
|
|
|
more information). |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# these methods return strings, and accept strings in some cases: |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
$seqobj->seq(); # string of sequence |
256
|
|
|
|
|
|
|
$seqobj->subseq(5,10); # part of the sequence as a string |
257
|
|
|
|
|
|
|
$seqobj->accession_number(); # when there, the accession number |
258
|
|
|
|
|
|
|
$seqobj->alphabet(); # one of 'dna','rna',or 'protein' |
259
|
|
|
|
|
|
|
$seqobj->version() # when there, the version |
260
|
|
|
|
|
|
|
$seqobj->keywords(); # when there, the Keywords line |
261
|
|
|
|
|
|
|
$seqobj->length() # length |
262
|
|
|
|
|
|
|
$seqobj->desc(); # description |
263
|
|
|
|
|
|
|
$seqobj->primary_id(); # a unique id for this sequence regardless |
264
|
|
|
|
|
|
|
# of its display_id or accession number |
265
|
|
|
|
|
|
|
$seqobj->display_id(); # the human readable id of the sequence |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Some of these values map to fields in common formats. For example, The |
268
|
|
|
|
|
|
|
display_id() method returns the LOCUS name of a Genbank entry, |
269
|
|
|
|
|
|
|
the (\S+) following the E character in a Fasta file, the ID from |
270
|
|
|
|
|
|
|
a SwissProt file, and so on. The desc() method will return the DEFINITION |
271
|
|
|
|
|
|
|
line of a Genbank file, the description following the display_id in a |
272
|
|
|
|
|
|
|
Fasta file, and the DE field in a SwissProt file. |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# the following methods return new Seq objects, but |
275
|
|
|
|
|
|
|
# do not transfer features across to the new object: |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
$seqobj->trunc(5,10) # truncation from 5 to 10 as new object |
278
|
|
|
|
|
|
|
$seqobj->revcom # reverse complements sequence |
279
|
|
|
|
|
|
|
$seqobj->translate # translation of the sequence |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# if new() can be called this method returns 1, else 0 |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
$seqobj->can_call_new |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# the following method determines if the given string will be accepted |
286
|
|
|
|
|
|
|
# by the seq() method - if the string is acceptable then validate() |
287
|
|
|
|
|
|
|
# returns 1, or 0 if not |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
$seqobj->validate_seq($string) |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# the following method returns or accepts a Species object: |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
$seqobj->species(); |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Please see L for more information on this object. |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# the following method returns or accepts an Annotation object |
298
|
|
|
|
|
|
|
# which in turn allows access to Annotation::Reference |
299
|
|
|
|
|
|
|
# and Annotation::Comment objects: |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
$seqobj->annotation(); |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
These annotations typically refer to entire sequences, unlike |
304
|
|
|
|
|
|
|
features. See L, |
305
|
|
|
|
|
|
|
L, L, and |
306
|
|
|
|
|
|
|
L for details. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
It is also important to be able to describe defined portions of a |
309
|
|
|
|
|
|
|
sequence. The combination of some description and the corresponding |
310
|
|
|
|
|
|
|
sub-sequence is called a feature - an exon and its coordinates within |
311
|
|
|
|
|
|
|
a gene is an example of a feature, or a domain within a protein. |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# the following methods return an array of SeqFeatureI objects: |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
$seqobj->get_SeqFeatures # The 'top level' sequence features |
316
|
|
|
|
|
|
|
$seqobj->get_all_SeqFeatures # All sequence features, including sub-seq |
317
|
|
|
|
|
|
|
# features, such as features in an exon |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# to find out the number of features use: |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
$seqobj->feature_count |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
Here are just some of the methods available to SeqFeatureI objects: |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# these methods return numbers: |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
$feat->start # start position (1 is the first base) |
328
|
|
|
|
|
|
|
$feat->end # end position (2 is the second base) |
329
|
|
|
|
|
|
|
$feat->strand # 1 means forward, -1 reverse, 0 not relevant |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
# these methods return or accept strings: |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
$feat->primary_tag # the name of the sequence feature, eg |
334
|
|
|
|
|
|
|
# 'exon', 'glycoslyation site', 'TM domain' |
335
|
|
|
|
|
|
|
$feat->source_tag # where the feature comes from, eg, 'EMBL_GenBank', |
336
|
|
|
|
|
|
|
# or 'BLAST' |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# this method returns the more austere PrimarySeq object, not a |
339
|
|
|
|
|
|
|
# Seq object - the main difference is that PrimarySeq objects do not |
340
|
|
|
|
|
|
|
# themselves contain sequence features |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
$feat->seq # the sequence between start,end on the |
343
|
|
|
|
|
|
|
# correct strand of the sequence |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
See L for more details on PrimarySeq objects. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# useful methods for feature comparisons, for start/end points |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
$feat->overlaps($other) # do $feat and $other overlap? |
350
|
|
|
|
|
|
|
$feat->contains($other) # is $other completely within $feat? |
351
|
|
|
|
|
|
|
$feat->equals($other) # do $feat and $other completely agree? |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# one can also add features |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
$seqobj->add_SeqFeature($feat) # returns 1 if successful |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# sub features. For complex join() statements, the feature |
358
|
|
|
|
|
|
|
# is one sequence feature with many sub SeqFeatures |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
$feat->sub_SeqFeature # returns array of sub seq features |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Please see L and L, |
363
|
|
|
|
|
|
|
for more information on sequence features. |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
It is worth mentioning that one can also retrieve the start and end |
366
|
|
|
|
|
|
|
positions of a feature using a Bio::LocationI object: |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
$location = $feat->location # $location is a Bio::LocationI object |
369
|
|
|
|
|
|
|
$location->start; # start position |
370
|
|
|
|
|
|
|
$location->end; # end position |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
This is useful because one needs a Bio::Location::SplitLocationI object |
373
|
|
|
|
|
|
|
in order to retrieve the coordinates inside the Genbank or EMBL join() |
374
|
|
|
|
|
|
|
statements (e.g. "CDS join(51..142,273..495,1346..1474)"): |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
if ( $feat->location->isa('Bio::Location::SplitLocationI') && |
377
|
|
|
|
|
|
|
$feat->primary_tag eq 'CDS' ) { |
378
|
|
|
|
|
|
|
foreach $loc ( $feat->location->sub_Location ) { |
379
|
|
|
|
|
|
|
print $loc->start . ".." . $loc->end . "\n"; |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
See L and L for more |
384
|
|
|
|
|
|
|
information. |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head1 Implemented Interfaces |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
This class implements the following interfaces. |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=over 4 |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item Bio::SeqI |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Note that this includes implementing Bio::PrimarySeqI. |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=item Bio::IdentifiableI |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=item Bio::DescribableI |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=item Bio::AnnotatableI |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=item Bio::FeatureHolderI |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=back |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=head1 FEEDBACK |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 Mailing Lists |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
412
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
413
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
416
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=head2 Support |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
I |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
425
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
426
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
427
|
|
|
|
|
|
|
with code and data examples if at all possible. |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head2 Reporting Bugs |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
432
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
433
|
|
|
|
|
|
|
web: |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head1 AUTHOR - Ewan Birney, inspired by Ian Korf objects |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
Email birney@ebi.ac.uk |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
Jason Stajich Ejason@bioperl.orgE |
444
|
|
|
|
|
|
|
Mark A. Jensen maj -at- fortinbras -dot- us |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=head1 APPENDIX |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
450
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a "_". |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=cut |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
#' |
455
|
|
|
|
|
|
|
# Let the code begin... |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
package Bio::Seq; |
459
|
181
|
|
|
181
|
|
9178
|
use strict; |
|
181
|
|
|
|
|
212
|
|
|
181
|
|
|
|
|
4238
|
|
460
|
|
|
|
|
|
|
|
461
|
181
|
|
|
181
|
|
24744
|
use Bio::Annotation::Collection; |
|
181
|
|
|
|
|
293
|
|
|
181
|
|
|
|
|
3592
|
|
462
|
181
|
|
|
181
|
|
21657
|
use Bio::PrimarySeq; |
|
181
|
|
|
|
|
245
|
|
|
181
|
|
|
|
|
5696
|
|
463
|
|
|
|
|
|
|
|
464
|
181
|
|
|
181
|
|
660
|
use base qw(Bio::Root::Root Bio::SeqI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::FeatureHolderI Bio::AnnotationCollectionI); |
|
181
|
|
|
|
|
200
|
|
|
181
|
|
|
|
|
58558
|
|
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head2 new |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Title : new |
469
|
|
|
|
|
|
|
Usage : $seq = Bio::Seq->new( -seq => 'ATGGGGGTGGTGGTACCCT', |
470
|
|
|
|
|
|
|
-id => 'human_id', |
471
|
|
|
|
|
|
|
-accession_number => 'AL000012', |
472
|
|
|
|
|
|
|
); |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
Function: Returns a new Seq object from |
475
|
|
|
|
|
|
|
basic constructors, being a string for the sequence |
476
|
|
|
|
|
|
|
and strings for id and accession_number |
477
|
|
|
|
|
|
|
Returns : a new Bio::Seq object |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=cut |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
sub new { |
482
|
626
|
|
|
626
|
1
|
4099
|
my($caller,@args) = @_; |
483
|
|
|
|
|
|
|
|
484
|
626
|
100
|
|
|
|
1488
|
if( $caller ne 'Bio::Seq') { |
485
|
461
|
50
|
|
|
|
918
|
$caller = ref($caller) if ref($caller); |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
# we know our inherietance hierarchy |
489
|
626
|
|
|
|
|
2373
|
my $self = Bio::Root::Root->new(@args); |
490
|
626
|
|
|
|
|
924
|
bless $self,$caller; |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# this is way too sneaky probably. We delegate the construction of |
493
|
|
|
|
|
|
|
# the Seq object onto PrimarySeq and then pop primary_seq into |
494
|
|
|
|
|
|
|
# our primary_seq slot |
495
|
|
|
|
|
|
|
|
496
|
626
|
|
|
|
|
2796
|
my $pseq = Bio::PrimarySeq->new(@args); |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# as we have just made this, we know it is ok to set hash directly |
499
|
|
|
|
|
|
|
# rather than going through the method |
500
|
|
|
|
|
|
|
|
501
|
626
|
|
|
|
|
1503
|
$self->{'primary_seq'} = $pseq; |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# setting this array is now delayed until the final |
504
|
|
|
|
|
|
|
# moment, again speed ups for non feature containing things |
505
|
|
|
|
|
|
|
# $self->{'_as_feat'} = []; |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
|
508
|
626
|
|
|
|
|
2261
|
my ($ann, $pid,$feat,$species) = &Bio::Root::RootI::_rearrange($self,[qw(ANNOTATION PRIMARY_ID FEATURES SPECIES)], @args); |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
# for a number of cases - reading fasta files - these are never set. This |
511
|
|
|
|
|
|
|
# gives a quick optimisation around testing things later on |
512
|
|
|
|
|
|
|
|
513
|
626
|
100
|
100
|
|
|
3473
|
if( defined $ann || defined $pid || defined $feat || defined $species ) { |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
514
|
359
|
100
|
|
|
|
1103
|
$pid && $self->primary_id($pid); |
515
|
359
|
100
|
|
|
|
1382
|
$species && $self->species($species); |
516
|
359
|
100
|
|
|
|
1236
|
$ann && $self->annotation($ann); |
517
|
|
|
|
|
|
|
|
518
|
359
|
100
|
|
|
|
669
|
if( defined $feat ) { |
519
|
211
|
50
|
|
|
|
1253
|
if( ref($feat) !~ /ARRAY/i ) { |
520
|
0
|
0
|
0
|
|
|
0
|
if( ref($feat) && $feat->isa('Bio::SeqFeatureI') ) { |
521
|
0
|
|
|
|
|
0
|
$self->add_SeqFeature($feat); |
522
|
|
|
|
|
|
|
} else { |
523
|
0
|
|
|
|
|
0
|
$self->warn("Must specify a valid Bio::SeqFeatureI or ArrayRef of Bio::SeqFeatureI's with the -features init parameter for ".ref($self)); |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
} else { |
526
|
211
|
|
|
|
|
497
|
foreach my $feature ( @$feat ) { |
527
|
9797
|
|
|
|
|
9784
|
$self->add_SeqFeature($feature); |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
626
|
|
|
|
|
1892
|
return $self; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=head1 PrimarySeq interface |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
The PrimarySeq interface provides the basic sequence getting |
541
|
|
|
|
|
|
|
and setting methods for on all sequences. |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
These methods implement the Bio::PrimarySeq interface by delegating |
544
|
|
|
|
|
|
|
to the primary_seq inside the object. This means that you |
545
|
|
|
|
|
|
|
can use a Seq object wherever there is a PrimarySeq, and |
546
|
|
|
|
|
|
|
of course, you are free to use these functions anyway. |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=cut |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head2 seq |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
Title : seq |
553
|
|
|
|
|
|
|
Usage : $string = $obj->seq() |
554
|
|
|
|
|
|
|
Function: Get/Set the sequence as a string of letters. The |
555
|
|
|
|
|
|
|
case of the letters is left up to the implementer. |
556
|
|
|
|
|
|
|
Suggested cases are upper case for proteins and lower case for |
557
|
|
|
|
|
|
|
DNA sequence (IUPAC standard), |
558
|
|
|
|
|
|
|
but implementations are suggested to keep an open mind about |
559
|
|
|
|
|
|
|
case (some users... want mixed case!) |
560
|
|
|
|
|
|
|
Returns : A scalar |
561
|
|
|
|
|
|
|
Args : Optionally on set the new value (a string). An optional second |
562
|
|
|
|
|
|
|
argument presets the alphabet (otherwise it will be guessed). |
563
|
|
|
|
|
|
|
Both parameters may also be given in named parameter style |
564
|
|
|
|
|
|
|
with -seq and -alphabet being the names. |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=cut |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
sub seq { |
569
|
839
|
|
|
839
|
1
|
5174
|
return shift->primary_seq()->seq(@_); |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
=head2 validate_seq |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Title : validate_seq |
576
|
|
|
|
|
|
|
Usage : if(! $seqobj->validate_seq($seq_str) ) { |
577
|
|
|
|
|
|
|
print "sequence $seq_str is not valid for an object of |
578
|
|
|
|
|
|
|
alphabet ",$seqobj->alphabet, "\n"; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
Function: Test that the given sequence is valid, i.e. contains only valid |
581
|
|
|
|
|
|
|
characters. The allowed characters are all letters (A-Z) and '-','.', |
582
|
|
|
|
|
|
|
'*','?','=' and '~'. Spaces are not valid. Note that this |
583
|
|
|
|
|
|
|
implementation does not take alphabet() into account. |
584
|
|
|
|
|
|
|
Returns : 1 if the supplied sequence string is valid, 0 otherwise. |
585
|
|
|
|
|
|
|
Args : - Sequence string to be validated |
586
|
|
|
|
|
|
|
- Boolean to throw an error if the sequence is invalid |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=cut |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
sub validate_seq { |
591
|
0
|
|
|
0
|
1
|
0
|
return shift->primary_seq()->validate_seq(@_); |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=head2 length |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
Title : length |
598
|
|
|
|
|
|
|
Usage : $len = $seq->length() |
599
|
|
|
|
|
|
|
Function: |
600
|
|
|
|
|
|
|
Example : |
601
|
|
|
|
|
|
|
Returns : Integer representing the length of the sequence. |
602
|
|
|
|
|
|
|
Args : None |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
=cut |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
sub length { |
607
|
225
|
|
|
225
|
1
|
856
|
return shift->primary_seq()->length(@_); |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=head1 Methods from the Bio::PrimarySeqI interface |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
=head2 subseq |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
Title : subseq |
616
|
|
|
|
|
|
|
Usage : $substring = $obj->subseq(10,40); |
617
|
|
|
|
|
|
|
Function: Returns the subseq from start to end, where the first base |
618
|
|
|
|
|
|
|
is 1 and the number is inclusive, ie 1-2 are the first two |
619
|
|
|
|
|
|
|
bases of the sequence |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
Start cannot be larger than end but can be equal |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
Returns : A string |
624
|
|
|
|
|
|
|
Args : 2 integers |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=cut |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
sub subseq { |
630
|
170
|
|
|
170
|
1
|
230
|
return shift->primary_seq()->subseq(@_); |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=head2 display_id |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
Title : display_id |
637
|
|
|
|
|
|
|
Usage : $id = $obj->display_id or $obj->display_id($newid); |
638
|
|
|
|
|
|
|
Function: Gets or sets the display id, also known as the common name of |
639
|
|
|
|
|
|
|
the Seq object. |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
The semantics of this is that it is the most likely string |
642
|
|
|
|
|
|
|
to be used as an identifier of the sequence, and likely to |
643
|
|
|
|
|
|
|
have "human" readability. The id is equivalent to the LOCUS |
644
|
|
|
|
|
|
|
field of the GenBank/EMBL databanks and the ID field of the |
645
|
|
|
|
|
|
|
Swissprot/sptrembl database. In fasta format, the >(\S+) is |
646
|
|
|
|
|
|
|
presumed to be the id, though some people overload the id |
647
|
|
|
|
|
|
|
to embed other information. Bioperl does not use any |
648
|
|
|
|
|
|
|
embedded information in the ID field, and people are |
649
|
|
|
|
|
|
|
encouraged to use other mechanisms (accession field for |
650
|
|
|
|
|
|
|
example, or extending the sequence object) to solve this. |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
Notice that $seq->id() maps to this function, mainly for |
653
|
|
|
|
|
|
|
legacy/convenience issues. |
654
|
|
|
|
|
|
|
Returns : A string |
655
|
|
|
|
|
|
|
Args : None or a new id |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=cut |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
sub display_id { |
660
|
363
|
|
|
363
|
1
|
16835
|
return shift->primary_seq->display_id(@_); |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=head2 accession_number |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
Title : accession_number |
667
|
|
|
|
|
|
|
Usage : $unique_biological_key = $obj->accession_number; |
668
|
|
|
|
|
|
|
Function: Returns the unique biological id for a sequence, commonly |
669
|
|
|
|
|
|
|
called the accession_number. For sequences from established |
670
|
|
|
|
|
|
|
databases, the implementors should try to use the correct |
671
|
|
|
|
|
|
|
accession number. Notice that primary_id() provides the |
672
|
|
|
|
|
|
|
unique id for the implemetation, allowing multiple objects |
673
|
|
|
|
|
|
|
to have the same accession number in a particular implementation. |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
For sequences with no accession number, this method should return |
676
|
|
|
|
|
|
|
"unknown". |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
Can also be used to set the accession number. |
679
|
|
|
|
|
|
|
Example : $key = $seq->accession_number or $seq->accession_number($key) |
680
|
|
|
|
|
|
|
Returns : A string |
681
|
|
|
|
|
|
|
Args : None or an accession number |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=cut |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
sub accession_number { |
686
|
208
|
|
|
208
|
1
|
3204
|
return shift->primary_seq->accession_number(@_); |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=head2 desc |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
Title : desc |
693
|
|
|
|
|
|
|
Usage : $seqobj->desc($string) or $seqobj->desc() |
694
|
|
|
|
|
|
|
Function: Sets or gets the description of the sequence |
695
|
|
|
|
|
|
|
Example : |
696
|
|
|
|
|
|
|
Returns : The description |
697
|
|
|
|
|
|
|
Args : The description or none |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=cut |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
sub desc { |
702
|
127
|
|
|
127
|
1
|
3725
|
return shift->primary_seq->desc(@_); |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=head2 primary_id |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
Title : primary_id |
709
|
|
|
|
|
|
|
Usage : $unique_implementation_key = $obj->primary_id; |
710
|
|
|
|
|
|
|
Function: Returns the unique id for this object in this |
711
|
|
|
|
|
|
|
implementation. This allows implementations to manage |
712
|
|
|
|
|
|
|
their own object ids in a way the implementation can control |
713
|
|
|
|
|
|
|
clients can expect one id to map to one object. |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
For sequences with no natural id, this method should return |
716
|
|
|
|
|
|
|
a stringified memory location. |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
Can also be used to set the primary_id (or unset to undef). |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
[Note this method name is likely to change in 1.3] |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
Example : $id = $seq->primary_id or $seq->primary_id($id) |
723
|
|
|
|
|
|
|
Returns : A string |
724
|
|
|
|
|
|
|
Args : None or an id, or undef to unset the primary id. |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=cut |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
sub primary_id { |
729
|
|
|
|
|
|
|
# Note: this used to not delegate to the primary seq. This is |
730
|
|
|
|
|
|
|
# really bad in very subtle ways. E.g., if you created the object |
731
|
|
|
|
|
|
|
# with a primary id given to the constructor and then later you |
732
|
|
|
|
|
|
|
# change the primary id, if this method wouldn't delegate you'd |
733
|
|
|
|
|
|
|
# have different values for primary id in the PrimarySeq object |
734
|
|
|
|
|
|
|
# compared to this instance. Not good. |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
# I can't remember why not delegating was ever deemed |
737
|
|
|
|
|
|
|
# advantageous, but I hereby claim that its problems far outweigh |
738
|
|
|
|
|
|
|
# its advantages, if there are any. Convince me otherwise if you |
739
|
|
|
|
|
|
|
# disagree. HL 2004/08/05 |
740
|
|
|
|
|
|
|
|
741
|
257
|
|
|
257
|
1
|
2396
|
return shift->primary_seq->primary_id(@_); |
742
|
|
|
|
|
|
|
} |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
=head2 can_call_new |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
Title : can_call_new |
748
|
|
|
|
|
|
|
Usage : if ( $obj->can_call_new ) { |
749
|
|
|
|
|
|
|
$newobj = $obj->new( %param ); |
750
|
|
|
|
|
|
|
} |
751
|
|
|
|
|
|
|
Function: can_call_new returns 1 or 0 depending |
752
|
|
|
|
|
|
|
on whether an implementation allows new |
753
|
|
|
|
|
|
|
constructor to be called. If a new constructor |
754
|
|
|
|
|
|
|
is allowed, then it should take the followed hashed |
755
|
|
|
|
|
|
|
constructor list. |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
$myobject->new( -seq => $sequence_as_string, |
758
|
|
|
|
|
|
|
-display_id => $id |
759
|
|
|
|
|
|
|
-accession_number => $accession |
760
|
|
|
|
|
|
|
-alphabet => 'dna', |
761
|
|
|
|
|
|
|
); |
762
|
|
|
|
|
|
|
Example : |
763
|
|
|
|
|
|
|
Returns : 1 or 0 |
764
|
|
|
|
|
|
|
Args : None |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
=cut |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
sub can_call_new { |
769
|
7
|
|
|
7
|
1
|
17
|
return 1; |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=head2 alphabet |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
Title : alphabet |
776
|
|
|
|
|
|
|
Usage : if ( $obj->alphabet eq 'dna' ) { /Do Something/ } |
777
|
|
|
|
|
|
|
Function: Get/Set the type of sequence being one of |
778
|
|
|
|
|
|
|
'dna', 'rna' or 'protein'. This is case sensitive. |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
This is not called because this would cause |
781
|
|
|
|
|
|
|
upgrade problems from the 0.5 and earlier Seq objects. |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
Returns : A string either 'dna','rna','protein'. NB - the object must |
784
|
|
|
|
|
|
|
make a call of the type - if there is no type specified it |
785
|
|
|
|
|
|
|
has to guess. |
786
|
|
|
|
|
|
|
Args : optional string to set : 'dna' | 'rna' | 'protein' |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
=cut |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
sub alphabet { |
791
|
409
|
|
|
409
|
1
|
424
|
my $self = shift; |
792
|
409
|
100
|
66
|
|
|
1179
|
return $self->primary_seq->alphabet(@_) if @_ && defined $_[0]; |
793
|
339
|
|
|
|
|
610
|
return $self->primary_seq->alphabet(); |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
=head2 is_circular |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
Title : is_circular |
800
|
|
|
|
|
|
|
Usage : if( $obj->is_circular) { /Do Something/ } |
801
|
|
|
|
|
|
|
Function: Returns true if the molecule is circular |
802
|
|
|
|
|
|
|
Returns : Boolean value |
803
|
|
|
|
|
|
|
Args : none |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
=cut |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
sub is_circular { |
808
|
1948
|
|
|
1948
|
1
|
2216
|
return shift->primary_seq()->is_circular(@_); |
809
|
|
|
|
|
|
|
} |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
=head1 Methods for Bio::IdentifiableI compliance |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
=head2 object_id |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
Title : object_id |
817
|
|
|
|
|
|
|
Usage : $string = $obj->object_id() |
818
|
|
|
|
|
|
|
Function: a string which represents the stable primary identifier |
819
|
|
|
|
|
|
|
in this namespace of this object. For DNA sequences this |
820
|
|
|
|
|
|
|
is its accession_number, similarly for protein sequences |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
This is aliased to accession_number(). |
823
|
|
|
|
|
|
|
Returns : A scalar |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
=cut |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
sub object_id { |
828
|
2
|
|
|
2
|
1
|
3
|
return shift->accession_number(@_); |
829
|
|
|
|
|
|
|
} |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=head2 version |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
Title : version |
835
|
|
|
|
|
|
|
Usage : $version = $obj->version() |
836
|
|
|
|
|
|
|
Function: a number which differentiates between versions of |
837
|
|
|
|
|
|
|
the same object. Higher numbers are considered to be |
838
|
|
|
|
|
|
|
later and more relevant, but a single object described |
839
|
|
|
|
|
|
|
the same identifier should represent the same concept |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
Returns : A number |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
=cut |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
sub version{ |
846
|
35
|
|
|
35
|
1
|
995
|
return shift->primary_seq->version(@_); |
847
|
|
|
|
|
|
|
} |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
=head2 authority |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
Title : authority |
853
|
|
|
|
|
|
|
Usage : $authority = $obj->authority() |
854
|
|
|
|
|
|
|
Function: a string which represents the organisation which |
855
|
|
|
|
|
|
|
granted the namespace, written as the DNS name for |
856
|
|
|
|
|
|
|
organisation (eg, wormbase.org) |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
Returns : A scalar |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
=cut |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
sub authority { |
863
|
3
|
|
|
3
|
1
|
6
|
return shift->primary_seq()->authority(@_); |
864
|
|
|
|
|
|
|
} |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
=head2 namespace |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
Title : namespace |
870
|
|
|
|
|
|
|
Usage : $string = $obj->namespace() |
871
|
|
|
|
|
|
|
Function: A string representing the name space this identifier |
872
|
|
|
|
|
|
|
is valid in, often the database name or the name |
873
|
|
|
|
|
|
|
describing the collection |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
Returns : A scalar |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
=cut |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
sub namespace{ |
880
|
20
|
|
|
20
|
1
|
515
|
return shift->primary_seq()->namespace(@_); |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
=head1 Methods for Bio::DescribableI compliance |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=head2 display_name |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
Title : display_name |
889
|
|
|
|
|
|
|
Usage : $string = $obj->display_name() |
890
|
|
|
|
|
|
|
Function: A string which is what should be displayed to the user |
891
|
|
|
|
|
|
|
the string should have no spaces (ideally, though a cautious |
892
|
|
|
|
|
|
|
user of this interface would not assumme this) and should be |
893
|
|
|
|
|
|
|
less than thirty characters (though again, double checking |
894
|
|
|
|
|
|
|
this is a good idea) |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
This is aliased to display_id(). |
897
|
|
|
|
|
|
|
Returns : A scalar |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
=cut |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
sub display_name { |
902
|
2
|
|
|
2
|
1
|
9
|
return shift->display_id(@_); |
903
|
|
|
|
|
|
|
} |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
=head2 description |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
Title : description |
908
|
|
|
|
|
|
|
Usage : $string = $obj->description() |
909
|
|
|
|
|
|
|
Function: A text string suitable for displaying to the user a |
910
|
|
|
|
|
|
|
description. This string is likely to have spaces, but |
911
|
|
|
|
|
|
|
should not have any newlines or formatting - just plain |
912
|
|
|
|
|
|
|
text. The string should not be greater than 255 characters |
913
|
|
|
|
|
|
|
and clients can feel justified at truncating strings at 255 |
914
|
|
|
|
|
|
|
characters for the purposes of display |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
This is aliased to desc(). |
917
|
|
|
|
|
|
|
Returns : A scalar |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
=cut |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
sub description { |
922
|
6
|
|
|
6
|
1
|
1760
|
return shift->desc(@_); |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
=head1 Methods for implementing Bio::AnnotatableI |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
=head2 annotation |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
Title : annotation |
931
|
|
|
|
|
|
|
Usage : $ann = $seq->annotation or |
932
|
|
|
|
|
|
|
$seq->annotation($ann) |
933
|
|
|
|
|
|
|
Function: Gets or sets the annotation |
934
|
|
|
|
|
|
|
Returns : Bio::AnnotationCollectionI object |
935
|
|
|
|
|
|
|
Args : None or Bio::AnnotationCollectionI object |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
See L and L |
938
|
|
|
|
|
|
|
for more information |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
=cut |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
sub annotation { |
943
|
2779
|
|
|
2779
|
1
|
81029
|
my ($obj,$value) = @_; |
944
|
2779
|
100
|
|
|
|
6304
|
if( defined $value ) { |
|
|
100
|
|
|
|
|
|
945
|
406
|
50
|
|
|
|
1704
|
$obj->throw("object of class ".ref($value)." does not implement ". |
946
|
|
|
|
|
|
|
"Bio::AnnotationCollectionI. Too bad.") |
947
|
|
|
|
|
|
|
unless $value->isa("Bio::AnnotationCollectionI"); |
948
|
406
|
|
|
|
|
742
|
$obj->{'_annotation'} = $value; |
949
|
|
|
|
|
|
|
} elsif( ! defined $obj->{'_annotation'}) { |
950
|
31
|
|
|
|
|
180
|
$obj->{'_annotation'} = Bio::Annotation::Collection->new(); |
951
|
|
|
|
|
|
|
} |
952
|
2779
|
|
|
|
|
8874
|
return $obj->{'_annotation'}; |
953
|
|
|
|
|
|
|
} |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
=head1 Methods for delegating Bio::AnnotationCollectionI |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
=head2 get_Annotations() |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
Usage : my @annotations = $seq->get_Annotations('key') |
961
|
|
|
|
|
|
|
Function: Retrieves all the Bio::AnnotationI objects for a specific key |
962
|
|
|
|
|
|
|
for this object |
963
|
|
|
|
|
|
|
Returns : list of Bio::AnnotationI - empty if no objects stored for a key |
964
|
|
|
|
|
|
|
Args : string which is key for annotations |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
=cut |
967
|
|
|
|
|
|
|
|
968
|
2
|
|
|
2
|
1
|
5
|
sub get_Annotations { shift->annotation->get_Annotations(@_); } |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
=head2 add_Annotation() |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
Usage : $seq->add_Annotation('reference',$object); |
974
|
|
|
|
|
|
|
$seq->add_Annotation($object,'Bio::MyInterface::DiseaseI'); |
975
|
|
|
|
|
|
|
$seq->add_Annotation($object); |
976
|
|
|
|
|
|
|
$seq->add_Annotation('disease',$object,'Bio::MyInterface::DiseaseI'); |
977
|
|
|
|
|
|
|
Function: Adds an annotation for a specific key for this sequence object. |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
If the key is omitted, the object to be added must provide a value |
980
|
|
|
|
|
|
|
via its tagname(). |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
If the archetype is provided, this and future objects added under |
983
|
|
|
|
|
|
|
that tag have to comply with the archetype and will be rejected |
984
|
|
|
|
|
|
|
otherwise. |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
Returns : none |
987
|
|
|
|
|
|
|
Args : annotation key ('disease', 'dblink', ...) |
988
|
|
|
|
|
|
|
object to store (must be Bio::AnnotationI compliant) |
989
|
|
|
|
|
|
|
[optional] object archetype to map future storage of object |
990
|
|
|
|
|
|
|
of these types to |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
=cut |
993
|
|
|
|
|
|
|
|
994
|
1
|
|
|
1
|
1
|
3
|
sub add_Annotation { shift->annotation->add_Annotation(@_) } |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=head2 remove_Annotations() |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
Usage : $seq->remove_Annotations() |
1000
|
|
|
|
|
|
|
Function: Remove the annotations for the specified key from this sequence |
1001
|
|
|
|
|
|
|
object |
1002
|
|
|
|
|
|
|
Returns : an list of Bio::AnnotationI compliant objects which were stored |
1003
|
|
|
|
|
|
|
under the given key(s) for this sequence object |
1004
|
|
|
|
|
|
|
Args : the key(s) (tag name(s), one or more strings) for which to |
1005
|
|
|
|
|
|
|
remove annotations (optional; if none given, flushes all |
1006
|
|
|
|
|
|
|
annotations) |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
=cut |
1009
|
|
|
|
|
|
|
|
1010
|
0
|
|
|
0
|
1
|
0
|
sub remove_Annotations { shift->annotation->remove_Annotations(@_) } |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
=head2 get_num_of_annotations() |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
Usage : my $count = $seq->get_num_of_annotations() |
1016
|
|
|
|
|
|
|
Alias : num_Annotations |
1017
|
|
|
|
|
|
|
Function: Returns the count of all annotations stored for this sequence |
1018
|
|
|
|
|
|
|
object |
1019
|
|
|
|
|
|
|
Returns : integer |
1020
|
|
|
|
|
|
|
Args : none |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
=cut |
1023
|
|
|
|
|
|
|
|
1024
|
0
|
|
|
0
|
1
|
0
|
sub get_num_of_annotations { shift->annotation->get_num_of_annotations(@_) } |
1025
|
0
|
|
|
0
|
0
|
0
|
sub num_Annotations { shift->get_num_of_annotations }; #DWYM |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
=head1 Methods to implement Bio::FeatureHolderI |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
This includes methods for retrieving, adding, and removing features. |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
=cut |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
=head2 get_SeqFeatures |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
Title : get_SeqFeatures |
1037
|
|
|
|
|
|
|
Usage : |
1038
|
|
|
|
|
|
|
Function: Get the feature objects held by this feature holder. |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
Features which are not top-level are subfeatures of one or |
1041
|
|
|
|
|
|
|
more of the returned feature objects, which means that you |
1042
|
|
|
|
|
|
|
must traverse the subfeature arrays of each top-level |
1043
|
|
|
|
|
|
|
feature object in order to traverse all features associated |
1044
|
|
|
|
|
|
|
with this sequence. |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
Specific features can be obtained by primary tag, specified in |
1047
|
|
|
|
|
|
|
the argument. |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
Use get_all_SeqFeatures() if you want the feature tree |
1050
|
|
|
|
|
|
|
flattened into one single array. |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
Example : my @feats = $seq->get_SeqFeatures or |
1053
|
|
|
|
|
|
|
my @genefeats = $seq->get_SeqFeatures('gene') |
1054
|
|
|
|
|
|
|
Returns : an array of Bio::SeqFeatureI implementing objects |
1055
|
|
|
|
|
|
|
Args : [optional] string (feature tag) |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
=cut |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
sub get_SeqFeatures{ |
1060
|
251
|
|
|
251
|
1
|
7238
|
my $self = shift; |
1061
|
251
|
|
|
|
|
301
|
my $tag = shift; |
1062
|
|
|
|
|
|
|
|
1063
|
251
|
100
|
|
|
|
637
|
if( !defined $self->{'_as_feat'} ) { |
1064
|
40
|
|
|
|
|
68
|
$self->{'_as_feat'} = []; |
1065
|
|
|
|
|
|
|
} |
1066
|
251
|
100
|
|
|
|
494
|
if ($tag) { |
1067
|
4
|
100
|
|
|
|
4
|
return map { $_->primary_tag eq $tag ? $_ : () } @{$self->{'_as_feat'}}; |
|
15
|
|
|
|
|
22
|
|
|
4
|
|
|
|
|
8
|
|
1068
|
|
|
|
|
|
|
} |
1069
|
|
|
|
|
|
|
else { |
1070
|
247
|
|
|
|
|
246
|
return @{$self->{'_as_feat'}}; |
|
247
|
|
|
|
|
6484
|
|
1071
|
|
|
|
|
|
|
} |
1072
|
|
|
|
|
|
|
} |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
=head2 get_all_SeqFeatures |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
Title : get_all_SeqFeatures |
1078
|
|
|
|
|
|
|
Usage : @feat_ary = $seq->get_all_SeqFeatures(); |
1079
|
|
|
|
|
|
|
Function: Returns the tree of feature objects attached to this |
1080
|
|
|
|
|
|
|
sequence object flattened into one single array. Top-level |
1081
|
|
|
|
|
|
|
features will still contain their subfeature-arrays, which |
1082
|
|
|
|
|
|
|
means that you will encounter subfeatures twice if you |
1083
|
|
|
|
|
|
|
traverse the subfeature tree of the returned objects. |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
Use get_SeqFeatures() if you want the array to contain only |
1086
|
|
|
|
|
|
|
the top-level features. |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
Returns : An array of Bio::SeqFeatureI implementing objects. |
1089
|
|
|
|
|
|
|
Args : None |
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
=cut |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
# this implementation is inherited from FeatureHolderI |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
=head2 feature_count |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
Title : feature_count |
1098
|
|
|
|
|
|
|
Usage : $seq->feature_count() |
1099
|
|
|
|
|
|
|
Function: Return the number of SeqFeatures attached to a sequence |
1100
|
|
|
|
|
|
|
Returns : integer representing the number of SeqFeatures |
1101
|
|
|
|
|
|
|
Args : None |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
=cut |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
sub feature_count { |
1106
|
10
|
|
|
10
|
1
|
24
|
my ($self) = @_; |
1107
|
|
|
|
|
|
|
|
1108
|
10
|
100
|
|
|
|
38
|
if (defined($self->{'_as_feat'})) { |
1109
|
9
|
|
|
|
|
15
|
return ($#{$self->{'_as_feat'}} + 1); |
|
9
|
|
|
|
|
52
|
|
1110
|
|
|
|
|
|
|
} else { |
1111
|
1
|
|
|
|
|
4
|
return 0; |
1112
|
|
|
|
|
|
|
} |
1113
|
|
|
|
|
|
|
} |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
=head2 add_SeqFeature |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
Title : add_SeqFeature |
1119
|
|
|
|
|
|
|
Usage : $seq->add_SeqFeature($feat); |
1120
|
|
|
|
|
|
|
Function: Adds the given feature object to the feature array of this |
1121
|
|
|
|
|
|
|
sequence. The object passed is required to implement the |
1122
|
|
|
|
|
|
|
Bio::SeqFeatureI interface. |
1123
|
|
|
|
|
|
|
The 'EXPAND' qualifier (see L) is supported, but |
1124
|
|
|
|
|
|
|
has no effect, |
1125
|
|
|
|
|
|
|
Returns : 1 on success |
1126
|
|
|
|
|
|
|
Args : A Bio::SeqFeatureI implementing object. |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
=cut |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
sub add_SeqFeature { |
1131
|
12487
|
|
|
12487
|
1
|
11777
|
my ($self, @feat) = @_; |
1132
|
|
|
|
|
|
|
|
1133
|
12487
|
100
|
|
|
|
15622
|
$self->{'_as_feat'} = [] unless $self->{'_as_feat'}; |
1134
|
|
|
|
|
|
|
|
1135
|
12487
|
100
|
|
|
|
15424
|
if (scalar @feat > 1) { |
1136
|
1
|
|
|
|
|
9
|
$self->deprecated( |
1137
|
|
|
|
|
|
|
-message => 'Providing an array of features to Bio::Seq add_SeqFeature()'. |
1138
|
|
|
|
|
|
|
' is deprecated and will be removed in a future version. '. |
1139
|
|
|
|
|
|
|
'Add a single feature at a time instead.', |
1140
|
|
|
|
|
|
|
-warn_version => 1.007, |
1141
|
|
|
|
|
|
|
-throw_version => 1.009, |
1142
|
|
|
|
|
|
|
); |
1143
|
|
|
|
|
|
|
} |
1144
|
|
|
|
|
|
|
|
1145
|
12487
|
|
|
|
|
10792
|
for my $feat ( @feat ) { |
1146
|
|
|
|
|
|
|
|
1147
|
12493
|
50
|
|
|
|
17743
|
next if $feat eq 'EXPAND'; # Need to support it for FeatureHolderI compliance |
1148
|
|
|
|
|
|
|
|
1149
|
12493
|
50
|
|
|
|
21544
|
if( !$feat->isa("Bio::SeqFeatureI") ) { |
1150
|
0
|
|
|
|
|
0
|
$self->throw("Expected a Bio::SeqFeatureI object, but got a $feat."); |
1151
|
|
|
|
|
|
|
} |
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
# make sure we attach ourselves to the feature if the feature wants it |
1154
|
12493
|
|
|
|
|
11629
|
my $aseq = $self->primary_seq; |
1155
|
12493
|
50
|
|
|
|
22674
|
$feat->attach_seq($aseq) if $aseq; |
1156
|
|
|
|
|
|
|
|
1157
|
12493
|
|
|
|
|
7415
|
push(@{$self->{'_as_feat'}},$feat); |
|
12493
|
|
|
|
|
17113
|
|
1158
|
|
|
|
|
|
|
} |
1159
|
12487
|
|
|
|
|
16958
|
return 1; |
1160
|
|
|
|
|
|
|
} |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
=head2 remove_SeqFeatures |
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
Title : remove_SeqFeatures |
1166
|
|
|
|
|
|
|
Usage : $seq->remove_SeqFeatures(); |
1167
|
|
|
|
|
|
|
Function: Removes all attached SeqFeatureI objects or those with the |
1168
|
|
|
|
|
|
|
specified primary tag |
1169
|
|
|
|
|
|
|
Example : my @gene_feats = $seq->remove_seqFeatures('gene') or |
1170
|
|
|
|
|
|
|
my @feats = $seq->remove_seqFeatures() |
1171
|
|
|
|
|
|
|
Returns : The array of Bio::SeqFeatureI objects removed from the sequence |
1172
|
|
|
|
|
|
|
Args : None, or a feature primary tag |
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
=cut |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
sub remove_SeqFeatures { |
1177
|
26
|
|
|
26
|
1
|
673
|
my ( $self, $tag ) = @_; |
1178
|
26
|
100
|
|
|
|
77
|
return () unless $self->{'_as_feat'}; |
1179
|
|
|
|
|
|
|
|
1180
|
25
|
100
|
|
|
|
67
|
if ( $tag ) { |
1181
|
1
|
|
|
|
|
2
|
my @selected_feats = grep { $_->primary_tag eq $tag } @{ $self->{'_as_feat'} }; |
|
11
|
|
|
|
|
14
|
|
|
1
|
|
|
|
|
4
|
|
1182
|
1
|
|
|
|
|
1
|
my @unselected_feats = grep { $_->primary_tag ne $tag } @{ $self->{'_as_feat'} }; |
|
11
|
|
|
|
|
14
|
|
|
1
|
|
|
|
|
2
|
|
1183
|
1
|
|
|
|
|
2
|
$self->{'_as_feat'} = \@unselected_feats; |
1184
|
1
|
|
|
|
|
4
|
return @selected_feats; |
1185
|
|
|
|
|
|
|
} |
1186
|
|
|
|
|
|
|
else { |
1187
|
24
|
|
|
|
|
29
|
my @all_feats = @{ $self->{'_as_feat'} }; |
|
24
|
|
|
|
|
1084
|
|
1188
|
24
|
|
|
|
|
42
|
$self->{'_as_feat'} = []; |
1189
|
24
|
|
|
|
|
413
|
return @all_feats; |
1190
|
|
|
|
|
|
|
} |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
=head1 Methods provided in the Bio::PrimarySeqI interface |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
These methods are inherited from the PrimarySeq interface |
1196
|
|
|
|
|
|
|
and work as one expects, building new Bio::Seq objects |
1197
|
|
|
|
|
|
|
or other information as expected. See L |
1198
|
|
|
|
|
|
|
for more information. |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
Sequence Features are B transferred to the new objects. |
1201
|
|
|
|
|
|
|
To reverse complement and include the features use |
1202
|
|
|
|
|
|
|
L. |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
=head2 revcom |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
Title : revcom |
1207
|
|
|
|
|
|
|
Usage : $rev = $seq->revcom() |
1208
|
|
|
|
|
|
|
Function: Produces a new Bio::Seq object which |
1209
|
|
|
|
|
|
|
is the reversed complement of the sequence. For protein |
1210
|
|
|
|
|
|
|
sequences this throws an exception of "Sequence is a protein. |
1211
|
|
|
|
|
|
|
Cannot revcom" |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
The id is the same id as the original sequence, and the |
1214
|
|
|
|
|
|
|
accession number is also identical. If someone wants to track |
1215
|
|
|
|
|
|
|
that this sequence has be reversed, it needs to define its own |
1216
|
|
|
|
|
|
|
extensions |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
To do an in-place edit of an object you can go: |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
$seq = $seq->revcom(); |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
This of course, causes Perl to handle the garbage collection of |
1223
|
|
|
|
|
|
|
the old object, but it is roughly speaking as efficient as an |
1224
|
|
|
|
|
|
|
in-place edit. |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
Returns : A new (fresh) Bio::Seq object |
1227
|
|
|
|
|
|
|
Args : None |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
=head2 trunc |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
Title : trunc |
1232
|
|
|
|
|
|
|
Usage : $subseq = $myseq->trunc(10,100); |
1233
|
|
|
|
|
|
|
Function: Provides a truncation of a sequence |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
Example : |
1236
|
|
|
|
|
|
|
Returns : A fresh Seq object |
1237
|
|
|
|
|
|
|
Args : A Seq object |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
=head2 id |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
Title : id |
1242
|
|
|
|
|
|
|
Usage : $id = $seq->id() |
1243
|
|
|
|
|
|
|
Function: This is mapped on display_id |
1244
|
|
|
|
|
|
|
Returns : value of display_id() |
1245
|
|
|
|
|
|
|
Args : [optional] value to update display_id |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
=cut |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
sub id { |
1250
|
70
|
|
|
70
|
1
|
941
|
return shift->display_id(@_); |
1251
|
|
|
|
|
|
|
} |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
=head1 Seq only methods |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
These methods are specific to the Bio::Seq object, and not |
1257
|
|
|
|
|
|
|
found on the Bio::PrimarySeq object |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
=head2 primary_seq |
1260
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
Title : primary_seq |
1262
|
|
|
|
|
|
|
Usage : $seq->primary_seq or $seq->primary_seq($newval) |
1263
|
|
|
|
|
|
|
Function: Get or set a PrimarySeq object |
1264
|
|
|
|
|
|
|
Example : |
1265
|
|
|
|
|
|
|
Returns : PrimarySeq object |
1266
|
|
|
|
|
|
|
Args : None or PrimarySeq object |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=cut |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
sub primary_seq { |
1271
|
17153
|
|
|
17153
|
1
|
12582
|
my ($obj,$value) = @_; |
1272
|
|
|
|
|
|
|
|
1273
|
17153
|
100
|
|
|
|
20672
|
if( defined $value) { |
1274
|
25
|
50
|
33
|
|
|
90
|
if( ! ref $value || ! $value->isa('Bio::PrimarySeqI') ) { |
1275
|
0
|
|
|
|
|
0
|
$obj->throw("$value is not a Bio::PrimarySeq compliant object"); |
1276
|
|
|
|
|
|
|
} |
1277
|
|
|
|
|
|
|
|
1278
|
25
|
|
|
|
|
23
|
$obj->{'primary_seq'} = $value; |
1279
|
|
|
|
|
|
|
# descend down over all seqfeature objects, seeing whether they |
1280
|
|
|
|
|
|
|
# want an attached seq. |
1281
|
|
|
|
|
|
|
|
1282
|
25
|
|
|
|
|
51
|
foreach my $sf ( $obj->get_SeqFeatures() ) { |
1283
|
0
|
|
|
|
|
0
|
$sf->attach_seq($value); |
1284
|
|
|
|
|
|
|
} |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
} |
1287
|
17153
|
|
|
|
|
21553
|
return $obj->{'primary_seq'}; |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
} |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
=head2 species |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
Title : species |
1295
|
|
|
|
|
|
|
Usage : $species = $seq->species() or $seq->species($species) |
1296
|
|
|
|
|
|
|
Function: Gets or sets the species |
1297
|
|
|
|
|
|
|
Returns : L object |
1298
|
|
|
|
|
|
|
Args : None or L object |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
See L for more information |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
=cut |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
sub species { |
1305
|
621
|
|
|
621
|
1
|
6581
|
my ($self, $species) = @_; |
1306
|
621
|
100
|
|
|
|
1159
|
if ($species) { |
1307
|
341
|
|
|
|
|
620
|
$self->{'species'} = $species; |
1308
|
|
|
|
|
|
|
} else { |
1309
|
280
|
|
|
|
|
1132
|
return $self->{'species'}; |
1310
|
|
|
|
|
|
|
} |
1311
|
|
|
|
|
|
|
} |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
# Internal methods follow... |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
# keep AUTOLOAD happy |
1317
|
|
|
|
0
|
|
|
sub DESTROY { } |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
############################################################################ |
1320
|
|
|
|
|
|
|
# aliases due to name changes or to compensate for our lack of consistency # |
1321
|
|
|
|
|
|
|
############################################################################ |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
# in all other modules we use the object in the singular -- |
1324
|
|
|
|
|
|
|
# lack of consistency sucks |
1325
|
|
|
|
|
|
|
*flush_SeqFeature = \&remove_SeqFeatures; |
1326
|
|
|
|
|
|
|
*flush_SeqFeatures = \&remove_SeqFeatures; |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
# this is now get_SeqFeatures() (from FeatureHolderI) |
1329
|
|
|
|
|
|
|
*top_SeqFeatures = \&get_SeqFeatures; |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
# this is now get_all_SeqFeatures() in FeatureHolderI |
1332
|
|
|
|
|
|
|
sub all_SeqFeatures{ |
1333
|
16
|
|
|
16
|
0
|
748
|
return shift->get_all_SeqFeatures(@_); |
1334
|
|
|
|
|
|
|
} |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
sub accession { |
1337
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
1338
|
0
|
|
|
|
|
|
$self->warn(ref($self)."::accession is deprecated, ". |
1339
|
|
|
|
|
|
|
"use accession_number() instead"); |
1340
|
0
|
|
|
|
|
|
return $self->accession_number(@_); |
1341
|
|
|
|
|
|
|
} |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
1; |