line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Cluster::UniGene.pm |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Andrew Macgregor |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Andrew Macgregor, Jo-Ann Stanton, David Green |
9
|
|
|
|
|
|
|
# Molecular Embryology Group, Anatomy & Structural Biology, University of Otago |
10
|
|
|
|
|
|
|
# http://meg.otago.ac.nz/ |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# _history |
15
|
|
|
|
|
|
|
# April 17, 2002 - Initial implementation by Andrew Macgregor |
16
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 NAME |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Bio::Cluster::UniGene - UniGene object |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 SYNOPSIS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
use Bio::Cluster::UniGene; |
25
|
|
|
|
|
|
|
use Bio::ClusterIO; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
$stream = Bio::ClusterIO->new('-file' => "Hs.data", |
28
|
|
|
|
|
|
|
'-format' => "unigene"); |
29
|
|
|
|
|
|
|
# note: we quote -format to keep older perl's from complaining. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
while ( my $in = $stream->next_cluster() ) { |
32
|
|
|
|
|
|
|
print $in->unigene_id() . "\n"; |
33
|
|
|
|
|
|
|
while ( my $sequence = $in->next_seq() ) { |
34
|
|
|
|
|
|
|
print $sequence->accession_number() . "\n"; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 DESCRIPTION |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
This UniGene object implements the L interface |
41
|
|
|
|
|
|
|
for the representation if UniGene clusters in Bioperl. It is returned |
42
|
|
|
|
|
|
|
by the L parser for unigene format and contains all |
43
|
|
|
|
|
|
|
the data associated with one UniGene record. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
This class implements several interfaces and hence can be used |
46
|
|
|
|
|
|
|
wherever instances of such interfaces are expected. In particular, the |
47
|
|
|
|
|
|
|
interfaces are L as the base interface for all cluster |
48
|
|
|
|
|
|
|
representations, and in addition L and |
49
|
|
|
|
|
|
|
L. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
The following lists the UniGene specific methods that are available |
52
|
|
|
|
|
|
|
(see below for details). Be aware next_XXX iterators take a snapshot |
53
|
|
|
|
|
|
|
of the array property when they are first called, and this snapshot is |
54
|
|
|
|
|
|
|
not reset until the iterator is exhausted. Hence, once called you need |
55
|
|
|
|
|
|
|
to exhaust the iterator to see any changes that have been made to the |
56
|
|
|
|
|
|
|
property in the meantime. You will usually want to use the |
57
|
|
|
|
|
|
|
non-iterator equivalents and loop over the elements yourself. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
new() - standard new call |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
unigene_id() - set/get unigene_id |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
title() - set/get title (description) |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
gene() - set/get gene |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
cytoband() - set/get cytoband |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
mgi() - set/get mgi |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
locuslink() - set/get locuslink |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
homol() - set/get homologene |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
gnm_terminus() - set/get gnm_terminus |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
scount() - set/get scount |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
express() - set/get express, currently takes/returns a reference to an |
80
|
|
|
|
|
|
|
array of expressed tissues |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
next_express() - returns the next tissue expression from the expressed |
83
|
|
|
|
|
|
|
tissue array |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
chromosome() - set/get chromosome, currently takes/returns a reference |
86
|
|
|
|
|
|
|
to an array of chromosome lines |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
next_chromosome() - returns the next chromosome line from the array of |
89
|
|
|
|
|
|
|
chromosome lines |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sts() - set/get sts, currently takes/returns a reference to an array |
92
|
|
|
|
|
|
|
of sts lines |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
next_sts() - returns the next sts line from the array of sts lines |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
txmap() - set/get txmap, currently takes/returns a reference to an |
97
|
|
|
|
|
|
|
array of txmap lines |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
next_txmap() - returns the next txmap line from the array of txmap |
100
|
|
|
|
|
|
|
lines |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
protsim() - set/get protsim, currently takes/returns a reference to an |
103
|
|
|
|
|
|
|
array of protsim lines |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
next_protsim() - returns the next protsim line from the array of |
106
|
|
|
|
|
|
|
protsim lines |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sequences() - set/get sequence, currently takes/returns a reference to |
109
|
|
|
|
|
|
|
an array of references to seq info |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
next_seq() - returns a Seq object that currently only contains an |
112
|
|
|
|
|
|
|
accession number |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 Implemented Interfaces |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
This class implementes the following interfaces. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=over 4 |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item Bio::Cluster::UniGeneI |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
This includes implementing Bio::ClusterI. |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=item Bio::IdentifiableI |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=item Bio::DescribableI |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=item Bio::AnnotatableI |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=item Bio::Factory::SequenceStreamI |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=back |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 FEEDBACK |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 Mailing Lists |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
141
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
142
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
145
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 Support |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
I |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
154
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
155
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
156
|
|
|
|
|
|
|
with code and data examples if at all possible. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head2 Reporting Bugs |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
161
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
162
|
|
|
|
|
|
|
web: |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head1 AUTHOR - Andrew Macgregor |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Email andrew at cbbc.murdoch.edu.au |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Hilmar Lapp, hlapp at gmx.net |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head1 APPENDIX |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
178
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a "_". |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=cut |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# Let the code begin... |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
package Bio::Cluster::UniGene; |
186
|
3
|
|
|
3
|
|
878
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
96
|
|
187
|
|
|
|
|
|
|
|
188
|
3
|
|
|
3
|
|
999
|
use Bio::Annotation::Collection; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
73
|
|
189
|
3
|
|
|
3
|
|
893
|
use Bio::Annotation::DBLink; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
73
|
|
190
|
3
|
|
|
3
|
|
15
|
use Bio::Annotation::SimpleValue; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
47
|
|
191
|
3
|
|
|
3
|
|
892
|
use Bio::Species; |
|
3
|
|
|
|
|
102
|
|
|
3
|
|
|
|
|
95
|
|
192
|
3
|
|
|
3
|
|
1024
|
use Bio::Seq::SeqFactory; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
90
|
|
193
|
|
|
|
|
|
|
|
194
|
3
|
|
|
3
|
|
18
|
use base qw(Bio::Root::Root Bio::Cluster::UniGeneI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::Factory::SequenceStreamI); |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
968
|
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
my %species_map = ( |
197
|
|
|
|
|
|
|
'Aga' => "Anopheles gambiae", |
198
|
|
|
|
|
|
|
'Ame' => "Apis mellifera", |
199
|
|
|
|
|
|
|
'At' => "Arabidopsis thaliana", |
200
|
|
|
|
|
|
|
'Bmo' => "Bombyx mori", |
201
|
|
|
|
|
|
|
'Bt' => "Bos taurus", |
202
|
|
|
|
|
|
|
'Cel' => "Caenorhabditis elegans", |
203
|
|
|
|
|
|
|
'Cfa' => "Canine familiaris", |
204
|
|
|
|
|
|
|
'Cin' => "Ciona intestinalis", |
205
|
|
|
|
|
|
|
'Cre' => "Chlamydomonas reinhardtii", |
206
|
|
|
|
|
|
|
'Csa' => "Ciona savignyi", |
207
|
|
|
|
|
|
|
'Csi' => "Citrus sinensis", |
208
|
|
|
|
|
|
|
'Ddi' => "Dictyostelium discoideum", |
209
|
|
|
|
|
|
|
'Dr' => "Danio rerio", |
210
|
|
|
|
|
|
|
'Dm' => "Drosophila melanogaster", |
211
|
|
|
|
|
|
|
'Gga' => "Gallus gallus", |
212
|
|
|
|
|
|
|
'Gma' => "Glycine max", |
213
|
|
|
|
|
|
|
'Han' => "Helianthus annus", |
214
|
|
|
|
|
|
|
'Hs' => "Homo sapiens", |
215
|
|
|
|
|
|
|
'Hma' => "Hydra magnipapillata", |
216
|
|
|
|
|
|
|
'Hv' => "Hordeum vulgare", |
217
|
|
|
|
|
|
|
'Lco' => "Lotus corniculatus", |
218
|
|
|
|
|
|
|
'Les' => "Lycopersicon esculentum", |
219
|
|
|
|
|
|
|
'Lsa' => "Lactuca sativa", |
220
|
|
|
|
|
|
|
'Mdo' => "Malus x domestica", |
221
|
|
|
|
|
|
|
'Mgr' => "Magnaporthe grisea", |
222
|
|
|
|
|
|
|
'Mm' => "Mus musculus", |
223
|
|
|
|
|
|
|
'Mtr' => "Medicago truncatula", |
224
|
|
|
|
|
|
|
'Ncr' => "Neurospora crassa", |
225
|
|
|
|
|
|
|
'Oar' => "Ovis aries", |
226
|
|
|
|
|
|
|
'Omy' => "Oncorhynchus mykiss", |
227
|
|
|
|
|
|
|
'Os' => "Oryza sativa", |
228
|
|
|
|
|
|
|
'Ola' => "Oryzias latipes", |
229
|
|
|
|
|
|
|
'Ppa' => "Physcomitrella patens", |
230
|
|
|
|
|
|
|
'Pta' => "Pinus taeda", |
231
|
|
|
|
|
|
|
'Ptp' => "Populus tremula x Populus tremuloides", |
232
|
|
|
|
|
|
|
'Rn' => "Rattus norvegicus", |
233
|
|
|
|
|
|
|
'Sbi' => "Sorghum bicolor", |
234
|
|
|
|
|
|
|
'Sma' => "Schistosoma mansoni", |
235
|
|
|
|
|
|
|
'Sof' => "Saccharum officinarum", |
236
|
|
|
|
|
|
|
'Spu' => "Strongylocentrotus purpuratus", |
237
|
|
|
|
|
|
|
'Ssa' => "Salmo salar", |
238
|
|
|
|
|
|
|
'Ssc' => "Sus scrofa", |
239
|
|
|
|
|
|
|
'Str' => "Xenopus tropicalis", |
240
|
|
|
|
|
|
|
'Stu' => "Solanum tuberosum", |
241
|
|
|
|
|
|
|
'Ta' => "Triticum aestivum", |
242
|
|
|
|
|
|
|
'Tgo' => "Toxoplasma gondii", |
243
|
|
|
|
|
|
|
'Tru' => "Takifugu rubripes", |
244
|
|
|
|
|
|
|
'Vvi' => "Vitis vinifera", |
245
|
|
|
|
|
|
|
'Xl' => "Xenopus laevis", |
246
|
|
|
|
|
|
|
'Zm' => "Zea mays", |
247
|
|
|
|
|
|
|
); |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=head2 new |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
Title : new |
253
|
|
|
|
|
|
|
Usage : used by ClusterIO |
254
|
|
|
|
|
|
|
Returns : a new Bio::Cluster::Unigene object |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=cut |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub new { |
259
|
|
|
|
|
|
|
# standard new call.. |
260
|
6
|
|
|
6
|
1
|
225
|
my($caller,@args) = @_; |
261
|
6
|
|
|
|
|
32
|
my $self = $caller->SUPER::new(@args); |
262
|
|
|
|
|
|
|
|
263
|
6
|
|
|
|
|
39
|
my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) = |
264
|
|
|
|
|
|
|
$self->_rearrange([qw(UNIGENE_ID |
265
|
|
|
|
|
|
|
DESCRIPTION |
266
|
|
|
|
|
|
|
MEMBERS |
267
|
|
|
|
|
|
|
SIZE |
268
|
|
|
|
|
|
|
SPECIES |
269
|
|
|
|
|
|
|
DISPLAY_ID |
270
|
|
|
|
|
|
|
OBJECT_ID |
271
|
|
|
|
|
|
|
NAMESPACE |
272
|
|
|
|
|
|
|
AUTHORITY |
273
|
|
|
|
|
|
|
VERSION |
274
|
|
|
|
|
|
|
SEQFACTORY |
275
|
|
|
|
|
|
|
)], @args); |
276
|
|
|
|
|
|
|
|
277
|
6
|
|
|
|
|
30
|
$self->{'_alphabet'} = 'dna'; |
278
|
|
|
|
|
|
|
|
279
|
6
|
50
|
|
|
|
14
|
$self->unigene_id($ugid) if $ugid; |
280
|
6
|
100
|
|
|
|
17
|
$self->description($desc) if $desc; |
281
|
6
|
100
|
|
|
|
15
|
$self->sequences($mems) if $mems; |
282
|
6
|
100
|
|
|
|
15
|
$self->size($size) if defined($size); |
283
|
6
|
100
|
|
|
|
16
|
$self->display_id($dispid) if $dispid; # overwrites ugid |
284
|
6
|
50
|
|
|
|
12
|
$self->object_id($id) if $id; # overwrites dispid |
285
|
6
|
|
100
|
|
|
32
|
$self->namespace($ns || 'UniGene'); |
286
|
6
|
|
50
|
|
|
24
|
$self->authority($auth || 'NCBI'); |
287
|
6
|
50
|
|
|
|
17
|
$self->version($v) if defined($v); |
288
|
6
|
50
|
|
|
|
18
|
if( ! defined $seqfact ) { |
289
|
6
|
|
|
|
|
27
|
$seqfact = Bio::Seq::SeqFactory->new |
290
|
|
|
|
|
|
|
(-verbose => $self->verbose(), |
291
|
|
|
|
|
|
|
-type => 'Bio::Seq::RichSeq'); |
292
|
|
|
|
|
|
|
} |
293
|
6
|
|
|
|
|
25
|
$self->sequence_factory($seqfact); |
294
|
6
|
100
|
66
|
|
|
28
|
if( (! $species) && (defined $self->unigene_id() && |
|
|
|
66
|
|
|
|
|
295
|
|
|
|
|
|
|
$self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) { |
296
|
|
|
|
|
|
|
# try set a default one depending on the ID |
297
|
4
|
|
|
|
|
11
|
$species = $species_map{$1}; |
298
|
|
|
|
|
|
|
} |
299
|
6
|
|
|
|
|
21
|
$self->species($species); |
300
|
6
|
|
|
|
|
28
|
return $self; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head1 L methods |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=cut |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head2 unigene_id |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Title : unigene_id |
311
|
|
|
|
|
|
|
Usage : unigene_id(); |
312
|
|
|
|
|
|
|
Function: Returns the unigene_id associated with the object. |
313
|
|
|
|
|
|
|
Example : $id = $unigene->unigene_id or $unigene->unigene_id($id) |
314
|
|
|
|
|
|
|
Returns : A string |
315
|
|
|
|
|
|
|
Args : None or an id |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=cut |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub unigene_id { |
321
|
19
|
|
|
19
|
1
|
291
|
my ($obj,$value) = @_; |
322
|
19
|
100
|
|
|
|
38
|
if( defined $value) { |
323
|
5
|
|
|
|
|
9
|
$obj->{'unigene_id'} = $value; |
324
|
|
|
|
|
|
|
} |
325
|
19
|
|
|
|
|
70
|
return $obj->{'unigene_id'}; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=head2 title |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
Title : title |
333
|
|
|
|
|
|
|
Usage : title(); |
334
|
|
|
|
|
|
|
Function: Returns the title associated with the object. |
335
|
|
|
|
|
|
|
Example : $title = $unigene->title or $unigene->title($title) |
336
|
|
|
|
|
|
|
Returns : A string |
337
|
|
|
|
|
|
|
Args : None or a title |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=cut |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
sub title { |
343
|
8
|
|
|
8
|
1
|
15
|
my ($obj,$value) = @_; |
344
|
8
|
100
|
|
|
|
15
|
if( defined $value) { |
345
|
4
|
|
|
|
|
7
|
$obj->{'title'} = $value; |
346
|
|
|
|
|
|
|
} |
347
|
8
|
|
|
|
|
21
|
return $obj->{'title'}; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=head2 gene |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Title : gene |
354
|
|
|
|
|
|
|
Usage : gene(); |
355
|
|
|
|
|
|
|
Function: Returns the gene associated with the object. |
356
|
|
|
|
|
|
|
Example : $gene = $unigene->gene or $unigene->gene($gene) |
357
|
|
|
|
|
|
|
Returns : A string |
358
|
|
|
|
|
|
|
Args : None or a gene |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=cut |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
sub gene { |
364
|
4
|
|
|
4
|
1
|
7
|
my $self = shift; |
365
|
4
|
|
|
|
|
9
|
return $self->_annotation_value('gene_name', @_); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head2 cytoband |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Title : cytoband |
372
|
|
|
|
|
|
|
Usage : cytoband(); |
373
|
|
|
|
|
|
|
Function: Returns the cytoband associated with the object. |
374
|
|
|
|
|
|
|
Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband) |
375
|
|
|
|
|
|
|
Returns : A string |
376
|
|
|
|
|
|
|
Args : None or a cytoband |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=cut |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub cytoband { |
382
|
4
|
|
|
4
|
1
|
6
|
my $self = shift; |
383
|
4
|
|
|
|
|
8
|
return $self->_annotation_value('cyto_band', @_); |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head2 mgi |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Title : mgi |
389
|
|
|
|
|
|
|
Usage : mgi(); |
390
|
|
|
|
|
|
|
Function: Returns the mgi associated with the object. |
391
|
|
|
|
|
|
|
Example : $mgi = $unigene->mgi or $unigene->mgi($mgi) |
392
|
|
|
|
|
|
|
Returns : A string |
393
|
|
|
|
|
|
|
Args : None or a mgi |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=cut |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
sub mgi { |
399
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
400
|
0
|
|
|
|
|
0
|
my $acc; |
401
|
|
|
|
|
|
|
|
402
|
0
|
0
|
|
|
|
0
|
if(@_) { |
403
|
|
|
|
|
|
|
# purge first |
404
|
0
|
|
|
|
|
0
|
$self->_remove_dblink('dblink','MGI'); |
405
|
|
|
|
|
|
|
# then add if a valid value is present |
406
|
0
|
0
|
|
|
|
0
|
if($acc = shift) { |
407
|
0
|
|
|
|
|
0
|
$self->_annotation_dblink('dblink','MGI',$acc); |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
} else { |
410
|
0
|
|
|
|
|
0
|
($acc) = $self->_annotation_dblink('dblink','MGI'); |
411
|
|
|
|
|
|
|
} |
412
|
0
|
|
|
|
|
0
|
return $acc; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=head2 locuslink |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
Title : locuslink |
419
|
|
|
|
|
|
|
Usage : locuslink(); |
420
|
|
|
|
|
|
|
Function: Returns or stores a reference to an array containing locuslink data. |
421
|
|
|
|
|
|
|
Returns : An array reference |
422
|
|
|
|
|
|
|
Args : None or an array reference |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=cut |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
sub locuslink { |
427
|
7
|
|
|
7
|
1
|
14
|
my ($self,$ll) = @_; |
428
|
|
|
|
|
|
|
|
429
|
7
|
100
|
|
|
|
17
|
if($ll) { |
430
|
|
|
|
|
|
|
# purge first |
431
|
4
|
|
|
|
|
11
|
$self->_remove_dblink('dblink','LocusLink'); |
432
|
|
|
|
|
|
|
# then add as many accessions as are present |
433
|
4
|
|
|
|
|
6
|
foreach my $acc (@$ll) { |
434
|
3
|
|
|
|
|
9
|
$self->_annotation_dblink('dblink','LocusLink',$acc); |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
} else { |
437
|
3
|
|
|
|
|
9
|
my @accs = $self->_annotation_dblink('dblink','LocusLink'); |
438
|
3
|
|
|
|
|
8
|
$ll = [@accs]; |
439
|
|
|
|
|
|
|
} |
440
|
7
|
|
|
|
|
18
|
return $ll; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=head2 homol |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
Title : homol |
447
|
|
|
|
|
|
|
Usage : homol(); |
448
|
|
|
|
|
|
|
Function: Returns the homol entry associated with the object. |
449
|
|
|
|
|
|
|
Example : $homol = $unigene->homol or $unigene->homol($homol) |
450
|
|
|
|
|
|
|
Returns : A string |
451
|
|
|
|
|
|
|
Args : None or a homol entry |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=cut |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
sub homol { |
456
|
6
|
|
|
6
|
1
|
12
|
my $self = shift; |
457
|
6
|
|
|
|
|
12
|
return $self->_annotation_value('homol', @_); |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head2 restr_expr |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Title : restr_expr |
464
|
|
|
|
|
|
|
Usage : restr_expr(); |
465
|
|
|
|
|
|
|
Function: Returns the restr_expr entry associated with the object. |
466
|
|
|
|
|
|
|
Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr) |
467
|
|
|
|
|
|
|
Returns : A string |
468
|
|
|
|
|
|
|
Args : None or a restr_expr entry |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub restr_expr { |
473
|
4
|
|
|
4
|
1
|
6
|
my $self = shift; |
474
|
4
|
|
|
|
|
9
|
return $self->_annotation_value('restr_expr', @_); |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=head2 gnm_terminus |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
Title : gnm_terminus |
481
|
|
|
|
|
|
|
Usage : gnm_terminus(); |
482
|
|
|
|
|
|
|
Function: Returns the gnm_terminus associated with the object. |
483
|
|
|
|
|
|
|
Example : $gnm_terminus = $unigene->gnm_terminus or |
484
|
|
|
|
|
|
|
$unigene->gnm_terminus($gnm_terminus) |
485
|
|
|
|
|
|
|
Returns : A string |
486
|
|
|
|
|
|
|
Args : None or a gnm_terminus |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=cut |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
sub gnm_terminus { |
491
|
4
|
|
|
4
|
1
|
7
|
my $self = shift; |
492
|
4
|
|
|
|
|
8
|
return $self->_annotation_value('gnm_terminus', @_); |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=head2 scount |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
Title : scount |
498
|
|
|
|
|
|
|
Usage : scount(); |
499
|
|
|
|
|
|
|
Function: Returns the scount associated with the object. |
500
|
|
|
|
|
|
|
Example : $scount = $unigene->scount or $unigene->scount($scount) |
501
|
|
|
|
|
|
|
Returns : A string |
502
|
|
|
|
|
|
|
Args : None or a scount |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=cut |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
sub scount { |
507
|
4
|
|
|
4
|
1
|
9
|
my ($obj,$value) = @_; |
508
|
4
|
100
|
66
|
|
|
21
|
if( defined $value) { |
|
|
100
|
|
|
|
|
|
509
|
1
|
|
|
|
|
3
|
$obj->{'scount'} = $value; |
510
|
|
|
|
|
|
|
} elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) { |
511
|
2
|
|
|
|
|
7
|
$obj->{'scount'} = $obj->size(); |
512
|
|
|
|
|
|
|
} |
513
|
4
|
|
|
|
|
10
|
return $obj->{'scount'}; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head2 express |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
Title : express |
520
|
|
|
|
|
|
|
Usage : express(); |
521
|
|
|
|
|
|
|
Function: Returns or stores a reference to an array containing |
522
|
|
|
|
|
|
|
tissue expression data |
523
|
|
|
|
|
|
|
Returns : An array reference |
524
|
|
|
|
|
|
|
Args : None or an array reference |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=cut |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
sub express { |
529
|
7
|
|
|
7
|
1
|
861
|
my $self = shift; |
530
|
|
|
|
|
|
|
|
531
|
7
|
|
|
|
|
18
|
return $self->_annotation_value_ary('expressed',@_); |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=head2 chromosome |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Title : chromosome |
538
|
|
|
|
|
|
|
Usage : chromosome(); |
539
|
|
|
|
|
|
|
Function: Returns or stores a reference to an array containing |
540
|
|
|
|
|
|
|
chromosome lines |
541
|
|
|
|
|
|
|
Returns : An array reference |
542
|
|
|
|
|
|
|
Args : None or an array reference |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=cut |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
sub chromosome { |
547
|
7
|
|
|
7
|
1
|
447
|
my $self = shift; |
548
|
|
|
|
|
|
|
|
549
|
7
|
|
|
|
|
14
|
return $self->_annotation_value_ary('chromosome',@_); |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=head2 sts |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
Title : sts |
556
|
|
|
|
|
|
|
Usage : sts(); |
557
|
|
|
|
|
|
|
Function: Returns or stores a reference to an array containing sts lines |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
Returns : An array reference |
560
|
|
|
|
|
|
|
Args : None or an array reference |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=cut |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
sub sts { |
565
|
7
|
|
|
7
|
1
|
896
|
my $self = shift; |
566
|
|
|
|
|
|
|
|
567
|
7
|
|
|
|
|
16
|
return $self->_annotation_value_ary('sts',@_); |
568
|
|
|
|
|
|
|
} |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=head2 txmap |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
Title : txmap |
574
|
|
|
|
|
|
|
Usage : txmap(); |
575
|
|
|
|
|
|
|
Function: Returns or stores a reference to an array containing txmap lines |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Returns : An array reference |
578
|
|
|
|
|
|
|
Args : None or an array reference |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=cut |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
sub txmap { |
583
|
6
|
|
|
6
|
1
|
851
|
my $self = shift; |
584
|
|
|
|
|
|
|
|
585
|
6
|
|
|
|
|
16
|
return $self->_annotation_value_ary('txmap',@_); |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
=head2 protsim |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
Title : protsim |
592
|
|
|
|
|
|
|
Usage : protsim(); |
593
|
|
|
|
|
|
|
Function: Returns or stores a reference to an array containing protsim lines |
594
|
|
|
|
|
|
|
This should really only be used by ClusterIO, not directly |
595
|
|
|
|
|
|
|
Returns : An array reference |
596
|
|
|
|
|
|
|
Args : None or an array reference |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=cut |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
sub protsim { |
601
|
7
|
|
|
7
|
1
|
848
|
my $self = shift; |
602
|
|
|
|
|
|
|
|
603
|
7
|
|
|
|
|
15
|
return $self->_annotation_value_ary('protsim',@_); |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=head2 sequences |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
Title : sequences |
610
|
|
|
|
|
|
|
Usage : sequences(); |
611
|
|
|
|
|
|
|
Function: Returns or stores a reference to an array containing |
612
|
|
|
|
|
|
|
sequence data. |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
This is mostly reserved for ClusterIO parsers. You should |
615
|
|
|
|
|
|
|
use get_members() for get and add_member()/remove_members() |
616
|
|
|
|
|
|
|
for set. |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
Returns : An array reference, or undef |
619
|
|
|
|
|
|
|
Args : None or an array reference or undef |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
=cut |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
sub sequences { |
624
|
27
|
|
|
27
|
1
|
31
|
my $self = shift; |
625
|
|
|
|
|
|
|
|
626
|
27
|
100
|
|
|
|
86
|
return $self->{'members'} = shift if @_; |
627
|
22
|
|
|
|
|
55
|
return $self->{'members'}; |
628
|
|
|
|
|
|
|
} |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=head2 species |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
Title : species |
633
|
|
|
|
|
|
|
Usage : $obj->species($newval) |
634
|
|
|
|
|
|
|
Function: Get/set the species object for this Unigene cluster. |
635
|
|
|
|
|
|
|
Example : |
636
|
|
|
|
|
|
|
Returns : value of species (a L object) |
637
|
|
|
|
|
|
|
Args : on set, new value (a L object or |
638
|
|
|
|
|
|
|
the binomial name, or undef, optional) |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=cut |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
sub species{ |
644
|
92
|
|
|
92
|
1
|
128
|
my $self = shift; |
645
|
|
|
|
|
|
|
|
646
|
92
|
100
|
|
|
|
139
|
if(@_) { |
647
|
6
|
|
|
|
|
11
|
my $species = shift; |
648
|
6
|
100
|
66
|
|
|
22
|
if($species && (! ref($species))) { |
649
|
4
|
|
|
|
|
14
|
my @class = reverse(split(' ',$species)); |
650
|
4
|
|
|
|
|
24
|
$species = Bio::Species->new(-classification => \@class); |
651
|
|
|
|
|
|
|
} |
652
|
6
|
|
|
|
|
17
|
return $self->{'species'} = $species; |
653
|
|
|
|
|
|
|
} |
654
|
86
|
|
|
|
|
321
|
return $self->{'species'}; |
655
|
|
|
|
|
|
|
} |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head1 L methods |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
=cut |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head2 display_id |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
Title : display_id |
665
|
|
|
|
|
|
|
Usage : |
666
|
|
|
|
|
|
|
Function: Get/set the display name or identifier for the cluster |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
This is aliased to unigene_id(). |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
Returns : a string |
671
|
|
|
|
|
|
|
Args : optional, on set the display ID ( a string) |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=cut |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
sub display_id{ |
676
|
5
|
|
|
5
|
1
|
27
|
return shift->unigene_id(@_); |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=head2 description |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
Title : description |
682
|
|
|
|
|
|
|
Usage : Bio::ClusterI->description("POLYUBIQUITIN") |
683
|
|
|
|
|
|
|
Function: get/set for the consensus description of the cluster |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
This is aliased to title(). |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Returns : the description string |
688
|
|
|
|
|
|
|
Args : Optional the description string |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=cut |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
sub description{ |
693
|
4
|
|
|
4
|
1
|
11
|
return shift->title(@_); |
694
|
|
|
|
|
|
|
} |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=head2 size |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
Title : size |
699
|
|
|
|
|
|
|
Usage : Bio::ClusterI->size(); |
700
|
|
|
|
|
|
|
Function: get for the size of the family, |
701
|
|
|
|
|
|
|
calculated from the number of members |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
This is aliased to scount(). |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Returns : the size of the cluster |
706
|
|
|
|
|
|
|
Args : |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=cut |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
sub size { |
711
|
6
|
|
|
6
|
1
|
545
|
my $self = shift; |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# hard-wiring the size is allowed if there are no sequences |
714
|
6
|
50
|
|
|
|
10
|
return $self->scount(@_) unless defined($self->sequences()); |
715
|
|
|
|
|
|
|
# but we can't change the number of members through this method |
716
|
6
|
|
|
|
|
9
|
my $n = scalar(@{$self->sequences()}); |
|
6
|
|
|
|
|
10
|
|
717
|
6
|
50
|
66
|
|
|
23
|
if(@_ && ($n != $_[0])) { |
718
|
0
|
|
|
|
|
0
|
$self->throw("Cannot change cluster size using size() from $n to ". |
719
|
|
|
|
|
|
|
$_[0]); |
720
|
|
|
|
|
|
|
} |
721
|
6
|
|
|
|
|
12
|
return $n; |
722
|
|
|
|
|
|
|
} |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
=head2 cluster_score |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
Title : cluster_score |
727
|
|
|
|
|
|
|
Usage : $cluster ->cluster_score(100); |
728
|
|
|
|
|
|
|
Function: get/set for cluster_score which |
729
|
|
|
|
|
|
|
represent the score in which the clustering |
730
|
|
|
|
|
|
|
algorithm assigns to this cluster. |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
For UniGene clusters, there really is no cluster score that |
733
|
|
|
|
|
|
|
would come with the data. However, we provide an |
734
|
|
|
|
|
|
|
implementation here so that you can score UniGene clusters |
735
|
|
|
|
|
|
|
if you want to. |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Returns : a number |
738
|
|
|
|
|
|
|
Args : optionally, on set a number |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
=cut |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
sub cluster_score{ |
743
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
744
|
|
|
|
|
|
|
|
745
|
0
|
0
|
|
|
|
0
|
return $self->{'cluster_score'} = shift if @_; |
746
|
0
|
|
|
|
|
0
|
return $self->{'cluster_score'}; |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=head2 get_members |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
Title : get_members |
752
|
|
|
|
|
|
|
Usage : Bio::ClusterI->get_members(($seq1, $seq2)); |
753
|
|
|
|
|
|
|
Function: retrieve the members of the family by some criteria |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
Will return all members if no criteria are provided. |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
At this time this implementation does not support |
758
|
|
|
|
|
|
|
specifying criteria and will always return all members. |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
Returns : the array of members |
761
|
|
|
|
|
|
|
Args : |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=cut |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
sub get_members { |
766
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
767
|
|
|
|
|
|
|
|
768
|
2
|
|
50
|
|
|
4
|
my $mems = $self->sequences() || []; |
769
|
|
|
|
|
|
|
# already objects? |
770
|
2
|
50
|
33
|
|
|
13
|
if(@$mems && (ref($mems->[0]) eq "HASH")) { |
771
|
|
|
|
|
|
|
# nope, we need to build the object list from scratch |
772
|
2
|
|
|
|
|
4
|
my @memlist = (); |
773
|
2
|
|
|
|
|
4
|
while(my $seq = $self->next_seq()) { |
774
|
57
|
|
|
|
|
143
|
push(@memlist, $seq); |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
# we cache this array of objects as the new member list |
777
|
2
|
|
|
|
|
4
|
$mems = \@memlist; |
778
|
2
|
|
|
|
|
5
|
$self->sequences($mems); |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
# done |
781
|
2
|
|
|
|
|
12
|
return @$mems; |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=head1 Annotatable view at the object properties |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
=cut |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
=head2 annotation |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
Title : annotation |
792
|
|
|
|
|
|
|
Usage : $obj->annotation($newval) |
793
|
|
|
|
|
|
|
Function: Get/set the L object for |
794
|
|
|
|
|
|
|
this UniGene cluster. |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
Many attributes of this class are actually stored within |
797
|
|
|
|
|
|
|
the annotation collection object as L |
798
|
|
|
|
|
|
|
compliant objects, so you can conveniently access them |
799
|
|
|
|
|
|
|
through the same interface as you would e.g. access |
800
|
|
|
|
|
|
|
L annotation properties. |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
If you call this method in set mode and replace the |
803
|
|
|
|
|
|
|
annotation collection with another one you should know |
804
|
|
|
|
|
|
|
exactly what you are doing. |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
Example : |
807
|
|
|
|
|
|
|
Returns : a L compliant object |
808
|
|
|
|
|
|
|
Args : on set, new value (a L |
809
|
|
|
|
|
|
|
compliant object or undef, optional) |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
=cut |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
sub annotation{ |
815
|
72
|
|
|
72
|
1
|
76
|
my $self = shift; |
816
|
|
|
|
|
|
|
|
817
|
72
|
50
|
|
|
|
160
|
if(@_) { |
|
|
100
|
|
|
|
|
|
818
|
0
|
|
|
|
|
0
|
return $self->{'annotation'} = shift; |
819
|
|
|
|
|
|
|
} elsif(! exists($self->{'annotation'})) { |
820
|
3
|
|
|
|
|
12
|
$self->{'annotation'} = Bio::Annotation::Collection->new(); |
821
|
|
|
|
|
|
|
} |
822
|
72
|
|
|
|
|
132
|
return $self->{'annotation'}; |
823
|
|
|
|
|
|
|
} |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
=head1 Implementation specific methods |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
These are mostly for adding/removing to array properties, and for |
829
|
|
|
|
|
|
|
methods with special functionality. |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
=cut |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
=head2 add_member |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
Title : add_member |
836
|
|
|
|
|
|
|
Usage : |
837
|
|
|
|
|
|
|
Function: Adds a member object to the list of members. |
838
|
|
|
|
|
|
|
Example : |
839
|
|
|
|
|
|
|
Returns : TRUE if the new member was successfully added, and FALSE |
840
|
|
|
|
|
|
|
otherwise. |
841
|
|
|
|
|
|
|
Args : The member to add. |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=cut |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
sub add_member{ |
847
|
0
|
|
|
0
|
1
|
0
|
my ($self,@mems) = @_; |
848
|
|
|
|
|
|
|
|
849
|
0
|
|
0
|
|
|
0
|
my $memlist = $self->{'members'} || []; |
850
|
|
|
|
|
|
|
# this is an object interface; is the member list already objects? |
851
|
0
|
0
|
0
|
|
|
0
|
if(@$memlist && (ref($memlist->[0]) eq "HASH")) { |
852
|
|
|
|
|
|
|
# nope, convert to objects |
853
|
0
|
|
|
|
|
0
|
$memlist = [$self->get_members()]; |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
# add new member(s) |
856
|
0
|
|
|
|
|
0
|
push(@$memlist, @mems); |
857
|
|
|
|
|
|
|
# store if we created this array ref ourselves |
858
|
0
|
|
|
|
|
0
|
$self->sequences($memlist); |
859
|
|
|
|
|
|
|
# done |
860
|
0
|
|
|
|
|
0
|
return 1; |
861
|
|
|
|
|
|
|
} |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
=head2 remove_members |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
Title : remove_members |
866
|
|
|
|
|
|
|
Usage : |
867
|
|
|
|
|
|
|
Function: Remove the list of members for this cluster such that the |
868
|
|
|
|
|
|
|
member list is undefined afterwards (as opposed to zero members). |
869
|
|
|
|
|
|
|
Example : |
870
|
|
|
|
|
|
|
Returns : the previous list of members |
871
|
|
|
|
|
|
|
Args : none |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
=cut |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
sub remove_members{ |
877
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
878
|
|
|
|
|
|
|
|
879
|
0
|
|
|
|
|
0
|
my @mems = $self->get_members(); |
880
|
0
|
|
|
|
|
0
|
$self->sequences(undef); |
881
|
0
|
|
|
|
|
0
|
return @mems; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
=head2 next_locuslink |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
Title : next_locuslink |
888
|
|
|
|
|
|
|
Usage : next_locuslink(); |
889
|
|
|
|
|
|
|
Function: Returns the next locuslink from an array referred |
890
|
|
|
|
|
|
|
to using $obj->{'locuslink'} |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
If you call this iterator again after it returned undef, it |
893
|
|
|
|
|
|
|
will re-cycle through the list of elements. Changes in the |
894
|
|
|
|
|
|
|
underlying array property while you loop over this iterator |
895
|
|
|
|
|
|
|
will not be reflected until you exhaust the iterator. |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
Example : while ( my $locuslink = $in->next_locuslink() ) { |
898
|
|
|
|
|
|
|
print "$locuslink\n"; |
899
|
|
|
|
|
|
|
} |
900
|
|
|
|
|
|
|
Returns : String |
901
|
|
|
|
|
|
|
Args : None |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
=cut |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
sub next_locuslink { |
906
|
3
|
|
|
3
|
1
|
10
|
my ($obj) = @_; |
907
|
|
|
|
|
|
|
|
908
|
3
|
|
|
|
|
6
|
return $obj->_next_element("ll","locuslink"); |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=head2 next_express |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
Title : next_express |
914
|
|
|
|
|
|
|
Usage : next_express(); |
915
|
|
|
|
|
|
|
Function: Returns the next tissue from an array referred |
916
|
|
|
|
|
|
|
to using $obj->{'express'} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
If you call this iterator again after it returned undef, it |
919
|
|
|
|
|
|
|
will re-cycle through the list of elements. Changes in the |
920
|
|
|
|
|
|
|
underlying array property while you loop over this iterator |
921
|
|
|
|
|
|
|
will not be reflected until you exhaust the iterator. |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
Example : while ( my $express = $in->next_express() ) { |
924
|
|
|
|
|
|
|
print "$express\n"; |
925
|
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
Returns : String |
927
|
|
|
|
|
|
|
Args : None |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
=cut |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
sub next_express { |
932
|
5
|
|
|
5
|
1
|
19
|
my ($obj) = @_; |
933
|
|
|
|
|
|
|
|
934
|
5
|
|
|
|
|
7
|
return $obj->_next_element("express","express"); |
935
|
|
|
|
|
|
|
} |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
=head2 next_chromosome |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
Title : next_chromosome |
941
|
|
|
|
|
|
|
Usage : next_chromosome(); |
942
|
|
|
|
|
|
|
Function: Returns the next chromosome line from an array referred |
943
|
|
|
|
|
|
|
to using $obj->{'chromosome'} |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
If you call this iterator again after it returned undef, it |
946
|
|
|
|
|
|
|
will re-cycle through the list of elements. Changes in the |
947
|
|
|
|
|
|
|
underlying array property while you loop over this iterator |
948
|
|
|
|
|
|
|
will not be reflected until you exhaust the iterator. |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
Example : while ( my $chromosome = $in->next_chromosome() ) { |
951
|
|
|
|
|
|
|
print "$chromosome\n"; |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
Returns : String |
954
|
|
|
|
|
|
|
Args : None |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
=cut |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
sub next_chromosome { |
959
|
3
|
|
|
3
|
1
|
11
|
my ($obj) = @_; |
960
|
|
|
|
|
|
|
|
961
|
3
|
|
|
|
|
4
|
return $obj->_next_element("chr","chromosome"); |
962
|
|
|
|
|
|
|
} |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
=head2 next_protsim |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
Title : next_protsim |
968
|
|
|
|
|
|
|
Usage : next_protsim(); |
969
|
|
|
|
|
|
|
Function: Returns the next protsim line from an array referred |
970
|
|
|
|
|
|
|
to using $obj->{'protsim'} |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
If you call this iterator again after it returned undef, it |
973
|
|
|
|
|
|
|
will re-cycle through the list of elements. Changes in the |
974
|
|
|
|
|
|
|
underlying array property while you loop over this iterator |
975
|
|
|
|
|
|
|
will not be reflected until you exhaust the iterator. |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
Example : while ( my $protsim = $in->next_protsim() ) { |
978
|
|
|
|
|
|
|
print "$protsim\n"; |
979
|
|
|
|
|
|
|
} |
980
|
|
|
|
|
|
|
Returns : String |
981
|
|
|
|
|
|
|
Args : None |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
=cut |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
sub next_protsim { |
986
|
3
|
|
|
3
|
1
|
13
|
my ($obj) = @_; |
987
|
|
|
|
|
|
|
|
988
|
3
|
|
|
|
|
5
|
return $obj->_next_element("protsim","protsim"); |
989
|
|
|
|
|
|
|
} |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
=head2 next_sts |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
Title : next_sts |
995
|
|
|
|
|
|
|
Usage : next_sts(); |
996
|
|
|
|
|
|
|
Function: Returns the next sts line from an array referred |
997
|
|
|
|
|
|
|
to using $obj->{'sts'} |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
If you call this iterator again after it returned undef, it |
1000
|
|
|
|
|
|
|
will re-cycle through the list of elements. Changes in the |
1001
|
|
|
|
|
|
|
underlying array property while you loop over this iterator |
1002
|
|
|
|
|
|
|
will not be reflected until you exhaust the iterator. |
1003
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
Example : while ( my $sts = $in->next_sts() ) { |
1005
|
|
|
|
|
|
|
print "$sts\n"; |
1006
|
|
|
|
|
|
|
} |
1007
|
|
|
|
|
|
|
Returns : String |
1008
|
|
|
|
|
|
|
Args : None |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
=cut |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
sub next_sts { |
1013
|
3
|
|
|
3
|
1
|
11
|
my ($obj) = @_; |
1014
|
|
|
|
|
|
|
|
1015
|
3
|
|
|
|
|
5
|
return $obj->_next_element("sts","sts"); |
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
=head2 next_txmap |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
Title : next_txmap |
1022
|
|
|
|
|
|
|
Usage : next_txmap(); |
1023
|
|
|
|
|
|
|
Function: Returns the next txmap line from an array |
1024
|
|
|
|
|
|
|
referred to using $obj->{'txmap'} |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
If you call this iterator again after it returned undef, it |
1027
|
|
|
|
|
|
|
will re-cycle through the list of elements. Changes in the |
1028
|
|
|
|
|
|
|
underlying array property while you loop over this iterator |
1029
|
|
|
|
|
|
|
will not be reflected until you exhaust the iterator. |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
Example : while ( my $tsmap = $in->next_txmap() ) { |
1032
|
|
|
|
|
|
|
print "$txmap\n"; |
1033
|
|
|
|
|
|
|
} |
1034
|
|
|
|
|
|
|
Returns : String |
1035
|
|
|
|
|
|
|
Args : None |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
=cut |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
sub next_txmap { |
1040
|
3
|
|
|
3
|
1
|
10
|
my ($obj) = @_; |
1041
|
|
|
|
|
|
|
|
1042
|
3
|
|
|
|
|
5
|
return $obj->_next_element("txmap","txmap"); |
1043
|
|
|
|
|
|
|
} |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
############################### |
1046
|
|
|
|
|
|
|
# private method |
1047
|
|
|
|
|
|
|
# |
1048
|
|
|
|
|
|
|
# args: prefix name for the queue |
1049
|
|
|
|
|
|
|
# name of the method from which to re-fill |
1050
|
|
|
|
|
|
|
# returns: the next element from that queue, or undef if the queue is empty |
1051
|
|
|
|
|
|
|
############################### |
1052
|
|
|
|
|
|
|
sub _next_element{ |
1053
|
20
|
|
|
20
|
|
26
|
my ($self,$queuename,$meth) = @_; |
1054
|
|
|
|
|
|
|
|
1055
|
20
|
|
|
|
|
29
|
$queuename = "_".$queuename."_queue"; |
1056
|
20
|
100
|
|
|
|
28
|
if(! exists($self->{$queuename})) { |
1057
|
|
|
|
|
|
|
# re-initialize from array of sequence data |
1058
|
6
|
|
|
|
|
7
|
$self->{$queuename} = [@{$self->$meth() }]; |
|
6
|
|
|
|
|
13
|
|
1059
|
|
|
|
|
|
|
} |
1060
|
20
|
|
|
|
|
23
|
my $queue = $self->{$queuename}; |
1061
|
|
|
|
|
|
|
# is queue exhausted (equivalent to end of stream)? |
1062
|
20
|
100
|
|
|
|
29
|
if(! @$queue) { |
1063
|
|
|
|
|
|
|
# yes, remove queue and signal to the caller |
1064
|
6
|
|
|
|
|
9
|
delete $self->{$queuename}; |
1065
|
6
|
|
|
|
|
12
|
return; |
1066
|
|
|
|
|
|
|
} |
1067
|
14
|
|
|
|
|
25
|
return shift(@$queue); |
1068
|
|
|
|
|
|
|
} |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
=head1 L methods |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
=cut |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
=head2 object_id |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
Title : object_id |
1077
|
|
|
|
|
|
|
Usage : $string = $obj->object_id() |
1078
|
|
|
|
|
|
|
Function: a string which represents the stable primary identifier |
1079
|
|
|
|
|
|
|
in this namespace of this object. For DNA sequences this |
1080
|
|
|
|
|
|
|
is its accession_number, similarly for protein sequences |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
This is aliased to unigene_id(). |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
Returns : A scalar |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
=cut |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
sub object_id { |
1090
|
0
|
|
|
0
|
1
|
0
|
return shift->unigene_id(@_); |
1091
|
|
|
|
|
|
|
} |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
=head2 version |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
Title : version |
1096
|
|
|
|
|
|
|
Usage : $version = $obj->version() |
1097
|
|
|
|
|
|
|
Function: a number which differentiates between versions of |
1098
|
|
|
|
|
|
|
the same object. Higher numbers are considered to be |
1099
|
|
|
|
|
|
|
later and more relevant, but a single object described |
1100
|
|
|
|
|
|
|
the same identifier should represent the same concept |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
Unigene clusters usually won't have a version, so this |
1103
|
|
|
|
|
|
|
will be mostly undefined. |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
Returns : A number |
1106
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
=cut |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
sub version { |
1112
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1113
|
|
|
|
|
|
|
|
1114
|
0
|
0
|
|
|
|
0
|
return $self->{'version'} = shift if @_; |
1115
|
0
|
|
|
|
|
0
|
return $self->{'version'}; |
1116
|
|
|
|
|
|
|
} |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
=head2 authority |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
Title : authority |
1122
|
|
|
|
|
|
|
Usage : $authority = $obj->authority() |
1123
|
|
|
|
|
|
|
Function: a string which represents the organisation which |
1124
|
|
|
|
|
|
|
granted the namespace, written as the DNS name for |
1125
|
|
|
|
|
|
|
organisation (eg, wormbase.org) |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
Returns : A scalar |
1128
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
=cut |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
sub authority { |
1134
|
91
|
|
|
91
|
1
|
119
|
my $self = shift; |
1135
|
|
|
|
|
|
|
|
1136
|
91
|
100
|
|
|
|
155
|
return $self->{'authority'} = shift if @_; |
1137
|
85
|
|
|
|
|
161
|
return $self->{'authority'}; |
1138
|
|
|
|
|
|
|
} |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
=head2 namespace |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
Title : namespace |
1144
|
|
|
|
|
|
|
Usage : $string = $obj->namespace() |
1145
|
|
|
|
|
|
|
Function: A string representing the name space this identifier |
1146
|
|
|
|
|
|
|
is valid in, often the database name or the name |
1147
|
|
|
|
|
|
|
describing the collection |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
Returns : A scalar |
1150
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
=cut |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
sub namespace { |
1156
|
7
|
|
|
7
|
1
|
9
|
my $self = shift; |
1157
|
|
|
|
|
|
|
|
1158
|
7
|
100
|
|
|
|
19
|
return $self->{'namespace'} = shift if @_; |
1159
|
1
|
|
|
|
|
5
|
return $self->{'namespace'}; |
1160
|
|
|
|
|
|
|
} |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
=head1 L methods |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
=cut |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
=head2 display_name |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
Title : display_name |
1169
|
|
|
|
|
|
|
Usage : $string = $obj->display_name() |
1170
|
|
|
|
|
|
|
Function: A string which is what should be displayed to the user |
1171
|
|
|
|
|
|
|
the string should have no spaces (ideally, though a cautious |
1172
|
|
|
|
|
|
|
user of this interface would not assume this) and should be |
1173
|
|
|
|
|
|
|
less than thirty characters (though again, double checking |
1174
|
|
|
|
|
|
|
this is a good idea) |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
This is aliased to unigene_id(). |
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
Returns : A scalar |
1179
|
|
|
|
|
|
|
Status : Virtual |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
=cut |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
sub display_name { |
1184
|
0
|
|
|
0
|
1
|
0
|
return shift->unigene_id(@_); |
1185
|
|
|
|
|
|
|
} |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head2 description() |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
Title : description |
1191
|
|
|
|
|
|
|
Usage : $string = $obj->description() |
1192
|
|
|
|
|
|
|
Function: A text string suitable for displaying to the user a |
1193
|
|
|
|
|
|
|
description. This string is likely to have spaces, but |
1194
|
|
|
|
|
|
|
should not have any newlines or formatting - just plain |
1195
|
|
|
|
|
|
|
text. The string should not be greater than 255 characters |
1196
|
|
|
|
|
|
|
and clients can feel justified at truncating strings at 255 |
1197
|
|
|
|
|
|
|
characters for the purposes of display |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
This is already demanded by Bio::ClusterI and hence is |
1200
|
|
|
|
|
|
|
present anyway. |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
Returns : A scalar |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
=cut |
1206
|
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
=head1 L methods |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
=cut |
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
=head2 next_seq |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
Title : next_seq |
1215
|
|
|
|
|
|
|
Usage : next_seq(); |
1216
|
|
|
|
|
|
|
Function: Returns the next seq as a Seq object as defined by |
1217
|
|
|
|
|
|
|
$seq->sequence_factory(), |
1218
|
|
|
|
|
|
|
at present an empty Bio::Seq::RichSeq object with |
1219
|
|
|
|
|
|
|
just the accession_number() and pid() set |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
This iterator will not exhaust the array of member |
1222
|
|
|
|
|
|
|
sequences. If you call next_seq() again after it returned |
1223
|
|
|
|
|
|
|
undef, it will re-cycle through the list of member |
1224
|
|
|
|
|
|
|
sequences. |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
Example : while ( my $sequence = $in->next_seq() ) { |
1227
|
|
|
|
|
|
|
print $sequence->accession_number() . "\n"; |
1228
|
|
|
|
|
|
|
} |
1229
|
|
|
|
|
|
|
Returns : Bio::PrimarySeqI object |
1230
|
|
|
|
|
|
|
Args : None |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
=cut |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
sub next_seq { |
1235
|
90
|
|
|
90
|
1
|
182
|
my ($obj) = @_; |
1236
|
|
|
|
|
|
|
|
1237
|
90
|
100
|
|
|
|
149
|
if(! exists($obj->{'_seq_queue'})) { |
1238
|
|
|
|
|
|
|
# re-initialize from array of sequence data |
1239
|
5
|
|
|
|
|
6
|
$obj->{'_seq_queue'} = [@{$obj->sequences()}]; |
|
5
|
|
|
|
|
8
|
|
1240
|
|
|
|
|
|
|
} |
1241
|
90
|
|
|
|
|
94
|
my $queue = $obj->{'_seq_queue'}; |
1242
|
|
|
|
|
|
|
# is queue exhausted (equivalent to end of stream)? |
1243
|
90
|
100
|
|
|
|
135
|
if(! @$queue) { |
1244
|
|
|
|
|
|
|
# yes, remove queue and signal to the caller |
1245
|
3
|
|
|
|
|
6
|
delete $obj->{'_seq_queue'}; |
1246
|
3
|
|
|
|
|
12
|
return; |
1247
|
|
|
|
|
|
|
} |
1248
|
|
|
|
|
|
|
# no, still data in the queue: get the next one from the queue |
1249
|
87
|
|
|
|
|
89
|
my $seq_h = shift(@$queue); |
1250
|
|
|
|
|
|
|
# if this is not a simple hash ref, it's an object already, and we'll |
1251
|
|
|
|
|
|
|
# return just that |
1252
|
87
|
100
|
|
|
|
155
|
return $seq_h if(ref($seq_h) ne 'HASH'); |
1253
|
|
|
|
|
|
|
# nope, we need to assemble this object from scratch |
1254
|
|
|
|
|
|
|
# |
1255
|
|
|
|
|
|
|
# assemble the annotation collection |
1256
|
84
|
|
|
|
|
180
|
my $ac = Bio::Annotation::Collection->new(); |
1257
|
84
|
|
|
|
|
267
|
foreach my $k (keys %$seq_h) { |
1258
|
589
|
100
|
|
|
|
1653
|
next if $k =~ /acc|pid|nid|version/; |
1259
|
|
|
|
|
|
|
my $ann = Bio::Annotation::SimpleValue->new(-tagname => $k, |
1260
|
325
|
|
|
|
|
765
|
-value => $seq_h->{$k}); |
1261
|
325
|
|
|
|
|
626
|
$ac->add_Annotation($ann); |
1262
|
|
|
|
|
|
|
} |
1263
|
|
|
|
|
|
|
# assemble the initialization parameters and create object |
1264
|
|
|
|
|
|
|
my $seqobj = $obj->sequence_factory->create( |
1265
|
|
|
|
|
|
|
-accession_number => $seq_h->{acc}, |
1266
|
|
|
|
|
|
|
-pid => $seq_h->{pid}, |
1267
|
|
|
|
|
|
|
# why does NCBI prepend a 'g' to its own identifiers?? |
1268
|
|
|
|
|
|
|
-primary_id => $seq_h->{nid} && $seq_h->{nid} =~ /^g\d+$/ ? |
1269
|
|
|
|
|
|
|
substr($seq_h->{nid},1) : $seq_h->{nid}, |
1270
|
|
|
|
|
|
|
-display_id => $seq_h->{acc}, |
1271
|
|
|
|
|
|
|
-seq_version => $seq_h->{version}, |
1272
|
|
|
|
|
|
|
-alphabet => $obj->{'_alphabet'}, |
1273
|
84
|
50
|
33
|
|
|
172
|
-namespace => $seq_h->{acc} =~ /^NM_/ ? 'RefSeq' : 'GenBank', |
|
|
100
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
-authority => $obj->authority(), # default is NCBI |
1275
|
|
|
|
|
|
|
-species => $obj->species(), |
1276
|
|
|
|
|
|
|
-annotation => $ac |
1277
|
|
|
|
|
|
|
); |
1278
|
84
|
|
|
|
|
356
|
return $seqobj; |
1279
|
|
|
|
|
|
|
} |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
=head2 sequence_factory |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
Title : sequence_factory |
1284
|
|
|
|
|
|
|
Usage : $seqio->sequence_factory($seqfactory) |
1285
|
|
|
|
|
|
|
Function: Get/Set the Bio::Factory::SequenceFactoryI |
1286
|
|
|
|
|
|
|
Returns : Bio::Factory::SequenceFactoryI |
1287
|
|
|
|
|
|
|
Args : [optional] Bio::Factory::SequenceFactoryI |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
=cut |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
sub sequence_factory { |
1293
|
90
|
|
|
90
|
1
|
128
|
my ($self,$obj) = @_; |
1294
|
90
|
100
|
|
|
|
139
|
if( defined $obj ) { |
1295
|
6
|
50
|
33
|
|
|
64
|
if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) { |
1296
|
0
|
|
|
|
|
0
|
$self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()"); |
1297
|
|
|
|
|
|
|
} |
1298
|
6
|
|
|
|
|
17
|
$self->{'_seqfactory'} = $obj; |
1299
|
|
|
|
|
|
|
} |
1300
|
90
|
|
|
|
|
692
|
$self->{'_seqfactory'}; |
1301
|
|
|
|
|
|
|
} |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
=head1 Private methods |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
=cut |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
=head2 _annotation_value |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
Title : _annotation_value |
1310
|
|
|
|
|
|
|
Usage : |
1311
|
|
|
|
|
|
|
Function: Private method. |
1312
|
|
|
|
|
|
|
Example : |
1313
|
|
|
|
|
|
|
Returns : the value (a string) |
1314
|
|
|
|
|
|
|
Args : annotation key (a string) |
1315
|
|
|
|
|
|
|
on set, annotation value (a string) |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
=cut |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
sub _annotation_value{ |
1321
|
22
|
|
|
22
|
|
26
|
my $self = shift; |
1322
|
22
|
|
|
|
|
25
|
my $key = shift; |
1323
|
|
|
|
|
|
|
|
1324
|
22
|
|
|
|
|
21
|
my ($ann, $val); |
1325
|
22
|
100
|
|
|
|
39
|
if(@_) { |
1326
|
11
|
|
|
|
|
14
|
$val = shift; |
1327
|
11
|
50
|
|
|
|
20
|
if(! defined($val)) { |
1328
|
0
|
|
|
|
|
0
|
($ann) = $self->annotation->remove_Annotations($key); |
1329
|
0
|
0
|
|
|
|
0
|
return $ann ? $ann->value() : undef; |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
} |
1332
|
22
|
|
|
|
|
35
|
($ann) = $self->annotation->get_Annotations($key); |
1333
|
22
|
100
|
100
|
|
|
71
|
if(defined $ann && (! $val)) { |
|
|
50
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
# get mode and exists |
1335
|
11
|
|
|
|
|
18
|
$val = $ann->value(); |
1336
|
|
|
|
|
|
|
} elsif($val) { |
1337
|
|
|
|
|
|
|
# set mode |
1338
|
11
|
100
|
|
|
|
16
|
if(!defined $ann) { |
1339
|
6
|
|
|
|
|
16
|
$ann = Bio::Annotation::SimpleValue->new(-tagname => $key); |
1340
|
6
|
|
|
|
|
11
|
$self->annotation->add_Annotation($ann); |
1341
|
|
|
|
|
|
|
} |
1342
|
11
|
|
|
|
|
20
|
$ann->value($val); |
1343
|
|
|
|
|
|
|
} |
1344
|
22
|
|
|
|
|
50
|
return $val; |
1345
|
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=head2 _annotation_value_ary |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
Title : _annotation_value_ary |
1351
|
|
|
|
|
|
|
Usage : |
1352
|
|
|
|
|
|
|
Function: Private method. |
1353
|
|
|
|
|
|
|
Example : |
1354
|
|
|
|
|
|
|
Returns : reference to the array of values |
1355
|
|
|
|
|
|
|
Args : annotation key (a string) |
1356
|
|
|
|
|
|
|
on set, reference to an array holding the values |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=cut |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
sub _annotation_value_ary{ |
1362
|
34
|
|
|
34
|
|
48
|
my ($self,$key,$arr) = @_; |
1363
|
|
|
|
|
|
|
|
1364
|
34
|
|
|
|
|
54
|
my $ac = $self->annotation; |
1365
|
34
|
100
|
|
|
|
51
|
if($arr) { |
1366
|
|
|
|
|
|
|
# purge first |
1367
|
20
|
|
|
|
|
38
|
$ac->remove_Annotations($key); |
1368
|
|
|
|
|
|
|
# then add as many values as are present |
1369
|
20
|
|
|
|
|
38
|
foreach my $val (@$arr) { |
1370
|
53
|
|
|
|
|
118
|
my $ann = Bio::Annotation::SimpleValue->new(-value => $val, |
1371
|
|
|
|
|
|
|
-tagname => $key |
1372
|
|
|
|
|
|
|
); |
1373
|
53
|
|
|
|
|
98
|
$ac->add_Annotation($ann); |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
} else { |
1376
|
14
|
|
|
|
|
36
|
my @vals = map { $_->value(); } $ac->get_Annotations($key); |
|
52
|
|
|
|
|
66
|
|
1377
|
14
|
|
|
|
|
32
|
$arr = [@vals]; |
1378
|
|
|
|
|
|
|
} |
1379
|
34
|
|
|
|
|
89
|
return $arr; |
1380
|
|
|
|
|
|
|
} |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
=head2 _annotation_dblink |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
Title : _annotation_dblink |
1386
|
|
|
|
|
|
|
Usage : |
1387
|
|
|
|
|
|
|
Function: Private method. |
1388
|
|
|
|
|
|
|
Example : |
1389
|
|
|
|
|
|
|
Returns : array of accessions for the given database (namespace) |
1390
|
|
|
|
|
|
|
Args : annotation key (a string) |
1391
|
|
|
|
|
|
|
dbname (a string) (optional on get, mandatory on set) |
1392
|
|
|
|
|
|
|
on set, accession or ID (a string), and version |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
=cut |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
sub _annotation_dblink{ |
1398
|
6
|
|
|
6
|
|
13
|
my ($self,$key,$dbname,$acc,$version) = @_; |
1399
|
|
|
|
|
|
|
|
1400
|
6
|
100
|
|
|
|
12
|
if($acc) { |
1401
|
|
|
|
|
|
|
# set mode -- this is adding here |
1402
|
3
|
|
|
|
|
19
|
my $ann = Bio::Annotation::DBLink->new(-tagname => $key, |
1403
|
|
|
|
|
|
|
-primary_id => $acc, |
1404
|
|
|
|
|
|
|
-database => $dbname, |
1405
|
|
|
|
|
|
|
-version => $version); |
1406
|
3
|
|
|
|
|
8
|
$self->annotation->add_Annotation($ann); |
1407
|
3
|
|
|
|
|
6
|
return 1; |
1408
|
|
|
|
|
|
|
} else { |
1409
|
|
|
|
|
|
|
# get mode |
1410
|
3
|
|
|
|
|
9
|
my @anns = $self->annotation->get_Annotations($key); |
1411
|
|
|
|
|
|
|
# filter out those that don't match the requested database |
1412
|
3
|
50
|
|
|
|
8
|
if($dbname) { |
1413
|
3
|
|
|
|
|
4
|
@anns = grep { $_->database() eq $dbname; } @anns; |
|
4
|
|
|
|
|
7
|
|
1414
|
|
|
|
|
|
|
} |
1415
|
3
|
|
|
|
|
6
|
return map { $_->primary_id(); } @anns; |
|
4
|
|
|
|
|
6
|
|
1416
|
|
|
|
|
|
|
} |
1417
|
|
|
|
|
|
|
} |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
=head2 _remove_dblink |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
Title : _remove_dblink |
1422
|
|
|
|
|
|
|
Usage : |
1423
|
|
|
|
|
|
|
Function: Private method. |
1424
|
|
|
|
|
|
|
Example : |
1425
|
|
|
|
|
|
|
Returns : array of accessions for the given database (namespace) |
1426
|
|
|
|
|
|
|
Args : annotation key (a string) |
1427
|
|
|
|
|
|
|
dbname (a string) (optional) |
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=cut |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
sub _remove_dblink{ |
1433
|
4
|
|
|
4
|
|
9
|
my ($self,$key,$dbname) = @_; |
1434
|
|
|
|
|
|
|
|
1435
|
4
|
|
|
|
|
7
|
my $ac = $self->annotation(); |
1436
|
4
|
|
|
|
|
6
|
my @anns = (); |
1437
|
4
|
50
|
|
|
|
8
|
if($dbname) { |
1438
|
4
|
|
|
|
|
10
|
foreach my $ann ($ac->remove_Annotations($key)) { |
1439
|
1
|
50
|
|
|
|
4
|
if($ann->database() eq $dbname) { |
1440
|
1
|
|
|
|
|
2
|
push(@anns, $ann); |
1441
|
|
|
|
|
|
|
} else { |
1442
|
0
|
|
|
|
|
0
|
$ac->add_Annotation($ann); |
1443
|
|
|
|
|
|
|
} |
1444
|
|
|
|
|
|
|
} |
1445
|
|
|
|
|
|
|
} else { |
1446
|
0
|
|
|
|
|
0
|
@anns = $ac->remove_Annotations($key); |
1447
|
|
|
|
|
|
|
} |
1448
|
4
|
|
|
|
|
8
|
return map { $_->primary_id(); } @anns; |
|
1
|
|
|
|
|
5
|
|
1449
|
|
|
|
|
|
|
} |
1450
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
##################################################################### |
1453
|
|
|
|
|
|
|
# aliases for naming consistency or other reasons # |
1454
|
|
|
|
|
|
|
##################################################################### |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
*sequence = \&sequences; |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
1; |