| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# $Id: Chado.pm,v 1.11 2009-08-25 19:29:43 scottcain Exp $ |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Bio::DB::Das::Chado - DAS-style access to a chado database |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Open up a feature database |
|
10
|
|
|
|
|
|
|
$db = Bio::DB::Das::Chado->new( |
|
11
|
|
|
|
|
|
|
-dsn => 'dbi:Pg:dbname=gadfly;host=lajolla' |
|
12
|
|
|
|
|
|
|
-user => 'jimbo', |
|
13
|
|
|
|
|
|
|
-pass => 'supersecret', |
|
14
|
|
|
|
|
|
|
); |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
@segments = $db->segment(-name => '2L', |
|
17
|
|
|
|
|
|
|
-start => 1, |
|
18
|
|
|
|
|
|
|
-end => 1000000); |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# segments are Bio::Das::SegmentI - compliant objects |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# fetch a list of features |
|
23
|
|
|
|
|
|
|
@features = $db->features(-type=>['type1','type2','type3']); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# invoke a callback over features |
|
26
|
|
|
|
|
|
|
$db->features(-type=>['type1','type2','type3'], |
|
27
|
|
|
|
|
|
|
-callback => sub { ... } |
|
28
|
|
|
|
|
|
|
); |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# get all feature types |
|
31
|
|
|
|
|
|
|
@types = $db->types; |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# count types |
|
34
|
|
|
|
|
|
|
%types = $db->types(-enumerate=>1); |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
@feature = $db->get_feature_by_name($class=>$name); |
|
37
|
|
|
|
|
|
|
@feature = $db->get_feature_by_target($target_name); |
|
38
|
|
|
|
|
|
|
@feature = $db->get_feature_by_attribute($att1=>$value1,$att2=>$value2); |
|
39
|
|
|
|
|
|
|
$feature = $db->get_feature_by_id($id); |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
$error = $db->error; |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Bio::DB::Das::Chado allows DAS style access to a Chado database, getting |
|
46
|
|
|
|
|
|
|
SeqFeatureI-compliant BioPerl objects and allowing GBrowse to access |
|
47
|
|
|
|
|
|
|
a Chado database directly. |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
54
|
|
|
|
|
|
|
GMOD modules. Send your comments and suggestions preferably to one |
|
55
|
|
|
|
|
|
|
of the GMOD mailing lists. Your participation is much appreciated. |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
gmod-gbrowse@lists.sourceforge.com |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Report bugs to the GMOD bug tracking system at SourceForge to help us keep |
|
62
|
|
|
|
|
|
|
track the bugs and their resolution. |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
http://sourceforge.net/tracker/?group_id=27707&atid=391291 |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 AUTHOR |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Scott Cain <scain@cpan.org> |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 LICENSE |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
This software may be redistributed under the same license as perl. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 APPENDIX |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
78
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=cut |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
#' |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
package Bio::DB::Das::Chado; |
|
85
|
1
|
|
|
1
|
|
27346
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
52
|
|
|
86
|
|
|
|
|
|
|
|
|
87
|
1
|
|
|
1
|
|
1023
|
use Bio::DB::Das::Chado::Segment; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
48
|
|
|
88
|
1
|
|
|
1
|
|
31
|
use Bio::Root::Root; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
23
|
|
|
89
|
1
|
|
|
1
|
|
1042
|
use Bio::DasI; |
|
|
1
|
|
|
|
|
2018
|
|
|
|
1
|
|
|
|
|
37
|
|
|
90
|
1
|
|
|
1
|
|
8
|
use Bio::PrimarySeq; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
31
|
|
|
91
|
1
|
|
|
1
|
|
5
|
use Bio::DB::GFF::Typename; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
27
|
|
|
92
|
1
|
|
|
1
|
|
4598
|
use DBI; |
|
|
1
|
|
|
|
|
21941
|
|
|
|
1
|
|
|
|
|
88
|
|
|
93
|
1
|
|
|
1
|
|
1255
|
use Bio::SeqFeature::Lite; |
|
|
1
|
|
|
|
|
5796
|
|
|
|
1
|
|
|
|
|
54
|
|
|
94
|
1
|
|
|
1
|
|
8
|
use Carp qw(longmess); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
73
|
|
|
95
|
1
|
|
|
1
|
|
7
|
use vars qw($VERSION @ISA); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
60
|
|
|
96
|
|
|
|
|
|
|
|
|
97
|
1
|
|
|
1
|
|
6
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
54
|
|
|
98
|
|
|
|
|
|
|
|
|
99
|
1
|
|
|
1
|
|
7
|
use constant SEGCLASS => 'Bio::DB::Das::Chado::Segment'; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
73
|
|
|
100
|
1
|
|
|
1
|
|
5
|
use constant MAP_REFERENCE_TYPE => 'MapReferenceType'; #dgg |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
44
|
|
|
101
|
1
|
|
|
1
|
|
5
|
use constant DEBUG => 0; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
11009
|
|
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
$VERSION = 0.34; |
|
104
|
|
|
|
|
|
|
@ISA = qw(Bio::Root::Root Bio::DasI); |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head2 new |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Title : new |
|
109
|
|
|
|
|
|
|
Usage : $db = Bio::DB::Das::Chado( |
|
110
|
|
|
|
|
|
|
-dsn => 'dbi:Pg:dbname=gadfly;host=lajolla' |
|
111
|
|
|
|
|
|
|
-user => 'jimbo', |
|
112
|
|
|
|
|
|
|
-pass => 'supersecret', |
|
113
|
|
|
|
|
|
|
); |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Function: Open up a Bio::DB::DasI interface to a Chado database |
|
116
|
|
|
|
|
|
|
Returns : a new Bio::DB::Das::Chado object |
|
117
|
|
|
|
|
|
|
Args : |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=over |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item -dsn [dsn string] |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
A full dbi dsn string for the database, optionally including host and port |
|
124
|
|
|
|
|
|
|
information, like "dbi:Pg:dbname=chado;host=localhost;port=5432". |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item -user [username] |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
The database user name. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item -pass [password] |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
The users password for the database. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item -organism [common_name|abbreviation|"Genus species"] |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Used to specify the organism that the features should be drawn from in |
|
137
|
|
|
|
|
|
|
Chado instances that have more than one organism. The argument can be |
|
138
|
|
|
|
|
|
|
the common name, the abbreviation or "Genus species". Since common name |
|
139
|
|
|
|
|
|
|
and abbreviation are not guaranteed to be unique, if one of those is supplied |
|
140
|
|
|
|
|
|
|
and it corresponds to more than one organism_id, the Chado adaptor will die. |
|
141
|
|
|
|
|
|
|
Since the combination is guaranteed to be unique by table constraints, |
|
142
|
|
|
|
|
|
|
supplying "Genus species" should always work. |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=item -srcfeatureslice [1|0] default: 1 |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Setting this to 1 will enable searching for features using a function and |
|
147
|
|
|
|
|
|
|
a corresponding index that can significantly speed searches, as long as |
|
148
|
|
|
|
|
|
|
the featureloc_slice function is present in the Chado instance (all |
|
149
|
|
|
|
|
|
|
"modern" instances of Chado do have this function). Since it available |
|
150
|
|
|
|
|
|
|
in nearly all Chado instances, in a future release of this adaptor, |
|
151
|
|
|
|
|
|
|
the default value of -srcfeatureslice will be set to 1 (on). |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item -inferCDS [1|0] default: 0 |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Given mRNA features that have exons and polypeptide features as children, |
|
156
|
|
|
|
|
|
|
when inferCDS is set, the Chado adaptor will calculate the intersection |
|
157
|
|
|
|
|
|
|
of the exons and polypeptide features and create CDS features that result. |
|
158
|
|
|
|
|
|
|
This is generally needed when using gene and mRNA features with glyphs in |
|
159
|
|
|
|
|
|
|
GBrowse that show subparts, like the gene and processed_transcript glyphs. |
|
160
|
|
|
|
|
|
|
Since this is almost always required, in a future release of this adaptor, |
|
161
|
|
|
|
|
|
|
the default will be switched to 1 (on). |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=item -fulltext [1|0] default: 0 |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
This item allows full text searching of various Chado text fields, |
|
166
|
|
|
|
|
|
|
including feature.name, feature.uniquename, synonym.synonym_sgml, |
|
167
|
|
|
|
|
|
|
dbxref.accession, and all_feature_names.name (which fequently includes |
|
168
|
|
|
|
|
|
|
featureprop.value, depending on how all_feature_names is configured). Note |
|
169
|
|
|
|
|
|
|
that to use -fulltext, you must run the preparation script, |
|
170
|
|
|
|
|
|
|
gmod_chado_fts_prep.pl, on the database, and in addition, it might |
|
171
|
|
|
|
|
|
|
be a good idea to set up a cronjob to keep the all_feature_names |
|
172
|
|
|
|
|
|
|
materialized view up to date with the materialized view tool, |
|
173
|
|
|
|
|
|
|
gmod_materialized_view_tool.pl. |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=item -recursivMapping [1|0] default: 0 |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
In the case where features are mapped to a "small" srcfeature (like |
|
178
|
|
|
|
|
|
|
a contig) and then that small feature is mapped to a larger feature |
|
179
|
|
|
|
|
|
|
(like a chromosome), setting -recursivMapping will allow the Chado |
|
180
|
|
|
|
|
|
|
adaptor to calculate the coordinates of the feature on the larger |
|
181
|
|
|
|
|
|
|
feature even though it isn't explicitly mapped to it. The Chado adaptor |
|
182
|
|
|
|
|
|
|
suffers an approximately 20% performance penalty to do this mapping. |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item -allow_obsolete [1|0] default: 0 |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
If set to 1, allow_obsolete will tell the Chado adaptor to ignore the |
|
187
|
|
|
|
|
|
|
feature.is_obsolete column when querying to find features. |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item -enable_seqscan [1|0] default: 1 |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
If set to zero, the -enable_seqscan will send a query planner hint to the |
|
192
|
|
|
|
|
|
|
PostgreSQL server to make it more costly to do sequential scans on a table. |
|
193
|
|
|
|
|
|
|
This is generally not necessary, as the query planner in Pg 8+ is smarter |
|
194
|
|
|
|
|
|
|
than it used to be. |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=item -do2Level [1|0] default: 0 |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
do2Level is a flag for specifying that two "levels" at most of features should |
|
199
|
|
|
|
|
|
|
be fetch when getting child features. This flag is generally unnecessary as |
|
200
|
|
|
|
|
|
|
Bio::Graphics::Glyph supports specifying on a per glyph basis what should |
|
201
|
|
|
|
|
|
|
be fetch. Use of this flag is incompatible with the -recursivMapping flag. |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=item -reference_class [SO type name] |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Used to specify what the "base type" is. Typically, this would be chromosome |
|
206
|
|
|
|
|
|
|
or contig, but setting it is only necessary in the case where features |
|
207
|
|
|
|
|
|
|
are mapped to more than one srcfeature and you don't want to use the |
|
208
|
|
|
|
|
|
|
one that is lowest on the graph. For example, you have polypeptides that are |
|
209
|
|
|
|
|
|
|
mapped to chromosomes and motifs that are mapped to polypeptides. If you |
|
210
|
|
|
|
|
|
|
want to display the motifs on the polypeptide, you need to set "polypeptide" |
|
211
|
|
|
|
|
|
|
as the argument for -reference_class. |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=item -tripal [1|0] default: 0 |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
If turned on, the tripal flag tells the adaptor that it is dealing with |
|
216
|
|
|
|
|
|
|
a Chado instance that is working with Tripal, and so the query to fetch |
|
217
|
|
|
|
|
|
|
features may fail with regard to analysis features. This flag attempts to |
|
218
|
|
|
|
|
|
|
prevent that. It may mean that analysis features (like similarity results) |
|
219
|
|
|
|
|
|
|
will be inaccessible to the adaptor, or at least scores associated with them |
|
220
|
|
|
|
|
|
|
will be, depending on how they were loaded. |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=back |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=cut |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# create new database accessor object |
|
227
|
|
|
|
|
|
|
# takes all the same args as a Bio::DB::BioDB class |
|
228
|
|
|
|
|
|
|
sub new { |
|
229
|
0
|
|
|
0
|
1
|
|
my $proto = shift; |
|
230
|
0
|
|
0
|
|
|
|
my $self = bless {}, ref($proto) || $proto; |
|
231
|
|
|
|
|
|
|
|
|
232
|
0
|
|
|
|
|
|
my %arg = @_; |
|
233
|
|
|
|
|
|
|
|
|
234
|
0
|
|
|
|
|
|
my $dsn = $arg{-dsn}; |
|
235
|
0
|
|
|
|
|
|
my $username = $arg{-user}; |
|
236
|
0
|
|
|
|
|
|
my $password = $arg{-pass}; |
|
237
|
0
|
|
|
|
|
|
my $refclass = $arg{-reference_class}; |
|
238
|
0
|
|
|
|
|
|
my $tripal = $arg{-tripal}; |
|
239
|
|
|
|
|
|
|
|
|
240
|
0
|
|
|
|
|
|
$self->{db_args}->{dsn} = $dsn; |
|
241
|
0
|
|
|
|
|
|
$self->{db_args}->{username} = $username; |
|
242
|
0
|
|
|
|
|
|
$self->{db_args}->{password} = $password; |
|
243
|
|
|
|
|
|
|
|
|
244
|
0
|
0
|
|
|
|
|
my $dbh = DBI->connect( $dsn, $username, $password ) |
|
245
|
|
|
|
|
|
|
or $self->throw("unable to open db handle"); |
|
246
|
0
|
|
|
|
|
|
$self->dbh($dbh); |
|
247
|
|
|
|
|
|
|
|
|
248
|
0
|
|
|
|
|
|
warn "$dbh\n" if DEBUG; |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
# determine which cv to use for SO terms |
|
251
|
|
|
|
|
|
|
|
|
252
|
0
|
|
|
|
|
|
$self->sofa_id(1); |
|
253
|
|
|
|
|
|
|
|
|
254
|
0
|
|
|
|
|
|
warn "SOFA id to use: ",$self->sofa_id() if DEBUG; |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# get the cvterm relationships here and save for later use |
|
257
|
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
my $cvterm_query="select ct.cvterm_id,ct.name as name, c.name as cvname |
|
259
|
|
|
|
|
|
|
from cvterm ct, cv c |
|
260
|
|
|
|
|
|
|
where ct.cv_id=c.cv_id and |
|
261
|
|
|
|
|
|
|
(c.name IN ( |
|
262
|
|
|
|
|
|
|
'relationship', |
|
263
|
|
|
|
|
|
|
'relationship type','Relationship Ontology', |
|
264
|
|
|
|
|
|
|
'autocreated') |
|
265
|
|
|
|
|
|
|
OR c.cv_id = ?)"; |
|
266
|
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
warn "cvterm query: $cvterm_query\n" if DEBUG; |
|
268
|
|
|
|
|
|
|
|
|
269
|
0
|
0
|
|
|
|
|
my $sth = $self->dbh->prepare($cvterm_query) |
|
270
|
|
|
|
|
|
|
or warn "unable to prepare select cvterms"; |
|
271
|
|
|
|
|
|
|
|
|
272
|
0
|
0
|
|
|
|
|
$sth->execute($self->sofa_id()) or $self->throw("unable to select cvterms"); |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# my $cvterm_id = {}; replaced with better-named variables |
|
275
|
|
|
|
|
|
|
# my $cvname = {}; |
|
276
|
|
|
|
|
|
|
|
|
277
|
0
|
|
|
|
|
|
my(%term2name,%name2term) = ({},{}); |
|
278
|
0
|
|
|
|
|
|
my %termcv=(); |
|
279
|
|
|
|
|
|
|
|
|
280
|
0
|
|
|
|
|
|
while (my $hashref = $sth->fetchrow_hashref) { |
|
281
|
0
|
|
|
|
|
|
$term2name{ $hashref->{cvterm_id} } = $hashref->{name}; |
|
282
|
0
|
|
|
|
|
|
$termcv{ $hashref->{cvterm_id} } = $hashref->{cvname}; # dgg |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
#this addresses a bug in gmod_load_gff3 (Scott!), which creates a 'part_of' |
|
285
|
|
|
|
|
|
|
#term in addition to the OBO_REL one that already exists! this will also |
|
286
|
|
|
|
|
|
|
#help with names that exist in both GO and SO, like 'protein'. |
|
287
|
|
|
|
|
|
|
# dgg: but this array is bad for callers of name2term() who expect scalar result |
|
288
|
|
|
|
|
|
|
# mostly want only sofa terms |
|
289
|
|
|
|
|
|
|
|
|
290
|
0
|
0
|
|
|
|
|
if(defined($name2term{ $hashref->{name} })){ #already seen this name |
|
291
|
|
|
|
|
|
|
|
|
292
|
0
|
0
|
|
|
|
|
if(ref($name2term{ $hashref->{name} }) ne 'ARRAY'){ #already array-converted |
|
293
|
|
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
$name2term{ $hashref->{name} } = [ $name2term{ $hashref->{name} } ]; |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
} |
|
297
|
|
|
|
|
|
|
|
|
298
|
0
|
|
|
|
|
|
push @{ $name2term{ $hashref->{name} } }, $hashref->{cvterm_id}; |
|
|
0
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
} else { |
|
301
|
|
|
|
|
|
|
|
|
302
|
0
|
|
|
|
|
|
$name2term{ $hashref->{name} } = $hashref->{cvterm_id}; |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
} |
|
306
|
0
|
|
|
|
|
|
$sth->finish; |
|
307
|
|
|
|
|
|
|
|
|
308
|
0
|
|
|
|
|
|
$self->term2name(\%term2name); |
|
309
|
0
|
|
|
|
|
|
$self->name2term(\%name2term, \%termcv); |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
#Recursive Mapping |
|
312
|
0
|
0
|
|
|
|
|
$self->recursivMapping($arg{-recursivMapping} ? $arg{-recursivMapping} : 0); |
|
313
|
|
|
|
|
|
|
|
|
314
|
0
|
0
|
|
|
|
|
$self->inferCDS($arg{-inferCDS} ? $arg{-inferCDS} : 0); |
|
315
|
0
|
0
|
|
|
|
|
$self->allow_obsolete($arg{-allow_obsolete} ? $arg{-allow_obsolete} : 0); |
|
316
|
|
|
|
|
|
|
|
|
317
|
0
|
0
|
0
|
|
|
|
if (exists($arg{-enable_seqscan}) && ! $arg{-enable_seqscan}){ |
|
318
|
0
|
|
|
|
|
|
$self->dbh->do("set enable_seqscan=0"); |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
0
|
0
|
|
|
|
|
$self->srcfeatureslice(defined $arg{-srcfeatureslice} ? $arg{-srcfeatureslice} : 1); |
|
322
|
0
|
0
|
|
|
|
|
$self->do2Level($arg{-do2Level} ? $arg{-do2Level} : 0); |
|
323
|
|
|
|
|
|
|
|
|
324
|
0
|
0
|
|
|
|
|
if ($arg{-organism}) { |
|
325
|
0
|
|
|
|
|
|
$self->organism_id($arg{-organism}); |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
#determine if all_feature_names view or table exist |
|
329
|
|
|
|
|
|
|
#$self->use_all_feature_names(); |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
#determine the type_id of the ref class and cache it |
|
332
|
0
|
|
|
|
|
|
$self->refclass($self->name2term($refclass)); |
|
333
|
|
|
|
|
|
|
|
|
334
|
0
|
|
|
|
|
|
$self->fulltext($arg{-fulltext}); |
|
335
|
0
|
|
|
|
|
|
$self->tripal($arg{-tripal}); |
|
336
|
|
|
|
|
|
|
|
|
337
|
0
|
|
|
|
|
|
return $self; |
|
338
|
|
|
|
|
|
|
} |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=head2 feature_summary |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=over |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=item Usage |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
$obj->feature_summary() |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=item Function |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
This function is based on Bio::DB::SeqFeature::Store->feature_summary. |
|
351
|
|
|
|
|
|
|
The text that follows comes from it's documtation: |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
This method is used to get coverage density information across a |
|
354
|
|
|
|
|
|
|
region of interest. You provide it with a region of interest, optional |
|
355
|
|
|
|
|
|
|
a list of feature types, and a count of the number of bins over which |
|
356
|
|
|
|
|
|
|
you want to calculate the coverage density. An object is returned |
|
357
|
|
|
|
|
|
|
corresponding to the requested region. It contains a tag called |
|
358
|
|
|
|
|
|
|
"coverage" that will return an array ref of "bins" length. Each |
|
359
|
|
|
|
|
|
|
element of the array describes the number of features that overlap the |
|
360
|
|
|
|
|
|
|
bin at this postion. |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Note that this method uses an approximate algorithm that is only |
|
363
|
|
|
|
|
|
|
accurate to 500 bp, so when dealing with bins that are smaller than |
|
364
|
|
|
|
|
|
|
1000 bp, you may see some shifting of counts between adjacent bins. |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
Although an -iterator option is provided, the method only ever returns |
|
367
|
|
|
|
|
|
|
a single feature, so this is fairly useless. |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=item Returns |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
A single feature containing summary data, or an interator containing |
|
372
|
|
|
|
|
|
|
that one feature. |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=item Arguments |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
-seq_id Sequence ID for the region |
|
377
|
|
|
|
|
|
|
-start Start of region |
|
378
|
|
|
|
|
|
|
-end End of region |
|
379
|
|
|
|
|
|
|
-type/-types Feature type of interest or array ref of types |
|
380
|
|
|
|
|
|
|
-bins Number of bins across region. Defaults to 1000. |
|
381
|
|
|
|
|
|
|
-iterator Return an iterator across the region |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=back |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=cut |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
sub feature_summary { |
|
388
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
389
|
0
|
|
|
|
|
|
my ($seq_name,$seq_id,$ref,$start,$stop,$end,$types,$type,$primary_tag,$bins,$iterator) = |
|
390
|
|
|
|
|
|
|
$self->_rearrange(['SEQID','SEQ_ID','REF','START','STOP','END', |
|
391
|
|
|
|
|
|
|
'TYPES','TYPE','PRIMARY_TAG', |
|
392
|
|
|
|
|
|
|
'BINS', |
|
393
|
|
|
|
|
|
|
'ITERATOR', |
|
394
|
|
|
|
|
|
|
],@_); |
|
395
|
|
|
|
|
|
|
|
|
396
|
0
|
|
0
|
|
|
|
$seq_name ||=$seq_id ||=$ref; |
|
|
|
|
0
|
|
|
|
|
|
397
|
0
|
|
0
|
|
|
|
$end ||=$end; |
|
398
|
0
|
|
0
|
|
|
|
$types ||=$type ||=$primary_tag; |
|
|
|
|
0
|
|
|
|
|
|
399
|
|
|
|
|
|
|
|
|
400
|
0
|
|
|
|
|
|
warn $types if DEBUG; |
|
401
|
|
|
|
|
|
|
|
|
402
|
0
|
0
|
|
|
|
|
my ($coverage,$tag) = $self->coverage_array(-seqid=> $seq_name, |
|
403
|
|
|
|
|
|
|
-start=> $start, |
|
404
|
|
|
|
|
|
|
-end => $end, |
|
405
|
|
|
|
|
|
|
-type => $types, |
|
406
|
|
|
|
|
|
|
-bins => $bins) or return; |
|
407
|
0
|
|
|
|
|
|
my $score = 0; |
|
408
|
0
|
|
|
|
|
|
for (@$coverage) { $score += $_ } |
|
|
0
|
|
|
|
|
|
|
|
409
|
0
|
|
|
|
|
|
$score /= @$coverage; |
|
410
|
|
|
|
|
|
|
|
|
411
|
0
|
|
|
|
|
|
my $feature = Bio::SeqFeature::Lite->new(-seq_id => $seq_name, |
|
412
|
|
|
|
|
|
|
-start => $start, |
|
413
|
|
|
|
|
|
|
-end => $end, |
|
414
|
|
|
|
|
|
|
-type => $tag, |
|
415
|
|
|
|
|
|
|
-score => $score, |
|
416
|
|
|
|
|
|
|
-attributes => |
|
417
|
|
|
|
|
|
|
{ coverage => [$coverage] }); |
|
418
|
|
|
|
|
|
|
|
|
419
|
0
|
|
|
|
|
|
my @features = ($feature); |
|
420
|
0
|
0
|
|
|
|
|
return $iterator |
|
421
|
|
|
|
|
|
|
? Bio::DB::Das::ChadoIterator->new(\@features) |
|
422
|
|
|
|
|
|
|
: $feature; |
|
423
|
|
|
|
|
|
|
} |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=head2 coverage_array |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=over |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=item Usage |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
$obj->coverage_array() |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=item Function |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
Calculates the coverage/density of a particular feature type |
|
437
|
|
|
|
|
|
|
over a range. |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=item Returns |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
A reference to the coverage array, or if called in an array |
|
442
|
|
|
|
|
|
|
context, a two element array with the reference to the coverage |
|
443
|
|
|
|
|
|
|
array first and the type that it was called with as the second |
|
444
|
|
|
|
|
|
|
element. |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=item Arguments |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
seqid |
|
449
|
|
|
|
|
|
|
start |
|
450
|
|
|
|
|
|
|
stop |
|
451
|
|
|
|
|
|
|
type |
|
452
|
|
|
|
|
|
|
bins |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=back |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
This is based on the method of the same name in |
|
457
|
|
|
|
|
|
|
Bio::DB::SeqFeature::Store::DBI::mysql |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub coverage_array { |
|
462
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
463
|
0
|
|
|
|
|
|
my ($seq_name,$seq_id,$ref,$start,$end,$stop,$types,$type,$primary_tag,$bins) = |
|
464
|
|
|
|
|
|
|
$self->_rearrange(['SEQID','SEQ_ID','REF','START','STOP','END', |
|
465
|
|
|
|
|
|
|
'TYPES','TYPE','PRIMARY_TAG','BINS'],@_); |
|
466
|
|
|
|
|
|
|
|
|
467
|
0
|
|
0
|
|
|
|
$seq_name ||= $seq_id ||= $ref; |
|
|
|
|
0
|
|
|
|
|
|
468
|
0
|
|
0
|
|
|
|
$types ||= $type ||= $primary_tag; |
|
|
|
|
0
|
|
|
|
|
|
469
|
0
|
|
0
|
|
|
|
$end ||= $stop; |
|
470
|
|
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
|
my $summary_bin_size = 1000; |
|
472
|
0
|
|
0
|
|
|
|
$bins ||= 1000; |
|
473
|
0
|
|
0
|
|
|
|
$start ||= 1; |
|
474
|
0
|
0
|
|
|
|
|
my $segment = $self->segment(-name =>$seq_name) or $self->throw("unknown seq_id $seq_name"); |
|
475
|
0
|
|
0
|
|
|
|
$end ||= $segment->end; |
|
476
|
|
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
|
my $binsize = ($end-$start+1)/$bins; |
|
478
|
0
|
|
|
|
|
|
my $seqid = $segment->feature_id; |
|
479
|
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
warn "$seqid in coverage array" if DEBUG; |
|
481
|
|
|
|
|
|
|
|
|
482
|
0
|
0
|
|
|
|
|
return [] unless $seqid; |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# where each bin starts |
|
485
|
0
|
|
|
|
|
|
my @his_bin_array = map {$start + $binsize * $_} (0..$bins); |
|
|
0
|
|
|
|
|
|
|
|
486
|
0
|
|
|
|
|
|
my @sum_bin_array = map {int(($_-1)/$summary_bin_size)} @his_bin_array; |
|
|
0
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
|
|
488
|
0
|
|
|
|
|
|
my $interval_stats = 'gff_interval_stats'; |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# pick up the type ids |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
#WARNING: potential bug below. This query and the loop that processes |
|
493
|
|
|
|
|
|
|
#it is from Lincoln's implementation for SeqFeature::Store. The query |
|
494
|
|
|
|
|
|
|
#seems to rely on getting the results back sorted even though the |
|
495
|
|
|
|
|
|
|
#query doesn't explicitly sort (the ORDER BY commented out was from me) |
|
496
|
|
|
|
|
|
|
#With sorting the processing takes much longer, so I'm leaving it out |
|
497
|
|
|
|
|
|
|
#for now, but reimplementing might be a good idea. |
|
498
|
|
|
|
|
|
|
|
|
499
|
0
|
|
|
|
|
|
my %bins; |
|
500
|
0
|
|
|
|
|
|
my $sql = <<END; |
|
501
|
|
|
|
|
|
|
SELECT bin,cum_count |
|
502
|
|
|
|
|
|
|
FROM $interval_stats |
|
503
|
|
|
|
|
|
|
WHERE (typeid=? OR typeid like ? ) AND bin >=? AND srcfeature_id =? |
|
504
|
|
|
|
|
|
|
-- ORDER BY bin |
|
505
|
|
|
|
|
|
|
LIMIT 1 |
|
506
|
|
|
|
|
|
|
END |
|
507
|
|
|
|
|
|
|
; |
|
508
|
|
|
|
|
|
|
|
|
509
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare($sql); |
|
510
|
|
|
|
|
|
|
|
|
511
|
0
|
|
|
|
|
|
my @t; |
|
512
|
0
|
0
|
|
|
|
|
if (ref $types eq 'ARRAY') { |
|
513
|
0
|
|
|
|
|
|
@t = @$types; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
else { |
|
516
|
0
|
|
|
|
|
|
@t = ($types); |
|
517
|
|
|
|
|
|
|
} |
|
518
|
|
|
|
|
|
|
|
|
519
|
0
|
|
|
|
|
|
warn join(" ", @t) . " types in coverage array" if DEBUG; |
|
520
|
|
|
|
|
|
|
|
|
521
|
0
|
|
|
|
|
|
eval { |
|
522
|
0
|
|
|
|
|
|
for my $typeid (@t) { |
|
523
|
0
|
|
|
|
|
|
my $typestr = $self->_types_sql($typeid); |
|
524
|
|
|
|
|
|
|
|
|
525
|
0
|
|
|
|
|
|
warn "$typestr typestr in coverage array" if DEBUG; |
|
526
|
|
|
|
|
|
|
|
|
527
|
0
|
|
|
|
|
|
for (my $i=0;$i<@sum_bin_array;$i++) { |
|
528
|
|
|
|
|
|
|
|
|
529
|
0
|
|
|
|
|
|
my @args = ($typestr,$typestr,$sum_bin_array[$i],$seqid); |
|
530
|
|
|
|
|
|
|
|
|
531
|
0
|
0
|
|
|
|
|
$sth->execute(@args) or $self->throw($sth->errstr); |
|
532
|
0
|
|
|
|
|
|
my ($bin,$cum_count) = $sth->fetchrow_array; |
|
533
|
0
|
|
|
|
|
|
push @{$bins{$typeid}},[$bin,$cum_count]; |
|
|
0
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
} |
|
536
|
|
|
|
|
|
|
}; |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
|
|
539
|
0
|
0
|
|
|
|
|
return unless %bins; |
|
540
|
|
|
|
|
|
|
|
|
541
|
0
|
|
|
|
|
|
my @tags; |
|
542
|
|
|
|
|
|
|
my @merged_bins; |
|
543
|
0
|
|
|
|
|
|
my $firstbin = int(($start-1)/$binsize); |
|
544
|
0
|
|
|
|
|
|
for my $type (keys %bins) { |
|
545
|
0
|
|
|
|
|
|
push @tags, $type; |
|
546
|
0
|
|
|
|
|
|
my $arry = $bins{$type}; |
|
547
|
0
|
|
|
|
|
|
my $last_count = $arry->[0][1]; |
|
548
|
0
|
|
|
|
|
|
my $last_bin = -1; |
|
549
|
0
|
|
|
|
|
|
my $i = 0; |
|
550
|
0
|
|
|
|
|
|
my $delta; |
|
551
|
0
|
|
|
|
|
|
for my $b (@$arry) { |
|
552
|
0
|
|
|
|
|
|
my ($bin,$count) = @$b; |
|
553
|
0
|
0
|
|
|
|
|
$delta = $count - $last_count if $bin > $last_bin; |
|
554
|
0
|
|
|
|
|
|
$merged_bins[$i++] = $delta; |
|
555
|
0
|
|
|
|
|
|
$last_count = $count; |
|
556
|
0
|
|
|
|
|
|
$last_bin = $bin; |
|
557
|
|
|
|
|
|
|
} |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
|
|
560
|
0
|
|
|
|
|
|
my $report_tag = join(",",@tags); |
|
561
|
0
|
0
|
|
|
|
|
return wantarray ? (\@merged_bins,$report_tag) : \@merged_bins; |
|
562
|
|
|
|
|
|
|
} |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
sub _types_sql { |
|
566
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
567
|
0
|
|
|
|
|
|
my $type = shift; |
|
568
|
0
|
|
|
|
|
|
my ($primary_tag,$source_tag,$typestr); |
|
569
|
|
|
|
|
|
|
|
|
570
|
0
|
0
|
0
|
|
|
|
if (ref $type && $type->isa('Bio::DB::GFF::Typename')) { |
|
571
|
0
|
|
|
|
|
|
$primary_tag = $type->method; |
|
572
|
0
|
|
|
|
|
|
$source_tag = $type->source; |
|
573
|
|
|
|
|
|
|
} else { |
|
574
|
0
|
|
|
|
|
|
($primary_tag,$source_tag) = split ':',$type,2; |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
|
|
577
|
0
|
0
|
|
|
|
|
if (defined $source_tag) { |
|
578
|
0
|
0
|
|
|
|
|
if (length($primary_tag)) { |
|
579
|
0
|
|
|
|
|
|
$typestr = "$primary_tag:$source_tag"; |
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
else { |
|
582
|
0
|
|
|
|
|
|
$typestr = "%:$source_tag"; |
|
583
|
|
|
|
|
|
|
} |
|
584
|
|
|
|
|
|
|
} else { |
|
585
|
0
|
|
|
|
|
|
$typestr = "$primary_tag:%"; |
|
586
|
|
|
|
|
|
|
} |
|
587
|
|
|
|
|
|
|
|
|
588
|
0
|
|
|
|
|
|
return ($typestr); |
|
589
|
|
|
|
|
|
|
} |
|
590
|
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=head2 tripal |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=over |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=item Usage |
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
$obj->tripal() #get existing value |
|
598
|
|
|
|
|
|
|
$obj->tripal($newval) #set new value |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=item Function |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
Flag to identfy Chado database that are working with Tripal |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
=item Returns |
|
605
|
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
value of tripal (a scalar) |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=item Arguments |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
new value of tripal (to set) |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=back |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
=cut |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
sub tripal { |
|
617
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
618
|
0
|
0
|
|
|
|
|
my $tripal = shift if defined(@_); |
|
619
|
0
|
0
|
|
|
|
|
return $self->{'tripal'} = $tripal if defined($tripal); |
|
620
|
0
|
|
|
|
|
|
return $self->{'tripal'}; |
|
621
|
|
|
|
|
|
|
} |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
=head2 fulltext |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=over |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
=item Usage |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
$obj->fulltext() #get existing value |
|
632
|
|
|
|
|
|
|
$obj->fulltext($newval) #set new value |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=item Function |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
Flag to govern the use of full text searching queries |
|
637
|
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
=item Returns |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
value of fulltext (a scalar) |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=item Arguments |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
new value of fulltext (to set) |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
=back |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=cut |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub fulltext { |
|
651
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
652
|
0
|
0
|
|
|
|
|
my $fulltext = shift if defined(@_); |
|
653
|
0
|
0
|
|
|
|
|
return $self->{'fulltext'} = $fulltext if defined($fulltext); |
|
654
|
0
|
|
|
|
|
|
return $self->{'fulltext'}; |
|
655
|
|
|
|
|
|
|
} |
|
656
|
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head2 refclass |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
=over |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=item Usage |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
$obj->refclass() #get existing value |
|
665
|
|
|
|
|
|
|
$obj->refclass($newval) #set new value |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
=item Function |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=item Returns |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
value of the reference class's cvterm_id (a scalar) |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=item Arguments |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
new value of the reference class's cvterm_id (to set) |
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=back |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=cut |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
sub refclass { |
|
682
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
683
|
0
|
0
|
|
|
|
|
my $refclass = shift if defined(@_); |
|
684
|
0
|
0
|
|
|
|
|
return $self->{'refclass'} = $refclass if defined($refclass); |
|
685
|
0
|
|
|
|
|
|
return $self->{'refclass'}; |
|
686
|
|
|
|
|
|
|
} |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=head2 use_all_feature_names |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
Title : use_all_feature_names |
|
692
|
|
|
|
|
|
|
Usage : $obj->use_all_feature_names() |
|
693
|
|
|
|
|
|
|
Function: set or return flag indicating that all_feature_names view is present |
|
694
|
|
|
|
|
|
|
Returns : 1 if all_feature_names present, 0 if not |
|
695
|
|
|
|
|
|
|
Args : to return the flag, none; to set, 1 |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=cut |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
sub use_all_feature_names { |
|
701
|
0
|
|
|
0
|
1
|
|
my ($self, $flag) = @_; |
|
702
|
|
|
|
|
|
|
|
|
703
|
0
|
0
|
|
|
|
|
return $self->{use_all_feature_names} = $flag |
|
704
|
|
|
|
|
|
|
if defined($flag); |
|
705
|
0
|
0
|
|
|
|
|
return $self->{use_all_feature_names} |
|
706
|
|
|
|
|
|
|
if defined $self->{use_all_feature_names}; |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
#now determine if either a view or table named all_feature_names is present |
|
709
|
0
|
|
|
|
|
|
my $query |
|
710
|
|
|
|
|
|
|
= "SELECT relkind FROM pg_class WHERE relname = 'all_feature_names'"; |
|
711
|
|
|
|
|
|
|
|
|
712
|
0
|
|
|
|
|
|
my $exists = $self->dbh->prepare($query); |
|
713
|
0
|
0
|
|
|
|
|
$exists->execute or warn "all_feature_names query failed: $!"; |
|
714
|
|
|
|
|
|
|
|
|
715
|
0
|
|
|
|
|
|
my ($kind) = $exists->fetchrow_array; |
|
716
|
0
|
0
|
0
|
|
|
|
if ($kind and ($kind eq 'r' or $kind eq 'v')) { |
|
|
|
0
|
0
|
|
|
|
|
|
717
|
0
|
|
|
|
|
|
$self->{use_all_feature_names} = 1; |
|
718
|
|
|
|
|
|
|
} |
|
719
|
|
|
|
|
|
|
elsif ($kind) { |
|
720
|
0
|
|
|
|
|
|
warn "all_feature_names: This option shouldn't happen--setting use_all_feature_names to zero."; |
|
721
|
0
|
|
|
|
|
|
$self->{use_all_feature_names} = 0; |
|
722
|
|
|
|
|
|
|
} |
|
723
|
|
|
|
|
|
|
else { |
|
724
|
0
|
|
|
|
|
|
$self->{use_all_feature_names} = 0; |
|
725
|
|
|
|
|
|
|
} |
|
726
|
0
|
|
|
|
|
|
$exists->finish; |
|
727
|
|
|
|
|
|
|
|
|
728
|
0
|
|
|
|
|
|
return $self->{use_all_feature_names}; |
|
729
|
|
|
|
|
|
|
} |
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=head2 organism_id |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
Title : organism_id |
|
734
|
|
|
|
|
|
|
Usage : $obj->organism_id() |
|
735
|
|
|
|
|
|
|
Function: set or return the organism_id |
|
736
|
|
|
|
|
|
|
Returns : the value of the id |
|
737
|
|
|
|
|
|
|
Args : to return the flag, none; to set, the common name of the organism |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
If -organism is set when the Chado feature is instantiated, this method |
|
740
|
|
|
|
|
|
|
queries the database with the common name to cache the organism_id. |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=cut |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
sub organism_id { |
|
745
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
746
|
0
|
|
|
|
|
|
my $organism_name = shift; |
|
747
|
|
|
|
|
|
|
|
|
748
|
0
|
0
|
|
|
|
|
if (!$organism_name) { |
|
749
|
0
|
|
|
|
|
|
return $self->{'organism_id'}; |
|
750
|
|
|
|
|
|
|
} |
|
751
|
|
|
|
|
|
|
|
|
752
|
0
|
|
|
|
|
|
my $dbh = $self->dbh; |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
#if there is a space in the name, check genus species |
|
755
|
0
|
0
|
|
|
|
|
if ($organism_name =~ /(\S+?)\s+(.+)/) { |
|
756
|
0
|
|
|
|
|
|
my $genus = $1; |
|
757
|
0
|
|
|
|
|
|
my $species = $2; |
|
758
|
0
|
|
|
|
|
|
my $species_query = $dbh->prepare("SELECT organism_id FROM organism WHERE genus = ? and species = |
|
759
|
|
|
|
|
|
|
?"); |
|
760
|
0
|
0
|
|
|
|
|
$species_query->execute($genus, $species) or die "organism genus species query failed:$!"; |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
#don't need to check for multiple rows because of unique constraint |
|
763
|
0
|
0
|
|
|
|
|
if ($species_query->rows == 1) { |
|
764
|
0
|
|
|
|
|
|
my($organism_id) = $species_query->fetchrow_array; |
|
765
|
|
|
|
|
|
|
|
|
766
|
0
|
0
|
|
|
|
|
if ($organism_id) { |
|
767
|
0
|
|
|
|
|
|
return $self->{'organism_id'} = $organism_id; |
|
768
|
|
|
|
|
|
|
} |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
} |
|
771
|
|
|
|
|
|
|
} |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
#check common name |
|
774
|
0
|
|
|
|
|
|
my $org_query = $dbh->prepare("SELECT organism_id FROM organism WHERE common_name = ?"); |
|
775
|
|
|
|
|
|
|
|
|
776
|
0
|
0
|
|
|
|
|
$org_query->execute($organism_name) or die "organism query failed:$!"; |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
#if more than one result for common name, croak |
|
779
|
0
|
0
|
|
|
|
|
if ($org_query->rows > 1) { |
|
|
|
0
|
|
|
|
|
|
|
780
|
0
|
|
|
|
|
|
$self->throw("The common organism name, $organism_name, is present more than once in the organism table; please use a more precice representation of the organism."); |
|
781
|
|
|
|
|
|
|
} |
|
782
|
|
|
|
|
|
|
elsif ($org_query->rows == 0 ) { |
|
783
|
|
|
|
|
|
|
#no--don't do anything here--let it go on to check other things |
|
784
|
|
|
|
|
|
|
#$self->throw("There is no organism in the organism table with a common name '$organism_name'; please check the spelling."); |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
else { |
|
787
|
0
|
|
|
|
|
|
my($organism_id) = $org_query->fetchrow_array; |
|
788
|
|
|
|
|
|
|
|
|
789
|
0
|
0
|
|
|
|
|
if ($organism_id) { |
|
790
|
0
|
|
|
|
|
|
return $self->{'organism_id'} = $organism_id; |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
} |
|
793
|
0
|
|
|
|
|
|
$org_query->finish; |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
#check abbrev |
|
796
|
0
|
|
|
|
|
|
my $abbrev_query = $dbh->prepare("SELECT organism_id FROM organism WHERE abbreviation = ?"); |
|
797
|
|
|
|
|
|
|
|
|
798
|
0
|
0
|
|
|
|
|
$abbrev_query->execute($organism_name) or die "organism abbrev query failed:$!"; |
|
799
|
|
|
|
|
|
|
|
|
800
|
0
|
0
|
|
|
|
|
if ($abbrev_query->rows > 1) { |
|
|
|
0
|
|
|
|
|
|
|
801
|
0
|
|
|
|
|
|
$self->throw("The abbreviated organism name, $organism_name, is present more than once in the organism table; please use a more precice representation of the organism."); |
|
802
|
|
|
|
|
|
|
} |
|
803
|
|
|
|
|
|
|
elsif ($abbrev_query->rows == 0) { |
|
804
|
|
|
|
|
|
|
#do nothing in case another check is added after this one |
|
805
|
|
|
|
|
|
|
} |
|
806
|
|
|
|
|
|
|
else { |
|
807
|
0
|
|
|
|
|
|
my($organism_id) = $abbrev_query->fetchrow_array; |
|
808
|
|
|
|
|
|
|
|
|
809
|
0
|
0
|
|
|
|
|
if ($organism_id) { |
|
810
|
0
|
|
|
|
|
|
return $self->{'organism_id'} = $organism_id; |
|
811
|
|
|
|
|
|
|
} |
|
812
|
|
|
|
|
|
|
} |
|
813
|
|
|
|
|
|
|
|
|
814
|
0
|
|
|
|
|
|
$self->throw("Tried everything to get an organism_id for '$organism_name' but failed; try 'genus species'"); |
|
815
|
0
|
|
|
|
|
|
return; #of course, this return will never get used |
|
816
|
|
|
|
|
|
|
} |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=head2 inferCDS |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Title : inferCDS |
|
823
|
|
|
|
|
|
|
Usage : $obj->inferCDS() |
|
824
|
|
|
|
|
|
|
Function: set or return the inferCDS flag |
|
825
|
|
|
|
|
|
|
Returns : the value of the inferCDS flag |
|
826
|
|
|
|
|
|
|
Args : to return the flag, none; to set, 1 |
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
Often, chado databases will be populated without CDS features, since |
|
829
|
|
|
|
|
|
|
they can be inferred from a union of exons and polypeptide features. |
|
830
|
|
|
|
|
|
|
Setting this flag tells the adaptor to do the inferrence to get |
|
831
|
|
|
|
|
|
|
those derived CDS features (at some small performance penatly). |
|
832
|
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
=cut |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
sub inferCDS { |
|
836
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
837
|
|
|
|
|
|
|
|
|
838
|
0
|
|
|
|
|
|
my $flag = shift; |
|
839
|
0
|
0
|
|
|
|
|
return $self->{inferCDS} = $flag if defined($flag); |
|
840
|
0
|
|
|
|
|
|
return $self->{inferCDS}; |
|
841
|
|
|
|
|
|
|
} |
|
842
|
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
=head2 allow_obsolete |
|
844
|
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
Title : allow_obsolete |
|
846
|
|
|
|
|
|
|
Usage : $obj->allow_obsolete() |
|
847
|
|
|
|
|
|
|
Function: set or return the allow_obsolete flag |
|
848
|
|
|
|
|
|
|
Returns : the value of the allow_obsolete flag |
|
849
|
|
|
|
|
|
|
Args : to return the flag, none; to set, 1 |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
The chado feature table has a flag column called 'is_obsolete'. |
|
852
|
|
|
|
|
|
|
Normally, these features should be ignored by GBrowse, but |
|
853
|
|
|
|
|
|
|
the -allow_obsolete method is provided to allow displaying |
|
854
|
|
|
|
|
|
|
obsolete features. |
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=cut |
|
857
|
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
sub allow_obsolete { |
|
859
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
860
|
0
|
0
|
|
|
|
|
my $allow_obsolete = shift if defined(@_); |
|
861
|
0
|
0
|
|
|
|
|
return $self->{'allow_obsolete'} = $allow_obsolete if defined($allow_obsolete); |
|
862
|
0
|
|
|
|
|
|
return $self->{'allow_obsolete'}; |
|
863
|
|
|
|
|
|
|
} |
|
864
|
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
=head2 sofa_id |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
Title : sofa_id |
|
869
|
|
|
|
|
|
|
Usage : $obj->sofa_id() |
|
870
|
|
|
|
|
|
|
Function: get or return the ID to use for SO terms |
|
871
|
|
|
|
|
|
|
Returns : the cv.cv_id for the SO ontology to use |
|
872
|
|
|
|
|
|
|
Args : to return the id, none; to determine the id, 1 |
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
=cut |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
sub sofa_id { |
|
877
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
878
|
0
|
0
|
|
|
|
|
return $self->{'sofa_id'} unless @_; |
|
879
|
|
|
|
|
|
|
|
|
880
|
0
|
|
|
|
|
|
my $query = "select cv_id from cv where name in ( |
|
881
|
|
|
|
|
|
|
'SOFA', |
|
882
|
|
|
|
|
|
|
'Sequence Ontology Feature Annotation', |
|
883
|
|
|
|
|
|
|
'sofa.ontology')"; |
|
884
|
|
|
|
|
|
|
|
|
885
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare($query); |
|
886
|
0
|
0
|
|
|
|
|
$sth->execute() or $self->throw("trying to find SOFA"); |
|
887
|
|
|
|
|
|
|
|
|
888
|
0
|
|
|
|
|
|
my $data = $sth->fetchrow_hashref(); |
|
889
|
0
|
|
|
|
|
|
my $sofa_id = $$data{'cv_id'}; |
|
890
|
|
|
|
|
|
|
|
|
891
|
0
|
|
|
|
|
|
$sth->finish; |
|
892
|
0
|
0
|
|
|
|
|
return $self->{'sofa_id'} = $sofa_id if $sofa_id; |
|
893
|
|
|
|
|
|
|
|
|
894
|
0
|
|
|
|
|
|
$query = "select cv_id from cv where name in ( |
|
895
|
|
|
|
|
|
|
'Sequence Ontology', |
|
896
|
|
|
|
|
|
|
'sequence', |
|
897
|
|
|
|
|
|
|
'SO')"; |
|
898
|
|
|
|
|
|
|
|
|
899
|
0
|
|
|
|
|
|
$sth = $self->dbh->prepare($query); |
|
900
|
0
|
0
|
|
|
|
|
$sth->execute() or $self->throw("trying to find SO"); |
|
901
|
|
|
|
|
|
|
|
|
902
|
0
|
|
|
|
|
|
$data = $sth->fetchrow_hashref(); |
|
903
|
0
|
|
|
|
|
|
$sofa_id = $$data{'cv_id'}; |
|
904
|
|
|
|
|
|
|
|
|
905
|
0
|
|
|
|
|
|
$sth->finish; |
|
906
|
0
|
0
|
|
|
|
|
return $self->{'sofa_id'} = $sofa_id if $sofa_id; |
|
907
|
|
|
|
|
|
|
|
|
908
|
0
|
|
|
|
|
|
$self->throw("unable to find SO or SOFA in the database!"); |
|
909
|
|
|
|
|
|
|
} |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=head2 recursivMapping |
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
Title : recursivMapping |
|
914
|
|
|
|
|
|
|
Usage : $obj->recursivMapping($newval) |
|
915
|
|
|
|
|
|
|
Function: Flag for activating the recursive mapping (desactivated by default) |
|
916
|
|
|
|
|
|
|
Returns : value of recursivMapping (a scalar) |
|
917
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
|
918
|
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
Goal : When we have a clone mapped on a chromosome, the recursive mapping maps the features of the clone on the chromosome. |
|
920
|
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
=cut |
|
922
|
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
sub recursivMapping{ |
|
924
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
925
|
|
|
|
|
|
|
|
|
926
|
0
|
0
|
|
|
|
|
return $self->{'recursivMapping'} = shift if @_; |
|
927
|
0
|
|
|
|
|
|
return $self->{'recursivMapping'}; |
|
928
|
|
|
|
|
|
|
} |
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=head2 srcfeatureslice |
|
931
|
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
Title : srcfeatureslice |
|
933
|
|
|
|
|
|
|
Usage : $obj->srcfeatureslice |
|
934
|
|
|
|
|
|
|
Function: Flag for activating |
|
935
|
|
|
|
|
|
|
Returns : value of srcfeatureslice |
|
936
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
|
937
|
|
|
|
|
|
|
Desc : Allows to use a featureslice of type featureloc_slice(srcfeat_id, int, int) |
|
938
|
|
|
|
|
|
|
Important : this and recursivMapping are mutually exclusives |
|
939
|
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
=cut |
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
sub srcfeatureslice{ |
|
943
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
944
|
0
|
0
|
|
|
|
|
return $self->{'srcfeatureslice'} = shift if @_; |
|
945
|
0
|
|
|
|
|
|
return $self->{'srcfeatureslice'}; |
|
946
|
|
|
|
|
|
|
} |
|
947
|
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
=head2 do2Level |
|
949
|
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
Title : do2Level |
|
951
|
|
|
|
|
|
|
Usage : $obj->do2Level |
|
952
|
|
|
|
|
|
|
Function: Flag for activating the fetching of 2levels in segment->features |
|
953
|
|
|
|
|
|
|
Returns : value of do2Level |
|
954
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
|
955
|
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
=cut |
|
957
|
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
sub do2Level{ |
|
959
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
960
|
0
|
0
|
|
|
|
|
return $self->{'do2Level'} = shift if @_; |
|
961
|
0
|
|
|
|
|
|
return $self->{'do2Level'}; |
|
962
|
|
|
|
|
|
|
} |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
=head2 dbh |
|
966
|
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
Title : dbh |
|
968
|
|
|
|
|
|
|
Usage : $obj->dbh($newval) |
|
969
|
|
|
|
|
|
|
Function: |
|
970
|
|
|
|
|
|
|
Returns : value of dbh (a scalar) |
|
971
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
|
972
|
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
=cut |
|
975
|
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
sub dbh { |
|
977
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
978
|
|
|
|
|
|
|
|
|
979
|
0
|
0
|
|
|
|
|
return $self->{'dbh'} = shift if @_; |
|
980
|
0
|
0
|
|
|
|
|
return $self->{'dbh'} if defined ($self->{'dbh'}); |
|
981
|
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
#uh oh, there isn't already a dbh object, try to create one |
|
983
|
0
|
|
|
|
|
|
my $dsn = $self->{db_args}->{dsn}; |
|
984
|
0
|
|
|
|
|
|
my $username = $self->{db_args}->{username}; |
|
985
|
0
|
|
|
|
|
|
my $password = $self->{db_args}->{password}; |
|
986
|
|
|
|
|
|
|
|
|
987
|
0
|
0
|
|
|
|
|
my $dbh = DBI->connect( $dsn, $username, $password ) |
|
988
|
|
|
|
|
|
|
or $self->throw("unable to open db handle"); |
|
989
|
0
|
|
|
|
|
|
$self->{'dbh'} = $dbh; |
|
990
|
|
|
|
|
|
|
|
|
991
|
0
|
0
|
0
|
|
|
|
if (exists($self->{-enable_seqscan}) && ! $self->{-enable_seqscan}){ |
|
992
|
0
|
|
|
|
|
|
$dbh->do("set enable_seqscan=0"); |
|
993
|
|
|
|
|
|
|
} |
|
994
|
|
|
|
|
|
|
|
|
995
|
0
|
|
|
|
|
|
return $self->{'dbh'}; |
|
996
|
|
|
|
|
|
|
} |
|
997
|
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
=head2 term2name |
|
999
|
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
Title : term2name |
|
1001
|
|
|
|
|
|
|
Usage : $obj->term2name($newval) |
|
1002
|
|
|
|
|
|
|
Function: When called with a hashref, sets cvterm.cvterm_id to cvterm.name |
|
1003
|
|
|
|
|
|
|
mapping hashref; when called with an int, returns the name |
|
1004
|
|
|
|
|
|
|
corresponding to that cvterm_id; called with no arguments, returns |
|
1005
|
|
|
|
|
|
|
the hashref. |
|
1006
|
|
|
|
|
|
|
Returns : see above |
|
1007
|
|
|
|
|
|
|
Args : on set, a hashref; to retrieve a name, an int; to retrieve the |
|
1008
|
|
|
|
|
|
|
hashref, none. |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
Note: should be replaced by Bio::GMOD::Util->term2name |
|
1011
|
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
=cut |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
sub term2name { |
|
1015
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1016
|
0
|
|
|
|
|
|
my $arg = shift; |
|
1017
|
|
|
|
|
|
|
|
|
1018
|
0
|
0
|
|
|
|
|
if(ref($arg) eq 'HASH'){ |
|
|
|
0
|
|
|
|
|
|
|
1019
|
0
|
|
|
|
|
|
return $self->{'term2name'} = $arg; |
|
1020
|
|
|
|
|
|
|
} elsif($arg) { |
|
1021
|
0
|
|
|
|
|
|
return $self->{'term2name'}{$arg}; |
|
1022
|
|
|
|
|
|
|
} else { |
|
1023
|
0
|
|
|
|
|
|
return $self->{'term2name'}; |
|
1024
|
|
|
|
|
|
|
} |
|
1025
|
|
|
|
|
|
|
} |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
=head2 name2term |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
Title : name2term |
|
1031
|
|
|
|
|
|
|
Usage : $obj->name2term($newval) |
|
1032
|
|
|
|
|
|
|
Function: When called with a hashref, sets cvterm.name to cvterm.cvterm_id |
|
1033
|
|
|
|
|
|
|
mapping hashref; when called with a string, returns the cvterm_id |
|
1034
|
|
|
|
|
|
|
corresponding to that name; called with no arguments, returns |
|
1035
|
|
|
|
|
|
|
the hashref. |
|
1036
|
|
|
|
|
|
|
Returns : see above |
|
1037
|
|
|
|
|
|
|
Args : on set, a hashref; to retrieve a cvterm_id, a string; to retrieve |
|
1038
|
|
|
|
|
|
|
the hashref, none. |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
Note: Should be replaced by Bio::GMOD::Util->name2term |
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
=cut |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
sub name2term { |
|
1045
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1046
|
0
|
|
|
|
|
|
my $arg = shift; |
|
1047
|
0
|
|
|
|
|
|
my $cvnames = shift; |
|
1048
|
|
|
|
|
|
|
|
|
1049
|
0
|
0
|
|
|
|
|
if(ref($cvnames) eq 'HASH'){ $self->{'termcvs'} = $cvnames; } |
|
|
0
|
|
|
|
|
|
|
|
1050
|
0
|
0
|
|
|
|
|
if(ref($arg) eq 'HASH'){ |
|
|
|
0
|
|
|
|
|
|
|
1051
|
0
|
|
|
|
|
|
return $self->{'name2term'} = $arg; |
|
1052
|
|
|
|
|
|
|
} elsif($arg) { |
|
1053
|
0
|
|
|
|
|
|
return $self->{'name2term'}{$arg}; |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
#rather than trying to guess what a caller wants, the caller will have |
|
1056
|
|
|
|
|
|
|
#deal with what comes... (ie, a scalar or a hash). |
|
1057
|
|
|
|
|
|
|
# my $val= $self->{'name2term'}{$arg}; |
|
1058
|
|
|
|
|
|
|
# if(ref($val)) { |
|
1059
|
|
|
|
|
|
|
# #? use $cvnames scalar here to pick which cv? |
|
1060
|
|
|
|
|
|
|
# my @val= @$val; |
|
1061
|
|
|
|
|
|
|
# foreach $val (@val) { |
|
1062
|
|
|
|
|
|
|
# my $cv= $self->{'termcvs'}{$val}; |
|
1063
|
|
|
|
|
|
|
# return $val if($cv =~ /^(SO|sequence)/i); # want sofa_id |
|
1064
|
|
|
|
|
|
|
# } |
|
1065
|
|
|
|
|
|
|
# return $val[0]; #? 1st is best guess |
|
1066
|
|
|
|
|
|
|
# } |
|
1067
|
|
|
|
|
|
|
# return $val; |
|
1068
|
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
} else { |
|
1070
|
0
|
|
|
|
|
|
return $self->{'name2term'}; |
|
1071
|
|
|
|
|
|
|
} |
|
1072
|
|
|
|
|
|
|
} |
|
1073
|
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
=head2 segment |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
Title : segment |
|
1077
|
|
|
|
|
|
|
Usage : $db->segment(@args); |
|
1078
|
|
|
|
|
|
|
Function: create a segment object |
|
1079
|
|
|
|
|
|
|
Returns : segment object(s) |
|
1080
|
|
|
|
|
|
|
Args : see below |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
This method generates a Bio::Das::SegmentI object (see |
|
1083
|
|
|
|
|
|
|
L<Bio::Das::SegmentI>). The segment can be used to find overlapping |
|
1084
|
|
|
|
|
|
|
features and the raw sequence. |
|
1085
|
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
When making the segment() call, you specify the ID of a sequence |
|
1087
|
|
|
|
|
|
|
landmark (e.g. an accession number, a clone or contig), and a |
|
1088
|
|
|
|
|
|
|
positional range relative to the landmark. If no range is specified, |
|
1089
|
|
|
|
|
|
|
then the entire region spanned by the landmark is used to generate the |
|
1090
|
|
|
|
|
|
|
segment. |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
Arguments are -option=E<gt>value pairs as follows: |
|
1093
|
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
-name ID of the landmark sequence. |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
-class A namespace qualifier. It is not necessary for the |
|
1097
|
|
|
|
|
|
|
database to honor namespace qualifiers, but if it |
|
1098
|
|
|
|
|
|
|
does, this is where the qualifier is indicated. |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
-version Version number of the landmark. It is not necessary for |
|
1101
|
|
|
|
|
|
|
the database to honor versions, but if it does, this is |
|
1102
|
|
|
|
|
|
|
where the version is indicated. |
|
1103
|
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
-start Start of the segment relative to landmark. Positions |
|
1105
|
|
|
|
|
|
|
follow standard 1-based sequence rules. If not specified, |
|
1106
|
|
|
|
|
|
|
defaults to the beginning of the landmark. |
|
1107
|
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
-end End of the segment relative to the landmark. If not specified, |
|
1109
|
|
|
|
|
|
|
defaults to the end of the landmark. |
|
1110
|
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
The return value is a list of Bio::Das::SegmentI objects. If the method |
|
1112
|
|
|
|
|
|
|
is called in a scalar context and there are no more than one segments |
|
1113
|
|
|
|
|
|
|
that satisfy the request, then it is allowed to return the segment. |
|
1114
|
|
|
|
|
|
|
Otherwise, the method must throw a "multiple segment exception". |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
=cut |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
sub segment { |
|
1119
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1120
|
0
|
|
|
|
|
|
my ($name,$base_start,$stop,$end,$class,$version,$db_id,$feature_id,$srcfeature_id) |
|
1121
|
|
|
|
|
|
|
= $self->_rearrange([qw(NAME |
|
1122
|
|
|
|
|
|
|
START |
|
1123
|
|
|
|
|
|
|
STOP |
|
1124
|
|
|
|
|
|
|
END |
|
1125
|
|
|
|
|
|
|
CLASS |
|
1126
|
|
|
|
|
|
|
VERSION |
|
1127
|
|
|
|
|
|
|
DB_ID |
|
1128
|
|
|
|
|
|
|
FEATURE_ID |
|
1129
|
|
|
|
|
|
|
SRCFEATURE_ID )],@_); |
|
1130
|
|
|
|
|
|
|
# lets the Segment class handle all the lifting. |
|
1131
|
|
|
|
|
|
|
|
|
1132
|
0
|
|
0
|
|
|
|
$end ||= $stop; |
|
1133
|
0
|
|
|
|
|
|
return $self->_segclass->new($name,$self,$base_start,$end,$db_id,0,$feature_id,$srcfeature_id); |
|
1134
|
|
|
|
|
|
|
} |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
=head2 features |
|
1137
|
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
Title : features |
|
1139
|
|
|
|
|
|
|
Usage : $db->features(@args) |
|
1140
|
|
|
|
|
|
|
Function: get all features, possibly filtered by type |
|
1141
|
|
|
|
|
|
|
Returns : a list of Bio::SeqFeatureI objects |
|
1142
|
|
|
|
|
|
|
Args : see below |
|
1143
|
|
|
|
|
|
|
Status : public |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
This routine will retrieve features in the database regardless of |
|
1146
|
|
|
|
|
|
|
position. It can be used to return all features, or a subset based on |
|
1147
|
|
|
|
|
|
|
their type |
|
1148
|
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
Arguments are -option=E<gt>value pairs as follows: |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
-type List of feature types to return. Argument is an array |
|
1152
|
|
|
|
|
|
|
of Bio::Das::FeatureTypeI objects or a set of strings |
|
1153
|
|
|
|
|
|
|
that can be converted into FeatureTypeI objects. |
|
1154
|
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
-callback A callback to invoke on each feature. The subroutine |
|
1156
|
|
|
|
|
|
|
will be passed each Bio::SeqFeatureI object in turn. |
|
1157
|
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
-attributes A hash reference containing attributes to match. |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
The -attributes argument is a hashref containing one or more attributes |
|
1161
|
|
|
|
|
|
|
to match against: |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
-attributes => { Gene => 'abc-1', |
|
1164
|
|
|
|
|
|
|
Note => 'confirmed' } |
|
1165
|
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
Attribute matching is simple exact string matching, and multiple |
|
1167
|
|
|
|
|
|
|
attributes are ANDed together. |
|
1168
|
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
If one provides a callback, it will be invoked on each feature in |
|
1170
|
|
|
|
|
|
|
turn. If the callback returns a false value, iteration will be |
|
1171
|
|
|
|
|
|
|
interrupted. When a callback is provided, the method returns undef. |
|
1172
|
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
=cut |
|
1174
|
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
sub features { |
|
1176
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1177
|
0
|
|
|
|
|
|
my ($type,$types,$callback,$attributes,$iterator,$feature_id,$seq_id,$start,$end) = |
|
1178
|
|
|
|
|
|
|
$self->_rearrange([qw(TYPE TYPES CALLBACK ATTRIBUTES ITERATOR FEATURE_ID SEQ_ID START END)], |
|
1179
|
|
|
|
|
|
|
@_); |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
0
|
|
0
|
|
|
|
$type ||= $types; #GRRR |
|
1182
|
|
|
|
|
|
|
|
|
1183
|
0
|
|
|
|
|
|
warn "Chado,features: $type\n" if DEBUG; |
|
1184
|
0
|
|
|
|
|
|
my @features = $self->_segclass->features(-type => $type, |
|
1185
|
|
|
|
|
|
|
-attributes => $attributes, |
|
1186
|
|
|
|
|
|
|
-callback => $callback, |
|
1187
|
|
|
|
|
|
|
-iterator => $iterator, |
|
1188
|
|
|
|
|
|
|
-factory => $self, |
|
1189
|
|
|
|
|
|
|
-feature_id=>$feature_id, |
|
1190
|
|
|
|
|
|
|
-seq_id =>$seq_id, |
|
1191
|
|
|
|
|
|
|
-start =>$start, |
|
1192
|
|
|
|
|
|
|
-end =>$end, |
|
1193
|
|
|
|
|
|
|
); |
|
1194
|
0
|
|
|
|
|
|
return @features; |
|
1195
|
|
|
|
|
|
|
} |
|
1196
|
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
sub get_seq_stream { |
|
1198
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1199
|
|
|
|
|
|
|
#warn "get_seq_stream args:@_"; |
|
1200
|
0
|
|
|
|
|
|
my ($type,$types,$callback,$attributes,$iterator,$feature_id,$seq_id,$start,$end) = |
|
1201
|
|
|
|
|
|
|
$self->_rearrange([qw(TYPE TYPES CALLBACK ATTRIBUTES ITERATOR FEATURE_ID SEQ_ID START END)], |
|
1202
|
|
|
|
|
|
|
@_); |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
0
|
|
|
|
|
|
my @features = $self->_segclass->features(-type => $type, |
|
1205
|
|
|
|
|
|
|
-attributes => $attributes, |
|
1206
|
|
|
|
|
|
|
-callback => $callback, |
|
1207
|
|
|
|
|
|
|
-iterator => $iterator, |
|
1208
|
|
|
|
|
|
|
-factory => $self, |
|
1209
|
|
|
|
|
|
|
-feature_id=>$feature_id, |
|
1210
|
|
|
|
|
|
|
-seq_id =>$seq_id, |
|
1211
|
|
|
|
|
|
|
-start =>$start, |
|
1212
|
|
|
|
|
|
|
-end =>$end, |
|
1213
|
|
|
|
|
|
|
); |
|
1214
|
|
|
|
|
|
|
|
|
1215
|
0
|
|
|
|
|
|
return Bio::DB::Das::ChadoIterator->new(\@features); |
|
1216
|
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
} |
|
1219
|
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
=head2 types |
|
1221
|
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
Title : types |
|
1223
|
|
|
|
|
|
|
Usage : $db->types(@args) |
|
1224
|
|
|
|
|
|
|
Function: return list of feature types in database |
|
1225
|
|
|
|
|
|
|
Returns : a list of Bio::Das::FeatureTypeI objects |
|
1226
|
|
|
|
|
|
|
Args : see below |
|
1227
|
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
This routine returns a list of feature types known to the database. It |
|
1229
|
|
|
|
|
|
|
is also possible to find out how many times each feature occurs. |
|
1230
|
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
Arguments are -option=E<gt>value pairs as follows: |
|
1232
|
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
-enumerate if true, count the features |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
The returned value will be a list of Bio::Das::FeatureTypeI objects |
|
1236
|
|
|
|
|
|
|
(see L<Bio::Das::FeatureTypeI>. |
|
1237
|
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
If -enumerate is true, then the function returns a hash (not a hash |
|
1239
|
|
|
|
|
|
|
reference) in which the keys are the stringified versions of |
|
1240
|
|
|
|
|
|
|
Bio::Das::FeatureTypeI and the values are the number of times each |
|
1241
|
|
|
|
|
|
|
feature appears in the database. |
|
1242
|
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
NOTE: This currently raises a "not-implemented" exception, as the |
|
1244
|
|
|
|
|
|
|
BioSQL API does not appear to provide this functionality. |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
=cut |
|
1247
|
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
sub types { |
|
1249
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1250
|
0
|
|
|
|
|
|
my ($enumerate) = $self->_rearrange([qw(ENUMERATE)],@_); |
|
1251
|
0
|
|
|
|
|
|
$self->throw_not_implemented; |
|
1252
|
|
|
|
|
|
|
#if lincoln didn't need to implement it, neither do I! |
|
1253
|
|
|
|
|
|
|
} |
|
1254
|
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
=head2 get_feature_by_alias, get_features_by_alias |
|
1256
|
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
Title : get_features_by_alias |
|
1258
|
|
|
|
|
|
|
Usage : $db->get_feature_by_alias(@args) |
|
1259
|
|
|
|
|
|
|
Function: return list of feature whose name or synonyms match |
|
1260
|
|
|
|
|
|
|
Returns : a list of Bio::Das::Chado::Segment::Feature objects |
|
1261
|
|
|
|
|
|
|
Args : See below |
|
1262
|
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
This method finds features matching the criteria outlined by the |
|
1264
|
|
|
|
|
|
|
supplied arguments. Wildcards (*) are allowed. Valid arguments are: |
|
1265
|
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
=over |
|
1267
|
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=item -name |
|
1269
|
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=item -class |
|
1271
|
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
=item -ref (refrence sequence) |
|
1273
|
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=item -start |
|
1275
|
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
=item -end |
|
1277
|
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
=back |
|
1279
|
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
=cut |
|
1281
|
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
sub get_feature_by_alias { |
|
1284
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1285
|
0
|
|
|
|
|
|
my @args = @_; |
|
1286
|
|
|
|
|
|
|
|
|
1287
|
0
|
0
|
|
|
|
|
if ( @args == 1 ) { |
|
1288
|
0
|
|
|
|
|
|
@args = (-name => $args[0]); |
|
1289
|
|
|
|
|
|
|
} |
|
1290
|
|
|
|
|
|
|
|
|
1291
|
0
|
|
|
|
|
|
push @args, -operation => 'by_alias'; |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
0
|
|
|
|
|
|
return $self->_by_alias_by_name(@args); |
|
1294
|
|
|
|
|
|
|
} |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
*get_features_by_alias = \&get_feature_by_alias; |
|
1297
|
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
=head2 get_feature_by_name, get_features_by_name |
|
1299
|
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
Title : get_features_by_name |
|
1301
|
|
|
|
|
|
|
Usage : $db->get_features_by_name(@args) |
|
1302
|
|
|
|
|
|
|
Function: return list of feature whose names match |
|
1303
|
|
|
|
|
|
|
Returns : a list of Bio::Das::Chado::Segment::Feature objects |
|
1304
|
|
|
|
|
|
|
Args : See below |
|
1305
|
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
This method finds features matching the criteria outlined by the |
|
1307
|
|
|
|
|
|
|
supplied arguments. Wildcards (*) are allowed. Valid arguments are: |
|
1308
|
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=over |
|
1310
|
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
=item -name |
|
1312
|
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
=item -class |
|
1314
|
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
=item -ref (refrence sequence) |
|
1316
|
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
=item -start |
|
1318
|
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
=item -end |
|
1320
|
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=back |
|
1322
|
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
=cut |
|
1324
|
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
*get_features_by_name = \&get_feature_by_name; |
|
1327
|
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
sub get_feature_by_name { |
|
1329
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1330
|
0
|
|
|
|
|
|
my @args = @_; |
|
1331
|
|
|
|
|
|
|
|
|
1332
|
0
|
|
|
|
|
|
warn "in get_feature_by_name, args:@args" if DEBUG; |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
0
|
0
|
|
|
|
|
if ( @args == 1 ) { |
|
1335
|
0
|
|
|
|
|
|
@args = (-name => $args[0]); |
|
1336
|
|
|
|
|
|
|
} |
|
1337
|
|
|
|
|
|
|
|
|
1338
|
0
|
|
|
|
|
|
push @args, -operation => 'by_name'; |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
0
|
|
|
|
|
|
return $self->_by_alias_by_name(@args); |
|
1341
|
|
|
|
|
|
|
} |
|
1342
|
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
=head2 _by_alias_by_name |
|
1344
|
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
Title : _by_alias_by_name |
|
1346
|
|
|
|
|
|
|
Usage : $db->_by_alias_by_name(@args) |
|
1347
|
|
|
|
|
|
|
Function: return list of feature whose names match |
|
1348
|
|
|
|
|
|
|
Returns : a list of Bio::Das::Chado::Segment::Feature objects |
|
1349
|
|
|
|
|
|
|
Args : See below |
|
1350
|
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
A private method that implements the get_features_by_name and |
|
1352
|
|
|
|
|
|
|
get_features_by_alias methods. It accepts the same args as |
|
1353
|
|
|
|
|
|
|
those methods, plus an addtional on (-operation) which is |
|
1354
|
|
|
|
|
|
|
either 'by_alias' or 'by_name' to indicate what rule it is to |
|
1355
|
|
|
|
|
|
|
use for finding features. |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
=cut |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
sub _by_alias_by_name { |
|
1360
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
1361
|
|
|
|
|
|
|
|
|
1362
|
0
|
|
|
|
|
|
my ($name, $class, $ref, $base_start, $stop, $operation) |
|
1363
|
|
|
|
|
|
|
= $self->_rearrange([qw(NAME CLASS REF START END OPERATION)],@_); |
|
1364
|
|
|
|
|
|
|
|
|
1365
|
0
|
0
|
|
|
|
|
if ($name =~ /^id:(\d+)/) { |
|
1366
|
0
|
|
|
|
|
|
my $feature_id = $1; |
|
1367
|
0
|
|
|
|
|
|
return $self->get_feature_by_feature_id($feature_id); |
|
1368
|
|
|
|
|
|
|
} |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
0
|
|
|
|
|
|
my @temp_array = split /:/, $name; |
|
1371
|
0
|
0
|
|
|
|
|
if (scalar @temp_array == 2) { |
|
1372
|
0
|
0
|
|
|
|
|
if ($self->source2dbxref($temp_array[0]) > 0) { |
|
1373
|
0
|
|
|
|
|
|
warn "assuming that the name with a colon ($name) is coming from a multiple hit search result (ie, is of the form 'source:name'"; |
|
1374
|
0
|
|
|
|
|
|
$name = $temp_array[1]; |
|
1375
|
|
|
|
|
|
|
} |
|
1376
|
|
|
|
|
|
|
} |
|
1377
|
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
##I think this is where this should go... |
|
1379
|
|
|
|
|
|
|
# We need to split the query on whitespaces, and replace the whitespace with & |
|
1380
|
|
|
|
|
|
|
# so that we can get proper full test search on allquery terms [LP] |
|
1381
|
|
|
|
|
|
|
# but it only make sense to do this for full text searching [Scott] |
|
1382
|
0
|
0
|
|
|
|
|
$name = $self->_search_name_prep_spaces($name) if $self->fulltext; |
|
1383
|
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
|
|
1385
|
0
|
|
|
|
|
|
my $wildcard = 0; |
|
1386
|
0
|
0
|
|
|
|
|
if ($name =~ /\*/) { |
|
1387
|
0
|
|
|
|
|
|
$wildcard = 1; |
|
1388
|
0
|
|
|
|
|
|
undef $class; |
|
1389
|
|
|
|
|
|
|
} |
|
1390
|
|
|
|
|
|
|
|
|
1391
|
0
|
|
|
|
|
|
warn "name:$name in get_feature_by_name" if DEBUG; |
|
1392
|
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
# $name = $self->_search_name_prep($name); |
|
1394
|
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
# warn "name after protecting _ and % in the string:$name\n" if DEBUG; |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
0
|
|
|
|
|
|
my (@features,$sth); |
|
1398
|
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
# get feature_id |
|
1400
|
|
|
|
|
|
|
# foreach feature_id, get the feature info |
|
1401
|
|
|
|
|
|
|
# then get src_feature stuff (chromosome info) and create a parent feature, |
|
1402
|
|
|
|
|
|
|
|
|
1403
|
0
|
|
|
|
|
|
my ($select_part,$from_part,$where_part); |
|
1404
|
|
|
|
|
|
|
|
|
1405
|
0
|
0
|
|
|
|
|
if ($class) { |
|
1406
|
|
|
|
|
|
|
#warn "class: $class"; |
|
1407
|
0
|
0
|
0
|
|
|
|
my $type = ($class eq 'CDS' && $self->inferCDS) |
|
1408
|
|
|
|
|
|
|
? $self->name2term('polypeptide') |
|
1409
|
|
|
|
|
|
|
: $self->name2term($class); |
|
1410
|
0
|
0
|
|
|
|
|
return unless $type; |
|
1411
|
|
|
|
|
|
|
|
|
1412
|
0
|
0
|
|
|
|
|
if (ref $type eq 'ARRAY') { |
|
|
|
0
|
|
|
|
|
|
|
1413
|
0
|
|
|
|
|
|
$type = join(',',@$type); |
|
1414
|
|
|
|
|
|
|
} |
|
1415
|
|
|
|
|
|
|
elsif (ref $type eq 'HASH') { |
|
1416
|
0
|
|
|
|
|
|
$type = join(',', map($$type{$_}, keys %$type) ); |
|
1417
|
|
|
|
|
|
|
} |
|
1418
|
0
|
|
|
|
|
|
$from_part = " feature f "; |
|
1419
|
0
|
|
|
|
|
|
$where_part.= " AND f.type_id in ( $type ) "; |
|
1420
|
|
|
|
|
|
|
} |
|
1421
|
|
|
|
|
|
|
|
|
1422
|
0
|
0
|
0
|
|
|
|
if ($self->organism_id and $operation eq 'by_alias') { |
|
|
|
0
|
|
|
|
|
|
|
1423
|
0
|
0
|
|
|
|
|
$where_part.= $self->use_all_feature_names() |
|
1424
|
|
|
|
|
|
|
? " AND afn.organism_id =".$self->organism_id |
|
1425
|
|
|
|
|
|
|
: " AND f.organism_id =".$self->organism_id; |
|
1426
|
|
|
|
|
|
|
} |
|
1427
|
|
|
|
|
|
|
elsif ($self->organism_id) { |
|
1428
|
0
|
|
|
|
|
|
$where_part.= " AND f.organism_id =".$self->organism_id; |
|
1429
|
|
|
|
|
|
|
} |
|
1430
|
|
|
|
|
|
|
|
|
1431
|
0
|
0
|
|
|
|
|
if ( $operation eq 'by_alias') { |
|
1432
|
0
|
0
|
|
|
|
|
if ($self->use_all_feature_names()) { |
|
1433
|
0
|
|
|
|
|
|
$select_part = "select distinct afn.feature_id \n"; |
|
1434
|
0
|
0
|
|
|
|
|
$from_part = $from_part ? |
|
1435
|
|
|
|
|
|
|
"$from_part join all_feature_names afn using (feature_id) " |
|
1436
|
|
|
|
|
|
|
: "all_feature_names afn "; |
|
1437
|
|
|
|
|
|
|
|
|
1438
|
0
|
|
|
|
|
|
my $alias_only_where; |
|
1439
|
|
|
|
|
|
|
# There is no difference in the wildcard or non-wildcard call to |
|
1440
|
|
|
|
|
|
|
# the full-text search [LP] |
|
1441
|
0
|
0
|
|
|
|
|
if ($self->fulltext) { |
|
|
|
0
|
|
|
|
|
|
|
1442
|
0
|
|
|
|
|
|
$alias_only_where = "where afn.searchable_name @@ to_tsquery(?)"; |
|
1443
|
|
|
|
|
|
|
} |
|
1444
|
|
|
|
|
|
|
elsif ($wildcard) { |
|
1445
|
0
|
|
|
|
|
|
$alias_only_where = "where lower(afn.name) like ?"; |
|
1446
|
|
|
|
|
|
|
} |
|
1447
|
|
|
|
|
|
|
else { |
|
1448
|
0
|
|
|
|
|
|
$alias_only_where = "where lower(afn.name) = ?"; |
|
1449
|
|
|
|
|
|
|
} |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
0
|
0
|
|
|
|
|
$where_part = $where_part ? |
|
1452
|
|
|
|
|
|
|
"$alias_only_where $where_part" |
|
1453
|
|
|
|
|
|
|
: $alias_only_where; |
|
1454
|
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
} |
|
1456
|
|
|
|
|
|
|
else { #need to use the synonym table |
|
1457
|
0
|
|
|
|
|
|
$select_part = "select distinct fs.feature_id \n"; |
|
1458
|
0
|
0
|
|
|
|
|
$from_part = $from_part ? |
|
1459
|
|
|
|
|
|
|
"$from_part join feature_synonym fs using (feature_id), synonym s " |
|
1460
|
|
|
|
|
|
|
: "feature_synonym fs, synonym s "; |
|
1461
|
|
|
|
|
|
|
|
|
1462
|
0
|
|
|
|
|
|
my $alias_only_where; |
|
1463
|
|
|
|
|
|
|
# Again, with full-text there's no difference in wildcard/non-wildcard [LP] |
|
1464
|
0
|
0
|
|
|
|
|
if ($self->fulltext) { |
|
|
|
0
|
|
|
|
|
|
|
1465
|
0
|
|
|
|
|
|
$alias_only_where = "where fs.synonym_id = s.synonym_id and\n" |
|
1466
|
|
|
|
|
|
|
. "s.searchable_synonym_sgml @@ to_tsquery(?)"; |
|
1467
|
|
|
|
|
|
|
} |
|
1468
|
|
|
|
|
|
|
elsif ($wildcard) { |
|
1469
|
0
|
|
|
|
|
|
$alias_only_where = "where fs.synonym_id = s.synonym_id and\n" |
|
1470
|
|
|
|
|
|
|
. "lower(s.synonym_sgml) like ?"; |
|
1471
|
|
|
|
|
|
|
} |
|
1472
|
|
|
|
|
|
|
else { |
|
1473
|
0
|
|
|
|
|
|
$alias_only_where = "where fs.synonym_id = s.synonym_id and\n" |
|
1474
|
|
|
|
|
|
|
. "lower(s.synonym_sgml) = ?"; |
|
1475
|
|
|
|
|
|
|
} |
|
1476
|
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
|
|
1478
|
0
|
0
|
|
|
|
|
$where_part = $where_part ? |
|
1479
|
|
|
|
|
|
|
"$alias_only_where $where_part" |
|
1480
|
|
|
|
|
|
|
: $alias_only_where; |
|
1481
|
|
|
|
|
|
|
} |
|
1482
|
|
|
|
|
|
|
} |
|
1483
|
|
|
|
|
|
|
else { #searching by name only |
|
1484
|
0
|
|
|
|
|
|
$select_part = "select f.feature_id "; |
|
1485
|
0
|
|
|
|
|
|
$from_part = " feature f "; |
|
1486
|
|
|
|
|
|
|
|
|
1487
|
0
|
|
|
|
|
|
my $name_only_where; |
|
1488
|
|
|
|
|
|
|
# Using full text search we only need create one WHERE clause, regardless of |
|
1489
|
|
|
|
|
|
|
# the presence of any wildcards... [LP] |
|
1490
|
0
|
0
|
|
|
|
|
if ($self->fulltext) { |
|
|
|
0
|
|
|
|
|
|
|
1491
|
0
|
|
|
|
|
|
$name_only_where = "where f.searchable_name @@ to_tsquery(?)"; |
|
1492
|
|
|
|
|
|
|
} |
|
1493
|
|
|
|
|
|
|
elsif ($wildcard) { |
|
1494
|
0
|
|
|
|
|
|
$name_only_where = "where lower(f.name) like ?"; |
|
1495
|
|
|
|
|
|
|
} |
|
1496
|
|
|
|
|
|
|
else { |
|
1497
|
0
|
|
|
|
|
|
$name_only_where = "where lower(f.name) = ?"; |
|
1498
|
|
|
|
|
|
|
} |
|
1499
|
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
|
|
1501
|
0
|
0
|
|
|
|
|
$where_part = $where_part ? |
|
1502
|
|
|
|
|
|
|
"$name_only_where $where_part" |
|
1503
|
|
|
|
|
|
|
: $name_only_where; |
|
1504
|
|
|
|
|
|
|
} |
|
1505
|
|
|
|
|
|
|
|
|
1506
|
0
|
|
|
|
|
|
my $query = $select_part . ' FROM ' . $from_part . $where_part; |
|
1507
|
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
# Added at suggestion of James Ward to strip confusing/fatal whitespace, |
|
1509
|
|
|
|
|
|
|
# so we trim leading and trailing whitespace before processing query [LP] |
|
1510
|
0
|
|
|
|
|
|
$query =~ s/^[ \t\r\n]+|[ \t\r\n]$//g; |
|
1511
|
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
|
|
1513
|
0
|
|
|
|
|
|
warn "first get_feature_by_name query:$query" if DEBUG; |
|
1514
|
|
|
|
|
|
|
|
|
1515
|
0
|
|
|
|
|
|
$sth = $self->dbh->prepare($query); |
|
1516
|
|
|
|
|
|
|
|
|
1517
|
0
|
0
|
|
|
|
|
if ($wildcard) { |
|
1518
|
0
|
|
|
|
|
|
$name = $self->_search_name_prep($name); |
|
1519
|
0
|
|
|
|
|
|
warn "name after protecting _ and % in the string:$name\n" if DEBUG; |
|
1520
|
|
|
|
|
|
|
} |
|
1521
|
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
# what the hell happened to the lower casing!!! |
|
1523
|
|
|
|
|
|
|
# left over bug from making the adaptor case insensitive? |
|
1524
|
|
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
|
#$name = lc($name); |
|
1526
|
|
|
|
|
|
|
|
|
1527
|
0
|
0
|
|
|
|
|
$sth->execute(lc($name)) or $self->throw("getting the feature_ids failed"); |
|
1528
|
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
# this makes performance awful! It does a wildcard search on a view |
|
1530
|
|
|
|
|
|
|
# that has several selects in it. For any reasonably sized database, |
|
1531
|
|
|
|
|
|
|
# this won't work. |
|
1532
|
|
|
|
|
|
|
# |
|
1533
|
|
|
|
|
|
|
# if ($sth->rows < 1 and |
|
1534
|
|
|
|
|
|
|
# $class ne 'chromosome' and |
|
1535
|
|
|
|
|
|
|
# $class ne 'region' and |
|
1536
|
|
|
|
|
|
|
# $class ne 'contig') { |
|
1537
|
|
|
|
|
|
|
# |
|
1538
|
|
|
|
|
|
|
# my $query; |
|
1539
|
|
|
|
|
|
|
# ($name,$query) = $self->_complex_search($name,$class,$wildcard); |
|
1540
|
|
|
|
|
|
|
# |
|
1541
|
|
|
|
|
|
|
# warn "complex_search query:$query\n"; |
|
1542
|
|
|
|
|
|
|
# |
|
1543
|
|
|
|
|
|
|
# $sth = $self->dbh->prepare($query); |
|
1544
|
|
|
|
|
|
|
# $sth->execute($name) or $self->throw("getting the feature_ids failed"); |
|
1545
|
|
|
|
|
|
|
# |
|
1546
|
|
|
|
|
|
|
# } |
|
1547
|
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
# prepare sql queries for use in while loops |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
0
|
|
|
|
|
|
my $isth = $self->dbh->prepare(" |
|
1552
|
|
|
|
|
|
|
select f.feature_id, f.name, f.type_id,f.uniquename,af.significance as score, |
|
1553
|
|
|
|
|
|
|
fl.fmin,fl.fmax,fl.strand,fl.phase, fl.srcfeature_id, fd.dbxref_id, |
|
1554
|
|
|
|
|
|
|
f.is_obsolete,f.seqlen |
|
1555
|
|
|
|
|
|
|
from feature f join featureloc fl using (feature_id) |
|
1556
|
|
|
|
|
|
|
left join analysisfeature af using (feature_id) |
|
1557
|
|
|
|
|
|
|
left join feature_dbxref fd using (feature_id) |
|
1558
|
|
|
|
|
|
|
where |
|
1559
|
|
|
|
|
|
|
f.feature_id = ? and fl.rank=0 and |
|
1560
|
|
|
|
|
|
|
(fd.dbxref_id is null or fd.dbxref_id in |
|
1561
|
|
|
|
|
|
|
(select dbxref_id from dbxref where db_id = ?)) |
|
1562
|
|
|
|
|
|
|
order by fl.srcfeature_id |
|
1563
|
|
|
|
|
|
|
"); |
|
1564
|
|
|
|
|
|
|
|
|
1565
|
0
|
|
|
|
|
|
my $jsth = $self->dbh->prepare("select name from feature |
|
1566
|
|
|
|
|
|
|
where feature_id = ?"); |
|
1567
|
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
# getting feature info |
|
1569
|
0
|
|
|
|
|
|
while (my $feature_id_ref = $sth->fetchrow_hashref) { |
|
1570
|
|
|
|
|
|
|
|
|
1571
|
0
|
|
|
|
|
|
warn "feature_id in features method loop:".$$feature_id_ref{feature_id} if DEBUG; |
|
1572
|
|
|
|
|
|
|
|
|
1573
|
0
|
0
|
|
|
|
|
$isth->execute($$feature_id_ref{'feature_id'},$self->gff_source_db_id) |
|
1574
|
|
|
|
|
|
|
or $self->throw("getting feature info failed"); |
|
1575
|
|
|
|
|
|
|
|
|
1576
|
0
|
0
|
|
|
|
|
if ($isth->rows == 0) { #this might be a srcfeature |
|
1577
|
|
|
|
|
|
|
|
|
1578
|
0
|
|
|
|
|
|
warn "$name might be a srcfeature" if DEBUG; |
|
1579
|
|
|
|
|
|
|
|
|
1580
|
0
|
|
|
|
|
|
my $is_srcfeature_query = $self->dbh->prepare(" |
|
1581
|
|
|
|
|
|
|
select srcfeature_id from featureloc where srcfeature_id=? limit 1 |
|
1582
|
|
|
|
|
|
|
"); |
|
1583
|
0
|
0
|
|
|
|
|
$is_srcfeature_query->execute($$feature_id_ref{'feature_id'}) |
|
1584
|
|
|
|
|
|
|
or $self->throw("checking if feature is a srcfeature failed"); |
|
1585
|
|
|
|
|
|
|
|
|
1586
|
0
|
|
|
|
|
|
$sth->finish; |
|
1587
|
0
|
|
|
|
|
|
$isth->finish; |
|
1588
|
0
|
|
|
|
|
|
$jsth->finish; |
|
1589
|
0
|
0
|
|
|
|
|
if ($is_srcfeature_query->rows == 1) {#yep, its a srcfeature |
|
1590
|
|
|
|
|
|
|
#build a feature out of the srcfeature: |
|
1591
|
0
|
|
|
|
|
|
warn "Yep, $name is a srcfeature" if DEBUG; |
|
1592
|
|
|
|
|
|
|
|
|
1593
|
0
|
|
|
|
|
|
my @args = ($name) ; |
|
1594
|
0
|
0
|
|
|
|
|
push @args, $base_start if $base_start; |
|
1595
|
0
|
0
|
|
|
|
|
push @args, $stop if $stop; |
|
1596
|
|
|
|
|
|
|
|
|
1597
|
0
|
|
|
|
|
|
warn "srcfeature args:$args[0]" if DEBUG; |
|
1598
|
|
|
|
|
|
|
|
|
1599
|
0
|
|
|
|
|
|
my @seg = ($self->segment(@args)); |
|
1600
|
|
|
|
|
|
|
|
|
1601
|
0
|
|
|
|
|
|
$is_srcfeature_query->finish; |
|
1602
|
0
|
|
|
|
|
|
return @seg; |
|
1603
|
|
|
|
|
|
|
} |
|
1604
|
|
|
|
|
|
|
else { |
|
1605
|
0
|
|
|
|
|
|
$is_srcfeature_query->finish; |
|
1606
|
0
|
|
|
|
|
|
return; #I got nothing! |
|
1607
|
|
|
|
|
|
|
} |
|
1608
|
|
|
|
|
|
|
} |
|
1609
|
|
|
|
|
|
|
|
|
1610
|
|
|
|
|
|
|
#getting chromosome info |
|
1611
|
0
|
|
|
|
|
|
my $old_srcfeature_id=-1; |
|
1612
|
0
|
|
|
|
|
|
my $parent_segment; |
|
1613
|
0
|
|
|
|
|
|
while (my $hashref = $isth->fetchrow_hashref) { |
|
1614
|
|
|
|
|
|
|
|
|
1615
|
0
|
0
|
0
|
|
|
|
next if ($$hashref{'is_obsolete'} and !$self->allow_obsolete); |
|
1616
|
|
|
|
|
|
|
|
|
1617
|
0
|
0
|
0
|
|
|
|
if ($self->refclass && $$hashref{type_id} == $self->refclass) { |
|
1618
|
|
|
|
|
|
|
#this feature is supposed to be a reference feature |
|
1619
|
0
|
|
|
|
|
|
my $f = Bio::DB::Das::Chado::Segment->new($$hashref{'name'}, |
|
1620
|
|
|
|
|
|
|
$self, |
|
1621
|
|
|
|
|
|
|
1,$$hashref{'seqlen'}, |
|
1622
|
|
|
|
|
|
|
$$hashref{'uniquename'}, |
|
1623
|
|
|
|
|
|
|
undef, |
|
1624
|
|
|
|
|
|
|
$$hashref{'feature_id'}, |
|
1625
|
|
|
|
|
|
|
undef); |
|
1626
|
0
|
|
|
|
|
|
push @features,$f; |
|
1627
|
0
|
|
|
|
|
|
next; |
|
1628
|
|
|
|
|
|
|
} |
|
1629
|
|
|
|
|
|
|
|
|
1630
|
0
|
0
|
|
|
|
|
if ($$hashref{'srcfeature_id'} != $old_srcfeature_id) { |
|
1631
|
0
|
0
|
|
|
|
|
$jsth->execute($$hashref{'srcfeature_id'}) |
|
1632
|
|
|
|
|
|
|
or die ("getting assembly info failed"); |
|
1633
|
0
|
|
|
|
|
|
my $src_name = $jsth->fetchrow_hashref; |
|
1634
|
0
|
|
|
|
|
|
warn "src_name:$$src_name{'name'}" if DEBUG; |
|
1635
|
0
|
|
|
|
|
|
$parent_segment = |
|
1636
|
|
|
|
|
|
|
Bio::DB::Das::Chado::Segment->new($$src_name{'name'},$self,undef,undef,undef,undef,$$hashref{'srcfeature_id'}); |
|
1637
|
0
|
|
|
|
|
|
$old_srcfeature_id=$$hashref{'srcfeature_id'}; |
|
1638
|
|
|
|
|
|
|
} |
|
1639
|
|
|
|
|
|
|
#now build the feature |
|
1640
|
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
#Recursive Mapping |
|
1642
|
0
|
0
|
|
|
|
|
if ($self->{recursivMapping}){ |
|
1643
|
|
|
|
|
|
|
#Fetch the recursively mapped position |
|
1644
|
|
|
|
|
|
|
|
|
1645
|
0
|
|
|
|
|
|
my $sql = "select fl.fmin,fl.fmax,fl.strand,fl.phase |
|
1646
|
|
|
|
|
|
|
from feat_remapping(?) fl |
|
1647
|
|
|
|
|
|
|
where fl.rank=0"; |
|
1648
|
0
|
|
|
|
|
|
my $recurs_sth = $self->dbh->prepare($sql); |
|
1649
|
0
|
|
|
|
|
|
$sql =~ s/\s+/ /gs ; |
|
1650
|
0
|
|
|
|
|
|
$recurs_sth->execute($$feature_id_ref{'feature_id'}); |
|
1651
|
0
|
|
|
|
|
|
my $hashref2 = $recurs_sth->fetchrow_hashref; |
|
1652
|
0
|
|
|
|
|
|
my $strand_ = $$hashref{'strand'}; |
|
1653
|
0
|
|
|
|
|
|
my $phase_ = $$hashref{'phase'}; |
|
1654
|
0
|
|
|
|
|
|
my $fmax_ = $$hashref{'fmax'}; |
|
1655
|
0
|
|
|
|
|
|
my $interbase_start; |
|
1656
|
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
#If unable to recursively map we assume that the feature is |
|
1658
|
|
|
|
|
|
|
# already mapped on the lowest refseq |
|
1659
|
|
|
|
|
|
|
|
|
1660
|
0
|
0
|
|
|
|
|
if ($recurs_sth->rows != 0){ |
|
1661
|
0
|
|
|
|
|
|
$interbase_start = $$hashref2{'fmin'}; |
|
1662
|
0
|
|
|
|
|
|
$strand_ = $$hashref2{'strand'}; |
|
1663
|
0
|
|
|
|
|
|
$phase_ = $$hashref2{'phase'}; |
|
1664
|
0
|
|
|
|
|
|
$fmax_ = $$hashref2{'fmax'}; |
|
1665
|
|
|
|
|
|
|
}else{ |
|
1666
|
0
|
|
|
|
|
|
$interbase_start = $$hashref{'fmin'}; |
|
1667
|
|
|
|
|
|
|
} |
|
1668
|
0
|
|
|
|
|
|
$base_start = $interbase_start +1; |
|
1669
|
|
|
|
|
|
|
|
|
1670
|
0
|
|
0
|
|
|
|
my $type_obj = Bio::DB::GFF::Typename->new( |
|
1671
|
|
|
|
|
|
|
$self->term2name($$hashref{type_id}), |
|
1672
|
|
|
|
|
|
|
$self->dbxref2source($$hashref{dbxref_id}) || ""); |
|
1673
|
|
|
|
|
|
|
|
|
1674
|
0
|
|
|
|
|
|
my $feat = Bio::DB::Das::Chado::Segment::Feature->new( |
|
1675
|
|
|
|
|
|
|
$self, |
|
1676
|
|
|
|
|
|
|
$parent_segment, |
|
1677
|
|
|
|
|
|
|
$parent_segment->seq_id, |
|
1678
|
|
|
|
|
|
|
$base_start,$fmax_, |
|
1679
|
|
|
|
|
|
|
$self->term2name($$hashref{'type_id'}), |
|
1680
|
|
|
|
|
|
|
$$hashref{'score'}, |
|
1681
|
|
|
|
|
|
|
$strand_, |
|
1682
|
|
|
|
|
|
|
$phase_, |
|
1683
|
|
|
|
|
|
|
$$hashref{'name'}, |
|
1684
|
|
|
|
|
|
|
$$hashref{'uniquename'}, |
|
1685
|
|
|
|
|
|
|
$$hashref{'feature_id'} |
|
1686
|
|
|
|
|
|
|
); |
|
1687
|
0
|
|
|
|
|
|
push @features, $feat; |
|
1688
|
0
|
|
|
|
|
|
$recurs_sth->finish; |
|
1689
|
|
|
|
|
|
|
#END Recursive Mapping |
|
1690
|
|
|
|
|
|
|
} else { |
|
1691
|
|
|
|
|
|
|
|
|
1692
|
0
|
0
|
0
|
|
|
|
if ($class && $class eq 'CDS' && $self->inferCDS) { |
|
|
|
|
0
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
#$hashref holds info for the polypeptide |
|
1694
|
0
|
|
|
|
|
|
my $poly_min = $$hashref{'fmin'}; |
|
1695
|
0
|
|
|
|
|
|
my $poly_max = $$hashref{'fmax'}; |
|
1696
|
0
|
|
|
|
|
|
my $poly_fid = $$hashref{'feature_id'}; |
|
1697
|
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
#get fid of parent transcript |
|
1699
|
0
|
|
|
|
|
|
my $id_list = ref $self->term2name('derives_from') eq 'ARRAY' |
|
1700
|
0
|
0
|
|
|
|
|
? "in (".join(",",@{$self->term2name('derives_from')}).")" |
|
1701
|
|
|
|
|
|
|
: "= ".$self->term2name('derives_from'); |
|
1702
|
|
|
|
|
|
|
|
|
1703
|
0
|
|
|
|
|
|
my $transcript_query = $self->dbh->prepare(" |
|
1704
|
|
|
|
|
|
|
SELECT object_id FROM feature_relationship |
|
1705
|
|
|
|
|
|
|
WHERE type_id ".$id_list |
|
1706
|
|
|
|
|
|
|
." AND subject_id = $poly_fid" |
|
1707
|
|
|
|
|
|
|
); |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
0
|
|
|
|
|
|
$transcript_query->execute; |
|
1710
|
0
|
|
|
|
|
|
my ($trans_id) = $transcript_query->fetchrow_array; |
|
1711
|
|
|
|
|
|
|
|
|
1712
|
0
|
|
|
|
|
|
$id_list = ref $self->term2name('part_of') eq 'ARRAY' |
|
1713
|
0
|
0
|
|
|
|
|
? "in (".join(",",@{$self->term2name('part_of')}).")" |
|
1714
|
|
|
|
|
|
|
: "= ".$self->term2name('part_of'); |
|
1715
|
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
#now get exons that are part of the transcript |
|
1717
|
0
|
|
|
|
|
|
my $exon_query = $self->dbh->prepare(" |
|
1718
|
|
|
|
|
|
|
SELECT f.feature_id,f.name,f.type_id,f.uniquename, |
|
1719
|
|
|
|
|
|
|
af.significance as score,fl.fmin,fl.fmax,fl.strand, |
|
1720
|
|
|
|
|
|
|
fl.phase, fl.srcfeature_id, fd.dbxref_id,f.is_obsolete |
|
1721
|
|
|
|
|
|
|
FROM feature f join featureloc fl using (feature_id) |
|
1722
|
|
|
|
|
|
|
left join analysisfeature af using (feature_id) |
|
1723
|
|
|
|
|
|
|
left join feature_dbxref fd using (feature_id) |
|
1724
|
|
|
|
|
|
|
WHERE |
|
1725
|
|
|
|
|
|
|
f.type_id = ".$self->term2name('exon')." and f.feature_id in |
|
1726
|
|
|
|
|
|
|
(select subject_id from feature_relationship where object_id = $trans_id and |
|
1727
|
|
|
|
|
|
|
type_id ".$id_list." ) and |
|
1728
|
|
|
|
|
|
|
fl.rank=0 and |
|
1729
|
|
|
|
|
|
|
(fd.dbxref_id is null or fd.dbxref_id in |
|
1730
|
|
|
|
|
|
|
(select dbxref_id from dbxref where db_id =".$self->gff_source_db_id.")) |
|
1731
|
|
|
|
|
|
|
"); |
|
1732
|
|
|
|
|
|
|
|
|
1733
|
0
|
|
|
|
|
|
$exon_query->execute(); |
|
1734
|
|
|
|
|
|
|
|
|
1735
|
0
|
|
|
|
|
|
while (my $exonref = $exon_query->fetchrow_hashref) { |
|
1736
|
0
|
0
|
|
|
|
|
next if ($$exonref{fmax} < $poly_min); |
|
1737
|
0
|
0
|
|
|
|
|
next if ($$exonref{fmin} > $poly_max); |
|
1738
|
0
|
0
|
0
|
|
|
|
next if ($$exonref{is_obsolete} and !$self->allow_obsolete); |
|
1739
|
|
|
|
|
|
|
|
|
1740
|
0
|
|
|
|
|
|
my ($start,$stop); |
|
1741
|
0
|
0
|
0
|
|
|
|
if ($$exonref{fmin} <= $poly_min && $$exonref{fmax} >= $poly_max) { |
|
1742
|
|
|
|
|
|
|
#the exon starts before polypeptide start |
|
1743
|
0
|
|
|
|
|
|
$start = $poly_min +1; |
|
1744
|
|
|
|
|
|
|
} |
|
1745
|
|
|
|
|
|
|
else { |
|
1746
|
0
|
|
|
|
|
|
$start = $$exonref{fmin} +1; |
|
1747
|
|
|
|
|
|
|
} |
|
1748
|
|
|
|
|
|
|
|
|
1749
|
0
|
0
|
0
|
|
|
|
if ($$exonref{fmax} >= $poly_max && $$exonref{fmin} <= $poly_min) { |
|
1750
|
0
|
|
|
|
|
|
$stop = $poly_max; |
|
1751
|
|
|
|
|
|
|
} |
|
1752
|
|
|
|
|
|
|
else { |
|
1753
|
0
|
|
|
|
|
|
$stop = $$exonref{fmax}; |
|
1754
|
|
|
|
|
|
|
} |
|
1755
|
|
|
|
|
|
|
|
|
1756
|
0
|
|
0
|
|
|
|
my $type_obj = Bio::DB::GFF::Typename->new( |
|
1757
|
|
|
|
|
|
|
'CDS', |
|
1758
|
|
|
|
|
|
|
$self->dbxref2source($$hashref{'dbxref_id'}) || ''); |
|
1759
|
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
|
|
1761
|
0
|
|
|
|
|
|
my $feat = Bio::DB::Das::Chado::Segment::Feature->new( |
|
1762
|
|
|
|
|
|
|
$self, |
|
1763
|
|
|
|
|
|
|
$parent_segment, |
|
1764
|
|
|
|
|
|
|
$parent_segment->seq_id, |
|
1765
|
|
|
|
|
|
|
$start,$stop, |
|
1766
|
|
|
|
|
|
|
$type_obj, |
|
1767
|
|
|
|
|
|
|
$$hashref{'score'}, |
|
1768
|
|
|
|
|
|
|
$$hashref{'strand'}, |
|
1769
|
|
|
|
|
|
|
$$hashref{'phase'}, |
|
1770
|
|
|
|
|
|
|
$$hashref{'name'}, |
|
1771
|
|
|
|
|
|
|
$$hashref{'uniquename'}, |
|
1772
|
|
|
|
|
|
|
$$hashref{'feature_id'} |
|
1773
|
|
|
|
|
|
|
); |
|
1774
|
0
|
|
|
|
|
|
push @features, $feat; |
|
1775
|
|
|
|
|
|
|
} |
|
1776
|
0
|
|
|
|
|
|
$exon_query->finish; |
|
1777
|
0
|
|
|
|
|
|
$transcript_query->finish; |
|
1778
|
|
|
|
|
|
|
} |
|
1779
|
|
|
|
|
|
|
else { |
|
1780
|
|
|
|
|
|
|
#the normal case where you don't infer CDS features |
|
1781
|
0
|
|
|
|
|
|
my $interbase_start = $$hashref{'fmin'}; |
|
1782
|
0
|
|
|
|
|
|
$base_start = $interbase_start +1; |
|
1783
|
|
|
|
|
|
|
|
|
1784
|
0
|
|
0
|
|
|
|
my $type_obj = Bio::DB::GFF::Typename->new( |
|
1785
|
|
|
|
|
|
|
$self->term2name($$hashref{'type_id'}), |
|
1786
|
|
|
|
|
|
|
$self->dbxref2source($$hashref{'dbxref_id'}) || ''); |
|
1787
|
|
|
|
|
|
|
|
|
1788
|
0
|
0
|
|
|
|
|
my $srcf = 1 if ($self->refclass() == $$hashref{'type_id'}) ; |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
0
|
0
|
|
|
|
|
my $feat = Bio::DB::Das::Chado::Segment::Feature->new( |
|
|
|
0
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
$self, |
|
1792
|
|
|
|
|
|
|
$srcf ? '' : $parent_segment, |
|
1793
|
|
|
|
|
|
|
$srcf ? '' : $parent_segment->seq_id, |
|
1794
|
|
|
|
|
|
|
$base_start,$$hashref{'fmax'}, |
|
1795
|
|
|
|
|
|
|
$type_obj, |
|
1796
|
|
|
|
|
|
|
$$hashref{'score'}, |
|
1797
|
|
|
|
|
|
|
$$hashref{'strand'}, |
|
1798
|
|
|
|
|
|
|
$$hashref{'phase'}, |
|
1799
|
|
|
|
|
|
|
$$hashref{'name'}, |
|
1800
|
|
|
|
|
|
|
$$hashref{'uniquename'}, |
|
1801
|
|
|
|
|
|
|
$$hashref{'feature_id'} |
|
1802
|
|
|
|
|
|
|
); |
|
1803
|
|
|
|
|
|
|
|
|
1804
|
0
|
|
|
|
|
|
push @features, $feat; |
|
1805
|
|
|
|
|
|
|
} |
|
1806
|
|
|
|
|
|
|
} |
|
1807
|
|
|
|
|
|
|
} |
|
1808
|
|
|
|
|
|
|
} |
|
1809
|
0
|
|
|
|
|
|
$sth->finish; |
|
1810
|
0
|
|
|
|
|
|
$isth->finish; |
|
1811
|
0
|
|
|
|
|
|
$jsth->finish; |
|
1812
|
0
|
|
|
|
|
|
return @features; |
|
1813
|
|
|
|
|
|
|
} |
|
1814
|
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
# Handle spaces in search query; we need to avoid replacing |
|
1816
|
|
|
|
|
|
|
# ' & ' with ' & & & ', though... [LP] |
|
1817
|
|
|
|
|
|
|
sub _search_name_prep_spaces { |
|
1818
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
1819
|
0
|
|
|
|
|
|
my $name = shift; |
|
1820
|
|
|
|
|
|
|
|
|
1821
|
0
|
|
|
|
|
|
$name =~ s/\s&\s/ /g; # Replace any user-defined ' & ' with spaces... |
|
1822
|
0
|
|
|
|
|
|
$name =~ s/\s/ & /g; # then replace all spaces with ' & ' |
|
1823
|
|
|
|
|
|
|
|
|
1824
|
0
|
|
|
|
|
|
return $name; |
|
1825
|
|
|
|
|
|
|
} |
|
1826
|
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
*fetch_feature_by_name = \&get_feature_by_name; |
|
1829
|
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
sub get_feature_by_feature_id { |
|
1831
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
1832
|
0
|
|
|
|
|
|
my $f_id = shift; |
|
1833
|
|
|
|
|
|
|
|
|
1834
|
0
|
|
|
|
|
|
my @features = $self->features(-feature_id => $f_id); |
|
1835
|
0
|
|
|
|
|
|
return @features; |
|
1836
|
|
|
|
|
|
|
} |
|
1837
|
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
sub get_feature_by_id { |
|
1839
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1840
|
0
|
|
|
|
|
|
my $f_id = shift; |
|
1841
|
|
|
|
|
|
|
|
|
1842
|
0
|
|
|
|
|
|
my @features = $self->features(-feature_id => $f_id); |
|
1843
|
0
|
|
|
|
|
|
return $features[0]; |
|
1844
|
|
|
|
|
|
|
} |
|
1845
|
|
|
|
|
|
|
|
|
1846
|
|
|
|
|
|
|
*fetch = *get_feature_by_primary_id = \&get_feature_by_feature_id; |
|
1847
|
|
|
|
|
|
|
|
|
1848
|
|
|
|
|
|
|
sub _complex_search { |
|
1849
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
1850
|
0
|
|
|
|
|
|
my $name = shift; |
|
1851
|
0
|
|
|
|
|
|
my $class= shift; |
|
1852
|
|
|
|
|
|
|
|
|
1853
|
0
|
|
|
|
|
|
warn "name before wildcard subs:$name\n" if DEBUG; |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
0
|
0
|
|
|
|
|
$name = "\%$name" unless (0 == index($name, "%")); |
|
1856
|
0
|
0
|
|
|
|
|
$name = "$name%" unless (0 == index(reverse($name), "%")); |
|
1857
|
|
|
|
|
|
|
|
|
1858
|
0
|
|
|
|
|
|
warn "name after wildcard subs:$name\n" if DEBUG; |
|
1859
|
|
|
|
|
|
|
|
|
1860
|
0
|
|
|
|
|
|
my $select_part = "select ga.feature_id "; |
|
1861
|
0
|
|
|
|
|
|
my $from_part = "from gffatts ga "; |
|
1862
|
0
|
|
|
|
|
|
my $where_part = "where lower(ga.attribute) like ? "; |
|
1863
|
|
|
|
|
|
|
|
|
1864
|
0
|
0
|
|
|
|
|
if ($class) { |
|
1865
|
0
|
|
|
|
|
|
my $type = $self->name2term($class); |
|
1866
|
0
|
0
|
|
|
|
|
return unless $type; |
|
1867
|
0
|
|
|
|
|
|
$from_part .= ", feature f "; |
|
1868
|
0
|
|
|
|
|
|
$where_part.= "and ga.feature_id = f.feature_id and " |
|
1869
|
|
|
|
|
|
|
."f.type_id = $type"; |
|
1870
|
|
|
|
|
|
|
} |
|
1871
|
|
|
|
|
|
|
|
|
1872
|
0
|
0
|
|
|
|
|
$where_part .= " and organism_id = ".$self->organism_id |
|
1873
|
|
|
|
|
|
|
if $self->organism_id; |
|
1874
|
|
|
|
|
|
|
|
|
1875
|
0
|
|
|
|
|
|
my $query = $select_part . $from_part . $where_part; |
|
1876
|
0
|
|
|
|
|
|
return ($name, $query); |
|
1877
|
|
|
|
|
|
|
} |
|
1878
|
|
|
|
|
|
|
|
|
1879
|
|
|
|
|
|
|
sub _search_name_prep { |
|
1880
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
1881
|
0
|
|
|
|
|
|
my $name = shift; |
|
1882
|
|
|
|
|
|
|
|
|
1883
|
0
|
0
|
|
|
|
|
if ($self->fulltext) { |
|
1884
|
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
# For full-text search, the appropriate extension wildcard |
|
1886
|
|
|
|
|
|
|
# is ':*' for prefix-matching. There are limitations to |
|
1887
|
|
|
|
|
|
|
# full-text search in that we cannot find internal parts of |
|
1888
|
|
|
|
|
|
|
# words, so wildcards can only come at the ends of phrases/ |
|
1889
|
|
|
|
|
|
|
# lexemes. Internal * are converted by tsquery into & [LP] |
|
1890
|
0
|
|
|
|
|
|
$name =~ s/_/\\_/g; # escape underscores in name |
|
1891
|
0
|
|
|
|
|
|
$name =~ s/(?<=\s)\*//g; # lose prefix wildcards (word start) |
|
1892
|
0
|
|
|
|
|
|
$name =~ s/(?<=^)\*//g; # lose prefix wildcards (query start) |
|
1893
|
0
|
|
|
|
|
|
$name =~ s/\*(?=$)/:\*/g; # convert trailing * (query end) into :* |
|
1894
|
0
|
|
|
|
|
|
$name =~ s/\*(?=\s)/:\*/g; # convert trailing * (word end) into :* |
|
1895
|
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
} |
|
1897
|
|
|
|
|
|
|
else { |
|
1898
|
0
|
|
|
|
|
|
$name =~ s/_/\\_/g; # escape underscores in name |
|
1899
|
0
|
|
|
|
|
|
$name =~ s/\%/\\%/g; # ditto for percent signs |
|
1900
|
|
|
|
|
|
|
|
|
1901
|
0
|
|
|
|
|
|
$name =~ s/\*/%/g; |
|
1902
|
|
|
|
|
|
|
} |
|
1903
|
|
|
|
|
|
|
|
|
1904
|
0
|
|
|
|
|
|
return $name; |
|
1905
|
|
|
|
|
|
|
} |
|
1906
|
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
|
|
1908
|
|
|
|
|
|
|
=head2 srcfeature2name |
|
1909
|
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
returns a srcfeature name given a srcfeature_id |
|
1911
|
|
|
|
|
|
|
|
|
1912
|
|
|
|
|
|
|
=cut |
|
1913
|
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
sub srcfeature2name { |
|
1915
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1916
|
0
|
|
|
|
|
|
my $id = shift; |
|
1917
|
|
|
|
|
|
|
|
|
1918
|
0
|
0
|
|
|
|
|
return $self->{'srcfeature_id'}->{$id} if $self->{'srcfeature_id'}->{$id}; |
|
1919
|
|
|
|
|
|
|
|
|
1920
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare("select name from feature " |
|
1921
|
|
|
|
|
|
|
."where feature_id = ?"); |
|
1922
|
0
|
|
|
|
|
|
$sth->execute($id); |
|
1923
|
|
|
|
|
|
|
|
|
1924
|
0
|
|
|
|
|
|
my $hashref = $sth->fetchrow_hashref; |
|
1925
|
0
|
|
|
|
|
|
$self->{'srcfeature_id'}->{$id} = $$hashref{'name'}; |
|
1926
|
|
|
|
|
|
|
|
|
1927
|
0
|
|
|
|
|
|
$sth->finish; |
|
1928
|
0
|
|
|
|
|
|
return $self->{'srcfeature_id'}->{$id}; |
|
1929
|
|
|
|
|
|
|
} |
|
1930
|
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
=head2 gff_source_db_id |
|
1932
|
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
Title : gff_source_db_id |
|
1934
|
|
|
|
|
|
|
Function: caches the chado db_id from the chado db table |
|
1935
|
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
=cut |
|
1937
|
|
|
|
|
|
|
|
|
1938
|
|
|
|
|
|
|
sub gff_source_db_id { |
|
1939
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1940
|
0
|
0
|
|
|
|
|
return $self->{'gff_source_db_id'} if $self->{'gff_source_db_id'}; |
|
1941
|
|
|
|
|
|
|
|
|
1942
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare(" |
|
1943
|
|
|
|
|
|
|
select db_id from db |
|
1944
|
|
|
|
|
|
|
where name = 'GFF_source'"); |
|
1945
|
0
|
|
|
|
|
|
$sth->execute(); |
|
1946
|
|
|
|
|
|
|
|
|
1947
|
0
|
|
|
|
|
|
my $hashref = $sth->fetchrow_hashref; |
|
1948
|
0
|
|
|
|
|
|
$self->{'gff_source_db_id'} = $$hashref{'db_id'}; |
|
1949
|
|
|
|
|
|
|
|
|
1950
|
0
|
|
|
|
|
|
$sth->finish; |
|
1951
|
0
|
|
|
|
|
|
return $self->{'gff_source_db_id'}; |
|
1952
|
|
|
|
|
|
|
} |
|
1953
|
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
=head2 gff_source_dbxref_id |
|
1955
|
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
Gets dbxref_id for features that have a gff source associated |
|
1957
|
|
|
|
|
|
|
|
|
1958
|
|
|
|
|
|
|
=cut |
|
1959
|
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
sub source2dbxref { |
|
1961
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
1962
|
0
|
|
|
|
|
|
my $source = shift; |
|
1963
|
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
#Why was this here? Debugging? |
|
1965
|
|
|
|
|
|
|
#return 'fake' unless defined($self->gff_source_db_id); |
|
1966
|
|
|
|
|
|
|
|
|
1967
|
0
|
0
|
|
|
|
|
return $self->{'source_dbxref'}->{$source} |
|
1968
|
|
|
|
|
|
|
if $self->{'source_dbxref'}->{$source}; |
|
1969
|
|
|
|
|
|
|
|
|
1970
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare(" |
|
1971
|
|
|
|
|
|
|
select dbxref_id,accession from dbxref where db_id= ?" |
|
1972
|
|
|
|
|
|
|
); |
|
1973
|
0
|
|
|
|
|
|
$sth->execute($self->gff_source_db_id); |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
0
|
|
|
|
|
|
while (my $hashref = $sth->fetchrow_hashref) { |
|
1976
|
0
|
|
|
|
|
|
warn "s2d:accession:$$hashref{accession}, dbxref_id:$$hashref{dbxref_id}\n" if DEBUG; |
|
1977
|
|
|
|
|
|
|
|
|
1978
|
0
|
|
|
|
|
|
$self->{'source_dbxref'}->{$$hashref{accession}} = $$hashref{dbxref_id}; |
|
1979
|
0
|
|
|
|
|
|
$self->{'dbxref_source'}->{$$hashref{dbxref_id}} = $$hashref{accession}; |
|
1980
|
|
|
|
|
|
|
} |
|
1981
|
|
|
|
|
|
|
|
|
1982
|
0
|
|
|
|
|
|
$sth->finish; |
|
1983
|
0
|
|
|
|
|
|
return $self->{'source_dbxref'}->{$source}; |
|
1984
|
|
|
|
|
|
|
} |
|
1985
|
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
=head2 dbxref2source |
|
1987
|
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
returns the source (string) when given a dbxref_id |
|
1989
|
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
=cut |
|
1991
|
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
sub dbxref2source { |
|
1993
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
1994
|
0
|
|
|
|
|
|
my $dbxref = shift; |
|
1995
|
|
|
|
|
|
|
|
|
1996
|
0
|
0
|
|
|
|
|
return '.' unless defined($self->gff_source_db_id); |
|
1997
|
|
|
|
|
|
|
|
|
1998
|
0
|
|
|
|
|
|
warn "d2s:dbxref:$dbxref\n" if DEBUG; |
|
1999
|
|
|
|
|
|
|
|
|
2000
|
0
|
0
|
0
|
|
|
|
if (defined ($self->{'dbxref_source'}) && $dbxref |
|
|
|
|
0
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
&& defined ($self->{'dbxref_source'}->{$dbxref})) { |
|
2002
|
0
|
|
|
|
|
|
return $self->{'dbxref_source'}->{$dbxref}; |
|
2003
|
|
|
|
|
|
|
} |
|
2004
|
|
|
|
|
|
|
|
|
2005
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare(" |
|
2006
|
|
|
|
|
|
|
select dbxref_id,accession from dbxref where db_id=?" |
|
2007
|
|
|
|
|
|
|
); |
|
2008
|
0
|
|
|
|
|
|
$sth->execute($self->gff_source_db_id); |
|
2009
|
|
|
|
|
|
|
|
|
2010
|
0
|
0
|
|
|
|
|
if ($sth->rows < 1) { |
|
2011
|
0
|
|
|
|
|
|
$sth->finish; |
|
2012
|
0
|
|
|
|
|
|
return "."; |
|
2013
|
|
|
|
|
|
|
} |
|
2014
|
|
|
|
|
|
|
|
|
2015
|
0
|
|
|
|
|
|
while (my $hashref = $sth->fetchrow_hashref) { |
|
2016
|
0
|
|
|
|
|
|
warn "d2s:accession:$$hashref{accession}, dbxref_id:$$hashref{dbxref_id}\n" |
|
2017
|
|
|
|
|
|
|
if DEBUG; |
|
2018
|
|
|
|
|
|
|
|
|
2019
|
0
|
|
|
|
|
|
$self->{'source_dbxref'}->{$$hashref{accession}} = $$hashref{dbxref_id}; |
|
2020
|
0
|
|
|
|
|
|
$self->{'dbxref_source'}->{$$hashref{dbxref_id}} = $$hashref{accession}; |
|
2021
|
|
|
|
|
|
|
} |
|
2022
|
|
|
|
|
|
|
|
|
2023
|
0
|
|
|
|
|
|
$sth->finish; |
|
2024
|
0
|
0
|
0
|
|
|
|
if (defined $self->{'dbxref_source'} && $dbxref |
|
|
|
|
0
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
&& defined $self->{'dbxref_source'}->{$dbxref}) { |
|
2026
|
0
|
|
|
|
|
|
return $self->{'dbxref_source'}->{$dbxref}; |
|
2027
|
|
|
|
|
|
|
} else { |
|
2028
|
0
|
0
|
|
|
|
|
$self->{'dbxref_source'}->{$dbxref} = "." if $dbxref; |
|
2029
|
0
|
|
|
|
|
|
return "."; |
|
2030
|
|
|
|
|
|
|
} |
|
2031
|
|
|
|
|
|
|
} |
|
2032
|
|
|
|
|
|
|
|
|
2033
|
|
|
|
|
|
|
=head2 source_dbxref_list |
|
2034
|
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
Title : source_dbxref_list |
|
2036
|
|
|
|
|
|
|
Usage : @all_dbxref_ids = $db->source_dbxref_list() |
|
2037
|
|
|
|
|
|
|
Function: Gets a list of all dbxref_ids that are used for GFF sources |
|
2038
|
|
|
|
|
|
|
Returns : a comma delimited string that is a list of dbxref_ids |
|
2039
|
|
|
|
|
|
|
Args : none |
|
2040
|
|
|
|
|
|
|
Status : public |
|
2041
|
|
|
|
|
|
|
|
|
2042
|
|
|
|
|
|
|
This method queries the database for all dbxref_ids that are used |
|
2043
|
|
|
|
|
|
|
to store GFF source terms. |
|
2044
|
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
=cut |
|
2046
|
|
|
|
|
|
|
|
|
2047
|
|
|
|
|
|
|
sub source_dbxref_list { |
|
2048
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
2049
|
0
|
0
|
|
|
|
|
return $self->{'source_dbxref_list'} if defined $self->{'source_dbxref_list'}; |
|
2050
|
|
|
|
|
|
|
|
|
2051
|
0
|
|
|
|
|
|
my $query = "select dbxref_id from dbxref where db_id = ?"; |
|
2052
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare($query); |
|
2053
|
0
|
|
|
|
|
|
$sth->execute($self->gff_source_db_id); |
|
2054
|
|
|
|
|
|
|
|
|
2055
|
|
|
|
|
|
|
#unpack it here to make it easier |
|
2056
|
0
|
|
|
|
|
|
my @dbxref_list; |
|
2057
|
0
|
|
|
|
|
|
while (my $row = $sth->fetchrow_arrayref) { |
|
2058
|
0
|
|
|
|
|
|
push @dbxref_list, $$row[0]; |
|
2059
|
|
|
|
|
|
|
} |
|
2060
|
|
|
|
|
|
|
|
|
2061
|
0
|
|
|
|
|
|
$sth->finish; |
|
2062
|
0
|
|
|
|
|
|
$self->{'source_dbxref_list'} = join (",",@dbxref_list); |
|
2063
|
0
|
|
|
|
|
|
return $self->{'source_dbxref_list'}; |
|
2064
|
|
|
|
|
|
|
} |
|
2065
|
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
=head2 search_notes |
|
2068
|
|
|
|
|
|
|
|
|
2069
|
|
|
|
|
|
|
Title : search_notes |
|
2070
|
|
|
|
|
|
|
Usage : $db->search_notes($search_term,$max_results) |
|
2071
|
|
|
|
|
|
|
Function: full-text search on features, ENSEMBL-style |
|
2072
|
|
|
|
|
|
|
Returns : an array of [$name,$description,$score] |
|
2073
|
|
|
|
|
|
|
Args : see below |
|
2074
|
|
|
|
|
|
|
Status : public |
|
2075
|
|
|
|
|
|
|
|
|
2076
|
|
|
|
|
|
|
This routine performs a full-text search on feature attributes (which |
|
2077
|
|
|
|
|
|
|
attributes depend on implementation) and returns a list of |
|
2078
|
|
|
|
|
|
|
[$name,$description,$score], where $name is the feature ID (accession?), |
|
2079
|
|
|
|
|
|
|
$description is a human-readable description such as a locus line, and |
|
2080
|
|
|
|
|
|
|
$score is the match strength. |
|
2081
|
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
=cut |
|
2083
|
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
=head2 ** NOT YET ACTIVE: search_notes IS IN TESTING STAGE ** |
|
2085
|
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
sub search_notes { |
|
2087
|
|
|
|
|
|
|
my $self = shift; |
|
2088
|
|
|
|
|
|
|
my ($search_string,$limit) = @_; |
|
2089
|
|
|
|
|
|
|
my $limit_str; |
|
2090
|
|
|
|
|
|
|
if (defined $limit) { |
|
2091
|
|
|
|
|
|
|
$limit_str = " LIMIT $limit "; |
|
2092
|
|
|
|
|
|
|
} else { |
|
2093
|
|
|
|
|
|
|
$limit_str = ""; |
|
2094
|
|
|
|
|
|
|
} |
|
2095
|
|
|
|
|
|
|
|
|
2096
|
|
|
|
|
|
|
# so here's the plan: |
|
2097
|
|
|
|
|
|
|
# if there is only 1 word, do 1-3 |
|
2098
|
|
|
|
|
|
|
# 1. search for accessions like $string.'%'--if any are found, quit and return them |
|
2099
|
|
|
|
|
|
|
# 2. search for feature.name like $string.'%'--if found, keep and continue |
|
2100
|
|
|
|
|
|
|
# 3. search somewhere in analysis like $string.'%'--if found, keep and continue |
|
2101
|
|
|
|
|
|
|
# if there is more than one word, don't search accessions |
|
2102
|
|
|
|
|
|
|
# 4. search each word anded together like '%'.$string.'%' --if found, keep and continue |
|
2103
|
|
|
|
|
|
|
# 5. search somewhere in analysis like '%'.$string.'%' |
|
2104
|
|
|
|
|
|
|
|
|
2105
|
|
|
|
|
|
|
# $self->dbh->trace(1); |
|
2106
|
|
|
|
|
|
|
|
|
2107
|
|
|
|
|
|
|
my @search_str = split /\s+/, $search_string; |
|
2108
|
|
|
|
|
|
|
my $qsearch_term = $self->dbh->quote($search_str[0]); |
|
2109
|
|
|
|
|
|
|
my $like_str = "( (dbx.accession ~* $qsearch_term OR \n" |
|
2110
|
|
|
|
|
|
|
." f.name ~* $qsearch_term) "; |
|
2111
|
|
|
|
|
|
|
for (my $i=1;$i<(scalar @search_str);$i++) { |
|
2112
|
|
|
|
|
|
|
$qsearch_term = $self->dbh->quote($search_str[$i]); |
|
2113
|
|
|
|
|
|
|
$like_str .= "and \n"; |
|
2114
|
|
|
|
|
|
|
$like_str .= " (dbx.accession ~* $qsearch_term OR \n" |
|
2115
|
|
|
|
|
|
|
." f.name ~* $qsearch_term) "; |
|
2116
|
|
|
|
|
|
|
} |
|
2117
|
|
|
|
|
|
|
$like_str .= ")"; |
|
2118
|
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
my $sth = $self->dbh->prepare(" |
|
2120
|
|
|
|
|
|
|
select dbx.accession,f.name,0 |
|
2121
|
|
|
|
|
|
|
from feature f, dbxref dbx, feature_dbxref fd |
|
2122
|
|
|
|
|
|
|
where |
|
2123
|
|
|
|
|
|
|
f.feature_id = fd.feature_id and |
|
2124
|
|
|
|
|
|
|
fd.dbxref_id = dbx.dbxref_id and |
|
2125
|
|
|
|
|
|
|
$like_str |
|
2126
|
|
|
|
|
|
|
$limit_str |
|
2127
|
|
|
|
|
|
|
"); |
|
2128
|
|
|
|
|
|
|
$sth->execute or throw ("couldn't execute keyword query"); |
|
2129
|
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
my @results; |
|
2131
|
|
|
|
|
|
|
while (my ($acc, $name, $score) = $sth->fetchrow_array) { |
|
2132
|
|
|
|
|
|
|
$score = sprintf("%.2f",$score); |
|
2133
|
|
|
|
|
|
|
push @results, [$acc, $name, $score]; |
|
2134
|
|
|
|
|
|
|
} |
|
2135
|
|
|
|
|
|
|
$sth->finish; |
|
2136
|
|
|
|
|
|
|
return @results; |
|
2137
|
|
|
|
|
|
|
} |
|
2138
|
|
|
|
|
|
|
|
|
2139
|
|
|
|
|
|
|
=cut |
|
2140
|
|
|
|
|
|
|
|
|
2141
|
|
|
|
|
|
|
=head2 attributes |
|
2142
|
|
|
|
|
|
|
|
|
2143
|
|
|
|
|
|
|
Title : attributes |
|
2144
|
|
|
|
|
|
|
Usage : @attributes = $db->attributes($id,$name) |
|
2145
|
|
|
|
|
|
|
Function: get the "attributes" on a particular feature |
|
2146
|
|
|
|
|
|
|
Returns : an array of string |
|
2147
|
|
|
|
|
|
|
Args : feature ID [, attribute name] |
|
2148
|
|
|
|
|
|
|
Status : public |
|
2149
|
|
|
|
|
|
|
|
|
2150
|
|
|
|
|
|
|
This method is intended as a "work-alike" to Bio::DB::GFF's |
|
2151
|
|
|
|
|
|
|
attributes method, which has the following returns: |
|
2152
|
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
Called in list context, it returns a list. If called in a |
|
2154
|
|
|
|
|
|
|
scalar context, it returns the first value of the attribute |
|
2155
|
|
|
|
|
|
|
if an attribute name is provided, otherwise it returns a |
|
2156
|
|
|
|
|
|
|
hash reference in which the keys are attribute names |
|
2157
|
|
|
|
|
|
|
and the values are anonymous arrays containing the values. |
|
2158
|
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
=cut |
|
2160
|
|
|
|
|
|
|
|
|
2161
|
|
|
|
|
|
|
sub attributes { |
|
2162
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
2163
|
0
|
|
|
|
|
|
my ($id,$tag) = @_; |
|
2164
|
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
#get feature_id |
|
2166
|
|
|
|
|
|
|
|
|
2167
|
0
|
|
|
|
|
|
my $query = "select feature_id from feature where uniquename = ?"; |
|
2168
|
0
|
0
|
|
|
|
|
$query .= " and organism_id = ".$self->organism_id if $self->organism_id; |
|
2169
|
|
|
|
|
|
|
|
|
2170
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare($query); |
|
2171
|
0
|
0
|
|
|
|
|
$sth->execute($id) or $self->throw("failed to get feature_id in attributes"); |
|
2172
|
0
|
|
|
|
|
|
my $hashref = $sth->fetchrow_hashref; |
|
2173
|
0
|
|
|
|
|
|
my $feature_id = $$hashref{'feature_id'}; |
|
2174
|
|
|
|
|
|
|
|
|
2175
|
0
|
0
|
|
|
|
|
if (defined $tag) { |
|
2176
|
0
|
|
|
|
|
|
my $query = "SELECT attribute FROM gfffeatureatts(?) WHERE type = ?"; |
|
2177
|
0
|
|
|
|
|
|
$sth = $self->dbh->prepare($query); |
|
2178
|
0
|
|
|
|
|
|
$sth->execute($feature_id,$tag); |
|
2179
|
|
|
|
|
|
|
} else { |
|
2180
|
0
|
|
|
|
|
|
my $query = "SELECT type,attribute FROM gfffeatureatts(?)"; |
|
2181
|
0
|
|
|
|
|
|
$sth = $self->dbh->prepare($query); |
|
2182
|
0
|
|
|
|
|
|
$sth->execute($feature_id); |
|
2183
|
|
|
|
|
|
|
} |
|
2184
|
|
|
|
|
|
|
|
|
2185
|
0
|
|
|
|
|
|
my $arrayref = $sth->fetchall_arrayref; |
|
2186
|
|
|
|
|
|
|
|
|
2187
|
0
|
|
|
|
|
|
my @array = @$arrayref; |
|
2188
|
0
|
0
|
0
|
|
|
|
($sth->finish && return ()) if scalar @array == 0; |
|
2189
|
|
|
|
|
|
|
|
|
2190
|
|
|
|
|
|
|
## dgg; ugly patch to copy polypeptide/protein residues into 'translation' attribute |
|
2191
|
|
|
|
|
|
|
# need to add to gfffeatureatts .. |
|
2192
|
0
|
0
|
0
|
|
|
|
if (!defined $tag || $tag eq 'translation') { |
|
2193
|
0
|
|
|
|
|
|
$sth = $self->dbh->prepare("select type_id from feature where feature_id = ?"); |
|
2194
|
0
|
|
|
|
|
|
$sth->execute($feature_id); # or $self->throw("failed to get feature_id in attributes"); |
|
2195
|
0
|
|
|
|
|
|
$hashref = $sth->fetchrow_hashref; |
|
2196
|
0
|
|
|
|
|
|
my $type_id = $$hashref{'type_id'}; |
|
2197
|
|
|
|
|
|
|
## warn("DEBUG: dgg ugly prot. patch; type=$type_id for ftid=$feature_id\n"); |
|
2198
|
|
|
|
|
|
|
|
|
2199
|
0
|
0
|
0
|
|
|
|
if( $self->name2term('polypeptide') |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2200
|
|
|
|
|
|
|
&& $type_id |
|
2201
|
|
|
|
|
|
|
&& $type_id == $self->name2term('polypeptide') |
|
2202
|
|
|
|
|
|
|
|| $self->name2term('protein') |
|
2203
|
|
|
|
|
|
|
&& $type_id |
|
2204
|
|
|
|
|
|
|
&& $type_id == $self->name2term('protein') |
|
2205
|
|
|
|
|
|
|
) { |
|
2206
|
0
|
|
|
|
|
|
$sth = $self->dbh->prepare("select residues from feature where feature_id = ?"); |
|
2207
|
0
|
|
|
|
|
|
$sth->execute($feature_id); # or $self->throw("failed to get feature_id in attributes"); |
|
2208
|
0
|
|
|
|
|
|
$hashref = $sth->fetchrow_hashref; |
|
2209
|
0
|
|
|
|
|
|
my $aa = $$hashref{'residues'}; |
|
2210
|
0
|
0
|
|
|
|
|
if($aa) { |
|
2211
|
|
|
|
|
|
|
## warn("DEBUG: dgg ugly prot. patch; aalen=",length($aa),"\n"); |
|
2212
|
|
|
|
|
|
|
## this wasn't working till I added in a featureprop 'translation=dummy' .. why? |
|
2213
|
0
|
0
|
|
|
|
|
if($tag) { push( @array, [ $aa]); } |
|
|
0
|
|
|
|
|
|
|
|
2214
|
0
|
|
|
|
|
|
else { push( @array, ['translation', $aa]); } |
|
2215
|
|
|
|
|
|
|
} |
|
2216
|
|
|
|
|
|
|
} |
|
2217
|
|
|
|
|
|
|
} |
|
2218
|
|
|
|
|
|
|
|
|
2219
|
0
|
|
|
|
|
|
my @result; |
|
2220
|
0
|
|
|
|
|
|
foreach my $lineref (@array) { |
|
2221
|
0
|
|
|
|
|
|
my @la = @$lineref; |
|
2222
|
0
|
|
|
|
|
|
push @result, @la; |
|
2223
|
|
|
|
|
|
|
} |
|
2224
|
|
|
|
|
|
|
|
|
2225
|
0
|
|
|
|
|
|
$sth->finish; |
|
2226
|
0
|
0
|
|
|
|
|
return @result if wantarray; |
|
2227
|
|
|
|
|
|
|
|
|
2228
|
0
|
0
|
|
|
|
|
return $result[0] if $tag; |
|
2229
|
|
|
|
|
|
|
|
|
2230
|
0
|
|
|
|
|
|
my %result; |
|
2231
|
|
|
|
|
|
|
|
|
2232
|
0
|
|
|
|
|
|
foreach my $lineref (@array) { |
|
2233
|
0
|
|
|
|
|
|
my ($key,$value) = splice(@$lineref,0,2); |
|
2234
|
0
|
|
|
|
|
|
push @{$result{$key}},$value; |
|
|
0
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
} |
|
2236
|
0
|
|
|
|
|
|
return \%result; |
|
2237
|
|
|
|
|
|
|
|
|
2238
|
|
|
|
|
|
|
} |
|
2239
|
|
|
|
|
|
|
|
|
2240
|
|
|
|
|
|
|
|
|
2241
|
|
|
|
|
|
|
|
|
2242
|
|
|
|
|
|
|
=head2 _segclass |
|
2243
|
|
|
|
|
|
|
|
|
2244
|
|
|
|
|
|
|
Title : _segclass |
|
2245
|
|
|
|
|
|
|
Usage : $class = $db->_segclass |
|
2246
|
|
|
|
|
|
|
Function: returns the perl class that we use for segment() calls |
|
2247
|
|
|
|
|
|
|
Returns : a string containing the segment class |
|
2248
|
|
|
|
|
|
|
Args : none |
|
2249
|
|
|
|
|
|
|
Status : reserved for subclass use |
|
2250
|
|
|
|
|
|
|
|
|
2251
|
|
|
|
|
|
|
=cut |
|
2252
|
|
|
|
|
|
|
|
|
2253
|
|
|
|
|
|
|
#sub default_class {return 'Sequence' } |
|
2254
|
|
|
|
|
|
|
## URGI changes |
|
2255
|
|
|
|
|
|
|
sub default_class { |
|
2256
|
|
|
|
|
|
|
|
|
2257
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
2258
|
|
|
|
|
|
|
|
|
2259
|
|
|
|
|
|
|
#dgg |
|
2260
|
0
|
0
|
0
|
|
|
|
unless( $self->{'reference_class'} || @_ ) { |
|
2261
|
0
|
|
|
|
|
|
$self->{'reference_class'} = $self->chado_reference_class(); |
|
2262
|
|
|
|
|
|
|
} |
|
2263
|
|
|
|
|
|
|
|
|
2264
|
0
|
0
|
|
|
|
|
if(@_) { |
|
2265
|
0
|
|
|
|
|
|
my $checkref = $self->check_chado_reference_class(@_); |
|
2266
|
0
|
0
|
|
|
|
|
unless($checkref) { |
|
2267
|
0
|
|
|
|
|
|
$self->throw("unable to find reference_class '$_[0]' feature in the database"); |
|
2268
|
|
|
|
|
|
|
} |
|
2269
|
|
|
|
|
|
|
} |
|
2270
|
|
|
|
|
|
|
|
|
2271
|
0
|
0
|
0
|
|
|
|
$self->{'reference_class'} = shift || 'Sequence' if(@_); |
|
2272
|
|
|
|
|
|
|
|
|
2273
|
0
|
|
|
|
|
|
return $self->{'reference_class'}; |
|
2274
|
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
} |
|
2276
|
|
|
|
|
|
|
|
|
2277
|
|
|
|
|
|
|
sub check_chado_reference_class { |
|
2278
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
2279
|
0
|
0
|
|
|
|
|
if(@_) { |
|
2280
|
0
|
|
|
|
|
|
my $refclass= shift; |
|
2281
|
0
|
|
|
|
|
|
my $type_id = $self->name2term($refclass); |
|
2282
|
0
|
|
|
|
|
|
my $query = "select feature_id from feature where type_id = ?"; |
|
2283
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare($query); |
|
2284
|
0
|
0
|
|
|
|
|
$sth->execute($type_id) or $self->throw("trying to find chado_reference_class"); |
|
2285
|
0
|
|
|
|
|
|
my $data = $sth->fetchrow_hashref(); |
|
2286
|
0
|
|
|
|
|
|
my $refid= $$data{'feature_id'}; |
|
2287
|
|
|
|
|
|
|
## warn("check_chado_reference_class: $refclass = $type_id -> $refid"); # DEBUG |
|
2288
|
|
|
|
|
|
|
|
|
2289
|
0
|
|
|
|
|
|
$sth->finish; |
|
2290
|
0
|
|
|
|
|
|
return $refid; |
|
2291
|
|
|
|
|
|
|
} |
|
2292
|
|
|
|
|
|
|
} |
|
2293
|
|
|
|
|
|
|
|
|
2294
|
|
|
|
|
|
|
=head2 chado_reference_class |
|
2295
|
|
|
|
|
|
|
|
|
2296
|
|
|
|
|
|
|
Title : chado_reference_class |
|
2297
|
|
|
|
|
|
|
Usage : $obj->chado_reference_class() |
|
2298
|
|
|
|
|
|
|
Function: get or return the ID to use for Gbrowse map reference class |
|
2299
|
|
|
|
|
|
|
using cvtermprop table, value = MAP_REFERENCE_TYPE |
|
2300
|
|
|
|
|
|
|
Returns : the cvterm.name |
|
2301
|
|
|
|
|
|
|
Args : to return the id, none; to determine the id, 1 |
|
2302
|
|
|
|
|
|
|
See also: default_class, refclass_feature_id |
|
2303
|
|
|
|
|
|
|
|
|
2304
|
|
|
|
|
|
|
Optionally test that user/config supplied ref class is indeed a proper |
|
2305
|
|
|
|
|
|
|
chado feature type. |
|
2306
|
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
=cut |
|
2308
|
|
|
|
|
|
|
|
|
2309
|
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
sub chado_reference_class { |
|
2311
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
2312
|
0
|
0
|
|
|
|
|
return $self->{'chado_reference_class'} if($self->{'chado_reference_class'}); |
|
2313
|
|
|
|
|
|
|
|
|
2314
|
0
|
|
|
|
|
|
my $chado_reference_class='Sequence'; # default ? |
|
2315
|
|
|
|
|
|
|
|
|
2316
|
0
|
|
|
|
|
|
my $query = "select cvterm_id from cvtermprop where value = ?"; |
|
2317
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare($query); |
|
2318
|
0
|
0
|
|
|
|
|
$sth->execute(MAP_REFERENCE_TYPE) or $self->throw("trying to find chado_reference_class"); |
|
2319
|
0
|
|
|
|
|
|
my $data = $sth->fetchrow_hashref(); #? FIXME: could be many values *? |
|
2320
|
0
|
|
|
|
|
|
my $ref_cvtermid = $$data{'cvterm_id'}; |
|
2321
|
|
|
|
|
|
|
|
|
2322
|
0
|
|
|
|
|
|
$sth->finish; |
|
2323
|
0
|
0
|
|
|
|
|
if($ref_cvtermid) { |
|
2324
|
0
|
|
|
|
|
|
$query = "select name from cvterm where cvterm_id = ?"; |
|
2325
|
0
|
|
|
|
|
|
$sth = $self->dbh->prepare($query); |
|
2326
|
0
|
0
|
|
|
|
|
$sth->execute($ref_cvtermid) or $self->throw("trying to find chado_reference_class"); |
|
2327
|
0
|
|
|
|
|
|
$data = $sth->fetchrow_hashref(); |
|
2328
|
0
|
0
|
|
|
|
|
$chado_reference_class = $$data{'name'} if ($$data{'name'}); |
|
2329
|
|
|
|
|
|
|
# warn("chado_reference_class: $chado_reference_class = $ref_cvtermid"); # DEBUG |
|
2330
|
0
|
|
|
|
|
|
$sth->finish; |
|
2331
|
|
|
|
|
|
|
} |
|
2332
|
|
|
|
|
|
|
|
|
2333
|
0
|
|
|
|
|
|
return $self->{'chado_reference_class'} = $chado_reference_class; |
|
2334
|
|
|
|
|
|
|
} |
|
2335
|
|
|
|
|
|
|
|
|
2336
|
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
=head2 refclass_feature_id |
|
2338
|
|
|
|
|
|
|
|
|
2339
|
|
|
|
|
|
|
Title : refclass_feature_id |
|
2340
|
|
|
|
|
|
|
Usage : $self->refclass_srcfeature_id() |
|
2341
|
|
|
|
|
|
|
Function: Used to store the feature_id of the reference class feature we are working on (e.g. contig, supercontig) |
|
2342
|
|
|
|
|
|
|
With this feature we can filter out all the request to be sure we are extracting a feature located on |
|
2343
|
|
|
|
|
|
|
the reference class feature. |
|
2344
|
|
|
|
|
|
|
Returns : A scalar |
|
2345
|
|
|
|
|
|
|
Args : The feature_id on setting |
|
2346
|
|
|
|
|
|
|
|
|
2347
|
|
|
|
|
|
|
=cut |
|
2348
|
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
sub refclass_feature_id { |
|
2350
|
|
|
|
|
|
|
|
|
2351
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
2352
|
|
|
|
|
|
|
|
|
2353
|
0
|
0
|
|
|
|
|
$self->{'refclass_feature_id'} = shift if(@_); |
|
2354
|
|
|
|
|
|
|
|
|
2355
|
0
|
|
|
|
|
|
return $self->{'refclass_feature_id'}; |
|
2356
|
|
|
|
|
|
|
|
|
2357
|
|
|
|
|
|
|
} |
|
2358
|
|
|
|
|
|
|
|
|
2359
|
|
|
|
|
|
|
|
|
2360
|
0
|
|
|
0
|
|
|
sub _segclass { return SEGCLASS } |
|
2361
|
|
|
|
|
|
|
|
|
2362
|
0
|
|
|
0
|
0
|
|
sub absolute {return} |
|
2363
|
|
|
|
|
|
|
|
|
2364
|
|
|
|
|
|
|
#implemented exactly the same as Bio::DB::SeqFeature::Store::DBI::mysql |
|
2365
|
|
|
|
|
|
|
sub clone { |
|
2366
|
|
|
|
|
|
|
#this is EO's implementation for the BDSFS::DBI::Pg implementation |
|
2367
|
|
|
|
|
|
|
#he says Pg's clone method is flawed |
|
2368
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
2369
|
|
|
|
|
|
|
|
|
2370
|
|
|
|
|
|
|
# my $dsn = $self->{db_args}->{dsn}; |
|
2371
|
|
|
|
|
|
|
# my $user = $self->{db_args}->{username}; |
|
2372
|
|
|
|
|
|
|
# my $pass = $self->{db_args}->{password}; |
|
2373
|
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
# $self->dbh()->{InactiveDestroy} = 1; |
|
2375
|
|
|
|
|
|
|
# my $new_dbh = DBI->connect($dsn,$user,$pass) or $self->throw($DBI::errstr); |
|
2376
|
|
|
|
|
|
|
# $new_dbh->{InactiveDestroy} = 1; |
|
2377
|
|
|
|
|
|
|
# $self->{dbh} = $new_dbh unless $self->is_temp; |
|
2378
|
|
|
|
|
|
|
|
|
2379
|
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
# this is the BDSFS::DBI::mysql implementation |
|
2381
|
0
|
|
|
|
|
|
$self->{dbh}{InactiveDestroy} = 1; |
|
2382
|
0
|
|
|
|
|
|
$self->{dbh} = $self->{dbh}->clone({}) |
|
2383
|
|
|
|
|
|
|
#magic from perlmonks to silence a warning: |
|
2384
|
|
|
|
|
|
|
# http://www.perlmonks.org/?node_id=594175 |
|
2385
|
|
|
|
|
|
|
# without the empty {} you get warnings about unrecognised attribute name |
|
2386
|
|
|
|
|
|
|
; # unless $self->is_temp; |
|
2387
|
|
|
|
|
|
|
} |
|
2388
|
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
|
|
2390
|
|
|
|
|
|
|
#this sub doesn't work and just causes annoying warnings |
|
2391
|
|
|
|
|
|
|
#sub DESTROY { |
|
2392
|
|
|
|
|
|
|
# my $self = shift; |
|
2393
|
|
|
|
|
|
|
# $self->dbh->disconnect; |
|
2394
|
|
|
|
|
|
|
# return; |
|
2395
|
|
|
|
|
|
|
#} |
|
2396
|
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
=head1 LEFTOVERS FROM BIO::DB::GFF NEEDED FOR DAS |
|
2398
|
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
these methods should probably be declared in an interface class |
|
2400
|
|
|
|
|
|
|
that Bio::DB::GFF implements. for instance, the aggregator methods |
|
2401
|
|
|
|
|
|
|
could be described in Bio::SeqFeature::AggregatorI |
|
2402
|
|
|
|
|
|
|
|
|
2403
|
|
|
|
|
|
|
=cut |
|
2404
|
|
|
|
|
|
|
|
|
2405
|
0
|
|
|
0
|
0
|
|
sub aggregators { return(); } |
|
2406
|
|
|
|
|
|
|
|
|
2407
|
|
|
|
|
|
|
=head1 END LEFTOVERS |
|
2408
|
|
|
|
|
|
|
|
|
2409
|
|
|
|
|
|
|
=cut |
|
2410
|
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
|
|
2412
|
|
|
|
|
|
|
package Bio::DB::Das::ChadoIterator; |
|
2413
|
|
|
|
|
|
|
|
|
2414
|
|
|
|
|
|
|
sub new { |
|
2415
|
0
|
|
|
0
|
|
|
my $package = shift; |
|
2416
|
0
|
|
|
|
|
|
my $features = shift; |
|
2417
|
0
|
|
|
|
|
|
return bless $features,$package; |
|
2418
|
|
|
|
|
|
|
} |
|
2419
|
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
sub next_seq { |
|
2421
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
2422
|
0
|
0
|
|
|
|
|
return unless @$self; |
|
2423
|
0
|
|
|
|
|
|
my $next_feature = shift @$self; |
|
2424
|
0
|
|
|
|
|
|
return $next_feature; |
|
2425
|
|
|
|
|
|
|
} |
|
2426
|
|
|
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
1; |
|
2428
|
|
|
|
|
|
|
|
|
2429
|
|
|
|
|
|
|
|
|
2430
|
|
|
|
|
|
|
|