line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::OntologyIO::dagflat |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Hilmar Lapp, hlapp at gmx.net |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# (c) Christian M. Zmasek, czmasek-at-burnham.org, 2002. |
9
|
|
|
|
|
|
|
# (c) Hilmar Lapp, hlapp at gmx.net, 2003. |
10
|
|
|
|
|
|
|
# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself. |
13
|
|
|
|
|
|
|
# Refer to the Perl Artistic License (see the license accompanying this |
14
|
|
|
|
|
|
|
# software package, or see http://www.perl.com/language/misc/Artistic.html) |
15
|
|
|
|
|
|
|
# for the terms under which you may use, modify, and redistribute this module. |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED |
18
|
|
|
|
|
|
|
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
19
|
|
|
|
|
|
|
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
20
|
|
|
|
|
|
|
# |
21
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 NAME |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Bio::OntologyIO::dagflat - a base class parser for GO flat-file type formats |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 SYNOPSIS |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
use Bio::OntologyIO; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# do not use directly -- use via Bio::OntologyIO |
34
|
|
|
|
|
|
|
# e.g., the GO parser is a simple extension of this class |
35
|
|
|
|
|
|
|
my $parser = Bio::OntologyIO->new |
36
|
|
|
|
|
|
|
( -format => "go", |
37
|
|
|
|
|
|
|
-defs_file => "/home/czmasek/GO/GO.defs", |
38
|
|
|
|
|
|
|
-files => ["/home/czmasek/GO/component.ontology", |
39
|
|
|
|
|
|
|
"/home/czmasek/GO/function.ontology", |
40
|
|
|
|
|
|
|
"/home/czmasek/GO/process.ontology"] ); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
my $go_ontology = $parser->next_ontology(); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" ); |
45
|
|
|
|
|
|
|
my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" ); |
46
|
|
|
|
|
|
|
my $RELATED_TO = Bio::Ontology::RelationshipType->get_instance( "RELATED_TO" ); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Needs Graph.pm from CPAN. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 FEEDBACK |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head2 Mailing Lists |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
57
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to the |
58
|
|
|
|
|
|
|
Bioperl mailing lists Your participation is much appreciated. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
61
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head2 Support |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
I |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
70
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
71
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
72
|
|
|
|
|
|
|
with code and data examples if at all possible. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 Reporting Bugs |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
77
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
78
|
|
|
|
|
|
|
web: |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 AUTHOR |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Christian M. Zmasek |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Email: czmasek-at-burnham.org or cmzmasek@yahoo.com |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
WWW: http://monochrome-effect.net/ |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Address: |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Genomics Institute of the Novartis Research Foundation |
93
|
|
|
|
|
|
|
10675 John Jay Hopkins Drive |
94
|
|
|
|
|
|
|
San Diego, CA 92121 |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head2 CONTRIBUTOR |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Hilmar Lapp, hlapp at gmx.net |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 APPENDIX |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
103
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# Let the code begin... |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
package Bio::OntologyIO::dagflat; |
112
|
|
|
|
|
|
|
|
113
|
2
|
|
|
2
|
|
9
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
44
|
|
114
|
|
|
|
|
|
|
|
115
|
2
|
|
|
2
|
|
6
|
use Bio::Root::IO; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
35
|
|
116
|
2
|
|
|
2
|
|
797
|
use Bio::Ontology::OBOEngine; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
43
|
|
117
|
2
|
|
|
2
|
|
10
|
use Bio::Ontology::Ontology; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
36
|
|
118
|
2
|
|
|
2
|
|
6
|
use Bio::Ontology::OntologyStore; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
29
|
|
119
|
2
|
|
|
2
|
|
5
|
use Bio::Ontology::TermFactory; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
27
|
|
120
|
2
|
|
|
2
|
|
5
|
use Bio::Annotation::DBLink; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
36
|
|
121
|
|
|
|
|
|
|
|
122
|
2
|
|
|
2
|
|
6
|
use constant TRUE => 1; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
88
|
|
123
|
2
|
|
|
2
|
|
6
|
use constant FALSE => 0; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
68
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
|
126
|
2
|
|
|
2
|
|
6
|
use base qw(Bio::OntologyIO); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
4979
|
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 new |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Title : new |
132
|
|
|
|
|
|
|
Usage : $parser = Bio::OntologyIO->new( |
133
|
|
|
|
|
|
|
-format => "go", |
134
|
|
|
|
|
|
|
-defs_file => "/path/to/GO.defs", |
135
|
|
|
|
|
|
|
-files => ["/path/to/component.ontology", |
136
|
|
|
|
|
|
|
"/path/to/function.ontology", |
137
|
|
|
|
|
|
|
"/path/to/process.ontology"] ); |
138
|
|
|
|
|
|
|
Function: Creates a new dagflat parser. |
139
|
|
|
|
|
|
|
Returns : A new dagflat parser object, implementing Bio::OntologyIO. |
140
|
|
|
|
|
|
|
Args : -defs_file => the name of the file holding the term |
141
|
|
|
|
|
|
|
definitions |
142
|
|
|
|
|
|
|
-files => a single ontology flat file holding the |
143
|
|
|
|
|
|
|
term relationships, or an array ref holding |
144
|
|
|
|
|
|
|
the file names (for GO, there will usually be |
145
|
|
|
|
|
|
|
3 files: component.ontology, function.ontology, |
146
|
|
|
|
|
|
|
process.ontology) |
147
|
|
|
|
|
|
|
-file => if there is only a single flat file, it may |
148
|
|
|
|
|
|
|
also be specified via the -file parameter |
149
|
|
|
|
|
|
|
-ontology_name => the name of the ontology; if not specified the |
150
|
|
|
|
|
|
|
parser will auto-discover it by using the term |
151
|
|
|
|
|
|
|
that starts with a $, and converting underscores |
152
|
|
|
|
|
|
|
to spaces |
153
|
|
|
|
|
|
|
-engine => the Bio::Ontology::OntologyEngineI object |
154
|
|
|
|
|
|
|
to be reused (will be created otherwise); note |
155
|
|
|
|
|
|
|
that every Bio::Ontology::OntologyI will |
156
|
|
|
|
|
|
|
qualify as well since that one inherits from the |
157
|
|
|
|
|
|
|
former. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
See L. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# in reality, we let OntologyIO::new do the instantiation, and override |
164
|
|
|
|
|
|
|
# _initialize for all initialization work |
165
|
|
|
|
|
|
|
sub _initialize { |
166
|
4
|
|
|
4
|
|
7
|
my ($self, %arg) = @_; |
167
|
|
|
|
|
|
|
|
168
|
4
|
|
|
|
|
22
|
my ( $defs_file_name,$files,$defs_url,$url,$name,$eng ) = |
169
|
|
|
|
|
|
|
$self->_rearrange([qw( DEFS_FILE |
170
|
|
|
|
|
|
|
FILES |
171
|
|
|
|
|
|
|
DEFS_URL |
172
|
|
|
|
|
|
|
URL |
173
|
|
|
|
|
|
|
ONTOLOGY_NAME |
174
|
|
|
|
|
|
|
ENGINE) |
175
|
|
|
|
|
|
|
], |
176
|
|
|
|
|
|
|
%arg ); |
177
|
|
|
|
|
|
|
|
178
|
4
|
|
|
|
|
12
|
delete($arg{-url}); #b/c GO has 3 files... |
179
|
|
|
|
|
|
|
|
180
|
4
|
|
|
|
|
19
|
$self->SUPER::_initialize( %arg ); |
181
|
|
|
|
|
|
|
|
182
|
4
|
|
|
|
|
15
|
$self->_done( FALSE ); |
183
|
4
|
|
|
|
|
9
|
$self->_not_first_record( FALSE ); |
184
|
4
|
|
|
|
|
8
|
$self->_term( "" ); |
185
|
4
|
|
|
|
|
5
|
delete $self->{'_ontologies'}; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# ontology engine (and possibly name if it's an OntologyI) |
188
|
4
|
50
|
|
|
|
23
|
$eng = Bio::Ontology::OBOEngine->new() unless $eng; |
189
|
4
|
50
|
|
|
|
25
|
if($eng->isa("Bio::Ontology::OntologyI")) { |
190
|
0
|
|
|
|
|
0
|
$self->ontology_name($eng->name()); |
191
|
0
|
0
|
|
|
|
0
|
$eng = $eng->engine() if $eng->can('engine'); |
192
|
|
|
|
|
|
|
} |
193
|
4
|
|
|
|
|
15
|
$self->_ont_engine($eng); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# flat files to parse |
196
|
4
|
50
|
66
|
|
|
15
|
if(defined($defs_file_name) && defined($defs_url)){ |
197
|
0
|
|
|
|
|
0
|
$self->throw('cannot provide both -defs_file and -defs_url'); |
198
|
|
|
|
|
|
|
} else { |
199
|
4
|
100
|
|
|
|
13
|
defined($defs_file_name) && $self->defs_file( $defs_file_name ); |
200
|
4
|
50
|
|
|
|
8
|
defined($defs_url) && $self->defs_url( $defs_url ); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
4
|
50
|
66
|
|
|
21
|
if(defined($files) && defined($url)){ |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
204
|
|
|
|
|
|
|
} elsif(defined($files)){ |
205
|
1
|
50
|
|
|
|
6
|
$self->{_flat_files} = $files ? ref($files) ? $files : [$files] : []; |
|
|
50
|
|
|
|
|
|
206
|
|
|
|
|
|
|
} elsif(defined($url)){ |
207
|
0
|
|
|
|
|
0
|
$self->url($url); |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# ontology name (overrides implicit one through OntologyI engine) |
211
|
4
|
50
|
|
|
|
15
|
$self->ontology_name($name) if $name; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
} # _initialize |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head2 ontology_name |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Title : ontology_name |
218
|
|
|
|
|
|
|
Usage : $obj->ontology_name($newval) |
219
|
|
|
|
|
|
|
Function: Get/set the name of the ontology parsed by this module. |
220
|
|
|
|
|
|
|
Example : |
221
|
|
|
|
|
|
|
Returns : value of ontology_name (a scalar) |
222
|
|
|
|
|
|
|
Args : on set, new value (a scalar or undef, optional) |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub ontology_name{ |
228
|
14
|
|
|
14
|
1
|
14
|
my $self = shift; |
229
|
|
|
|
|
|
|
|
230
|
14
|
100
|
|
|
|
28
|
return $self->{'ontology_name'} = shift if @_; |
231
|
11
|
|
|
|
|
41
|
return $self->{'ontology_name'}; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=head2 parse |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Title : parse() |
238
|
|
|
|
|
|
|
Usage : $parser->parse(); |
239
|
|
|
|
|
|
|
Function: Parses the files set with "new" or with methods |
240
|
|
|
|
|
|
|
defs_file and _flat_files. |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
Normally you should not need to call this method as it will |
243
|
|
|
|
|
|
|
be called automatically upon the first call to |
244
|
|
|
|
|
|
|
next_ontology(). |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
Returns : [Bio::Ontology::OntologyEngineI] |
247
|
|
|
|
|
|
|
Args : |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=cut |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub parse { |
252
|
4
|
|
|
4
|
1
|
4
|
my $self = shift; |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
#warn "PARSING"; |
255
|
|
|
|
|
|
|
# setup the default term factory if not done by anyone yet |
256
|
4
|
50
|
|
|
|
22
|
$self->term_factory(Bio::Ontology::TermFactory->new( |
257
|
|
|
|
|
|
|
-type => "Bio::Ontology::Term")) |
258
|
|
|
|
|
|
|
unless $self->term_factory(); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# create the ontology object itself |
261
|
4
|
|
|
|
|
14
|
my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(), |
262
|
|
|
|
|
|
|
-engine => $self->_ont_engine()); |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# parse definitions |
265
|
4
|
|
|
|
|
14
|
while( my $term = $self->_next_term() ) { |
266
|
9
|
|
|
|
|
14
|
$self->_add_term( $term, $ont ); |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# set up the ontology of the relationship types |
270
|
4
|
|
|
|
|
12
|
foreach ($self->_part_of_relationship(), $self->_is_a_relationship(), $self->_related_to_relationship()) { |
271
|
12
|
|
|
|
|
18
|
$_->ontology($ont); |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# pre-seed the IO system with the first flat file if -file wasn't provided |
275
|
4
|
100
|
|
|
|
11
|
if(! $self->_fh) { |
276
|
1
|
50
|
|
|
|
6
|
if($self->url){ |
|
|
50
|
|
|
|
|
|
277
|
0
|
0
|
|
|
|
0
|
if(ref($self->url) eq 'ARRAY'){ |
278
|
|
|
|
|
|
|
#warn "BA"; |
279
|
0
|
|
|
|
|
0
|
foreach my $url (@{ $self->url }){ |
|
0
|
|
|
|
|
0
|
|
280
|
|
|
|
|
|
|
#warn $url; |
281
|
|
|
|
|
|
|
#warn $ont; |
282
|
|
|
|
|
|
|
#warn scalar($ont->get_all_terms()); |
283
|
0
|
|
|
|
|
0
|
$self->_initialize_io(-url => $url); |
284
|
0
|
|
|
|
|
0
|
$self->_parse_flat_file($ont); |
285
|
|
|
|
|
|
|
} |
286
|
0
|
|
|
|
|
0
|
$self->close(); |
287
|
|
|
|
|
|
|
} else { |
288
|
0
|
|
|
|
|
0
|
$self->_initialize_io(-url => $self->url); |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
} elsif($self->_flat_files){ |
291
|
1
|
|
|
|
|
3
|
$self->_initialize_io(-file => shift(@{$self->_flat_files()})); |
|
1
|
|
|
|
|
3
|
|
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
4
|
|
|
|
|
9
|
while($self->_fh) { |
296
|
4
|
|
|
|
|
13
|
$self->_parse_flat_file($ont); |
297
|
|
|
|
|
|
|
# advance to next flat file if more are available |
298
|
4
|
50
|
|
|
|
6
|
if(@{$self->_flat_files()}) { |
|
4
|
|
|
|
|
21
|
|
299
|
0
|
|
|
|
|
0
|
$self->close(); |
300
|
0
|
|
|
|
|
0
|
$self->_initialize_io(-file => shift(@{$self->_flat_files()})); |
|
0
|
|
|
|
|
0
|
|
301
|
|
|
|
|
|
|
} else { |
302
|
4
|
|
|
|
|
7
|
last; # nothing else to parse so terminate the loop |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
} |
305
|
4
|
|
|
|
|
17
|
$self->_add_ontology($ont); |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
# not needed anywhere, only because of backward compatibility |
308
|
4
|
|
|
|
|
8
|
return $self->_ont_engine(); |
309
|
|
|
|
|
|
|
} # parse |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head2 next_ontology |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Title : next_ontology |
314
|
|
|
|
|
|
|
Usage : |
315
|
|
|
|
|
|
|
Function: Get the next available ontology from the parser. This is the |
316
|
|
|
|
|
|
|
method prescribed by Bio::OntologyIO. |
317
|
|
|
|
|
|
|
Example : |
318
|
|
|
|
|
|
|
Returns : An object implementing Bio::Ontology::OntologyI, and undef if |
319
|
|
|
|
|
|
|
there is no more ontology in the input. |
320
|
|
|
|
|
|
|
Args : |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=cut |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub next_ontology { |
326
|
5
|
|
|
5
|
1
|
24
|
my $self = shift; |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
# parse if not done already |
329
|
5
|
100
|
|
|
|
19
|
$self->parse() unless exists($self->{'_ontologies'}); |
330
|
|
|
|
|
|
|
# return next available ontology |
331
|
5
|
50
|
|
|
|
14
|
if(exists($self->{'_ontologies'})){ |
332
|
5
|
|
|
|
|
4
|
my $ont = shift (@{$self->{'_ontologies'}}); |
|
5
|
|
|
|
|
10
|
|
333
|
5
|
100
|
|
|
|
12
|
if($ont){ |
334
|
4
|
|
|
|
|
32
|
my $store = Bio::Ontology::OntologyStore->new(); |
335
|
4
|
|
|
|
|
11
|
$store->register_ontology($ont); |
336
|
4
|
|
|
|
|
13
|
return $ont; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
} |
339
|
1
|
|
|
|
|
2
|
return; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head2 defs_file |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
Title : defs_file |
345
|
|
|
|
|
|
|
Usage : $parser->defs_file( "GO.defs" ); |
346
|
|
|
|
|
|
|
Function: Set/get for the term definitions filename. |
347
|
|
|
|
|
|
|
Returns : The term definitions file name [string]. |
348
|
|
|
|
|
|
|
Args : On set, the term definitions file name [string] (optional). |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=cut |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub defs_file { |
353
|
2
|
|
|
2
|
1
|
2
|
my $self = shift; |
354
|
|
|
|
|
|
|
|
355
|
2
|
50
|
|
|
|
5
|
if ( @_ ) { |
356
|
2
|
|
|
|
|
4
|
my $f = shift; |
357
|
2
|
|
|
|
|
3
|
$self->{ "_defs_file_name" } = $f; |
358
|
2
|
50
|
|
|
|
7
|
$self->_defs_io->close() if $self->_defs_io(); |
359
|
2
|
50
|
|
|
|
5
|
if(defined($f)) { |
360
|
2
|
|
|
|
|
9
|
$self->_defs_io( Bio::Root::IO->new( -input => $f ) ); |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
} |
363
|
2
|
|
|
|
|
4
|
return $self->{ "_defs_file_name" }; |
364
|
|
|
|
|
|
|
} # defs_file |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub defs_url { |
367
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
368
|
0
|
|
|
|
|
0
|
my $val = shift; |
369
|
0
|
0
|
|
|
|
0
|
if(defined($val)){ |
370
|
0
|
|
|
|
|
0
|
$self->{'_defs_url'} = $val; |
371
|
|
|
|
|
|
|
|
372
|
0
|
0
|
|
|
|
0
|
$self->_defs_io->close() if $self->_defs_io(); |
373
|
0
|
|
|
|
|
0
|
$self->_defs_io( Bio::Root::IO->new( -url => $val ) ); |
374
|
|
|
|
|
|
|
} |
375
|
0
|
|
|
|
|
0
|
return $self->{'_defs_url'}; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
sub url { |
379
|
1
|
|
|
1
|
0
|
2
|
my $self = shift; |
380
|
1
|
|
|
|
|
1
|
my $val = shift; |
381
|
1
|
50
|
|
|
|
4
|
if(defined($val)){ |
382
|
0
|
|
|
|
|
0
|
$self->{'_url'} = $val; |
383
|
|
|
|
|
|
|
} |
384
|
1
|
|
|
|
|
3
|
return $self->{'_url'}; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=head2 close |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
Title : close |
390
|
|
|
|
|
|
|
Usage : |
391
|
|
|
|
|
|
|
Function: Closes this ontology stream and associated file handles. |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Clients should call this method especially when they write |
394
|
|
|
|
|
|
|
ontologies. |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
We need to override this here in order to close the file |
397
|
|
|
|
|
|
|
handle for the term definitions file. |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Example : |
400
|
|
|
|
|
|
|
Returns : none |
401
|
|
|
|
|
|
|
Args : none |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=cut |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub close{ |
407
|
4
|
|
|
4
|
1
|
6
|
my $self = shift; |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# first call the inherited implementation |
410
|
4
|
|
|
|
|
26
|
$self->SUPER::close(); |
411
|
|
|
|
|
|
|
# then close the defs file io (if there is one) |
412
|
4
|
100
|
|
|
|
12
|
$self->_defs_io->close() if $self->_defs_io(); |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=head2 _flat_files |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Title : _flat_files |
418
|
|
|
|
|
|
|
Usage : $files_to_parse = $parser->_flat_files(); |
419
|
|
|
|
|
|
|
Function: Get the array of ontology flat files that need to be parsed. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Note that this array will decrease in elements over the |
422
|
|
|
|
|
|
|
parsing process. Therefore, it\'s value outside of this |
423
|
|
|
|
|
|
|
module will be limited. Also, be careful not to alter the |
424
|
|
|
|
|
|
|
array unless you know what you are doing. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
Returns : a reference to an array of zero or more strings |
427
|
|
|
|
|
|
|
Args : none |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=cut |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub _flat_files { |
432
|
6
|
|
|
6
|
|
10
|
my $self = shift; |
433
|
|
|
|
|
|
|
|
434
|
6
|
100
|
|
|
|
16
|
$self->{_flat_files} = [] unless exists($self->{_flat_files}); |
435
|
6
|
|
|
|
|
17
|
return $self->{_flat_files}; |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# INTERNAL METHODS |
440
|
|
|
|
|
|
|
# ---------------- |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=head2 _defs_io |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
Title : _defs_io |
445
|
|
|
|
|
|
|
Usage : $obj->_defs_io($newval) |
446
|
|
|
|
|
|
|
Function: Get/set the Bio::Root::IO instance representing the |
447
|
|
|
|
|
|
|
definition file, if provided (see defs_file()). |
448
|
|
|
|
|
|
|
Example : |
449
|
|
|
|
|
|
|
Returns : value of _defs_io (a Bio::Root::IO object) |
450
|
|
|
|
|
|
|
Args : on set, new value (a Bio::Root::IO object or undef, optional) |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=cut |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub _defs_io{ |
455
|
85
|
|
|
85
|
|
63
|
my $self = shift; |
456
|
|
|
|
|
|
|
|
457
|
85
|
100
|
|
|
|
119
|
return $self->{'_defs_io'} = shift if @_; |
458
|
83
|
|
|
|
|
149
|
return $self->{'_defs_io'}; |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub _add_ontology { |
462
|
4
|
|
|
4
|
|
4
|
my $self = shift; |
463
|
4
|
50
|
|
|
|
15
|
$self->{'_ontologies'} = [] unless exists($self->{'_ontologies'}); |
464
|
4
|
|
|
|
|
7
|
foreach my $ont (@_) { |
465
|
4
|
50
|
33
|
|
|
28
|
$self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI") |
466
|
|
|
|
|
|
|
unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI"); |
467
|
|
|
|
|
|
|
# the ontology name may have been auto-discovered while parsing |
468
|
|
|
|
|
|
|
# the file |
469
|
4
|
50
|
|
|
|
19
|
$ont->name($self->ontology_name) unless $ont->name(); |
470
|
4
|
|
|
|
|
5
|
push(@{$self->{'_ontologies'}}, $ont); |
|
4
|
|
|
|
|
14
|
|
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# This simply delegates. See SimpleGOEngine. |
475
|
|
|
|
|
|
|
sub _add_term { |
476
|
227
|
|
|
227
|
|
215
|
my ( $self, $term, $ont ) = @_; |
477
|
227
|
50
|
33
|
|
|
518
|
$term->ontology($ont) if $ont && (! $term->ontology); |
478
|
227
|
|
|
|
|
314
|
$self->_ont_engine()->add_term( $term ); |
479
|
|
|
|
|
|
|
} # _add_term |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# This simply delegates. See SimpleGOEngine |
484
|
|
|
|
|
|
|
sub _part_of_relationship { |
485
|
51
|
|
|
51
|
|
36
|
my $self = shift; |
486
|
|
|
|
|
|
|
|
487
|
51
|
|
|
|
|
68
|
return $self->_ont_engine()->part_of_relationship(@_); |
488
|
|
|
|
|
|
|
} # _part_of_relationship |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# This simply delegates. See SimpleGOEngine |
493
|
|
|
|
|
|
|
sub _is_a_relationship { |
494
|
211
|
|
|
211
|
|
159
|
my $self = shift; |
495
|
|
|
|
|
|
|
|
496
|
211
|
|
|
|
|
254
|
return $self->_ont_engine()->is_a_relationship(@_); |
497
|
|
|
|
|
|
|
} # _is_a_relationship |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
# This simply delegates. See SimpleGOEngine |
500
|
|
|
|
|
|
|
sub _related_to_relationship { |
501
|
4
|
|
|
4
|
|
4
|
my $self = shift; |
502
|
|
|
|
|
|
|
|
503
|
4
|
|
|
|
|
6
|
return $self->_ont_engine()->related_to_relationship(@_); |
504
|
|
|
|
|
|
|
} # _is_a_relationship |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
# This simply delegates. See SimpleGOEngine |
509
|
|
|
|
|
|
|
sub _add_relationship { |
510
|
254
|
|
|
254
|
|
256
|
my ( $self, $parent, $child, $type, $ont ) = @_; |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
# note the triple terminology (subject,predicate,object) corresponds to |
513
|
|
|
|
|
|
|
# (child,type,parent) |
514
|
254
|
|
|
|
|
258
|
$self->_ont_engine()->add_relationship( $child, $type, $parent, $ont ); |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
} # _add_relationship |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# This simply delegates. See SimpleGOEngine |
521
|
|
|
|
|
|
|
sub _has_term { |
522
|
258
|
|
|
258
|
|
181
|
my $self = shift; |
523
|
|
|
|
|
|
|
|
524
|
258
|
|
|
|
|
345
|
return $self->_ont_engine()->has_term( @_ ); |
525
|
|
|
|
|
|
|
} # _add_term |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
# This parses the relationships files |
530
|
|
|
|
|
|
|
sub _parse_flat_file { |
531
|
4
|
|
|
4
|
|
9
|
my $self = shift; |
532
|
4
|
|
|
|
|
4
|
my $ont = shift; |
533
|
|
|
|
|
|
|
|
534
|
4
|
|
|
|
|
10
|
my @stack = (); |
535
|
4
|
|
|
|
|
3
|
my $prev_spaces = -1; |
536
|
4
|
|
|
|
|
5
|
my $prev_term = ""; |
537
|
|
|
|
|
|
|
|
538
|
4
|
|
|
|
|
19
|
while ( my $line = $self->_readline() ) { |
539
|
|
|
|
|
|
|
|
540
|
248
|
100
|
|
|
|
397
|
if ( $line =~ /^!/ ) { |
541
|
26
|
|
|
|
|
42
|
next; |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
# split into term specifications |
545
|
222
|
|
|
|
|
964
|
my @termspecs = split(/ (?=[%<])/, $line); |
546
|
|
|
|
|
|
|
# the first element is whitespace only |
547
|
222
|
100
|
|
|
|
573
|
shift(@termspecs) if $termspecs[0] =~ /^\s*$/; |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# parse out the focus term |
550
|
222
|
|
|
|
|
384
|
my $current_term = $self->_get_first_termid( $termspecs[0] ); |
551
|
222
|
|
|
|
|
319
|
my @syns = $self->_get_synonyms( $termspecs[0] ); |
552
|
222
|
|
|
|
|
299
|
my @sec_go_ids = $self->_get_secondary_termids( $termspecs[0] ); |
553
|
222
|
|
|
|
|
307
|
my @cross = $self->_get_db_cross_refs( $termspecs[0] ); |
554
|
222
|
|
|
|
|
154
|
my @cross_refs; |
555
|
222
|
|
|
|
|
233
|
foreach my $cross_ref (@cross) { |
556
|
81
|
100
|
|
|
|
141
|
$cross_ref eq $current_term && next; |
557
|
4
|
|
|
|
|
6
|
push(@cross_refs, $cross_ref); |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# parse out the parents of the focus term |
561
|
222
|
|
|
|
|
166
|
shift(@termspecs); |
562
|
222
|
|
|
|
|
202
|
my @isa_parents = (); |
563
|
222
|
|
|
|
|
155
|
my @partof_parents = (); |
564
|
222
|
|
|
|
|
179
|
foreach my $parent (@termspecs) { |
565
|
36
|
100
|
|
|
|
56
|
if (index($parent, "%") == 0) { |
|
|
50
|
|
|
|
|
|
566
|
25
|
|
|
|
|
39
|
push(@isa_parents, $self->_get_first_termid($parent)); |
567
|
|
|
|
|
|
|
} elsif (index($parent, "<") == 0) { |
568
|
11
|
|
|
|
|
12
|
push(@partof_parents, $self->_get_first_termid($parent)); |
569
|
|
|
|
|
|
|
} else { |
570
|
0
|
|
|
|
|
0
|
$self->warn("unhandled relationship type in '".$parent."'"); |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
|
574
|
222
|
100
|
|
|
|
303
|
if ( ! $self->_has_term( $current_term ) ) { |
575
|
204
|
|
|
|
|
266
|
my $term =$self->_create_ont_entry($self->_get_name($line, |
576
|
|
|
|
|
|
|
$current_term), |
577
|
|
|
|
|
|
|
$current_term ); |
578
|
204
|
|
|
|
|
374
|
$self->_add_term( $term, $ont ); |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
222
|
|
|
|
|
366
|
my $current_term_object = $self->_ont_engine()->get_terms( $current_term ); |
582
|
222
|
|
|
|
|
392
|
my $anno = $self->_to_annotation(\@cross_refs); |
583
|
222
|
|
|
|
|
513
|
$current_term_object->add_dbxref(-dbxrefs => $anno); |
584
|
222
|
|
|
|
|
413
|
$current_term_object->add_secondary_id( @sec_go_ids ); |
585
|
222
|
|
|
|
|
324
|
$current_term_object->add_synonym( @syns ); |
586
|
222
|
100
|
|
|
|
363
|
unless ( $line =~ /^\$/ ) { |
587
|
218
|
|
|
|
|
305
|
$current_term_object->ontology( $ont ); |
588
|
|
|
|
|
|
|
} |
589
|
222
|
|
|
|
|
224
|
foreach my $parent ( @isa_parents ) { |
590
|
25
|
100
|
|
|
|
32
|
if ( ! $self->_has_term( $parent ) ) { |
591
|
11
|
|
|
|
|
23
|
my $term = $self->_create_ont_entry($self->_get_name($line, |
592
|
|
|
|
|
|
|
$parent), |
593
|
|
|
|
|
|
|
$parent ); |
594
|
11
|
|
|
|
|
24
|
$self->_add_term( $term, $ont ); |
595
|
|
|
|
|
|
|
} |
596
|
|
|
|
|
|
|
|
597
|
25
|
|
|
|
|
43
|
$self->_add_relationship( $parent, |
598
|
|
|
|
|
|
|
$current_term, |
599
|
|
|
|
|
|
|
$self->_is_a_relationship(), |
600
|
|
|
|
|
|
|
$ont); |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
} |
603
|
222
|
|
|
|
|
181
|
foreach my $parent ( @partof_parents ) { |
604
|
11
|
100
|
|
|
|
14
|
if ( ! $self->_has_term( $parent ) ) { |
605
|
3
|
|
|
|
|
9
|
my $term = $self->_create_ont_entry($self->_get_name($line, |
606
|
|
|
|
|
|
|
$parent), |
607
|
|
|
|
|
|
|
$parent ); |
608
|
3
|
|
|
|
|
7
|
$self->_add_term( $term, $ont ); |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
|
611
|
11
|
|
|
|
|
18
|
$self->_add_relationship( $parent, |
612
|
|
|
|
|
|
|
$current_term, |
613
|
|
|
|
|
|
|
$self->_part_of_relationship(), |
614
|
|
|
|
|
|
|
$ont); |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
|
617
|
222
|
|
|
|
|
292
|
my $current_spaces = $self->_count_spaces( $line ); |
618
|
|
|
|
|
|
|
|
619
|
222
|
100
|
|
|
|
318
|
if ( $current_spaces != $prev_spaces ) { |
620
|
|
|
|
|
|
|
|
621
|
116
|
100
|
|
|
|
161
|
if ( $current_spaces == $prev_spaces + 1 ) { |
|
|
50
|
|
|
|
|
|
622
|
75
|
|
|
|
|
90
|
push( @stack, $prev_term ); |
623
|
|
|
|
|
|
|
} elsif ( $current_spaces < $prev_spaces ) { |
624
|
41
|
|
|
|
|
40
|
my $n = $prev_spaces - $current_spaces; |
625
|
41
|
|
|
|
|
68
|
for ( my $i = 0; $i < $n; ++$i ) { |
626
|
60
|
|
|
|
|
106
|
pop( @stack ); |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
} else { |
629
|
0
|
|
|
|
|
0
|
$self->throw( "format error (file ".$self->file.")" ); |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
222
|
|
|
|
|
289
|
my $parent = $stack[ @stack - 1 ]; |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# add a relationship if the line isn\'t the one with the root term |
636
|
|
|
|
|
|
|
# of the ontology (which is also the name of the ontology) |
637
|
222
|
100
|
|
|
|
387
|
if ( index($line,'$') != 0 ) { |
638
|
|
|
|
|
|
|
#adding @reltype@ syntax |
639
|
218
|
50
|
|
|
|
447
|
if ( $line !~ /^\s*([<%~]|\@\w+?\@)/ ) { |
640
|
0
|
|
|
|
|
0
|
$self->throw( "format error (file ".$self->file.") offending line:\n$line" ); |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
218
|
|
|
|
|
410
|
my($relstring) = $line =~ /^\s*([<%~]|\@[^\@]+?\@)/; |
644
|
|
|
|
|
|
|
|
645
|
218
|
|
|
|
|
163
|
my $reltype; |
646
|
|
|
|
|
|
|
|
647
|
218
|
100
|
|
|
|
365
|
if ($relstring eq '<') { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
648
|
36
|
|
|
|
|
59
|
$reltype = $self->_part_of_relationship; |
649
|
|
|
|
|
|
|
} elsif ($relstring eq '%') { |
650
|
182
|
|
|
|
|
264
|
$reltype = $self->_is_a_relationship; |
651
|
|
|
|
|
|
|
} elsif ($relstring eq '~') { |
652
|
0
|
|
|
|
|
0
|
$reltype = $self->_related_to_relationship; |
653
|
|
|
|
|
|
|
} else { |
654
|
0
|
|
|
|
|
0
|
$relstring =~ s/\@//g; |
655
|
0
|
0
|
|
|
|
0
|
if ($self->_ont_engine->get_relationship_type($relstring)) { |
656
|
0
|
|
|
|
|
0
|
$reltype = $self->_ont_engine->get_relationship_type($relstring); |
657
|
|
|
|
|
|
|
} else { |
658
|
0
|
|
|
|
|
0
|
$self->_ont_engine->add_relationship_type($relstring, $ont); |
659
|
0
|
|
|
|
|
0
|
$reltype = $self->_ont_engine->get_relationship_type($relstring); |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
#my $reltype = ($line =~ /^\s*) ? |
664
|
|
|
|
|
|
|
#$self->_part_of_relationship() : |
665
|
|
|
|
|
|
|
#$self->_is_a_relationship(); |
666
|
218
|
|
|
|
|
300
|
$self->_add_relationship( $parent, $current_term, $reltype, $ont); |
667
|
|
|
|
|
|
|
} |
668
|
|
|
|
|
|
|
|
669
|
222
|
|
|
|
|
178
|
$prev_spaces = $current_spaces; |
670
|
222
|
|
|
|
|
738
|
$prev_term = $current_term; |
671
|
|
|
|
|
|
|
} |
672
|
4
|
|
|
|
|
9
|
return $ont; |
673
|
|
|
|
|
|
|
} # _parse_relationships_file |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
# Parses the 1st term id number out of line. |
678
|
|
|
|
|
|
|
sub _get_first_termid { |
679
|
258
|
|
|
258
|
|
241
|
my ( $self, $line ) = @_; |
680
|
258
|
50
|
|
|
|
664
|
if ( $line =~ /;\s*([A-Z_]{1,8}:\d{1,})/ ) { |
681
|
|
|
|
|
|
|
# if ( $line =~ /;\s*(\w+:\w+)/ ) { |
682
|
258
|
|
|
|
|
590
|
return $1; |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
else { |
685
|
0
|
|
|
|
|
0
|
$self->throw( "format error: no term id in line \"$line\"" ); |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
} # _get_first_termid |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
# Parses the name out of line. |
693
|
|
|
|
|
|
|
sub _get_name { |
694
|
218
|
|
|
218
|
|
204
|
my ( $self, $line, $termid ) = @_; |
695
|
|
|
|
|
|
|
|
696
|
218
|
50
|
|
|
|
4234
|
if ( $line =~ /([^;<%~]+);\s*$termid/ ) { |
697
|
218
|
|
|
|
|
334
|
my $name = $1; |
698
|
|
|
|
|
|
|
# remove trailing and leading whitespace |
699
|
218
|
|
|
|
|
517
|
$name =~ s/\s+$//; |
700
|
218
|
|
|
|
|
251
|
$name =~ s/^\s+//; |
701
|
218
|
|
|
|
|
163
|
$name =~ s/\@.+?\@//; |
702
|
|
|
|
|
|
|
# remove leading dollar character; also we default the name of the |
703
|
|
|
|
|
|
|
# ontology to this name unless it is preset to something else |
704
|
218
|
100
|
|
|
|
381
|
if(index($name,'$') == 0) { |
705
|
3
|
|
|
|
|
6
|
$name = substr($name,1); |
706
|
|
|
|
|
|
|
# replace underscores by spaces for setting the ontology name |
707
|
3
|
50
|
|
|
|
8
|
$self->ontology_name(join(" ",split(/_/,$name))) |
708
|
|
|
|
|
|
|
unless $self->ontology_name(); |
709
|
|
|
|
|
|
|
} |
710
|
218
|
|
|
|
|
557
|
return $name; |
711
|
|
|
|
|
|
|
} |
712
|
|
|
|
|
|
|
else { |
713
|
0
|
|
|
|
|
0
|
return; |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
} # _get_name |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
# Parses the synonyms out of line. |
719
|
|
|
|
|
|
|
sub _get_synonyms { |
720
|
222
|
|
|
222
|
|
200
|
my ( $self, $line ) = @_; |
721
|
|
|
|
|
|
|
|
722
|
222
|
|
|
|
|
186
|
my @synonyms = (); |
723
|
|
|
|
|
|
|
|
724
|
222
|
|
|
|
|
433
|
while ( $line =~ /synonym\s*:\s*([^;<%~]+)/g ) { |
725
|
38
|
|
|
|
|
45
|
my $syn = $1; |
726
|
38
|
|
|
|
|
88
|
$syn =~ s/\s+$//; |
727
|
38
|
|
|
|
|
43
|
$syn =~ s/^\s+//; |
728
|
38
|
|
|
|
|
81
|
push( @synonyms, $syn ); |
729
|
|
|
|
|
|
|
} |
730
|
222
|
|
|
|
|
270
|
return @synonyms; |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
} # _get_synonyms |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
# Parses the db cross refs out of line. |
737
|
|
|
|
|
|
|
sub _get_db_cross_refs { |
738
|
222
|
|
|
222
|
|
171
|
my ( $self, $line ) = @_; |
739
|
|
|
|
|
|
|
|
740
|
222
|
|
|
|
|
164
|
my @refs = (); |
741
|
|
|
|
|
|
|
|
742
|
222
|
|
|
|
|
632
|
while ( $line =~ /;([^;<%~:]+:[^;<%~:]+)/g ) { |
743
|
264
|
|
|
|
|
304
|
my $ref = $1; |
744
|
264
|
100
|
100
|
|
|
1106
|
if ( $ref =~ /synonym/ || $ref =~ /[A-Z]{1,8}:\d{3,}/ ) { |
745
|
183
|
|
|
|
|
332
|
next; |
746
|
|
|
|
|
|
|
} |
747
|
81
|
|
|
|
|
173
|
$ref =~ s/\s+$//; |
748
|
81
|
|
|
|
|
124
|
$ref =~ s/^\s+//; |
749
|
|
|
|
|
|
|
|
750
|
81
|
|
|
|
|
166
|
$ref = $self->unescape( $ref ); |
751
|
|
|
|
|
|
|
|
752
|
81
|
50
|
|
|
|
242
|
push( @refs, $ref ) if defined $ref; |
753
|
|
|
|
|
|
|
} |
754
|
222
|
|
|
|
|
292
|
return @refs; |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
} |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
# Parses the secondary go ids out of a line |
760
|
|
|
|
|
|
|
sub _get_secondary_termids { |
761
|
222
|
|
|
222
|
|
197
|
my ( $self, $line ) = @_; |
762
|
222
|
|
|
|
|
166
|
my @secs = (); |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
# while ( $line =~ /,\s*([A-Z]{1,8}:\d{3,})/g ) { |
765
|
222
|
|
|
|
|
358
|
while ( $line =~ /,\s*(\w+:\w+)/g ) { |
766
|
1
|
|
|
|
|
3
|
my $sec = $1; |
767
|
1
|
|
|
|
|
2
|
push( @secs, $sec ); |
768
|
|
|
|
|
|
|
} |
769
|
222
|
|
|
|
|
192
|
return @secs; |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
} # _get_secondary_termids |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
# Counts the spaces at the beginning of a line in the relationships files |
775
|
|
|
|
|
|
|
sub _count_spaces { |
776
|
222
|
|
|
222
|
|
213
|
my ( $self, $line ) = @_; |
777
|
|
|
|
|
|
|
|
778
|
222
|
100
|
|
|
|
518
|
if ( $line =~ /^(\s+)/ ) { |
779
|
218
|
|
|
|
|
790
|
return length( $1 ); |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
else { |
782
|
4
|
|
|
|
|
7
|
return 0; |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
} # _count_spaces |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
# "next" method for parsing the defintions file |
788
|
|
|
|
|
|
|
sub _next_term { |
789
|
13
|
|
|
13
|
|
10
|
my ( $self ) = @_; |
790
|
|
|
|
|
|
|
|
791
|
13
|
100
|
100
|
|
|
21
|
if ( ($self->_done() == TRUE) || (! $self->_defs_io())) { |
792
|
4
|
|
|
|
|
8
|
return; |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
|
795
|
9
|
|
|
|
|
9
|
my $line = ""; |
796
|
9
|
|
|
|
|
6
|
my $termid = ""; |
797
|
9
|
|
|
|
|
13
|
my $next_term = $self->_term(); |
798
|
9
|
|
|
|
|
8
|
my $def = ""; |
799
|
9
|
|
|
|
|
7
|
my $comment = ""; |
800
|
9
|
|
|
|
|
8
|
my @def_refs = (); |
801
|
9
|
|
|
|
|
8
|
my $isobsolete; |
802
|
|
|
|
|
|
|
|
803
|
9
|
|
|
|
|
12
|
while( $line = ( $self->_defs_io->_readline() ) ) { |
804
|
62
|
100
|
100
|
|
|
309
|
if ( $line !~ /\S/ |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
805
|
|
|
|
|
|
|
|| $line =~ /^\s*!/ ) { |
806
|
26
|
|
|
|
|
34
|
next; |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*term:\s*(.+)/ ) { |
809
|
9
|
|
|
|
|
14
|
$self->_term( $1 ); |
810
|
9
|
100
|
|
|
|
11
|
last if $self->_not_first_record(); |
811
|
2
|
|
|
|
|
3
|
$next_term = $1; |
812
|
2
|
|
|
|
|
4
|
$self->_not_first_record( TRUE ); |
813
|
|
|
|
|
|
|
} |
814
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*[a-z]{0,8}id:\s*(.+)/ ) { |
815
|
9
|
|
|
|
|
20
|
$termid = $1; |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*definition:\s*(.+)/ ) { |
818
|
9
|
|
|
|
|
23
|
$def = $self->unescape($1); |
819
|
9
|
50
|
|
|
|
26
|
$isobsolete = 1 if index($def,"OBSOLETE") == 0; |
820
|
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) { |
822
|
9
|
|
|
|
|
14
|
push( @def_refs, $self->unescape($1) ); |
823
|
|
|
|
|
|
|
} |
824
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*comment:\s*(.+)/ ) { |
825
|
0
|
|
|
|
|
0
|
$comment = $self->unescape($1); |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
} |
828
|
9
|
100
|
|
|
|
16
|
$self->_done( TRUE ) unless $line; # we'll come back until done |
829
|
9
|
|
|
|
|
17
|
return $self->_create_ont_entry( $next_term, $termid, $def, |
830
|
|
|
|
|
|
|
$comment, \@def_refs, $isobsolete); |
831
|
|
|
|
|
|
|
} # _next_term |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
# Holds the GO engine to be parsed into |
836
|
|
|
|
|
|
|
sub _ont_engine { |
837
|
1239
|
|
|
1239
|
|
925
|
my ( $self, $value ) = @_; |
838
|
|
|
|
|
|
|
|
839
|
1239
|
100
|
|
|
|
1628
|
if ( defined $value ) { |
840
|
4
|
|
|
|
|
6
|
$self->{ "_ont_engine" } = $value; |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
|
843
|
1239
|
|
|
|
|
2382
|
return $self->{ "_ont_engine" }; |
844
|
|
|
|
|
|
|
} # _ont_engine |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
# Used to create ontology terms. |
848
|
|
|
|
|
|
|
# Arguments: name, id |
849
|
|
|
|
|
|
|
sub _create_ont_entry { |
850
|
227
|
|
|
227
|
|
276
|
my ( $self, $name, $termid, $def, $cmt, $dbxrefs, $obsolete ) = @_; |
851
|
|
|
|
|
|
|
|
852
|
227
|
50
|
33
|
|
|
842
|
if((!defined($obsolete)) && (index(lc($name),"obsolete") == 0)) { |
853
|
0
|
|
|
|
|
0
|
$obsolete = 1; |
854
|
|
|
|
|
|
|
} |
855
|
227
|
|
|
|
|
357
|
my $anno = $self->_to_annotation($dbxrefs); |
856
|
227
|
|
|
|
|
473
|
my $term = $self->term_factory->create_object(-name => $name, |
857
|
|
|
|
|
|
|
-identifier => $termid, |
858
|
|
|
|
|
|
|
-definition => $def, |
859
|
|
|
|
|
|
|
-comment => $cmt, |
860
|
|
|
|
|
|
|
-dbxrefs => $anno, |
861
|
|
|
|
|
|
|
-is_obsolete => $obsolete); |
862
|
|
|
|
|
|
|
|
863
|
227
|
|
|
|
|
376
|
return $term; |
864
|
|
|
|
|
|
|
} # _create_ont_entry |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
# Holds whether first record or not |
869
|
|
|
|
|
|
|
sub _not_first_record { |
870
|
15
|
|
|
15
|
|
15
|
my ( $self, $value ) = @_; |
871
|
|
|
|
|
|
|
|
872
|
15
|
100
|
|
|
|
24
|
if ( defined $value ) { |
873
|
6
|
|
|
|
|
9
|
$self->{ "_not_first_record" } = $value; |
874
|
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
|
|
876
|
15
|
|
|
|
|
22
|
return $self->{ "_not_first_record" }; |
877
|
|
|
|
|
|
|
} # _not_first_record |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
# Holds whether done or not |
882
|
|
|
|
|
|
|
sub _done { |
883
|
19
|
|
|
19
|
|
16
|
my ( $self, $value ) = @_; |
884
|
|
|
|
|
|
|
|
885
|
19
|
100
|
|
|
|
31
|
if ( defined $value ) { |
886
|
6
|
|
|
|
|
8
|
$self->{ "_done" } = $value; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
19
|
|
|
|
|
45
|
return $self->{ "_done" }; |
890
|
|
|
|
|
|
|
} # _done |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
# Holds a term. |
894
|
|
|
|
|
|
|
sub _term { |
895
|
22
|
|
|
22
|
|
25
|
my ( $self, $value ) = @_; |
896
|
|
|
|
|
|
|
|
897
|
22
|
100
|
|
|
|
28
|
if ( defined $value ) { |
898
|
13
|
|
|
|
|
24
|
$self->{ "_term" } = $value; |
899
|
|
|
|
|
|
|
} |
900
|
|
|
|
|
|
|
|
901
|
22
|
|
|
|
|
25
|
return $self->{ "_term" }; |
902
|
|
|
|
|
|
|
} # _term |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
# convert simple strings to Bio::Annotation::DBLinks |
905
|
|
|
|
|
|
|
sub _to_annotation { |
906
|
449
|
|
|
449
|
|
357
|
my ($self , $links) = @_; |
907
|
449
|
100
|
|
|
|
697
|
return unless $links; |
908
|
231
|
|
|
|
|
149
|
my @dbxrefs; |
909
|
231
|
|
|
|
|
149
|
for my $string (@{$links}) { |
|
231
|
|
|
|
|
293
|
|
910
|
13
|
|
|
|
|
33
|
my ($db, $id) = split(':',$string); |
911
|
13
|
|
|
|
|
55
|
push @dbxrefs, Bio::Annotation::DBLink->new(-database => $db, -primary_id => $id); |
912
|
|
|
|
|
|
|
} |
913
|
231
|
|
|
|
|
260
|
return \@dbxrefs; |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
1; |