line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package GO::OntologyProvider::OboParser; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# File : OboParser.pm |
4
|
|
|
|
|
|
|
# Authors : Elizabeth Boyle; Gavin Sherlock |
5
|
|
|
|
|
|
|
# Date Begun : Summer 2001 |
6
|
|
|
|
|
|
|
# Rewritten : September 29th 2002 |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Updated to parse the gene ontology info from the obo file. |
9
|
|
|
|
|
|
|
# August 2006, Shuai Weng |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# $Id: OboParser.pm,v 1.4 2007/11/15 18:32:12 sherlock Exp $ |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# License information (the MIT license) |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Copyright (c) 2003 Gavin Sherlock; Stanford University |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Permission is hereby granted, free of charge, to any person |
18
|
|
|
|
|
|
|
# obtaining a copy of this software and associated documentation files |
19
|
|
|
|
|
|
|
# (the "Software"), to deal in the Software without restriction, |
20
|
|
|
|
|
|
|
# including without limitation the rights to use, copy, modify, merge, |
21
|
|
|
|
|
|
|
# publish, distribute, sublicense, and/or sell copies of the Software, |
22
|
|
|
|
|
|
|
# and to permit persons to whom the Software is furnished to do so, |
23
|
|
|
|
|
|
|
# subject to the following conditions: |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# The above copyright notice and this permission notice shall be |
26
|
|
|
|
|
|
|
# included in all copies or substantial portions of the Software. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
29
|
|
|
|
|
|
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
30
|
|
|
|
|
|
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
31
|
|
|
|
|
|
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
32
|
|
|
|
|
|
|
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
33
|
|
|
|
|
|
|
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
34
|
|
|
|
|
|
|
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
35
|
|
|
|
|
|
|
# SOFTWARE. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=pod |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 NAME |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
GO::OntologyProvider::OboParser - Provides API for retrieving data from Gene Ontology obo file. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 SYNOPSIS |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
use GO::OntologyProvider::OboParser; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile => "gene_ontology.obo", |
48
|
|
|
|
|
|
|
aspect => [P|F|C]); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
print "The ancestors of GO:0006177 are:\n"; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my $node = $ontology->nodeFromId("GO:0006177"); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
foreach my $ancestor ($node->ancestors){ |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
print $ancestor->goid, " ", $ancestor->term, "\n"; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
$ontology->printOntology(); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 DESCRIPTION |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
GO::OntologyProvider::OboParser implements the interface defined by |
66
|
|
|
|
|
|
|
GO::OntologyProvider, and parses the gene ontology obo file (GO) in |
67
|
|
|
|
|
|
|
plain text (not XML) format. These files can be obtained from the |
68
|
|
|
|
|
|
|
Gene Ontology Consortium web site, http://www.geneontology.org/. From |
69
|
|
|
|
|
|
|
the information in the file, it creates a directed acyclic graph (DAG) |
70
|
|
|
|
|
|
|
structure in memory. This means that GO terms are arranged into |
71
|
|
|
|
|
|
|
tree-like structures where each GO node can have multiple parent nodes |
72
|
|
|
|
|
|
|
and multiple child nodes. The file MUST be named with a .obo suffix. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
This data structure can be used in conjunction with files in which |
75
|
|
|
|
|
|
|
certain genes are annotated to corresponding GO nodes. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Each GO ID (e.g. "GO:1234567") has associated with it a GO node. That |
78
|
|
|
|
|
|
|
GO node contains the name of the GO term, a list of the nodes directly |
79
|
|
|
|
|
|
|
above the node ("parent nodes"), and a list of the nodes directly |
80
|
|
|
|
|
|
|
below the current node ("child nodes"). The "ancestor nodes" of a |
81
|
|
|
|
|
|
|
certain node are all of the nodes that are in a path from the current |
82
|
|
|
|
|
|
|
node to the root of the ontology, with all repetitions removed. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
The example format is as follows: |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
[Term] |
87
|
|
|
|
|
|
|
id: GO:0000006 |
88
|
|
|
|
|
|
|
name: high affinity zinc uptake transporter activity |
89
|
|
|
|
|
|
|
namespace: molecular_function |
90
|
|
|
|
|
|
|
def: "Catalysis of the reaction: Zn2+(out) = Zn2+(in), probably powered by proton motive force." [TC:2.A.5.1.1] |
91
|
|
|
|
|
|
|
xref_analog: TC:2.A.5.1.1 |
92
|
|
|
|
|
|
|
is_a: GO:0005385 ! zinc ion transporter activity |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
[Term] |
96
|
|
|
|
|
|
|
id: GO:0000005 |
97
|
|
|
|
|
|
|
name: ribosomal chaperone activity |
98
|
|
|
|
|
|
|
namespace: molecular_function |
99
|
|
|
|
|
|
|
def: "OBSOLETE. Assists in the correct assembly of ribosomes or ribosomal subunits in vivo, but is not a component of the assembled ribosome when performing its normal biological function." [GOC:jl, PMID:12150913] |
100
|
|
|
|
|
|
|
comment: This term was made obsolete because it refers to a class of gene products and a biological process rather than a molecular |
101
|
|
|
|
|
|
|
function. To update annotations, consider the molecular function term 'unfolded protein binding ; GO:0051082' and the biological process |
102
|
|
|
|
|
|
|
term 'ribosome biogenesis and assembly ; GO:0042254' and its children. |
103
|
|
|
|
|
|
|
is_obsolete: true |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
################################################################## |
108
|
|
|
|
|
|
|
################################################################## |
109
|
|
|
|
|
|
|
|
110
|
2
|
|
|
2
|
|
248807
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
94
|
|
111
|
2
|
|
|
2
|
|
12
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
70
|
|
112
|
2
|
|
|
2
|
|
12
|
use diagnostics; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
16
|
|
113
|
|
|
|
|
|
|
|
114
|
2
|
|
|
2
|
|
380
|
use base qw (GO::OntologyProvider); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
2927
|
|
115
|
2
|
|
|
2
|
|
631
|
use GO::Node; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
66
|
|
116
|
2
|
|
|
2
|
|
1440
|
use Storable qw (nstore); |
|
2
|
|
|
|
|
5178
|
|
|
2
|
|
|
|
|
155
|
|
117
|
|
|
|
|
|
|
|
118
|
2
|
|
|
2
|
|
1068
|
use IO::File; |
|
2
|
|
|
|
|
25294
|
|
|
2
|
|
|
|
|
5160
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
our $VERSION = 0.01; |
121
|
|
|
|
|
|
|
our $PACKAGE = "GO::OntologyProvider::OntologyOboParser"; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
################################################################## |
124
|
|
|
|
|
|
|
# |
125
|
|
|
|
|
|
|
# CLASS ATTRIBUTES |
126
|
|
|
|
|
|
|
# |
127
|
|
|
|
|
|
|
################################################################## |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# All the following class attributes are constants, that should be |
130
|
|
|
|
|
|
|
# initialized here at compile time. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
my $DEBUG = 0; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
my $kFile = $PACKAGE.'::__file'; |
135
|
|
|
|
|
|
|
my $kAspect = $PACKAGE.'::__aspect'; |
136
|
|
|
|
|
|
|
my $kRootNode = $PACKAGE.'::__rootNode'; |
137
|
|
|
|
|
|
|
my $kNodes = $PACKAGE.'::__nodes'; |
138
|
|
|
|
|
|
|
my $kSecondaryIds = $PACKAGE.'::__secondaryIds'; |
139
|
|
|
|
|
|
|
my $kParent = $PACKAGE.'::__parent'; |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
my %kAspects = ( |
142
|
|
|
|
|
|
|
'P' => 'biological_process', |
143
|
|
|
|
|
|
|
'F' => 'molecular_function', |
144
|
|
|
|
|
|
|
'C' => 'cellular_component' |
145
|
|
|
|
|
|
|
); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
################################################################## |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# The constructor, and associated initialization methods |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
################################################################## |
152
|
|
|
|
|
|
|
sub new{ |
153
|
|
|
|
|
|
|
################################################################## |
154
|
|
|
|
|
|
|
# This is the constructor for an OntologyOboParser object. |
155
|
|
|
|
|
|
|
# |
156
|
|
|
|
|
|
|
# The constructor expects one of two type of arguments, either an |
157
|
|
|
|
|
|
|
# 'ontologyFile' and 'ontology' argument , or an 'objectFile' argument. |
158
|
|
|
|
|
|
|
# When instantiated with an ontologyFile argument, it expects the file |
159
|
|
|
|
|
|
|
# to be in obo format. When instantiated with an objectFile argument, |
160
|
|
|
|
|
|
|
# it expects to open a previously created OboParser object that |
161
|
|
|
|
|
|
|
# has been serialized to disk. |
162
|
|
|
|
|
|
|
# |
163
|
|
|
|
|
|
|
# |
164
|
|
|
|
|
|
|
# Usage : |
165
|
|
|
|
|
|
|
# |
166
|
|
|
|
|
|
|
# my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile=>$file, |
167
|
|
|
|
|
|
|
# ontology=>[P|F|C]); |
168
|
|
|
|
|
|
|
# |
169
|
|
|
|
|
|
|
# my $ontology = GO::OntologyProvider::OboParser->new(objectFile=>$file); |
170
|
|
|
|
|
|
|
# |
171
|
|
|
|
|
|
|
|
172
|
2
|
|
|
2
|
1
|
42
|
my ($class, %args) = @_; |
173
|
|
|
|
|
|
|
|
174
|
2
|
|
|
|
|
4
|
my $self; |
175
|
|
|
|
|
|
|
|
176
|
2
|
50
|
|
|
|
14
|
if (exists($args{'objectFile'})){ |
|
|
50
|
|
|
|
|
|
177
|
|
|
|
|
|
|
|
178
|
0
|
|
|
|
|
0
|
$self = Storable::retrieve($args{'objectFile'}) |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
}elsif (exists($args{'ontologyFile'})){ |
181
|
|
|
|
|
|
|
|
182
|
2
|
|
|
|
|
6
|
$self = {}; |
183
|
|
|
|
|
|
|
|
184
|
2
|
|
|
|
|
6
|
bless $self, $class; |
185
|
|
|
|
|
|
|
|
186
|
2
|
|
|
|
|
12
|
$self->__setFile($args{'ontologyFile'}, |
187
|
|
|
|
|
|
|
$args{'aspect'}); |
188
|
|
|
|
|
|
|
|
189
|
2
|
|
|
|
|
9
|
$self->__init; |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
2
|
|
|
|
|
5065
|
return ($self); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
############################################################################ |
198
|
|
|
|
|
|
|
sub __setFile{ |
199
|
|
|
|
|
|
|
############################################################################ |
200
|
|
|
|
|
|
|
# This private method simply stores the name of the file used for |
201
|
|
|
|
|
|
|
# construction inside the object's hash |
202
|
|
|
|
|
|
|
|
203
|
2
|
|
|
2
|
|
6
|
my ($self, $file, $aspect) = @_; |
204
|
|
|
|
|
|
|
|
205
|
2
|
50
|
|
|
|
96
|
if (!-e $file){ |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
207
|
0
|
|
|
|
|
0
|
die "$file does not exist"; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
}elsif (-d $file){ |
210
|
|
|
|
|
|
|
|
211
|
0
|
|
|
|
|
0
|
die "$file is a directory"; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
}elsif (!-r $file){ |
214
|
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
0
|
die "$file is not readable"; |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
}elsif ($file !~ /\.obo/){ |
218
|
|
|
|
|
|
|
|
219
|
0
|
|
|
|
|
0
|
die "$file must have a .obo suffix"; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
2
|
50
|
|
|
|
9
|
if (!defined $aspect) { |
224
|
|
|
|
|
|
|
|
225
|
0
|
|
|
|
|
0
|
die "You have to pass the GO aspect [".join("\|", sort keys %kAspects) ."] to the ", ref($self); |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
2
|
50
|
|
|
|
8
|
if (!exists $kAspects{$aspect}) { |
230
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
0
|
die "Unknown aspect name: $aspect. The allowable GO aspects are ". join(", ", sort keys %kAspects)."\n"; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
2
|
|
|
|
|
18
|
$self->{$kFile} = $file; |
236
|
|
|
|
|
|
|
|
237
|
2
|
|
|
|
|
6
|
$self->{$kAspect} = $aspect; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
############################################################################ |
242
|
|
|
|
|
|
|
sub __file { |
243
|
|
|
|
|
|
|
############################################################################ |
244
|
|
|
|
|
|
|
# This private method returns the name of the file used to construct the object |
245
|
|
|
|
|
|
|
|
246
|
2
|
|
|
2
|
|
25
|
return $_[0]->{$kFile}; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
############################################################################ |
251
|
|
|
|
|
|
|
sub __aspect { |
252
|
|
|
|
|
|
|
############################################################################ |
253
|
|
|
|
|
|
|
# This private method returns the name of the ontology used to construct the object |
254
|
|
|
|
|
|
|
|
255
|
2
|
|
|
2
|
|
9
|
return $_[0]->{$kAspect}; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
############################################################################ |
260
|
|
|
|
|
|
|
sub __init { # okay |
261
|
|
|
|
|
|
|
############################################################################ |
262
|
|
|
|
|
|
|
# This method initializes the ontologyOboParser object, by parsing an ontology |
263
|
|
|
|
|
|
|
# file, and storing the structures represented therein, in memory. |
264
|
|
|
|
|
|
|
|
265
|
2
|
|
|
2
|
|
5
|
my $self = shift; |
266
|
|
|
|
|
|
|
|
267
|
2
|
|
50
|
|
|
8
|
my $ontologyFh = IO::File->new($self->__file, q{<} )|| die "$PACKAGE can't open file ". $self->__file ." : $!"; |
268
|
|
|
|
|
|
|
|
269
|
2
|
|
|
|
|
224
|
my $aspect = $kAspects{$self->__aspect}; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# go through the ontology one line at a time |
272
|
|
|
|
|
|
|
|
273
|
2
|
|
|
|
|
4
|
my @entryLine; |
274
|
|
|
|
|
|
|
|
275
|
2
|
|
|
|
|
4
|
my $isValidEntry = 0; |
276
|
|
|
|
|
|
|
|
277
|
2
|
|
|
|
|
3
|
my $namespace; |
278
|
|
|
|
|
|
|
|
279
|
2
|
|
|
|
|
58
|
while (<$ontologyFh>){ |
280
|
|
|
|
|
|
|
|
281
|
428186
|
|
|
|
|
460872
|
chomp; |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
# finish parsing the obo file of we reach the typedef line. |
284
|
|
|
|
|
|
|
|
285
|
428186
|
100
|
|
|
|
789031
|
last if (/^\[Typedef\]/); |
286
|
|
|
|
|
|
|
|
287
|
428184
|
100
|
|
|
|
1105413
|
if ($_ eq '[Term]') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
# we reached a new term - so process the previous entry |
290
|
|
|
|
|
|
|
|
291
|
45858
|
100
|
|
|
|
79022
|
if ($isValidEntry) { |
292
|
|
|
|
|
|
|
|
293
|
25108
|
|
|
|
|
59258
|
$self->__processNode(\@entryLine); |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# reset our variables |
298
|
|
|
|
|
|
|
|
299
|
45858
|
|
|
|
|
106218
|
@entryLine = (); |
300
|
|
|
|
|
|
|
|
301
|
45858
|
|
|
|
|
55327
|
$isValidEntry = 0; |
302
|
45858
|
|
|
|
|
137656
|
$namespace = ''; |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
}elsif ($_ eq "namespace: $aspect"){ |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# term is in the requested namespace |
307
|
|
|
|
|
|
|
|
308
|
25808
|
|
|
|
|
28727
|
$namespace = $aspect; |
309
|
|
|
|
|
|
|
|
310
|
25808
|
|
|
|
|
101010
|
$isValidEntry = 1; |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
}elsif ($_ eq 'is_obsolete: true'){ |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# we don't want obsolete nodes - DO NOT COMMENT THIS OUT - |
315
|
|
|
|
|
|
|
# infinite recursion will result! |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# Note, the logic here relies on the is_obsolete line coming after the |
318
|
|
|
|
|
|
|
# namespace line. |
319
|
|
|
|
|
|
|
|
320
|
2024
|
|
|
|
|
4914
|
$isValidEntry = 0; |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
}else { |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# build up the information for this node |
325
|
|
|
|
|
|
|
|
326
|
354494
|
|
|
|
|
1126825
|
push(@entryLine, $_); |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# process the final node |
333
|
|
|
|
|
|
|
|
334
|
2
|
50
|
33
|
|
|
33
|
if ($namespace eq $aspect && $isValidEntry) { |
335
|
|
|
|
|
|
|
|
336
|
2
|
|
|
|
|
9
|
$self->__processNode(\@entryLine); |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
2
|
50
|
|
|
|
32
|
$ontologyFh->close || die "Can't close ". $self->__file ." : $!"; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# now populate ancestor paths for each node. |
343
|
|
|
|
|
|
|
|
344
|
2
|
|
|
|
|
144
|
$self->__populatePaths; |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
############################################################################ |
349
|
|
|
|
|
|
|
sub __processNode{ |
350
|
|
|
|
|
|
|
############################################################################ |
351
|
|
|
|
|
|
|
# This private method processes entry lines identified as a node. |
352
|
|
|
|
|
|
|
# The general idea is that it needs three pieces of |
353
|
|
|
|
|
|
|
# information about the line to deal with it: |
354
|
|
|
|
|
|
|
# |
355
|
|
|
|
|
|
|
# 1. The name of the node. |
356
|
|
|
|
|
|
|
# 2. The GOIDs associated with the node. |
357
|
|
|
|
|
|
|
# 3. The parent node ids. |
358
|
|
|
|
|
|
|
# |
359
|
|
|
|
|
|
|
# It creates a node object for the current node and then indicates in that node |
360
|
|
|
|
|
|
|
# the identity of its parent(s). |
361
|
|
|
|
|
|
|
|
362
|
25110
|
|
|
25110
|
|
37373
|
my ($self, $entryLineArrayRef) = @_; |
363
|
|
|
|
|
|
|
|
364
|
25110
|
|
|
|
|
46197
|
my ($nodeName, $goid, $secondaryGoidArrayRef, $parentGoidArrayRef) |
365
|
|
|
|
|
|
|
= $self->__getNodeInfoFromLine($entryLineArrayRef); |
366
|
|
|
|
|
|
|
|
367
|
25110
|
|
|
|
|
60148
|
my $node = $self->__createNode($goid, $nodeName); |
368
|
|
|
|
|
|
|
|
369
|
25110
|
100
|
|
|
|
25308
|
if (scalar (@{$parentGoidArrayRef}) == 0) { # no parent goid |
|
25110
|
|
|
|
|
55044
|
|
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# The GOA has obsoleted the 'Gene_Ontology' term, but |
372
|
|
|
|
|
|
|
# currently we need it to make the graph work. Thus, we'll |
373
|
|
|
|
|
|
|
# recreate the root, using it.s original id and name. This |
374
|
|
|
|
|
|
|
# needs to be fixed in future. |
375
|
|
|
|
|
|
|
|
376
|
2
|
|
|
|
|
3
|
my $rootGoid = 'GO:0003673'; |
377
|
2
|
|
|
|
|
6
|
my $rootTerm = 'Gene_Ontology'; |
378
|
|
|
|
|
|
|
|
379
|
2
|
|
|
|
|
9
|
my $rootNode = $self->__createNode($rootGoid, $rootTerm); |
380
|
|
|
|
|
|
|
|
381
|
2
|
|
|
|
|
9
|
$self->{$kRootNode} = $rootNode; |
382
|
|
|
|
|
|
|
|
383
|
2
|
|
|
|
|
5
|
@{$parentGoidArrayRef} = ($rootGoid); |
|
2
|
|
|
|
|
7
|
|
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
## now hash any secondaries to the primary |
388
|
|
|
|
|
|
|
|
389
|
25110
|
|
|
|
|
26523
|
foreach my $secondaryId (@{$secondaryGoidArrayRef}){ |
|
25110
|
|
|
|
|
43300
|
|
390
|
|
|
|
|
|
|
|
391
|
696
|
|
|
|
|
3217
|
$self->{$kSecondaryIds}{$secondaryId} = $goid; |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
25110
|
|
|
|
|
83610
|
$self->{$kParent}{$goid} = $parentGoidArrayRef; |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
############################################################################ |
400
|
|
|
|
|
|
|
sub __getNodeInfoFromLine { # okay |
401
|
|
|
|
|
|
|
############################################################################ |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# This private method takes an array reference to the lines for a |
404
|
|
|
|
|
|
|
# given GO term node entry and returns the term name, a reference |
405
|
|
|
|
|
|
|
# that points to an array of goids associated with that term name, and |
406
|
|
|
|
|
|
|
# a reference that points to an array of direct parent GOIDs. The |
407
|
|
|
|
|
|
|
# primary goid will be the first goid returned in the list. |
408
|
|
|
|
|
|
|
# |
409
|
|
|
|
|
|
|
# Usage: |
410
|
|
|
|
|
|
|
# |
411
|
|
|
|
|
|
|
# my ($termName, $goidArrayRef, $parentGoidArrayRef) |
412
|
|
|
|
|
|
|
# = $self->__getNodeInfoFromLine($entryLineArrayRef); |
413
|
|
|
|
|
|
|
|
414
|
25110
|
|
|
25110
|
|
29079
|
my ($self, $entryLineArrayRef) = @_; |
415
|
|
|
|
|
|
|
|
416
|
25110
|
|
|
|
|
24222
|
my ($nodeName, $goid, @secondaryGoid, @parentGoid); |
417
|
|
|
|
|
|
|
|
418
|
25110
|
|
|
|
|
24681
|
foreach my $line (@{$entryLineArrayRef}) { |
|
25110
|
|
|
|
|
44089
|
|
419
|
|
|
|
|
|
|
|
420
|
184204
|
100
|
|
|
|
848179
|
if ($line =~ /^id: *(GO:0*[0-9]+)$/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
421
|
|
|
|
|
|
|
|
422
|
25110
|
|
|
|
|
57467
|
$goid = $1; |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
}elsif ($line =~ /^name: *(.+)$/) { |
425
|
|
|
|
|
|
|
|
426
|
25110
|
|
|
|
|
47449
|
$nodeName = $1; |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
}elsif ($line =~ /^alt_id: *(GO:0*[0-9]+)$/) { |
429
|
|
|
|
|
|
|
|
430
|
696
|
|
|
|
|
1631
|
push(@secondaryGoid, $1); |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
}elsif ($line =~ /^(is_a:|relationship: part_of) *(GO:0*[0-9]+)/) { |
433
|
|
|
|
|
|
|
|
434
|
43284
|
|
|
|
|
110860
|
push(@parentGoid, $2); |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
# check that we can actually get some goids. Added this in to |
441
|
|
|
|
|
|
|
# deal with when a broken file that appeared on the GO site, it |
442
|
|
|
|
|
|
|
# caused me to get email saying my code was broken... |
443
|
|
|
|
|
|
|
|
444
|
25110
|
50
|
|
|
|
55076
|
if (!$goid){ |
445
|
|
|
|
|
|
|
|
446
|
0
|
|
|
|
|
0
|
die "There appears to be a problem with the ontology file.\n". |
447
|
|
|
|
|
|
|
"No GOIDs could be extracted from '$nodeName'.n\n"; |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# remove \'s from nodeName |
452
|
|
|
|
|
|
|
|
453
|
25110
|
|
|
|
|
36681
|
$nodeName =~ s/\\//g; |
454
|
|
|
|
|
|
|
|
455
|
25110
|
|
|
|
|
102037
|
return ($nodeName, $goid, \@secondaryGoid, \@parentGoid); |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
############################################################################### |
461
|
|
|
|
|
|
|
sub __createNode { |
462
|
|
|
|
|
|
|
############################################################################### |
463
|
|
|
|
|
|
|
|
464
|
25112
|
|
|
25112
|
|
34957
|
my ($self, $goid, $nodeName) = @_; |
465
|
|
|
|
|
|
|
|
466
|
25112
|
|
|
|
|
24453
|
my $node; |
467
|
|
|
|
|
|
|
|
468
|
25112
|
50
|
|
|
|
43252
|
if ($self->__nodeIsAlreadyCreated($goid)){ |
469
|
|
|
|
|
|
|
|
470
|
0
|
|
|
|
|
0
|
$node = $self->nodeFromId($goid); |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
} else { # node has not already been created |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# create node |
475
|
|
|
|
|
|
|
|
476
|
25112
|
|
|
|
|
73748
|
$node = GO::Node->new(goid => $goid, |
477
|
|
|
|
|
|
|
term => $nodeName); |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# store it |
480
|
|
|
|
|
|
|
|
481
|
25112
|
|
|
|
|
100105
|
$self->{$kNodes}{$goid} = $node; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
25112
|
|
|
|
|
46735
|
return $node; |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
############################################################################### |
490
|
|
|
|
|
|
|
sub __populatePaths { |
491
|
|
|
|
|
|
|
############################################################################### |
492
|
|
|
|
|
|
|
# in this method, we populate all the paths to the root for each node |
493
|
|
|
|
|
|
|
# in the ontology. To do this, we have to call the recursive method, |
494
|
|
|
|
|
|
|
# __findAncestor(), which will build up each path from a node to the |
495
|
|
|
|
|
|
|
# root, and when it reaches the end of the path (the root itself), |
496
|
|
|
|
|
|
|
# will add that path via the Node method addPathToRoot. |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# POSSIBLE ALTERNATIVE APPROACH |
499
|
|
|
|
|
|
|
# |
500
|
|
|
|
|
|
|
# Profiling of the OboParser reveals that when building the ontology, |
501
|
|
|
|
|
|
|
# ~77% of the time is spent in the recursive __findAncestor(). Thus, |
502
|
|
|
|
|
|
|
# if a way could be found to decrease the number of recursive calls to |
503
|
|
|
|
|
|
|
# that method, it might significantly positively impact the runtime |
504
|
|
|
|
|
|
|
# performance. |
505
|
|
|
|
|
|
|
# |
506
|
|
|
|
|
|
|
# A possible alternative approach to the current method, might be to |
507
|
|
|
|
|
|
|
# simply populate paths for every leaf node (we would need to know who |
508
|
|
|
|
|
|
|
# they are), and as their paths are populated, also populate the paths |
509
|
|
|
|
|
|
|
# for their ancestors as well, as the paths from the ancestors are |
510
|
|
|
|
|
|
|
# subparts of the paths from leaves to the root. However, care would |
511
|
|
|
|
|
|
|
# have to be taken to not add the same path twice, as there would be |
512
|
|
|
|
|
|
|
# issues with when a leaf has two or more paths to a particular node, |
513
|
|
|
|
|
|
|
# whose paths are then being added. Note also, if you encounter a |
514
|
|
|
|
|
|
|
# node for whom you have already added paths, you don't need to add |
515
|
|
|
|
|
|
|
# them again, so this might significantly save the number of recursive |
516
|
|
|
|
|
|
|
# calls required. |
517
|
|
|
|
|
|
|
|
518
|
2
|
|
|
2
|
|
6
|
my $self = shift; |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# go through each GO node in the $kParent hash, the keys of which |
521
|
|
|
|
|
|
|
# are the goids that are parents of a given node. |
522
|
|
|
|
|
|
|
|
523
|
2
|
|
|
|
|
4
|
foreach my $childGoid ( keys %{$self->{$kParent}} ) { |
|
2
|
|
|
|
|
12800
|
|
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# note, we directly access the kNodes hash here, rather than |
526
|
|
|
|
|
|
|
# use nodeFromId(). This is for performance reasons only - |
527
|
|
|
|
|
|
|
# accessing the kNodes hash directly in this method, and the |
528
|
|
|
|
|
|
|
# __findAncestor method shaces about 40% of the runtime off of |
529
|
|
|
|
|
|
|
# the time taken to populate all the paths. |
530
|
|
|
|
|
|
|
|
531
|
25110
|
|
|
|
|
74096
|
my $childNode = $self->{$kNodes}{$childGoid}; |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# now go through each of this child's parents |
534
|
|
|
|
|
|
|
|
535
|
25110
|
|
|
|
|
30021
|
foreach my $parentGoid (@{$self->{$kParent}{$childGoid}}) { |
|
25110
|
|
|
|
|
85374
|
|
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
### Note, there has been a case in the obo file where |
538
|
|
|
|
|
|
|
### there was an error, and a node was listed as having |
539
|
|
|
|
|
|
|
### parent in a different aspect. This results in a fatal |
540
|
|
|
|
|
|
|
### run time error, as when the parser reads the file, it |
541
|
|
|
|
|
|
|
### only keeps nodes of a given aspect, and is thus left |
542
|
|
|
|
|
|
|
### with a dangling reference. In this case, parentNode |
543
|
|
|
|
|
|
|
### will be undef, and the call to addParentNodes ends up |
544
|
|
|
|
|
|
|
### in a run time error. We can add some logic here to |
545
|
|
|
|
|
|
|
### give a better error message. |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
my $parentNode = $self->{$kNodes}{$parentGoid} |
548
|
|
|
|
|
|
|
|
549
|
43286
|
|
33
|
|
|
195534
|
|| do { |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
print "There is an error in the obo file, where the relationship between ", |
552
|
|
|
|
|
|
|
$childNode->goid, |
553
|
|
|
|
|
|
|
" and one or more of its parents is not correctly defined.\n", |
554
|
|
|
|
|
|
|
"Please check the obo file.\n", |
555
|
|
|
|
|
|
|
"The program is unable to continue.\n\n"; |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
exit; |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
}; |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
### create connections between child node and its parent |
562
|
|
|
|
|
|
|
|
563
|
43286
|
|
|
|
|
119876
|
$childNode->addParentNodes($parentNode); |
564
|
|
|
|
|
|
|
|
565
|
43286
|
|
|
|
|
121786
|
$parentNode->addChildNodes($childNode); |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
# begin to build the ancestor path, starting with this |
568
|
|
|
|
|
|
|
# parent |
569
|
|
|
|
|
|
|
|
570
|
43286
|
|
|
|
|
75710
|
my @path = ($parentNode); |
571
|
|
|
|
|
|
|
|
572
|
43286
|
100
|
|
|
|
127141
|
if (exists $self->{$kParent}{$parentGoid}){ |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# if this parent has parents, then we continue to |
575
|
|
|
|
|
|
|
# build the path upwards to the root. We pass in the |
576
|
|
|
|
|
|
|
# child node, so that each path which reaches the root |
577
|
|
|
|
|
|
|
# can be added during the recursive calls to find |
578
|
|
|
|
|
|
|
# ancestor |
579
|
|
|
|
|
|
|
|
580
|
43284
|
|
|
|
|
91685
|
$self->__findAncestors($childNode, |
581
|
|
|
|
|
|
|
$parentGoid, |
582
|
|
|
|
|
|
|
\@path); |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
}else{ |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
# otherwise, the path only contains the root, and we add it. |
587
|
|
|
|
|
|
|
|
588
|
2
|
|
|
|
|
9
|
$childNode->addPathToRoot(@path); |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
####################################################################### |
599
|
|
|
|
|
|
|
sub __findAncestors { |
600
|
|
|
|
|
|
|
####################################################################### |
601
|
|
|
|
|
|
|
# Usage: |
602
|
|
|
|
|
|
|
# |
603
|
|
|
|
|
|
|
# $self->__findAncestor($childNode, |
604
|
|
|
|
|
|
|
# $parentGoid, |
605
|
|
|
|
|
|
|
# $pathArrayRef); |
606
|
|
|
|
|
|
|
# |
607
|
|
|
|
|
|
|
# This method looks through each goid in hash %{$self->{$kParent}} to |
608
|
|
|
|
|
|
|
# find all ancestors and push everything to @{$pathArrayRef}..And if |
609
|
|
|
|
|
|
|
# there is no ancestor found for the $parentGoid, it just add the path |
610
|
|
|
|
|
|
|
# to the child node. |
611
|
|
|
|
|
|
|
|
612
|
2427650
|
|
|
2427650
|
|
3167333
|
my ($self, $childNode, $parentGoid, $pathArrayRef) = @_; |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# go through each immediate parent of the passed in parent |
615
|
|
|
|
|
|
|
|
616
|
2427650
|
|
|
|
|
2292451
|
foreach my $ancestorGoid (@{$self->{$kParent}{$parentGoid}}) { |
|
2427650
|
|
|
|
|
5446603
|
|
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# add the ancestor node to our path to the root which is being |
619
|
|
|
|
|
|
|
# built |
620
|
|
|
|
|
|
|
|
621
|
3008496
|
|
|
|
|
2937253
|
push (@{$pathArrayRef}, $self->{$kNodes}{$ancestorGoid}); |
|
3008496
|
|
|
|
|
6354722
|
|
622
|
|
|
|
|
|
|
|
623
|
3008496
|
100
|
|
|
|
6965503
|
if (exists $self->{$kParent}{$ancestorGoid}){ |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
# if this ancestor has parents, continue building the |
626
|
|
|
|
|
|
|
# paths to the root recursively up the DAG |
627
|
|
|
|
|
|
|
|
628
|
2384366
|
|
|
|
|
4181235
|
$self->__findAncestors($childNode, |
629
|
|
|
|
|
|
|
$ancestorGoid, |
630
|
|
|
|
|
|
|
$pathArrayRef); |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
}else { |
633
|
|
|
|
|
|
|
|
634
|
624130
|
|
|
|
|
643826
|
$childNode->addPathToRoot(reverse @{$pathArrayRef}); |
|
624130
|
|
|
|
|
1662233
|
|
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
# because there are multiple paths to the root for most nodes, |
639
|
|
|
|
|
|
|
# we have now remove the current ancestor from this time |
640
|
|
|
|
|
|
|
# through the loop so that the path is reset to the original |
641
|
|
|
|
|
|
|
# condition that it was in when passed in to this method |
642
|
|
|
|
|
|
|
|
643
|
3008496
|
|
|
|
|
3460975
|
pop @{$pathArrayRef}; |
|
3008496
|
|
|
|
|
6192103
|
|
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
############################################################################ |
650
|
|
|
|
|
|
|
sub __nodeIsAlreadyCreated { # okay |
651
|
|
|
|
|
|
|
############################################################################ |
652
|
|
|
|
|
|
|
# This private method returns a boolean to indicate whether a node has |
653
|
|
|
|
|
|
|
# already been created for a given GO ID. |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
|
656
|
25112
|
|
|
25112
|
|
101256
|
return (exists($_[0]->{$kNodes}{$_[1]})); |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
############################################################################ |
661
|
|
|
|
|
|
|
sub printOntology{ |
662
|
|
|
|
|
|
|
############################################################################ |
663
|
|
|
|
|
|
|
# This prints out the ontology, with redundancies. |
664
|
|
|
|
|
|
|
|
665
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
666
|
|
|
|
|
|
|
|
667
|
0
|
|
|
|
|
0
|
$self->__printNode($self->rootNode, 0); |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
############################################################################ |
672
|
|
|
|
|
|
|
sub __printNode{ |
673
|
|
|
|
|
|
|
############################################################################ |
674
|
|
|
|
|
|
|
# This recursive function prints the name of the specified node and the |
675
|
|
|
|
|
|
|
# names of all of its descendants. |
676
|
|
|
|
|
|
|
# |
677
|
|
|
|
|
|
|
|
678
|
0
|
|
|
0
|
|
0
|
my ($self, $node, $indentationLevel) = @_; |
679
|
|
|
|
|
|
|
|
680
|
0
|
|
|
|
|
0
|
print " " x $indentationLevel, $node->term, " ; ", $node->goid, "\n"; |
681
|
|
|
|
|
|
|
|
682
|
0
|
|
|
|
|
0
|
foreach my $childNode (sort {$a->term cmp $b->term} $node->childNodes) { |
|
0
|
|
|
|
|
0
|
|
683
|
|
|
|
|
|
|
|
684
|
0
|
|
|
|
|
0
|
$self->__printNode($childNode, $indentationLevel+1); |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
############################################################################ |
691
|
|
|
|
|
|
|
sub allNodes{ |
692
|
|
|
|
|
|
|
############################################################################ |
693
|
|
|
|
|
|
|
# This method returns an array of all the nodes that have been created. |
694
|
|
|
|
|
|
|
# |
695
|
|
|
|
|
|
|
# Usage: |
696
|
|
|
|
|
|
|
# |
697
|
|
|
|
|
|
|
# my @nodes = $ontologyParser->allNodes; |
698
|
|
|
|
|
|
|
|
699
|
3
|
|
|
3
|
1
|
4248
|
return (values %{$_[0]->{$kNodes}}); |
|
3
|
|
|
|
|
5278
|
|
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
############################################################################ |
704
|
|
|
|
|
|
|
sub rootNode{ |
705
|
|
|
|
|
|
|
############################################################################ |
706
|
|
|
|
|
|
|
# This returns the root node in the ontology. |
707
|
|
|
|
|
|
|
# |
708
|
|
|
|
|
|
|
# Usage: |
709
|
|
|
|
|
|
|
# |
710
|
|
|
|
|
|
|
# my $rootNode = $ontologyParser->rootNode; |
711
|
|
|
|
|
|
|
|
712
|
119030
|
|
|
119030
|
1
|
481410
|
return ($_[0]->{$kRootNode}); |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
############################################################################ |
717
|
|
|
|
|
|
|
sub nodeFromId{ |
718
|
|
|
|
|
|
|
############################################################################ |
719
|
|
|
|
|
|
|
# This public method takes a GOID and returns the GO::Node that |
720
|
|
|
|
|
|
|
# it corresponds to. It should also work with secondary id's |
721
|
|
|
|
|
|
|
# |
722
|
|
|
|
|
|
|
# Usage : |
723
|
|
|
|
|
|
|
# |
724
|
|
|
|
|
|
|
# my $node = $ontologyParser->nodeFromId($goid); |
725
|
|
|
|
|
|
|
|
726
|
170559
|
|
|
170559
|
1
|
273400
|
my ($self, $goid) = @_; |
727
|
|
|
|
|
|
|
|
728
|
170559
|
100
|
|
|
|
527308
|
if (exists ($self->{$kNodes}{$goid})){ # it's a primary |
|
|
50
|
|
|
|
|
|
729
|
|
|
|
|
|
|
|
730
|
170351
|
|
|
|
|
813567
|
return ($self->{$kNodes}{$goid}); |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
}elsif (exists ($self->{$kSecondaryIds}{$goid})){ # it's a secondary |
733
|
|
|
|
|
|
|
|
734
|
0
|
|
|
|
|
0
|
return $self->{$kNodes}{$self->{$kSecondaryIds}{$goid}}; |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
}else{ |
737
|
|
|
|
|
|
|
|
738
|
208
|
|
|
|
|
1506
|
return undef; |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
} |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
############################################################################ |
745
|
|
|
|
|
|
|
sub numNodes{ |
746
|
|
|
|
|
|
|
############################################################################ |
747
|
|
|
|
|
|
|
# This public method returns the number of nodes that exist with the |
748
|
|
|
|
|
|
|
# ontology |
749
|
|
|
|
|
|
|
# |
750
|
|
|
|
|
|
|
# Usage : |
751
|
|
|
|
|
|
|
# |
752
|
|
|
|
|
|
|
# my $numNodes = $ontologyParser->numNodes; |
753
|
|
|
|
|
|
|
|
754
|
1
|
|
|
1
|
1
|
3
|
return scalar (keys %{$_[0]->{$kNodes}}); |
|
1
|
|
|
|
|
8
|
|
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
} |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
############################################################################ |
759
|
|
|
|
|
|
|
sub serializeToDisk { |
760
|
|
|
|
|
|
|
############################################################################ |
761
|
|
|
|
|
|
|
# Saves the current state of the Ontology Parser Object to a file, |
762
|
|
|
|
|
|
|
# using the Storable package. Saves in network order for portability, |
763
|
|
|
|
|
|
|
# just in case. Returns the name of the file. If no filename is |
764
|
|
|
|
|
|
|
# provided, then the name of the file (and it's directory, if one was |
765
|
|
|
|
|
|
|
# provided) used for object construction, will be used, with .obj |
766
|
|
|
|
|
|
|
# appended. If the object was instantiated from a file with a .obj |
767
|
|
|
|
|
|
|
# suffix, then the same filename would be used, if none were provided. |
768
|
|
|
|
|
|
|
# |
769
|
|
|
|
|
|
|
# This method currently causes a segfault on MacOSX (at least 10.1.5 |
770
|
|
|
|
|
|
|
# -> 10.2.3), with perl 5.6, and Storable 1.0.14, when trying to store |
771
|
|
|
|
|
|
|
# the process ontology. This failure occurs using either store, or |
772
|
|
|
|
|
|
|
# nstore, and is manifested by a segmentation fault. It has not been |
773
|
|
|
|
|
|
|
# investigated whether this is a perl problem, or a Storable problem |
774
|
|
|
|
|
|
|
# (which has large amounts of C-code). This does not cause a |
775
|
|
|
|
|
|
|
# segmentation on Solaris, using perl 5.6.1 and Storable 1.0.13. This |
776
|
|
|
|
|
|
|
# doesn't make it clear whether it's a MacOSX problem or a perl |
777
|
|
|
|
|
|
|
# problem or not. It should be noted that newer versions of both perl |
778
|
|
|
|
|
|
|
# and Storable exist, and the code should be tested with those as |
779
|
|
|
|
|
|
|
# well. |
780
|
|
|
|
|
|
|
# |
781
|
|
|
|
|
|
|
# Usage: |
782
|
|
|
|
|
|
|
# |
783
|
|
|
|
|
|
|
# my $objectFile = $ontologyParser->serializeToDisk(filename=>$filename); |
784
|
|
|
|
|
|
|
|
785
|
0
|
|
|
0
|
1
|
|
my ($self, %args) = @_; |
786
|
|
|
|
|
|
|
|
787
|
0
|
|
|
|
|
|
my $fileName; |
788
|
|
|
|
|
|
|
|
789
|
0
|
0
|
|
|
|
|
if (exists ($args{'filename'})){ # they supply their own filename |
790
|
|
|
|
|
|
|
|
791
|
0
|
|
|
|
|
|
$fileName = $args{'filename'}; |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
}else{ # we build a name from the file used to instantiate ourselves |
794
|
|
|
|
|
|
|
|
795
|
0
|
|
|
|
|
|
$fileName = $self->__file; |
796
|
|
|
|
|
|
|
|
797
|
0
|
0
|
|
|
|
|
if ($fileName !~ /\.obj$/){ # if we weren't instantiated from an object |
798
|
|
|
|
|
|
|
|
799
|
0
|
|
|
|
|
|
$fileName .= ".obj"; # add a .obj suffix to the name |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
} |
804
|
|
|
|
|
|
|
|
805
|
0
|
0
|
|
|
|
|
nstore ($self, $fileName) || die "$PACKAGE could not serialize itself to $fileName : $!"; |
806
|
|
|
|
|
|
|
|
807
|
0
|
|
|
|
|
|
return ($fileName); |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
} |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
1; # to keep perl happy |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
# P O D D O C U M E N T A T I O N # |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
=pod |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=head1 Instance Constructor |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=head2 new |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
This is the constructor for an OboParser object. The constructor |
823
|
|
|
|
|
|
|
expects one of two arguments, either an 'ontologyFile' argument, or an |
824
|
|
|
|
|
|
|
'objectFile' argument. When instantiated with an ontologyFile |
825
|
|
|
|
|
|
|
argument, it expects it to correspond to an obo file created by the GO |
826
|
|
|
|
|
|
|
consortium, according to their file format, and in addition, also |
827
|
|
|
|
|
|
|
requires an 'aspect' argument. When instantiated with an objectFile |
828
|
|
|
|
|
|
|
argument, it expects to open a previously created ontologyParser |
829
|
|
|
|
|
|
|
object that has been serialized to disk (see serializeToDisk). |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
Usage: |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile => $ontologyFile, |
834
|
|
|
|
|
|
|
aspect => $aspect); |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
my $ontology = GO::OntologyProvider::OboParser->new(objectFile => $objectFile); |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
=head1 Instance Methods |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=head2 printOntology |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
This prints out the ontology, with redundancies, to STDOUT. It does |
843
|
|
|
|
|
|
|
not yet print out all of the ontology information (like relationship |
844
|
|
|
|
|
|
|
type etc). This method will be likely be removed in a future version, |
845
|
|
|
|
|
|
|
so should not be relied upon. |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
Usage: |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
$ontologyParser->printOntology; |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=head2 allNodes |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
This method returns an array of all the GO:Nodes that have been |
854
|
|
|
|
|
|
|
created. |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
Usage: |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
my @nodes = $ontologyParser->allNodes; |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
=head2 rootNode |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
This returns the root node in the ontology. |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
my $rootNode = $ontologyParser->rootNode; |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
=head2 nodeFromId |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
This public method takes a GOID and returns the GO::Node that |
869
|
|
|
|
|
|
|
it corresponds to. |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
Usage : |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
my $node = $ontologyParser->nodeFromId($goid); |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
If the GOID does not correspond to a GO node, then undef will be |
876
|
|
|
|
|
|
|
returned. Note if you try to call any methods on an undef, you will |
877
|
|
|
|
|
|
|
get a fatal runtime error, so if you can't guarantee all GOIDs that |
878
|
|
|
|
|
|
|
you supply are good, you should check that the return value from this |
879
|
|
|
|
|
|
|
method is defined. |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
=head2 numNodes |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
This public method returns the number of nodes that exist with the |
884
|
|
|
|
|
|
|
ontology |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
Usage : |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
my $numNodes = $ontologyParser->numNodes; |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
=head2 serializeToDisk |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
Saves the current state of the Ontology Parser Object to a file, using |
893
|
|
|
|
|
|
|
the Storable package. Saves in network order for portability, just in |
894
|
|
|
|
|
|
|
case. Returns the name of the file. If no filename is provided, then |
895
|
|
|
|
|
|
|
the name of the file (and its directory, if one was provided) used for |
896
|
|
|
|
|
|
|
object construction, will be used, with .obj appended. If the object |
897
|
|
|
|
|
|
|
was instantiated from a file with a .obj suffix, then the same |
898
|
|
|
|
|
|
|
filename would be used, if none were provided. |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
This method currently causes a segfault on MacOSX (at least 10.1.5 -> |
901
|
|
|
|
|
|
|
10.2.3), with perl 5.6, and Storable 1.0.14, when trying to store the |
902
|
|
|
|
|
|
|
process ontology. This failure occurs using either store, or nstore, |
903
|
|
|
|
|
|
|
and is manifested by a segmentation fault. It has not been |
904
|
|
|
|
|
|
|
investigated whether this is a perl problem, or a Storable problem |
905
|
|
|
|
|
|
|
(which has large amounts of C-code). This does not cause a |
906
|
|
|
|
|
|
|
segmentation on Solaris, using perl 5.6.1 and Storable 1.0.13. This |
907
|
|
|
|
|
|
|
does not make it clear whether it is a MacOSX problem or a perl |
908
|
|
|
|
|
|
|
problem or not. It should be noted that newer versions of both perl |
909
|
|
|
|
|
|
|
and Storable exist, and the code should be tested with those as well. |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
Usage: |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
my $objectFile = $ontologyParser->serializeToDisk(filename=>$filename); |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=head1 Authors |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
Gavin Sherlock; sherlock@genome.stanford.edu |
918
|
|
|
|
|
|
|
Elizabeth Boyle; ell@mit.edu |
919
|
|
|
|
|
|
|
Shuai Weng; shuai@genome.stanford.edu |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
=cut |