line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: Parser.pm,v 1.15 2006/04/20 22:48:23 cmungall Exp $ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# see also - http://www.geneontology.org |
5
|
|
|
|
|
|
|
# - http://www.godatabase.org/dev |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 NAME |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
GO::Parser - parses all GO files formats and types |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 SYNOPSIS |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
fetch L objects using a parser: |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Scenario 1: Getting objects from a file |
18
|
|
|
|
|
|
|
use GO::Parser; |
19
|
|
|
|
|
|
|
my $parser = new GO::Parser({handler=>'obj',use_cache=>1}); |
20
|
|
|
|
|
|
|
$parser->parse("function.ontology"); # ontology |
21
|
|
|
|
|
|
|
$parser->parse("GO.defs"); # definitions |
22
|
|
|
|
|
|
|
$parser->parse("ec2go"); # external refs |
23
|
|
|
|
|
|
|
$parser->parse("gene-associations.sgd"); # gene assocs |
24
|
|
|
|
|
|
|
# get GO::Model::Graph object |
25
|
|
|
|
|
|
|
my $graph = $parser->handler->graph; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Scenario 2: Getting OBO XML from a file |
28
|
|
|
|
|
|
|
use GO::Parser; |
29
|
|
|
|
|
|
|
my $parser = new GO::Parser({handler=>'xml'}); |
30
|
|
|
|
|
|
|
$parser->handler->file("output.xml"); |
31
|
|
|
|
|
|
|
$parser->parse("gene_ontology.obo"); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Scenario 3: Using an XSL stylesheet to convert the OBO XML |
34
|
|
|
|
|
|
|
use GO::Parser; |
35
|
|
|
|
|
|
|
my $parser = new GO::Parser({handler=>'xml'}); |
36
|
|
|
|
|
|
|
# xslt files are kept in in $ENV{GO_ROOT}/xml/xsl |
37
|
|
|
|
|
|
|
# (if $GO_ROOT is not set, defaults to install directory) |
38
|
|
|
|
|
|
|
$parser->xslt("oboxml_to_owl"); |
39
|
|
|
|
|
|
|
$parser->handler->file("output.owl-xml"); |
40
|
|
|
|
|
|
|
$parser->parse("gene_ontology.obo"); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Scenario 4: via scripts |
43
|
|
|
|
|
|
|
my $cmd = "go2xml gene_ontology.obo | xsltproc my-transform.xsl -"; |
44
|
|
|
|
|
|
|
my $fh = FileHandle->new("$cmd |") || die("problem initiating $cmd"); |
45
|
|
|
|
|
|
|
while(<$fh>) { print $_ } |
46
|
|
|
|
|
|
|
$fh->close || die("problem running $cmd"); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=cut |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 DESCRIPTION |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Module for parsing GO flat files; for examples of GO/OBO flatfile |
53
|
|
|
|
|
|
|
formats see: |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
L |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
L |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
For a description of the various file formats, see: |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
L |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
L |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
This module will generate XML events from a correctly formatted GO/OBO |
66
|
|
|
|
|
|
|
file |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head1 SEE ALSO |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
This module is a part of go-dev, see: |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
L |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
for more details |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 PUBLIC METHODS |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head2 new |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Title : new |
81
|
|
|
|
|
|
|
Usage : my $p = GO::Parser->new({format=>'obo_xml',handler=>'obj'}); |
82
|
|
|
|
|
|
|
$p->parse("go.obo-xml"); |
83
|
|
|
|
|
|
|
my $g = $p->handler->graph; |
84
|
|
|
|
|
|
|
Synonyms: |
85
|
|
|
|
|
|
|
Function: creates a parser object |
86
|
|
|
|
|
|
|
Example : |
87
|
|
|
|
|
|
|
Returns : GO::Parser |
88
|
|
|
|
|
|
|
Args : a hashref of arguments: |
89
|
|
|
|
|
|
|
format: a format for which a parser exists |
90
|
|
|
|
|
|
|
handler: a format for which a perl handler exists |
91
|
|
|
|
|
|
|
use_cache: (boolean) see caching below |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head2 parse |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Title : parse |
96
|
|
|
|
|
|
|
Usage : $p->parse($file); |
97
|
|
|
|
|
|
|
Synonyms: |
98
|
|
|
|
|
|
|
Function: parses a file |
99
|
|
|
|
|
|
|
Example : |
100
|
|
|
|
|
|
|
Returns : |
101
|
|
|
|
|
|
|
Args : str filename |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 handler |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Title : handler |
106
|
|
|
|
|
|
|
Usage : my $handler = $p->handler; |
107
|
|
|
|
|
|
|
Synonyms: |
108
|
|
|
|
|
|
|
Function: gets/sets a GO::Handler object |
109
|
|
|
|
|
|
|
Example : |
110
|
|
|
|
|
|
|
Returns : L |
111
|
|
|
|
|
|
|
Args : L |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 FORMATS |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
This module is a front end wrapper for a number of different GO/OBO |
116
|
|
|
|
|
|
|
formats - see the relevant module documentation below for details. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
The full list of parsers can be found in the go-perl/GO/Parsers/ |
119
|
|
|
|
|
|
|
directory |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=over |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item obo_text |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Files with suffix ".obo" |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
This is a new file format replacement for the existing GO flat file |
128
|
|
|
|
|
|
|
formats. It handles ontologies, definitions and xrefs (but not |
129
|
|
|
|
|
|
|
associations) |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=item go_ont |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Files with suffix ".ontology" |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
These store the ontology DAGs |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=item go_def |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
Files with suffix ".defs" |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item go_xref |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
External database references for GO terms |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Files with suffix "2go" (eg ec2go, metacyc2go) |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=item go_assoc |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Annotations of genes or gene products using GO |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Files with prefix "gene-association." |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item obo_xml |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Files with suffix ".obo.xml" or ".obo-xml" |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
This is the XML version of the OBO flat file format above |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
See L |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item obj_yaml |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
A YAML dump of the perl L object. You need L |
164
|
|
|
|
|
|
|
from CPAN for this to work |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=item obj_storable |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
A dump of the perl L object. You need L |
169
|
|
|
|
|
|
|
from CPAN for this to work. This is intended to cache objects on the |
170
|
|
|
|
|
|
|
filesystem, for fast access. The obj_storable representation may not |
171
|
|
|
|
|
|
|
be portable |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head2 PARSING ARCHITECTURE |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Each parser fires XML B. The XML events are known as |
176
|
|
|
|
|
|
|
B. |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
These XML events can be B by a handler written in perl, or |
179
|
|
|
|
|
|
|
they can be caught by an XML parser written in some other language, or |
180
|
|
|
|
|
|
|
by using XSL stylesheets. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
go-dev comes with a number of stylesheets in the |
183
|
|
|
|
|
|
|
go-dev/xml/xsl |
184
|
|
|
|
|
|
|
directory |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Anything that catches these XML events is known as a B |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
go-perl comes with some standard perl XML handlers, in addition to |
189
|
|
|
|
|
|
|
some standard XSL stylesheets. These can be found in the |
190
|
|
|
|
|
|
|
B directory |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
If you are interested in getting perl B from files then you |
193
|
|
|
|
|
|
|
will want the B handler, which gives back L |
194
|
|
|
|
|
|
|
objects |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
The parsing architecture gives you the option of using the go-perl |
197
|
|
|
|
|
|
|
object model, or just parsing the XML events directly |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
If you are using the go-db-perl library, the load-go-into-db.pl script |
200
|
|
|
|
|
|
|
will perform the following processes when loading files into the |
201
|
|
|
|
|
|
|
database |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=over |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=item Obo-XML events fired using GO::Parser::* classes |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=item Obo-XML transformed into godb xml using oboxml_to_godb_prestore.xsl |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=item godb_prestore.xml stored in database using generic loader |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=back |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=head2 Obo-XML |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
The Obo-XML format DTD is stored in the go-dev/xml/dtd directory |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head2 HOW IT WORKS |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Currently the various parsers and perl event handlers use the B |
220
|
|
|
|
|
|
|
module for this - see L for more details, or |
221
|
|
|
|
|
|
|
http://stag.sourceforge.net |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=head2 NESTED EVENTS |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
nested events can be thought of as xml, without attributes; nested |
226
|
|
|
|
|
|
|
events can easily be turned into xml |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
events have a start, a body and an end |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
event handlers can *catch* these events and do something with them. |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
an object handler can turn the events into objects, centred around the |
233
|
|
|
|
|
|
|
GO::Model::Graph object; see GO::Handlers::obj |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
other handlers can catch the events and convert them into other |
236
|
|
|
|
|
|
|
formats, eg OWL or OBO |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Or you can bypass the handler and get output as an XML stream - to do |
239
|
|
|
|
|
|
|
this, just run the go2xml script |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
a database loading event handler can catch the events and turn them |
242
|
|
|
|
|
|
|
into SQL statements, loading a MySQL or postgres database (see the |
243
|
|
|
|
|
|
|
go-db-perl library) |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
the advantage of an event based parsing architecture is that it is |
246
|
|
|
|
|
|
|
easy to build lightweight parsers, and heavy weight object models can |
247
|
|
|
|
|
|
|
be bypassed if prefered. |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=head2 EXAMPLES |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
To see examples of the events generated by the GO::Parser class, run |
252
|
|
|
|
|
|
|
the script go2xml; for example |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
go2xml function.ontology |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
on any GO-formatted flatfile |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
This also works on OBO-formatted files: |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
go2xml gene_ontology.obo |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
You can also use the script "stag-parse.pl" which comes with the |
263
|
|
|
|
|
|
|
L distribution. for example |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
stag-parse.pl -p GO::Parsers::go_assoc_parser gene-association.fb |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head2 XSLT HANDLERS |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
The full list can be found in the go-dev/xml/xsl directory |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head2 PERL HANDLERS |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
see GO::Handlers::* for all the different handlers possible; |
274
|
|
|
|
|
|
|
more can be added dynamically. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
you can either create the handler object yourself, and pass it as an argument, |
277
|
|
|
|
|
|
|
e.g. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
my $apph = new GO::AppHandle(-db=>"go"); |
280
|
|
|
|
|
|
|
my $handler = new GO::Handlers::godb({apph=>$apph}); |
281
|
|
|
|
|
|
|
my $parser = new GO::Parser({handler=>$handler}); |
282
|
|
|
|
|
|
|
$parser->parse(@files); |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
or you can use one of the registered handlers: |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
my $parser = new GO::Parser({handler=>'db', |
287
|
|
|
|
|
|
|
handler_args=>{apph=>$apph}}); |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
or you can just do things from the command line |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
go2fmt.pl -w oboxml function.ontology |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
the registered perl handlers are as follows: |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=over |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=item obo_xml |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
writes out OBO-XML (which is basically a straightforward conversion of |
301
|
|
|
|
|
|
|
the event stream into XML) |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=item obo_text |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=item go_ont |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
legacy GO-ontology file format |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=item go_xref |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
GO xref file, for linking GO terms to terms and dbxrefs in other ontologies |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=item go_defs |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
legacy GO-definitions file format |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=item go_assoc |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
GO association file format |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=item rdf |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
GO XML-RDF file format |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=item owl |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
OWL format (default: OWL-DL) |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
OWL is a W3C standard format for ontologies |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
You will need the XSL files from the full go-dev distribution to run |
332
|
|
|
|
|
|
|
this; see the XML section in L |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=item prolog |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
prolog facts - you will need a prolog compiler/interpreter to use |
337
|
|
|
|
|
|
|
these. You can reason over these facts using Obol or the forthcoming |
338
|
|
|
|
|
|
|
Bio-LP project |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=item sxpr |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
lisp style S-Expressions, conforming to the OBO-XML schema; you will |
343
|
|
|
|
|
|
|
need lisp to make full use of these. you can also do some nice stuff |
344
|
|
|
|
|
|
|
just within emacs (use lisp-mode and load an sxpr file into your |
345
|
|
|
|
|
|
|
buffer) |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=item godb |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
this is actually part of the go-db-perl library, not the go-perl library |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
catches events and loads them into a database conforming to the GO |
352
|
|
|
|
|
|
|
database schema; see the directory go-dev/sql, as part of the whole |
353
|
|
|
|
|
|
|
go-dev distribution; or www.godatabase.org/dev/database |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=item obj_yaml |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
A YAML dump of the perl L object. You need L |
358
|
|
|
|
|
|
|
from CPAN for this to work |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=item obj_storable |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
A dump of the perl L object. You need L |
363
|
|
|
|
|
|
|
from CPAN for this to work. This is intended to cache objects on the |
364
|
|
|
|
|
|
|
filesystem, for fast access. The obj_storable representation may not |
365
|
|
|
|
|
|
|
be portable |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=back |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head1 EXAMPLES OF DATATYPE TEXT FORMATS |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=head2 go_ont format |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
eg format: go_ont for storing graphs and metadata; for example: |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
!version: $Revision: 1.15 $ |
376
|
|
|
|
|
|
|
!date: $Date: 2006/04/20 22:48:23 $ |
377
|
|
|
|
|
|
|
!editors: Michael Ashburner (FlyBase), Midori Harris (SGD), Judy Blake (MGD) |
378
|
|
|
|
|
|
|
$Gene_Ontology ; GO:0003673 |
379
|
|
|
|
|
|
|
$cellular_component ; GO:0005575 |
380
|
|
|
|
|
|
|
%extracellular ; GO:0005576 |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
See GO::Parsers::go_ont_parser for more details |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
this is the following file parsed with events turned directly into OBO-XML: |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
file |
393
|
|
|
|
|
|
|
z.ontology |
394
|
|
|
|
|
|
|
1075164285 |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
GO:0003673 |
398
|
|
|
|
|
|
|
Gene_Ontology |
399
|
|
|
|
|
|
|
root |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
GO:0005575 |
403
|
|
|
|
|
|
|
cellular_component |
404
|
|
|
|
|
|
|
root |
405
|
|
|
|
|
|
|
GO:0003673 |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
GO:0005576 |
409
|
|
|
|
|
|
|
extracellular |
410
|
|
|
|
|
|
|
root |
411
|
|
|
|
|
|
|
GO:0005575 |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
GO:0005577 |
415
|
|
|
|
|
|
|
fibrinogen |
416
|
|
|
|
|
|
|
root |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
part_of |
419
|
|
|
|
|
|
|
GO:0005576 |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
GO:0005972 |
424
|
|
|
|
|
|
|
fibrinogen alpha chain |
425
|
|
|
|
|
|
|
root |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
part_of |
428
|
|
|
|
|
|
|
GO:0005577 |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
GO:0005973 |
433
|
|
|
|
|
|
|
fibrinogen beta chain |
434
|
|
|
|
|
|
|
root |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
part_of |
437
|
|
|
|
|
|
|
GO:0005577 |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=head2 go_def format |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
eg format: go_defs for storing definitions: |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
!Gene Ontology definitions |
447
|
|
|
|
|
|
|
! |
448
|
|
|
|
|
|
|
term: 'de novo' protein folding |
449
|
|
|
|
|
|
|
goid: GO:0006458 |
450
|
|
|
|
|
|
|
definition: Processes that assist the folding of a nascent peptide chain into its correct tertiary structure. |
451
|
|
|
|
|
|
|
definition_reference: Sanger:mb |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
See GO::Parsers::go_def_parser for more details |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 go_xref format |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
eg format: go_xrefs for storing links between GO IDs and IDs for terms |
458
|
|
|
|
|
|
|
in other DBs: |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
EC:1.-.-.- > GO:oxidoreductase ; GO:0016491 |
461
|
|
|
|
|
|
|
EC:1.1.-.- > GO:1-phenylethanol dehydrogenase ; GO:0018449 |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
See GO::Parsers::go_xref_parser for more details |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=head2 go_assoc format |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
eg format: go-assocs for storing gene-associations: |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
SGD S0004660 AAC1 GO:0005743 SGD:12031|PMID:2167309 TAS C ADP/ATP translocator YMR056C gene taxon:4932 20010118 |
470
|
|
|
|
|
|
|
SGD S0004660 AAC1 GO:0006854 SGD:12031|PMID:2167309 IDA P ADP/ATP translocator YMR056C gene taxon:4932 20010118 |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
See GO::Parsers::go_assoc_parser for more details |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head2 obo_text format |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
L |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=cut |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
package GO::Parser; |
481
|
|
|
|
|
|
|
|
482
|
23
|
|
|
23
|
|
342399
|
use Exporter; |
|
23
|
|
|
|
|
64
|
|
|
23
|
|
|
|
|
1219
|
|
483
|
|
|
|
|
|
|
|
484
|
23
|
|
|
23
|
|
125
|
use Carp; |
|
23
|
|
|
|
|
50
|
|
|
23
|
|
|
|
|
1520
|
|
485
|
23
|
|
|
23
|
|
17075
|
use GO::Model::Term; |
|
23
|
|
|
|
|
77
|
|
|
23
|
|
|
|
|
1746
|
|
486
|
23
|
|
|
23
|
|
45884
|
use FileHandle; |
|
23
|
|
|
|
|
385792
|
|
|
23
|
|
|
|
|
166
|
|
487
|
23
|
|
|
23
|
|
11985
|
use strict qw(subs vars refs); |
|
23
|
|
|
|
|
57
|
|
|
23
|
|
|
|
|
893
|
|
488
|
23
|
|
|
23
|
|
133
|
use base qw(GO::Model::Root); |
|
23
|
|
|
|
|
47
|
|
|
23
|
|
|
|
|
28120
|
|
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# Exceptions |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
# Constructor |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=head2 new |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
Usage - my $parser = GO::Parser->new() |
499
|
|
|
|
|
|
|
Returns - GO::Parser |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
creates a new parser |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=cut |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
sub new { |
506
|
20
|
|
|
20
|
1
|
2404
|
my $proto = shift; |
507
|
20
|
|
33
|
|
|
166
|
my $class = ref($proto) || $proto;; |
508
|
20
|
|
|
|
|
60
|
my $self = {}; |
509
|
20
|
|
|
|
|
59
|
bless $self, $class; |
510
|
|
|
|
|
|
|
|
511
|
20
|
|
100
|
|
|
575
|
my $init_h = $_[0] || {}; |
512
|
20
|
50
|
|
|
|
130
|
if (!ref($init_h)) { |
513
|
0
|
|
|
|
|
0
|
$init_h = {@_}; |
514
|
|
|
|
|
|
|
} |
515
|
20
|
|
100
|
|
|
175
|
my $fmt = $init_h->{format} || $init_h->{fmt} || ''; |
516
|
20
|
|
|
|
|
54
|
my $use_cache = $init_h->{use_cache}; |
517
|
20
|
50
|
|
|
|
119
|
$fmt = lc($fmt) unless $fmt =~ /::/; |
518
|
|
|
|
|
|
|
# $fmt = 'gotext' unless $fmt; |
519
|
20
|
100
|
|
|
|
73
|
if (!$fmt) { |
520
|
|
|
|
|
|
|
# this parser guesses/defers on what type it is parsing |
521
|
6
|
|
|
|
|
14
|
$fmt = "unknown_format"; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
20
|
|
|
|
|
99
|
my $p = $self->get_parser_impl($fmt); |
525
|
20
|
50
|
|
|
|
99
|
if ($init_h) { |
526
|
20
|
|
|
|
|
97
|
map {$p->{$_} = $init_h->{$_}} keys %$init_h; |
|
30
|
|
|
|
|
374
|
|
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
20
|
|
100
|
|
|
126
|
my $handler = $init_h->{handler} || "base"; |
530
|
20
|
50
|
|
|
|
183
|
if (UNIVERSAL::isa($handler, "GO::AppHandle")) { |
531
|
0
|
|
|
|
|
0
|
require "GO/Handlers/DbStoreHandler.pm"; |
532
|
0
|
|
|
|
|
0
|
$handler = GO::Handlers::DbStoreHandler->new({apph=>$handler}); |
533
|
|
|
|
|
|
|
} |
534
|
20
|
100
|
|
|
|
81
|
unless (ref($handler)) { |
535
|
19
|
|
|
|
|
47
|
my $hclass = $handler; |
536
|
19
|
50
|
|
|
|
262
|
if ($handler !~ /::/) { |
537
|
19
|
50
|
|
|
|
97
|
if ($handler =~ /^::/) { |
538
|
0
|
|
|
|
|
0
|
$hclass = $handler; |
539
|
0
|
|
|
|
|
0
|
$hclass =~ s/^:://; |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
else { |
542
|
19
|
|
|
|
|
74
|
$hclass = "GO::Handlers::$handler"; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
} |
545
|
19
|
|
|
|
|
40
|
eval { |
546
|
19
|
|
|
|
|
162
|
$class->load_module($hclass); |
547
|
|
|
|
|
|
|
}; |
548
|
19
|
50
|
|
|
|
108
|
if ($@) { |
549
|
0
|
|
|
|
|
0
|
print STDERR $@, "\n\n\n"; |
550
|
|
|
|
|
|
|
|
551
|
0
|
|
|
|
|
0
|
$self->throw("No such handler: $handler"); |
552
|
|
|
|
|
|
|
} |
553
|
19
|
|
|
|
|
404
|
$handler = $hclass->new($init_h->{handler_args}); |
554
|
|
|
|
|
|
|
} |
555
|
20
|
|
|
|
|
403
|
$p->handler($handler); |
556
|
20
|
|
|
|
|
871
|
$p->use_cache($use_cache); |
557
|
|
|
|
|
|
|
|
558
|
20
|
|
|
|
|
54
|
delete $init_h->{parser}; |
559
|
20
|
|
|
|
|
55
|
delete $init_h->{handler}; |
560
|
|
|
|
|
|
|
|
561
|
20
|
|
|
|
|
262
|
return $p; |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
sub get_parser_impl { |
566
|
26
|
|
|
26
|
0
|
56
|
my $self = shift; |
567
|
26
|
|
|
|
|
52
|
my $fmt = shift; |
568
|
26
|
|
|
|
|
44
|
my $mod; |
569
|
26
|
50
|
|
|
|
151
|
if ($fmt =~ /::/) { |
570
|
0
|
|
|
|
|
0
|
$mod = $fmt; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
else { |
573
|
26
|
|
|
|
|
119
|
$mod = "GO::Parsers::$fmt"."_parser"; |
574
|
|
|
|
|
|
|
} |
575
|
26
|
|
|
|
|
114
|
$self->load_module($mod); |
576
|
26
|
|
|
|
|
498
|
my $p = $mod->new(); |
577
|
26
|
|
|
|
|
160
|
return $p; |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=head2 create_handler |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
Usage - my $handler = GO::Parser->create_handler('obj'); |
584
|
|
|
|
|
|
|
Returns - L |
585
|
|
|
|
|
|
|
Args - handler type [str] |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=cut |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub create_handler { |
590
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
591
|
1
|
|
50
|
|
|
6
|
my $type = shift || 'obj'; |
592
|
1
|
|
|
|
|
6
|
my $p = $self->new({handler=>$type}); |
593
|
1
|
|
|
|
|
14
|
return $p->handler; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
sub load_module { |
597
|
|
|
|
|
|
|
|
598
|
45
|
|
|
45
|
0
|
97
|
my $self = shift; |
599
|
45
|
|
|
|
|
91
|
my $classname = shift; |
600
|
45
|
|
|
|
|
87
|
my $mod = $classname; |
601
|
45
|
|
|
|
|
257
|
$mod =~ s/::/\//g; |
602
|
|
|
|
|
|
|
|
603
|
45
|
100
|
|
|
|
241
|
if ($main::{"_<$mod.pm"}) { |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
else { |
606
|
43
|
|
|
|
|
71
|
eval { |
607
|
43
|
|
|
|
|
25644
|
require "$mod.pm"; |
608
|
|
|
|
|
|
|
}; |
609
|
43
|
50
|
|
|
|
416
|
if ($@) { |
610
|
0
|
|
|
|
|
|
$self->throw("No such module: $classname;;\n$@"); |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
1; |