| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# $Id: obo_text_parser.pm,v 1.52 2010/03/11 22:40:27 cmungall Exp $ |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# see also - http://www.geneontology.org |
|
5
|
|
|
|
|
|
|
# - http://www.godatabase.org/dev |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package GO::Parsers::obo_text_parser; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
GO::Parsers::obo_text_parser - OBO Flat file parser object |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
do not use this class directly; use GO::Parser |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=cut |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
|
25
|
|
|
|
|
|
|
|
|
26
|
10
|
|
|
10
|
|
66
|
use Exporter; |
|
|
10
|
|
|
|
|
22
|
|
|
|
10
|
|
|
|
|
520
|
|
|
27
|
10
|
|
|
10
|
|
12029
|
use Text::Balanced qw(extract_quotelike extract_bracketed); |
|
|
10
|
|
|
|
|
423477
|
|
|
|
10
|
|
|
|
|
1229
|
|
|
28
|
10
|
|
|
10
|
|
121
|
use base qw(GO::Parsers::base_parser); |
|
|
10
|
|
|
|
|
20
|
|
|
|
10
|
|
|
|
|
4351
|
|
|
29
|
10
|
|
|
10
|
|
5319
|
use GO::Parsers::ParserEventNames; |
|
|
10
|
|
|
|
|
27
|
|
|
|
10
|
|
|
|
|
2815
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
10
|
|
|
10
|
|
62
|
use Carp; |
|
|
10
|
|
|
|
|
15
|
|
|
|
10
|
|
|
|
|
586
|
|
|
32
|
10
|
|
|
10
|
|
57
|
use FileHandle; |
|
|
10
|
|
|
|
|
19
|
|
|
|
10
|
|
|
|
|
90
|
|
|
33
|
|
|
|
|
|
|
|
|
34
|
10
|
|
|
10
|
|
5192
|
use strict qw(vars refs); |
|
|
10
|
|
|
|
|
18
|
|
|
|
10
|
|
|
|
|
119395
|
|
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub dtd { |
|
37
|
0
|
|
|
0
|
0
|
0
|
'obo-parser-events.dtd'; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub parse_fh { |
|
41
|
10
|
|
|
10
|
0
|
2157
|
my ($self, $fh) = @_; |
|
42
|
|
|
|
|
|
|
|
|
43
|
10
|
|
|
|
|
110
|
$self->start_event(OBO); |
|
44
|
10
|
|
|
|
|
739
|
$self->parse_fh_inner($fh); |
|
45
|
|
|
|
|
|
|
|
|
46
|
10
|
|
|
|
|
160
|
$self->pop_stack_to_depth(0); |
|
47
|
10
|
|
|
|
|
674
|
$self->parsed_ontology(1); |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub parse_fh_inner { |
|
52
|
|
|
|
|
|
|
|
|
53
|
10
|
|
|
10
|
0
|
28
|
my ($self, $fh) = @_; |
|
54
|
10
|
|
|
|
|
40
|
my $file = $self->file; |
|
55
|
10
|
|
|
|
|
121
|
my $litemode = $self->litemode; |
|
56
|
10
|
|
|
|
|
25
|
my $is_go; |
|
57
|
10
|
|
|
|
|
18
|
local($_); # latest perl is more strict about modification of $_ |
|
58
|
|
|
|
|
|
|
|
|
59
|
10
|
|
|
|
|
131
|
$self->fire_source_event($file); |
|
60
|
10
|
|
|
|
|
153
|
$self->start_event(HEADER); |
|
61
|
10
|
|
|
|
|
297
|
my $stanza_count; |
|
62
|
10
|
|
|
|
|
26
|
my $in_hdr = 1; |
|
63
|
10
|
|
|
|
|
25
|
my $is_root = 1; # default |
|
64
|
10
|
|
|
|
|
20
|
my $namespace_set; |
|
65
|
|
|
|
|
|
|
my $id; |
|
66
|
10
|
|
|
|
|
82
|
my $namespace = $self->force_namespace; # default |
|
67
|
10
|
|
|
|
|
58
|
my $force_namespace = $self->force_namespace; |
|
68
|
10
|
|
|
|
|
79
|
my $usc = $self->replace_underscore; |
|
69
|
10
|
|
|
|
|
29
|
my %id_remap_h = (); |
|
70
|
10
|
|
|
|
|
22
|
my @imports = (); |
|
71
|
|
|
|
|
|
|
|
|
72
|
10
|
|
|
|
|
19
|
my $is_utf8; |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# temporary hack... |
|
75
|
10
|
50
|
|
|
|
76
|
if ($ENV{OBO_IDMAP}) { |
|
76
|
0
|
|
|
|
|
0
|
my @parts = split(/\;/,$ENV{OBO_IDMAP}); |
|
77
|
0
|
|
|
|
|
0
|
foreach (@parts) { |
|
78
|
0
|
0
|
|
|
|
0
|
if (/(.*)=(.*)/) { |
|
79
|
0
|
|
|
|
|
0
|
$id_remap_h{$1} = $2; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
10
|
|
|
|
|
23
|
my $default_id_prefix; |
|
85
|
|
|
|
|
|
|
|
|
86
|
10
|
|
|
|
|
4014
|
while(<$fh>) { |
|
87
|
3715
|
|
|
|
|
5365
|
chomp; |
|
88
|
|
|
|
|
|
|
|
|
89
|
3715
|
50
|
|
|
|
7450
|
if (/^encoding:\s*utf/) { |
|
90
|
0
|
|
|
|
|
0
|
$is_utf8 = 1; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
3715
|
50
|
|
|
|
7091
|
if (!$is_utf8) { |
|
94
|
3715
|
|
|
|
|
4720
|
tr [\200-\377] |
|
95
|
|
|
|
|
|
|
[\000-\177]; # see 'man perlop', section on tr/ |
|
96
|
|
|
|
|
|
|
# weird ascii characters should be excluded |
|
97
|
3715
|
|
|
|
|
4618
|
tr/\0-\10//d; # remove weird characters; ascii 0-8 |
|
98
|
|
|
|
|
|
|
# preserve \11 (9 - tab) and \12 (10-linefeed) |
|
99
|
3715
|
|
|
|
|
4267
|
tr/\13\14//d; # remove weird characters; 11,12 |
|
100
|
|
|
|
|
|
|
# preserve \15 (13 - carriage return) |
|
101
|
3715
|
|
|
|
|
4399
|
tr/\16-\37//d; # remove 14-31 (all rest before space) |
|
102
|
3715
|
|
|
|
|
4640
|
tr/\177//d; # remove DEL character |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
3715
|
|
|
|
|
4712
|
s/^\!.*//; |
|
106
|
3715
|
|
|
|
|
4661
|
s/[^\\]\!.*//; |
|
107
|
|
|
|
|
|
|
#s/[^\\]\#.*//; |
|
108
|
3715
|
|
|
|
|
6684
|
s/^\s+//; |
|
109
|
3715
|
|
|
|
|
7602
|
s/\s+$//; |
|
110
|
3715
|
100
|
|
|
|
7641
|
next unless $_; |
|
111
|
3213
|
0
|
33
|
|
|
6788
|
next if ($litemode && $_ !~ /^(\[|id:|name:|is_a:|relationship:|namespace:|is_obsolete:)/ && !$in_hdr); |
|
|
|
|
33
|
|
|
|
|
|
112
|
3213
|
100
|
|
|
|
23298
|
if (/^\[(\w+)\]\s*(.*)/) { # new stanza |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# we are at the beginning of a new stanza |
|
115
|
|
|
|
|
|
|
# reset everything and make sure everything from |
|
116
|
|
|
|
|
|
|
# previous stanza is exported |
|
117
|
|
|
|
|
|
|
|
|
118
|
494
|
|
|
|
|
1251
|
my $stanza = lc($1); |
|
119
|
494
|
|
|
|
|
1091
|
my $rest = $2; |
|
120
|
494
|
100
|
|
|
|
939
|
if ($in_hdr) { |
|
121
|
10
|
|
|
|
|
23
|
$in_hdr = 0; |
|
122
|
10
|
|
|
|
|
195
|
$self->end_event(HEADER); |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
else { |
|
125
|
484
|
100
|
|
|
|
1225
|
if (!$namespace_set) { |
|
126
|
149
|
50
|
|
|
|
302
|
if (!$namespace) { |
|
127
|
0
|
0
|
|
|
|
0
|
if ($stanza ne 'instance') { |
|
128
|
|
|
|
|
|
|
#$self->parse_err("missing namespace for ID: $id"); |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
else { |
|
132
|
149
|
|
|
|
|
519
|
$self->event(NAMESPACE, $namespace); |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
} |
|
135
|
484
|
100
|
|
|
|
12533
|
$self->event(IS_ROOT,1) if $is_root; |
|
136
|
484
|
|
|
|
|
1850
|
$is_root = 1; # assume root by default; override if parents found |
|
137
|
484
|
|
|
|
|
702
|
$namespace_set = 0; |
|
138
|
484
|
|
|
|
|
1674
|
$self->end_event; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
494
|
100
|
|
|
|
18205
|
$is_root = 0 unless $stanza eq 'term'; |
|
141
|
494
|
|
|
|
|
1510
|
$self->start_event($stanza); |
|
142
|
494
|
|
|
|
|
13823
|
$id = undef; |
|
143
|
494
|
|
|
|
|
2876
|
$stanza_count++; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
elsif ($in_hdr) { |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# we are in the header section |
|
148
|
|
|
|
|
|
|
|
|
149
|
94
|
50
|
|
|
|
424
|
if (/^([\w\-]+)\:\s*(.*)/) { # tag-val pair |
|
150
|
94
|
|
|
|
|
472
|
my ($tag, $val) = ($1,$2); |
|
151
|
94
|
100
|
|
|
|
227
|
if ($tag eq 'subsetdef') { |
|
152
|
12
|
50
|
|
|
|
40
|
if ($val =~ /(\S+)\s+(.*)/) { |
|
153
|
12
|
|
|
|
|
20
|
my $subset_id = $1; |
|
154
|
12
|
|
|
|
|
22
|
$val = $2; |
|
155
|
12
|
|
|
|
|
26
|
my ($subset_name, $parts) = |
|
156
|
|
|
|
|
|
|
extract_qstr($val); |
|
157
|
0
|
|
|
|
|
0
|
$val = |
|
158
|
|
|
|
|
|
|
[[ID,$subset_id], |
|
159
|
|
|
|
|
|
|
[NAME,$subset_name], |
|
160
|
12
|
|
|
|
|
63
|
map {dbxref($_)} @$parts]; |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
else { |
|
163
|
0
|
|
|
|
|
0
|
$self->parse_err("subsetdef: expect ID \"NAME\", got: $val"); |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
94
|
100
|
|
|
|
185
|
if ($tag eq 'synonymtypedef') { |
|
167
|
18
|
50
|
|
|
|
150
|
if ($val =~ /(\S+)\s+\"(.*)\"\s*(.*)/) { |
|
168
|
18
|
|
|
|
|
34
|
my $stname = $1; |
|
169
|
18
|
|
|
|
|
64
|
my $stdef = $2; |
|
170
|
18
|
|
|
|
|
35
|
my $scope = $3; |
|
171
|
18
|
50
|
|
|
|
108
|
$val = |
|
172
|
|
|
|
|
|
|
[[ID,$stname], |
|
173
|
|
|
|
|
|
|
[NAME,$stdef], |
|
174
|
|
|
|
|
|
|
($scope ? ['scope', $scope] : ())]; |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
else { |
|
178
|
0
|
|
|
|
|
0
|
$self->parse_err("synonymtypedef: expect ID \"NAME\", got: $val"); |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
} |
|
181
|
94
|
50
|
|
|
|
190
|
if ($tag eq 'idspace') { |
|
182
|
0
|
|
|
|
|
0
|
my ($idspace,$global,@rest) = split(' ',$val); |
|
183
|
0
|
0
|
|
|
|
0
|
if (!$global) { |
|
184
|
0
|
|
|
|
|
0
|
$self->parse_err("idspace requires two columns"); |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
$val = |
|
187
|
0
|
0
|
|
|
|
0
|
[['local',$idspace], |
|
188
|
|
|
|
|
|
|
['global',$global], |
|
189
|
|
|
|
|
|
|
(@rest ? [COMMENT,join(' ',@rest)] : ()), |
|
190
|
|
|
|
|
|
|
]; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
94
|
50
|
|
|
|
169
|
if ($tag eq 'local-id-mapping') { |
|
193
|
0
|
0
|
|
|
|
0
|
if ($val =~ /(\S+)\s+(.*)/) { |
|
194
|
|
|
|
|
|
|
# with a local ID mapping we delay binding |
|
195
|
0
|
|
|
|
|
0
|
$val = |
|
196
|
|
|
|
|
|
|
[['local',$1], |
|
197
|
|
|
|
|
|
|
['to',$2]]; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
else { |
|
200
|
0
|
|
|
|
|
0
|
$self->parse_err("id-mapping requires two columns"); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
} |
|
203
|
94
|
50
|
|
|
|
153
|
if ($tag eq 'import') { |
|
204
|
0
|
0
|
|
|
|
0
|
if ($ENV{OBO_FOLLOW_IMPORTS}) { |
|
205
|
0
|
|
|
|
|
0
|
push(@imports, $val); |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
else { |
|
208
|
|
|
|
|
|
|
# handled below |
|
209
|
|
|
|
|
|
|
#$self->event(import=>$val); |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
94
|
|
|
|
|
296
|
$self->event($tag=>$val); |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# post-processing |
|
216
|
94
|
100
|
|
|
|
14606
|
if ($tag eq 'default-namespace') { |
|
217
|
10
|
50
|
|
|
|
47
|
$namespace = $val |
|
218
|
|
|
|
|
|
|
unless $namespace; |
|
219
|
|
|
|
|
|
|
} |
|
220
|
94
|
100
|
|
|
|
216
|
if ($tag eq 'id-mapping') { |
|
221
|
13
|
50
|
|
|
|
62
|
if ($val =~ /(\S+)\s+(.*)/) { |
|
222
|
|
|
|
|
|
|
# bind at parse time |
|
223
|
13
|
50
|
|
|
|
46
|
if ($id_remap_h{$1}) { |
|
224
|
0
|
|
|
|
|
0
|
$self->parse_err("remapping $1 to $2"); |
|
225
|
|
|
|
|
|
|
} |
|
226
|
13
|
|
|
|
|
48
|
$id_remap_h{$1} = $2; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
else { |
|
229
|
0
|
|
|
|
|
0
|
$self->parse_err("id-mapping requires two columns"); |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
} |
|
232
|
94
|
50
|
|
|
|
600
|
if ($tag eq 'default-id-prefix') { |
|
233
|
0
|
|
|
|
|
0
|
$default_id_prefix = $val; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
else { |
|
237
|
0
|
|
|
|
|
0
|
$self->parse_err("illegal header entry: $_"); |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
} # END OF IN-HEADER |
|
240
|
|
|
|
|
|
|
elsif (/^([\w\-]+)\:\s*(.*)/) { # tag-val pair |
|
241
|
2625
|
|
|
|
|
7179
|
my ($tag, $val) = ($1,$2); |
|
242
|
2625
|
|
|
|
|
2758
|
my $qh; |
|
243
|
2625
|
|
|
|
|
4519
|
($val, $qh) = extract_quals($val); |
|
244
|
|
|
|
|
|
|
#$val =~ s/\\//g; |
|
245
|
2625
|
|
|
|
|
4659
|
my $val2 = $val; |
|
246
|
2625
|
|
|
|
|
3983
|
$val2 =~ s/\\,/,/g; |
|
247
|
2625
|
|
|
|
|
3433
|
$val2 =~ s/\\//g; |
|
248
|
2625
|
100
|
66
|
|
|
24969
|
if ($tag eq ID) { |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
249
|
494
|
100
|
|
|
|
1172
|
if ($id_remap_h{$val}) { |
|
250
|
6
|
|
|
|
|
14
|
$val = $id_remap_h{$val}; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
494
|
100
|
|
|
|
1446
|
if ($val !~ /:/) { |
|
253
|
13
|
50
|
|
|
|
42
|
if ($default_id_prefix) { |
|
254
|
0
|
|
|
|
|
0
|
$val = "$default_id_prefix:$val"; |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
elsif ($tag eq NAME) { |
|
259
|
|
|
|
|
|
|
# replace underscore in name |
|
260
|
494
|
|
|
|
|
714
|
$val = $val2; |
|
261
|
494
|
50
|
|
|
|
956
|
if ($usc) { |
|
262
|
0
|
|
|
|
|
0
|
$val =~ s/_/$usc/g; |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
elsif ($tag eq RELATIONSHIP) { |
|
266
|
97
|
|
|
|
|
413
|
my ($type, @ids) = split(' ', $val2); |
|
267
|
97
|
|
|
|
|
158
|
my $id = shift @ids; |
|
268
|
97
|
100
|
|
|
|
256
|
if ($id_remap_h{$type}) { |
|
269
|
75
|
|
|
|
|
133
|
$type = $id_remap_h{$type}; |
|
270
|
|
|
|
|
|
|
} |
|
271
|
97
|
100
|
|
|
|
267
|
if ($type !~ /:/) { |
|
272
|
22
|
50
|
|
|
|
47
|
if ($default_id_prefix) { |
|
273
|
0
|
|
|
|
|
0
|
$type = "$default_id_prefix:$type"; |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
} |
|
276
|
97
|
|
|
|
|
385
|
$val = [[TYPE,$type],[TO,$id]]; |
|
277
|
97
|
|
|
|
|
238
|
push(@$val,map {['additional_argument',$_]} @ids); |
|
|
0
|
|
|
|
|
0
|
|
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
elsif ($tag eq INTERSECTION_OF || $tag eq UNION_OF) { |
|
280
|
4
|
|
|
|
|
13
|
my ($type, $id) = split(' ', $val2); |
|
281
|
4
|
100
|
|
|
|
15
|
if ($id_remap_h{$type}) { |
|
282
|
2
|
|
|
|
|
5
|
$type = $id_remap_h{$type}; |
|
283
|
|
|
|
|
|
|
} |
|
284
|
4
|
50
|
|
|
|
15
|
if ($type !~ /:/) { |
|
285
|
0
|
0
|
|
|
|
0
|
if ($default_id_prefix) { |
|
286
|
0
|
|
|
|
|
0
|
$type = "$default_id_prefix:$type"; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
} |
|
289
|
4
|
100
|
|
|
|
10
|
if (defined $id) { |
|
290
|
2
|
|
|
|
|
11
|
$val = [[TYPE,$type],[TO,$id]]; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
else { |
|
293
|
2
|
|
|
|
|
5
|
$id = $type; |
|
294
|
2
|
|
|
|
|
9
|
$val = [[TO,$id]]; |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
} |
|
297
|
|
|
|
|
|
|
elsif ($tag eq INVERSE_OF || $tag eq TRANSITIVE_OVER || $tag eq IS_A) { |
|
298
|
476
|
100
|
|
|
|
1113
|
if ($id_remap_h{$val}) { |
|
299
|
3
|
|
|
|
|
7
|
$val = $id_remap_h{$val}; |
|
300
|
|
|
|
|
|
|
} |
|
301
|
476
|
100
|
|
|
|
1429
|
if ($val !~ /:/) { |
|
302
|
23
|
50
|
|
|
|
62
|
if ($default_id_prefix) { |
|
303
|
0
|
|
|
|
|
0
|
$val = "$default_id_prefix:$val"; |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
elsif ($tag eq DISJOINT_FROM) { |
|
308
|
2
|
50
|
|
|
|
6
|
if ($id_remap_h{$val}) { |
|
309
|
0
|
|
|
|
|
0
|
$val = $id_remap_h{$val}; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
2
|
50
|
|
|
|
30
|
if ($val !~ /:/) { |
|
312
|
0
|
0
|
|
|
|
0
|
if ($default_id_prefix) { |
|
313
|
0
|
|
|
|
|
0
|
$val = "$default_id_prefix:$val"; |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
elsif ($tag eq XREF) { |
|
318
|
118
|
|
|
|
|
412
|
$tag = XREF_ANALOG; |
|
319
|
118
|
|
|
|
|
323
|
my $dbxref = dbxref($val); |
|
320
|
118
|
|
|
|
|
348
|
$val = $dbxref->[1]; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
elsif ($tag eq XREF_ANALOG) { |
|
323
|
1
|
|
|
|
|
5
|
my $dbxref = dbxref($val); |
|
324
|
1
|
|
|
|
|
3
|
$val = $dbxref->[1]; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
elsif ($tag eq XREF_UNKNOWN) { |
|
327
|
0
|
|
|
|
|
0
|
my $dbxref = dbxref($val); |
|
328
|
0
|
|
|
|
|
0
|
$val = $dbxref->[1]; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
elsif ($tag eq PROPERTY_VALUE) { |
|
331
|
116
|
50
|
|
|
|
442
|
if ($val =~ /^(\S+)\s+(\".*)/) { |
|
332
|
|
|
|
|
|
|
# first form |
|
333
|
|
|
|
|
|
|
# property_value: relation "literal value" xsd:datatype |
|
334
|
0
|
|
|
|
|
0
|
my $type = $1; |
|
335
|
0
|
|
|
|
|
0
|
my $rest = $2; |
|
336
|
0
|
|
|
|
|
0
|
my ($to, $datatype) = extract_quotelike($rest); |
|
337
|
0
|
|
|
|
|
0
|
$to =~ s/^\"//; |
|
338
|
0
|
|
|
|
|
0
|
$to =~ s/\"$//; |
|
339
|
0
|
|
|
|
|
0
|
$datatype =~ s/^\s+//; |
|
340
|
0
|
|
|
|
|
0
|
$val = [[TYPE,$type], |
|
341
|
|
|
|
|
|
|
[VALUE,$to], |
|
342
|
|
|
|
|
|
|
[DATATYPE,$datatype]]; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
else { |
|
345
|
|
|
|
|
|
|
# second form |
|
346
|
|
|
|
|
|
|
# property_value: relation ToID |
|
347
|
116
|
|
|
|
|
336
|
my ($type,$to) = split(' ',$val); |
|
348
|
116
|
|
|
|
|
498
|
$val = [[TYPE,$type], |
|
349
|
|
|
|
|
|
|
[TO,$to]]; |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
elsif ($tag eq NAMESPACE) { |
|
353
|
338
|
50
|
|
|
|
898
|
if ($force_namespace) { |
|
354
|
|
|
|
|
|
|
# override whatever namespace was provided |
|
355
|
0
|
|
|
|
|
0
|
$val = $force_namespace; |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
else { |
|
358
|
|
|
|
|
|
|
# do nothing - we will export later |
|
359
|
|
|
|
|
|
|
} |
|
360
|
338
|
|
|
|
|
436
|
$namespace_set = $val; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
elsif ($tag eq DEF) { |
|
363
|
43
|
|
|
|
|
148
|
my ($defstr, $parts) = |
|
364
|
|
|
|
|
|
|
extract_qstr($val); |
|
365
|
53
|
|
|
|
|
125
|
$val = |
|
366
|
|
|
|
|
|
|
[[DEFSTR,$defstr], |
|
367
|
43
|
|
|
|
|
177
|
map {dbxref($_)} @$parts]; |
|
368
|
|
|
|
|
|
|
} |
|
369
|
|
|
|
|
|
|
elsif ($tag =~ /(\w*)synonym/) { |
|
370
|
380
|
|
100
|
|
|
1652
|
my $scope = $1 || ''; |
|
371
|
380
|
100
|
|
|
|
679
|
if ($scope) { |
|
372
|
1
|
|
|
|
|
2
|
$tag = SYNONYM; |
|
373
|
1
|
50
|
|
|
|
5
|
if ($scope =~ /(\w+)_$/) { |
|
374
|
1
|
|
|
|
|
3
|
$scope = $1; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
|
|
|
|
|
|
else { |
|
377
|
0
|
|
|
|
|
0
|
$self->parse_err("bad synonym type: $scope"); |
|
378
|
0
|
|
|
|
|
0
|
$scope = ''; |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
} |
|
381
|
380
|
|
|
|
|
755
|
my ($syn, $parts, $extra_quals) = |
|
382
|
|
|
|
|
|
|
extract_qstr($val2); |
|
383
|
380
|
100
|
|
|
|
964
|
if (@$extra_quals) { |
|
384
|
297
|
|
|
|
|
494
|
$scope = shift @$extra_quals; |
|
385
|
297
|
|
|
|
|
519
|
$scope = lc($scope); |
|
386
|
297
|
100
|
|
|
|
993
|
$qh->{synonym_type} = shift @$extra_quals if @$extra_quals; |
|
387
|
|
|
|
|
|
|
} |
|
388
|
380
|
50
|
|
|
|
795
|
if ($qh->{scope}) { |
|
389
|
0
|
0
|
|
|
|
0
|
if ($scope) { |
|
390
|
0
|
0
|
|
|
|
0
|
if ($scope ne $qh->{scope}) { |
|
391
|
0
|
|
|
|
|
0
|
$self->parse_err("inconsistent scope: $scope/$qh->{scope}"); |
|
392
|
|
|
|
|
|
|
} |
|
393
|
|
|
|
|
|
|
else { |
|
394
|
0
|
|
|
|
|
0
|
$self->parse_err("redundant scope: $scope"); |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
} |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
else { |
|
399
|
380
|
|
|
|
|
753
|
$qh->{scope} = $scope; |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
|
|
402
|
56
|
|
|
|
|
121
|
$val = |
|
403
|
|
|
|
|
|
|
[[SYNONYM_TEXT,$syn], |
|
404
|
380
|
|
|
|
|
1425
|
(map {dbxref($_)} @$parts)]; |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
elsif ($tag =~ /formula/) { |
|
407
|
0
|
|
|
|
|
0
|
my ($formula, $parts, $extra_quals) = |
|
408
|
|
|
|
|
|
|
extract_qstr($val2); |
|
409
|
0
|
|
|
|
|
0
|
my $lang = 'CLIF'; |
|
410
|
0
|
0
|
|
|
|
0
|
if (@$extra_quals) { |
|
411
|
0
|
|
|
|
|
0
|
$lang = shift @$extra_quals; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
0
|
|
|
|
|
0
|
$qh->{format} = $lang; |
|
414
|
|
|
|
|
|
|
|
|
415
|
0
|
|
|
|
|
0
|
$val = |
|
416
|
|
|
|
|
|
|
[['formula_text',$formula], |
|
417
|
0
|
|
|
|
|
0
|
(map {dbxref($_)} @$parts)]; |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
elsif ($tag eq 'holds_temporally_between' || # experimental support for obof1.3 |
|
420
|
|
|
|
|
|
|
$tag eq 'holds_atemporally_between' || |
|
421
|
|
|
|
|
|
|
$tag eq 'holds_on_class_level_between') { |
|
422
|
0
|
|
|
|
|
0
|
my ($sub, $ob) = split(' ', $val2); |
|
423
|
0
|
0
|
|
|
|
0
|
if ($id_remap_h{$sub}) { |
|
424
|
0
|
|
|
|
|
0
|
$sub = $id_remap_h{$sub}; |
|
425
|
|
|
|
|
|
|
} |
|
426
|
0
|
0
|
|
|
|
0
|
if ($id_remap_h{$ob}) { |
|
427
|
0
|
|
|
|
|
0
|
$ob = $id_remap_h{$ob}; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
0
|
|
|
|
|
0
|
$val = [[subject=>$sub],[object=>$ob]]; |
|
430
|
|
|
|
|
|
|
} |
|
431
|
|
|
|
|
|
|
elsif ($tag eq 'holds_over_chain' || $tag eq 'equivalent_to_chain') { # obof1.3 |
|
432
|
0
|
|
|
|
|
0
|
my @rels = split(' ', $val2); |
|
433
|
0
|
|
|
|
|
0
|
@rels = map { |
|
434
|
0
|
|
|
|
|
0
|
my $rel = $_; |
|
435
|
0
|
0
|
|
|
|
0
|
if ($id_remap_h{$_}) { |
|
436
|
0
|
|
|
|
|
0
|
$rel = $id_remap_h{$_} |
|
437
|
|
|
|
|
|
|
} |
|
438
|
0
|
0
|
|
|
|
0
|
if ($rel !~ /:/) { |
|
439
|
0
|
0
|
|
|
|
0
|
if ($default_id_prefix) { |
|
440
|
0
|
|
|
|
|
0
|
$rel = "$default_id_prefix:$rel"; |
|
441
|
|
|
|
|
|
|
} |
|
442
|
|
|
|
|
|
|
} |
|
443
|
0
|
|
|
|
|
0
|
$rel; |
|
444
|
|
|
|
|
|
|
} @rels; |
|
445
|
0
|
|
|
|
|
0
|
$val = [map {[relation=>$_]} @rels]; |
|
|
0
|
|
|
|
|
0
|
|
|
446
|
|
|
|
|
|
|
} |
|
447
|
|
|
|
|
|
|
elsif ($tag =~ /^expand/) { |
|
448
|
0
|
|
|
|
|
0
|
my ($template, $parts) = |
|
449
|
|
|
|
|
|
|
extract_qstr($val); |
|
450
|
0
|
|
|
|
|
0
|
$val = $template; |
|
451
|
|
|
|
|
|
|
} |
|
452
|
|
|
|
|
|
|
else { |
|
453
|
62
|
|
|
|
|
106
|
$val = $val2; |
|
454
|
|
|
|
|
|
|
# normal tag:val |
|
455
|
|
|
|
|
|
|
} |
|
456
|
2625
|
100
|
100
|
|
|
13862
|
if (!ref($val) && $val eq 'true') { |
|
457
|
29
|
|
|
|
|
35
|
$val = 1; |
|
458
|
|
|
|
|
|
|
} |
|
459
|
2625
|
50
|
66
|
|
|
8914
|
if (!ref($val) && $val eq 'false') { |
|
460
|
0
|
|
|
|
|
0
|
$val = 0; |
|
461
|
|
|
|
|
|
|
} |
|
462
|
2625
|
100
|
|
|
|
4657
|
if (%$qh) { |
|
463
|
|
|
|
|
|
|
# note that if attributes are used for |
|
464
|
|
|
|
|
|
|
# terminal nodes then we effectively have |
|
465
|
|
|
|
|
|
|
# to 'push the node down' a level; |
|
466
|
|
|
|
|
|
|
# eg |
|
467
|
|
|
|
|
|
|
# x |
|
468
|
|
|
|
|
|
|
# ==> [is_a=>'x'] |
|
469
|
|
|
|
|
|
|
# x |
|
470
|
|
|
|
|
|
|
# ==> [is_a=>[[@=>[[t=>v]]],[.=>x]]] |
|
471
|
390
|
100
|
|
|
|
767
|
my $data = ref $val ? $val : [['.'=>$val]]; |
|
472
|
390
|
|
|
|
|
1049
|
my @quals = map {[$_=>$qh->{$_}]} keys %$qh; |
|
|
668
|
|
|
|
|
2432
|
|
|
473
|
390
|
|
|
|
|
2200
|
$self->event($tag=>[['@'=>[@quals]], |
|
474
|
|
|
|
|
|
|
@$data, |
|
475
|
|
|
|
|
|
|
]); |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
else { |
|
478
|
2235
|
|
|
|
|
6800
|
$self->event($tag=>$val); |
|
479
|
|
|
|
|
|
|
} |
|
480
|
|
|
|
|
|
|
|
|
481
|
2625
|
100
|
100
|
|
|
417414
|
if ($tag eq IS_A || $tag eq RELATIONSHIP) { |
|
482
|
553
|
|
|
|
|
797
|
$is_root = 0; |
|
483
|
|
|
|
|
|
|
} |
|
484
|
2625
|
50
|
33
|
|
|
6598
|
if ($tag eq IS_OBSOLETE && $val) { |
|
485
|
0
|
|
|
|
|
0
|
$is_root = 0; |
|
486
|
|
|
|
|
|
|
} |
|
487
|
2625
|
100
|
|
|
|
4879
|
if ($tag eq ID) { |
|
488
|
494
|
|
|
|
|
1512
|
$id = $val; |
|
489
|
|
|
|
|
|
|
} |
|
490
|
2625
|
100
|
|
|
|
14983
|
if ($tag eq NAME) { |
|
491
|
494
|
50
|
|
|
|
1186
|
if (!$id) { |
|
492
|
0
|
|
|
|
|
0
|
$self->parse_err("missing id!") |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
else { |
|
495
|
494
|
|
|
|
|
2246
|
$self->acc2name_h->{$id} = $val; |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
} |
|
499
|
|
|
|
|
|
|
else { |
|
500
|
0
|
|
|
|
|
0
|
$self->throw("uh oh: $_"); |
|
501
|
|
|
|
|
|
|
} |
|
502
|
|
|
|
|
|
|
} |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
# duplicated code! check final event |
|
505
|
10
|
100
|
|
|
|
66
|
if (!$namespace_set) { |
|
506
|
7
|
50
|
33
|
|
|
193
|
if (!$namespace && $stanza_count) { |
|
507
|
|
|
|
|
|
|
#$self->parse_err("missing namespace for ID: $id"); |
|
508
|
|
|
|
|
|
|
} |
|
509
|
|
|
|
|
|
|
else { |
|
510
|
7
|
|
|
|
|
37
|
$self->event(NAMESPACE, $namespace); |
|
511
|
|
|
|
|
|
|
} |
|
512
|
|
|
|
|
|
|
} |
|
513
|
10
|
50
|
|
|
|
864
|
$self->event(IS_ROOT,1) if $is_root; |
|
514
|
|
|
|
|
|
|
|
|
515
|
10
|
|
|
|
|
33
|
foreach my $import_file (@imports) { |
|
516
|
0
|
|
|
|
|
0
|
$import_file = $self->download_file_if_required($import_file); |
|
517
|
0
|
|
|
|
|
0
|
$self->file($import_file); |
|
518
|
0
|
|
|
|
|
0
|
$self->pop_stack_to_depth(1); |
|
519
|
|
|
|
|
|
|
#$self->end_event(HEADER); |
|
520
|
0
|
|
|
|
|
0
|
my $ifh = FileHandle->new($import_file); |
|
521
|
0
|
|
|
|
|
0
|
$self->parse_fh_inner($ifh); |
|
522
|
|
|
|
|
|
|
#$self->pop_stack_to_depth(1); |
|
523
|
0
|
|
|
|
|
0
|
$ifh->close(); |
|
524
|
|
|
|
|
|
|
} |
|
525
|
|
|
|
|
|
|
|
|
526
|
10
|
|
|
|
|
209
|
return; |
|
527
|
|
|
|
|
|
|
} |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
# each tag line can have trailing qualifiers in {}s at the end |
|
530
|
|
|
|
|
|
|
sub extract_quals { |
|
531
|
2625
|
|
|
2625
|
0
|
3520
|
my $str = shift; |
|
532
|
|
|
|
|
|
|
|
|
533
|
2625
|
|
|
|
|
9290
|
my %q = (); |
|
534
|
2625
|
100
|
|
|
|
5850
|
if ($str =~ /(.*[^\s])\s+(\{.*)\}\s*$/) { |
|
535
|
10
|
|
|
|
|
376
|
my $return_str = $1; |
|
536
|
10
|
|
|
|
|
28
|
my $extr = $2; |
|
537
|
10
|
50
|
|
|
|
26
|
if ($extr) { |
|
538
|
10
|
|
|
|
|
28
|
my @qparts = split_on_comma($extr); |
|
539
|
10
|
|
|
|
|
24
|
foreach (@qparts) { |
|
540
|
10
|
50
|
|
|
|
147
|
if (/(\w+)=\"(.*)\"/) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
541
|
0
|
|
|
|
|
0
|
$q{$1} = $2; |
|
542
|
|
|
|
|
|
|
} |
|
543
|
|
|
|
|
|
|
elsif (/(\w+)=\'(.*)\'/) { |
|
544
|
0
|
|
|
|
|
0
|
$q{$1} = $2; |
|
545
|
|
|
|
|
|
|
} |
|
546
|
|
|
|
|
|
|
elsif (/(\w+)=(\S+)/) { # current 1.2 standard; non-quoted |
|
547
|
10
|
|
|
|
|
54
|
$q{$1} = $2; |
|
548
|
|
|
|
|
|
|
} |
|
549
|
|
|
|
|
|
|
else { |
|
550
|
0
|
|
|
|
|
0
|
warn("$_ in $str"); |
|
551
|
|
|
|
|
|
|
} |
|
552
|
|
|
|
|
|
|
} |
|
553
|
|
|
|
|
|
|
} |
|
554
|
10
|
|
|
|
|
118
|
return ($return_str, \%q); |
|
555
|
|
|
|
|
|
|
} |
|
556
|
|
|
|
|
|
|
else { |
|
557
|
2615
|
|
|
|
|
9292
|
return ($str, {}); |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
sub extract_qstr { |
|
562
|
435
|
|
|
435
|
0
|
657
|
my $str = shift; |
|
563
|
|
|
|
|
|
|
|
|
564
|
435
|
|
|
|
|
1426
|
my ($extr, $rem, $prefix) = extract_quotelike($str); |
|
565
|
435
|
|
|
|
|
43084
|
my $txt = $extr; |
|
566
|
435
|
100
|
|
|
|
2123
|
$txt =~ s/^\"// if $txt; |
|
567
|
435
|
100
|
|
|
|
2857
|
$txt =~ s/\"$// if $txt; |
|
568
|
435
|
50
|
|
|
|
1113
|
if ($prefix) { |
|
569
|
0
|
|
|
|
|
0
|
warn("illegal prefix: $prefix in: $str"); |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
|
|
572
|
435
|
|
|
|
|
606
|
my @extra = (); |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# synonyms can have two words following quoted part |
|
575
|
|
|
|
|
|
|
# before dbxref section |
|
576
|
|
|
|
|
|
|
# - two |
|
577
|
435
|
100
|
|
|
|
1852
|
if ($rem =~ /(\w+)\s+(\w+)\s+(\[.*)/) { |
|
|
|
100
|
|
|
|
|
|
|
578
|
278
|
|
|
|
|
617
|
$rem = $3; |
|
579
|
278
|
|
|
|
|
762
|
push(@extra,$1,$2); |
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
elsif ($rem =~ /(\w+)\s+(\[.*)/) { |
|
582
|
19
|
|
|
|
|
36
|
$rem = $2; |
|
583
|
19
|
|
|
|
|
45
|
push(@extra,$1); |
|
584
|
|
|
|
|
|
|
} |
|
585
|
|
|
|
|
|
|
else { |
|
586
|
|
|
|
|
|
|
} |
|
587
|
|
|
|
|
|
|
|
|
588
|
435
|
|
|
|
|
577
|
my @parts = (); |
|
589
|
435
|
|
|
|
|
1315
|
while (($extr, $rem, $prefix) = extract_bracketed($rem, '[]')) { |
|
590
|
788
|
100
|
|
|
|
73304
|
last unless $extr; |
|
591
|
353
|
|
|
|
|
1148
|
$extr =~ s/^\[//; |
|
592
|
353
|
|
|
|
|
1066
|
$extr =~ s/\]$//; |
|
593
|
353
|
100
|
|
|
|
1545
|
push(@parts, $extr) if $extr; |
|
594
|
|
|
|
|
|
|
} |
|
595
|
|
|
|
|
|
|
@parts = |
|
596
|
435
|
|
|
|
|
836
|
map {split_on_comma($_)} @parts; |
|
|
95
|
|
|
|
|
248
|
|
|
597
|
|
|
|
|
|
|
|
|
598
|
435
|
100
|
|
|
|
1100
|
$txt =~ s/\\//g if $txt; |
|
599
|
435
|
|
|
|
|
1620
|
return ($txt, \@parts, \@extra); |
|
600
|
|
|
|
|
|
|
} |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
sub split_on_comma { |
|
603
|
105
|
|
|
105
|
0
|
167
|
my $str = shift; |
|
604
|
105
|
|
|
|
|
192
|
my @parts = (); |
|
605
|
105
|
|
|
|
|
422
|
while ($str =~ /(.*[^\\],\s*)(.*)/) { |
|
606
|
14
|
|
|
|
|
39
|
$str = $1; |
|
607
|
14
|
|
|
|
|
27
|
my $part = $2; |
|
608
|
14
|
|
|
|
|
42
|
unshift(@parts, $part); |
|
609
|
14
|
|
|
|
|
106
|
$str =~ s/,\s*$//; |
|
610
|
|
|
|
|
|
|
} |
|
611
|
105
|
|
|
|
|
243
|
unshift(@parts, $str); |
|
612
|
105
|
|
|
|
|
257
|
return map {s/\\//g;$_} @parts; |
|
|
119
|
|
|
|
|
219
|
|
|
|
119
|
|
|
|
|
479
|
|
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# turns a DB:ACC string into an obo-xml dbxref element |
|
616
|
|
|
|
|
|
|
sub dbxref { |
|
617
|
228
|
|
|
228
|
0
|
314
|
my $str = shift; |
|
618
|
228
|
|
|
|
|
388
|
$str =~ s/\\//g; |
|
619
|
228
|
|
|
|
|
235
|
my $name; |
|
620
|
228
|
50
|
|
|
|
574
|
if ($str =~ /(.*)\s+\"(.*)\"$/) { |
|
621
|
0
|
|
|
|
|
0
|
$str = $1; |
|
622
|
0
|
|
|
|
|
0
|
$name = $2; |
|
623
|
|
|
|
|
|
|
} |
|
624
|
228
|
|
|
|
|
877
|
my ($db, @rest) = split(/:/, $str); |
|
625
|
228
|
|
|
|
|
524
|
my $acc = join(':',@rest); |
|
626
|
228
|
|
|
|
|
677
|
$db =~ s/^\s+//; |
|
627
|
228
|
50
|
33
|
|
|
751
|
if ($db eq 'http' && $acc =~ /^\/\//) { |
|
628
|
|
|
|
|
|
|
# dbxref is actually a URI |
|
629
|
0
|
|
|
|
|
0
|
$db = 'URL'; |
|
630
|
0
|
|
|
|
|
0
|
$acc = simple_escape($acc); |
|
631
|
0
|
|
|
|
|
0
|
$acc =~ s/\s/\%20/g; |
|
632
|
0
|
|
|
|
|
0
|
$acc = "http:$acc"; |
|
633
|
|
|
|
|
|
|
} |
|
634
|
|
|
|
|
|
|
else { |
|
635
|
|
|
|
|
|
|
# $db=escape($db); |
|
636
|
|
|
|
|
|
|
# $acc=escape($acc); |
|
637
|
|
|
|
|
|
|
} |
|
638
|
228
|
|
|
|
|
422
|
$db =~ s/\s+/_/g; # HumanDO.obo has spaces in xref |
|
639
|
228
|
|
|
|
|
371
|
$acc =~ s/\s+/_/g; |
|
640
|
228
|
50
|
|
|
|
490
|
$db = 'NULL' unless $db; |
|
641
|
228
|
50
|
|
|
|
487
|
$acc = 'NULL' unless $acc; |
|
642
|
228
|
50
|
|
|
|
1846
|
[DBXREF,[[ACC,$acc], |
|
643
|
|
|
|
|
|
|
[DBNAME,$db], |
|
644
|
|
|
|
|
|
|
defined $name ? [NAME,$name] : () |
|
645
|
|
|
|
|
|
|
]]; |
|
646
|
|
|
|
|
|
|
} |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
sub parse_term_expression { |
|
649
|
4
|
|
|
4
|
0
|
12735
|
my $self = shift; |
|
650
|
4
|
|
|
|
|
8
|
my $expr = shift; |
|
651
|
4
|
|
|
|
|
18
|
my ($te,$rest) = $self->parse_term_expression_with_rest($expr); |
|
652
|
4
|
50
|
|
|
|
14
|
if ($rest) { |
|
653
|
0
|
|
|
|
|
0
|
$self->parse_err("trailing: $rest"); |
|
654
|
|
|
|
|
|
|
} |
|
655
|
4
|
|
|
|
|
44
|
return Data::Stag->nodify($te); |
|
656
|
|
|
|
|
|
|
} |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
sub parse_term_expression_with_rest { |
|
659
|
15
|
|
|
15
|
0
|
20
|
my $self = shift; |
|
660
|
15
|
|
|
|
|
20
|
my $expr = shift; |
|
661
|
15
|
100
|
|
|
|
99
|
if ($expr =~ /^\((.*)/) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
662
|
1
|
|
|
|
|
8
|
my $genus_expr = $1; |
|
663
|
1
|
|
|
|
|
5
|
my ($genus,$diff_expr) = $self->parse_term_expression_with_rest($genus_expr); |
|
664
|
1
|
|
|
|
|
5
|
my $next_c = substr($diff_expr,0,1,''); |
|
665
|
1
|
50
|
|
|
|
5
|
if ($next_c eq ')') { |
|
666
|
1
|
|
|
|
|
5
|
my ($diffs,$rest) = $self->parse_differentia_with_rest($diff_expr); |
|
667
|
1
|
|
|
|
|
5
|
my $stag = [intersection=>[ |
|
668
|
|
|
|
|
|
|
[link=>[[to=>[$genus]]]], |
|
669
|
|
|
|
|
|
|
@$diffs]]; |
|
670
|
1
|
|
|
|
|
5
|
return ($stag,$rest); |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
} |
|
673
|
|
|
|
|
|
|
else { |
|
674
|
0
|
|
|
|
|
0
|
$self->parse_err("expected ) at end of genus. Got: $next_c followed by $diff_expr"); |
|
675
|
|
|
|
|
|
|
} |
|
676
|
|
|
|
|
|
|
} |
|
677
|
|
|
|
|
|
|
elsif ($expr =~ /^([\w\:\.\-]+)\^(.*)/) { |
|
678
|
7
|
|
|
|
|
21
|
my $genus = $1; |
|
679
|
7
|
|
|
|
|
19
|
my $diff_expr = $2; |
|
680
|
7
|
|
|
|
|
22
|
my ($diffs,$rest) = $self->parse_differentia_with_rest($diff_expr); |
|
681
|
7
|
|
|
|
|
30
|
my $stag = [intersection=>[ |
|
682
|
|
|
|
|
|
|
[link=>[[to=>$genus]]], |
|
683
|
|
|
|
|
|
|
@$diffs]]; |
|
684
|
7
|
|
|
|
|
24
|
return ($stag,$rest); |
|
685
|
|
|
|
|
|
|
} |
|
686
|
|
|
|
|
|
|
elsif ($expr =~ /^([\w\:\.\-]+)(.*)/) { |
|
687
|
7
|
|
|
|
|
28
|
return ($1,$2); |
|
688
|
|
|
|
|
|
|
} |
|
689
|
|
|
|
|
|
|
else { |
|
690
|
0
|
|
|
|
|
0
|
$self->parse_err("could not parse: $expr"); |
|
691
|
|
|
|
|
|
|
} |
|
692
|
|
|
|
|
|
|
} |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
sub parse_differentia { |
|
695
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
696
|
0
|
|
|
|
|
0
|
my $expr = shift; |
|
697
|
0
|
|
|
|
|
0
|
my ($diffs,$rest) = $self->parse_differentia_with_rest($expr); |
|
698
|
0
|
0
|
|
|
|
0
|
if ($rest) { |
|
699
|
0
|
|
|
|
|
0
|
$self->parse_err("trailing: $rest"); |
|
700
|
|
|
|
|
|
|
} |
|
701
|
|
|
|
|
|
|
|
|
702
|
0
|
|
|
|
|
0
|
Data::Stag->nodify($_) foreach @$diffs; |
|
703
|
0
|
|
|
|
|
0
|
return $diffs; |
|
704
|
|
|
|
|
|
|
} |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub parse_differentia_with_rest { |
|
707
|
10
|
|
|
10
|
0
|
16
|
my $self = shift; |
|
708
|
10
|
|
|
|
|
20
|
my $expr = shift; |
|
709
|
10
|
50
|
|
|
|
42
|
if ($expr =~ /^(.+?)\((.*)/) { |
|
710
|
10
|
|
|
|
|
20
|
my $rel = $1; |
|
711
|
10
|
|
|
|
|
17
|
my $term_expr = $2; |
|
712
|
10
|
|
|
|
|
28
|
my ($term,$rest) = $self->parse_term_expression_with_rest($term_expr); |
|
713
|
10
|
100
|
|
|
|
56
|
my $diff = [link=>[[type=>$rel], |
|
714
|
|
|
|
|
|
|
[to=>(ref($term) ? [$term] : $term)]]]; |
|
715
|
10
|
50
|
|
|
|
24
|
if ($rest) { |
|
716
|
10
|
|
|
|
|
25
|
my $next_c = substr($rest,0,1,''); |
|
717
|
10
|
50
|
|
|
|
24
|
if ($next_c eq ')') { |
|
718
|
10
|
|
|
|
|
19
|
$next_c = substr($rest,0,1); |
|
719
|
10
|
100
|
66
|
|
|
64
|
if ($next_c eq '^' || $next_c eq ',') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
720
|
2
|
|
|
|
|
12
|
my ($next_diffs,$next_rest) = $self->parse_differentia_with_rest(substr($rest,1)); |
|
721
|
2
|
50
|
|
|
|
8
|
if (!$next_diffs) { |
|
722
|
0
|
|
|
|
|
0
|
$self->parse_err("problem parsing differentia: $rest. Expr: $term_expr"); |
|
723
|
0
|
|
|
|
|
0
|
return ([$diff],$rest); |
|
724
|
|
|
|
|
|
|
} |
|
725
|
2
|
|
|
|
|
12
|
return ([$diff,@$next_diffs],$next_rest); |
|
726
|
|
|
|
|
|
|
} |
|
727
|
|
|
|
|
|
|
elsif ($next_c eq '') { |
|
728
|
4
|
|
|
|
|
16
|
return ([$diff],$rest); |
|
729
|
|
|
|
|
|
|
} |
|
730
|
|
|
|
|
|
|
elsif ($next_c eq ')') { |
|
731
|
4
|
|
|
|
|
16
|
return ([$diff],$rest); |
|
732
|
|
|
|
|
|
|
} |
|
733
|
|
|
|
|
|
|
else { |
|
734
|
0
|
|
|
|
|
|
$self->parse_err("expected ^ or ) in differentium. Got: $next_c followed_by: $rest. Expr: $term_expr"); |
|
735
|
|
|
|
|
|
|
} |
|
736
|
|
|
|
|
|
|
} |
|
737
|
|
|
|
|
|
|
else { |
|
738
|
0
|
|
|
|
|
|
$self->parse_err("expected ) to close differentium. Got: $next_c followed by: $rest. Expr: $term_expr"); |
|
739
|
|
|
|
|
|
|
} |
|
740
|
|
|
|
|
|
|
} |
|
741
|
|
|
|
|
|
|
else { |
|
742
|
0
|
|
|
|
|
|
$self->parse_err("expected ). Got: \"\". Expr: $term_expr"); |
|
743
|
|
|
|
|
|
|
} |
|
744
|
|
|
|
|
|
|
} |
|
745
|
|
|
|
|
|
|
else { |
|
746
|
0
|
|
|
|
|
|
$self->parse_err("expect relation(...). Got: $expr. "); |
|
747
|
|
|
|
|
|
|
} |
|
748
|
|
|
|
|
|
|
} |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
# lifted from CGI::Util |
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
our $EBCDIC = "\t" ne "\011"; |
|
753
|
|
|
|
|
|
|
# (ord('^') == 95) for codepage 1047 as on os390, vmesa |
|
754
|
|
|
|
|
|
|
our @E2A = ( |
|
755
|
|
|
|
|
|
|
0, 1, 2, 3,156, 9,134,127,151,141,142, 11, 12, 13, 14, 15, |
|
756
|
|
|
|
|
|
|
16, 17, 18, 19,157, 10, 8,135, 24, 25,146,143, 28, 29, 30, 31, |
|
757
|
|
|
|
|
|
|
128,129,130,131,132,133, 23, 27,136,137,138,139,140, 5, 6, 7, |
|
758
|
|
|
|
|
|
|
144,145, 22,147,148,149,150, 4,152,153,154,155, 20, 21,158, 26, |
|
759
|
|
|
|
|
|
|
32,160,226,228,224,225,227,229,231,241,162, 46, 60, 40, 43,124, |
|
760
|
|
|
|
|
|
|
38,233,234,235,232,237,238,239,236,223, 33, 36, 42, 41, 59, 94, |
|
761
|
|
|
|
|
|
|
45, 47,194,196,192,193,195,197,199,209,166, 44, 37, 95, 62, 63, |
|
762
|
|
|
|
|
|
|
248,201,202,203,200,205,206,207,204, 96, 58, 35, 64, 39, 61, 34, |
|
763
|
|
|
|
|
|
|
216, 97, 98, 99,100,101,102,103,104,105,171,187,240,253,254,177, |
|
764
|
|
|
|
|
|
|
176,106,107,108,109,110,111,112,113,114,170,186,230,184,198,164, |
|
765
|
|
|
|
|
|
|
181,126,115,116,117,118,119,120,121,122,161,191,208, 91,222,174, |
|
766
|
|
|
|
|
|
|
172,163,165,183,169,167,182,188,189,190,221,168,175, 93,180,215, |
|
767
|
|
|
|
|
|
|
123, 65, 66, 67, 68, 69, 70, 71, 72, 73,173,244,246,242,243,245, |
|
768
|
|
|
|
|
|
|
125, 74, 75, 76, 77, 78, 79, 80, 81, 82,185,251,252,249,250,255, |
|
769
|
|
|
|
|
|
|
92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213, |
|
770
|
|
|
|
|
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159 |
|
771
|
|
|
|
|
|
|
); |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
sub escape { |
|
774
|
0
|
0
|
0
|
0
|
0
|
|
shift() if @_ > 1 and ( ref($_[0]) || (defined $_[1] && $_[0] eq $CGI::DefaultClass)); |
|
|
|
|
0
|
|
|
|
|
|
775
|
0
|
|
|
|
|
|
my $toencode = shift; |
|
776
|
0
|
0
|
|
|
|
|
return undef unless defined($toencode); |
|
777
|
|
|
|
|
|
|
# force bytes while preserving backward compatibility -- dankogai |
|
778
|
0
|
|
|
|
|
|
$toencode = pack("C*", unpack("C*", $toencode)); |
|
779
|
0
|
0
|
|
|
|
|
if ($EBCDIC) { |
|
780
|
0
|
|
|
|
|
|
$toencode=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",$E2A[ord($1)])/eg; |
|
|
0
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
} else { |
|
782
|
0
|
|
|
|
|
|
$toencode=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg; |
|
|
0
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
} |
|
784
|
0
|
|
|
|
|
|
return $toencode; |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
sub simple_escape { |
|
788
|
0
|
0
|
|
0
|
0
|
|
return unless defined(my $toencode = shift); |
|
789
|
0
|
|
|
|
|
|
$toencode =~ s{&}{&}gso; |
|
790
|
0
|
|
|
|
|
|
$toencode =~ s{<}{<}gso; |
|
791
|
0
|
|
|
|
|
|
$toencode =~ s{>}{>}gso; |
|
792
|
0
|
|
|
|
|
|
$toencode =~ s{\"}{"}gso; |
|
793
|
|
|
|
|
|
|
# Doesn't work. Can't work. forget it. |
|
794
|
|
|
|
|
|
|
# $toencode =~ s{\x8b}{}gso; |
|
795
|
|
|
|
|
|
|
# $toencode =~ s{\x9b}{}gso; |
|
796
|
0
|
|
|
|
|
|
$toencode; |
|
797
|
|
|
|
|
|
|
} |
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
1; |