line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: obo_text_parser.pm,v 1.52 2010/03/11 22:40:27 cmungall Exp $ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# see also - http://www.geneontology.org |
5
|
|
|
|
|
|
|
# - http://www.godatabase.org/dev |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package GO::Parsers::obo_text_parser; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
GO::Parsers::obo_text_parser - OBO Flat file parser object |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
do not use this class directly; use GO::Parser |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=cut |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 DESCRIPTION |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
25
|
|
|
|
|
|
|
|
26
|
10
|
|
|
10
|
|
66
|
use Exporter; |
|
10
|
|
|
|
|
22
|
|
|
10
|
|
|
|
|
520
|
|
27
|
10
|
|
|
10
|
|
12029
|
use Text::Balanced qw(extract_quotelike extract_bracketed); |
|
10
|
|
|
|
|
423477
|
|
|
10
|
|
|
|
|
1229
|
|
28
|
10
|
|
|
10
|
|
121
|
use base qw(GO::Parsers::base_parser); |
|
10
|
|
|
|
|
20
|
|
|
10
|
|
|
|
|
4351
|
|
29
|
10
|
|
|
10
|
|
5319
|
use GO::Parsers::ParserEventNames; |
|
10
|
|
|
|
|
27
|
|
|
10
|
|
|
|
|
2815
|
|
30
|
|
|
|
|
|
|
|
31
|
10
|
|
|
10
|
|
62
|
use Carp; |
|
10
|
|
|
|
|
15
|
|
|
10
|
|
|
|
|
586
|
|
32
|
10
|
|
|
10
|
|
57
|
use FileHandle; |
|
10
|
|
|
|
|
19
|
|
|
10
|
|
|
|
|
90
|
|
33
|
|
|
|
|
|
|
|
34
|
10
|
|
|
10
|
|
5192
|
use strict qw(vars refs); |
|
10
|
|
|
|
|
18
|
|
|
10
|
|
|
|
|
119395
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub dtd { |
37
|
0
|
|
|
0
|
0
|
0
|
'obo-parser-events.dtd'; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub parse_fh { |
41
|
10
|
|
|
10
|
0
|
2157
|
my ($self, $fh) = @_; |
42
|
|
|
|
|
|
|
|
43
|
10
|
|
|
|
|
110
|
$self->start_event(OBO); |
44
|
10
|
|
|
|
|
739
|
$self->parse_fh_inner($fh); |
45
|
|
|
|
|
|
|
|
46
|
10
|
|
|
|
|
160
|
$self->pop_stack_to_depth(0); |
47
|
10
|
|
|
|
|
674
|
$self->parsed_ontology(1); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub parse_fh_inner { |
52
|
|
|
|
|
|
|
|
53
|
10
|
|
|
10
|
0
|
28
|
my ($self, $fh) = @_; |
54
|
10
|
|
|
|
|
40
|
my $file = $self->file; |
55
|
10
|
|
|
|
|
121
|
my $litemode = $self->litemode; |
56
|
10
|
|
|
|
|
25
|
my $is_go; |
57
|
10
|
|
|
|
|
18
|
local($_); # latest perl is more strict about modification of $_ |
58
|
|
|
|
|
|
|
|
59
|
10
|
|
|
|
|
131
|
$self->fire_source_event($file); |
60
|
10
|
|
|
|
|
153
|
$self->start_event(HEADER); |
61
|
10
|
|
|
|
|
297
|
my $stanza_count; |
62
|
10
|
|
|
|
|
26
|
my $in_hdr = 1; |
63
|
10
|
|
|
|
|
25
|
my $is_root = 1; # default |
64
|
10
|
|
|
|
|
20
|
my $namespace_set; |
65
|
|
|
|
|
|
|
my $id; |
66
|
10
|
|
|
|
|
82
|
my $namespace = $self->force_namespace; # default |
67
|
10
|
|
|
|
|
58
|
my $force_namespace = $self->force_namespace; |
68
|
10
|
|
|
|
|
79
|
my $usc = $self->replace_underscore; |
69
|
10
|
|
|
|
|
29
|
my %id_remap_h = (); |
70
|
10
|
|
|
|
|
22
|
my @imports = (); |
71
|
|
|
|
|
|
|
|
72
|
10
|
|
|
|
|
19
|
my $is_utf8; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# temporary hack... |
75
|
10
|
50
|
|
|
|
76
|
if ($ENV{OBO_IDMAP}) { |
76
|
0
|
|
|
|
|
0
|
my @parts = split(/\;/,$ENV{OBO_IDMAP}); |
77
|
0
|
|
|
|
|
0
|
foreach (@parts) { |
78
|
0
|
0
|
|
|
|
0
|
if (/(.*)=(.*)/) { |
79
|
0
|
|
|
|
|
0
|
$id_remap_h{$1} = $2; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
10
|
|
|
|
|
23
|
my $default_id_prefix; |
85
|
|
|
|
|
|
|
|
86
|
10
|
|
|
|
|
4014
|
while(<$fh>) { |
87
|
3715
|
|
|
|
|
5365
|
chomp; |
88
|
|
|
|
|
|
|
|
89
|
3715
|
50
|
|
|
|
7450
|
if (/^encoding:\s*utf/) { |
90
|
0
|
|
|
|
|
0
|
$is_utf8 = 1; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
3715
|
50
|
|
|
|
7091
|
if (!$is_utf8) { |
94
|
3715
|
|
|
|
|
4720
|
tr [\200-\377] |
95
|
|
|
|
|
|
|
[\000-\177]; # see 'man perlop', section on tr/ |
96
|
|
|
|
|
|
|
# weird ascii characters should be excluded |
97
|
3715
|
|
|
|
|
4618
|
tr/\0-\10//d; # remove weird characters; ascii 0-8 |
98
|
|
|
|
|
|
|
# preserve \11 (9 - tab) and \12 (10-linefeed) |
99
|
3715
|
|
|
|
|
4267
|
tr/\13\14//d; # remove weird characters; 11,12 |
100
|
|
|
|
|
|
|
# preserve \15 (13 - carriage return) |
101
|
3715
|
|
|
|
|
4399
|
tr/\16-\37//d; # remove 14-31 (all rest before space) |
102
|
3715
|
|
|
|
|
4640
|
tr/\177//d; # remove DEL character |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
3715
|
|
|
|
|
4712
|
s/^\!.*//; |
106
|
3715
|
|
|
|
|
4661
|
s/[^\\]\!.*//; |
107
|
|
|
|
|
|
|
#s/[^\\]\#.*//; |
108
|
3715
|
|
|
|
|
6684
|
s/^\s+//; |
109
|
3715
|
|
|
|
|
7602
|
s/\s+$//; |
110
|
3715
|
100
|
|
|
|
7641
|
next unless $_; |
111
|
3213
|
0
|
33
|
|
|
6788
|
next if ($litemode && $_ !~ /^(\[|id:|name:|is_a:|relationship:|namespace:|is_obsolete:)/ && !$in_hdr); |
|
|
|
33
|
|
|
|
|
112
|
3213
|
100
|
|
|
|
23298
|
if (/^\[(\w+)\]\s*(.*)/) { # new stanza |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# we are at the beginning of a new stanza |
115
|
|
|
|
|
|
|
# reset everything and make sure everything from |
116
|
|
|
|
|
|
|
# previous stanza is exported |
117
|
|
|
|
|
|
|
|
118
|
494
|
|
|
|
|
1251
|
my $stanza = lc($1); |
119
|
494
|
|
|
|
|
1091
|
my $rest = $2; |
120
|
494
|
100
|
|
|
|
939
|
if ($in_hdr) { |
121
|
10
|
|
|
|
|
23
|
$in_hdr = 0; |
122
|
10
|
|
|
|
|
195
|
$self->end_event(HEADER); |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
else { |
125
|
484
|
100
|
|
|
|
1225
|
if (!$namespace_set) { |
126
|
149
|
50
|
|
|
|
302
|
if (!$namespace) { |
127
|
0
|
0
|
|
|
|
0
|
if ($stanza ne 'instance') { |
128
|
|
|
|
|
|
|
#$self->parse_err("missing namespace for ID: $id"); |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
else { |
132
|
149
|
|
|
|
|
519
|
$self->event(NAMESPACE, $namespace); |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
} |
135
|
484
|
100
|
|
|
|
12533
|
$self->event(IS_ROOT,1) if $is_root; |
136
|
484
|
|
|
|
|
1850
|
$is_root = 1; # assume root by default; override if parents found |
137
|
484
|
|
|
|
|
702
|
$namespace_set = 0; |
138
|
484
|
|
|
|
|
1674
|
$self->end_event; |
139
|
|
|
|
|
|
|
} |
140
|
494
|
100
|
|
|
|
18205
|
$is_root = 0 unless $stanza eq 'term'; |
141
|
494
|
|
|
|
|
1510
|
$self->start_event($stanza); |
142
|
494
|
|
|
|
|
13823
|
$id = undef; |
143
|
494
|
|
|
|
|
2876
|
$stanza_count++; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
elsif ($in_hdr) { |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# we are in the header section |
148
|
|
|
|
|
|
|
|
149
|
94
|
50
|
|
|
|
424
|
if (/^([\w\-]+)\:\s*(.*)/) { # tag-val pair |
150
|
94
|
|
|
|
|
472
|
my ($tag, $val) = ($1,$2); |
151
|
94
|
100
|
|
|
|
227
|
if ($tag eq 'subsetdef') { |
152
|
12
|
50
|
|
|
|
40
|
if ($val =~ /(\S+)\s+(.*)/) { |
153
|
12
|
|
|
|
|
20
|
my $subset_id = $1; |
154
|
12
|
|
|
|
|
22
|
$val = $2; |
155
|
12
|
|
|
|
|
26
|
my ($subset_name, $parts) = |
156
|
|
|
|
|
|
|
extract_qstr($val); |
157
|
0
|
|
|
|
|
0
|
$val = |
158
|
|
|
|
|
|
|
[[ID,$subset_id], |
159
|
|
|
|
|
|
|
[NAME,$subset_name], |
160
|
12
|
|
|
|
|
63
|
map {dbxref($_)} @$parts]; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
else { |
163
|
0
|
|
|
|
|
0
|
$self->parse_err("subsetdef: expect ID \"NAME\", got: $val"); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
94
|
100
|
|
|
|
185
|
if ($tag eq 'synonymtypedef') { |
167
|
18
|
50
|
|
|
|
150
|
if ($val =~ /(\S+)\s+\"(.*)\"\s*(.*)/) { |
168
|
18
|
|
|
|
|
34
|
my $stname = $1; |
169
|
18
|
|
|
|
|
64
|
my $stdef = $2; |
170
|
18
|
|
|
|
|
35
|
my $scope = $3; |
171
|
18
|
50
|
|
|
|
108
|
$val = |
172
|
|
|
|
|
|
|
[[ID,$stname], |
173
|
|
|
|
|
|
|
[NAME,$stdef], |
174
|
|
|
|
|
|
|
($scope ? ['scope', $scope] : ())]; |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
else { |
178
|
0
|
|
|
|
|
0
|
$self->parse_err("synonymtypedef: expect ID \"NAME\", got: $val"); |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
} |
181
|
94
|
50
|
|
|
|
190
|
if ($tag eq 'idspace') { |
182
|
0
|
|
|
|
|
0
|
my ($idspace,$global,@rest) = split(' ',$val); |
183
|
0
|
0
|
|
|
|
0
|
if (!$global) { |
184
|
0
|
|
|
|
|
0
|
$self->parse_err("idspace requires two columns"); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
$val = |
187
|
0
|
0
|
|
|
|
0
|
[['local',$idspace], |
188
|
|
|
|
|
|
|
['global',$global], |
189
|
|
|
|
|
|
|
(@rest ? [COMMENT,join(' ',@rest)] : ()), |
190
|
|
|
|
|
|
|
]; |
191
|
|
|
|
|
|
|
} |
192
|
94
|
50
|
|
|
|
169
|
if ($tag eq 'local-id-mapping') { |
193
|
0
|
0
|
|
|
|
0
|
if ($val =~ /(\S+)\s+(.*)/) { |
194
|
|
|
|
|
|
|
# with a local ID mapping we delay binding |
195
|
0
|
|
|
|
|
0
|
$val = |
196
|
|
|
|
|
|
|
[['local',$1], |
197
|
|
|
|
|
|
|
['to',$2]]; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
else { |
200
|
0
|
|
|
|
|
0
|
$self->parse_err("id-mapping requires two columns"); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
} |
203
|
94
|
50
|
|
|
|
153
|
if ($tag eq 'import') { |
204
|
0
|
0
|
|
|
|
0
|
if ($ENV{OBO_FOLLOW_IMPORTS}) { |
205
|
0
|
|
|
|
|
0
|
push(@imports, $val); |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
else { |
208
|
|
|
|
|
|
|
# handled below |
209
|
|
|
|
|
|
|
#$self->event(import=>$val); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
94
|
|
|
|
|
296
|
$self->event($tag=>$val); |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# post-processing |
216
|
94
|
100
|
|
|
|
14606
|
if ($tag eq 'default-namespace') { |
217
|
10
|
50
|
|
|
|
47
|
$namespace = $val |
218
|
|
|
|
|
|
|
unless $namespace; |
219
|
|
|
|
|
|
|
} |
220
|
94
|
100
|
|
|
|
216
|
if ($tag eq 'id-mapping') { |
221
|
13
|
50
|
|
|
|
62
|
if ($val =~ /(\S+)\s+(.*)/) { |
222
|
|
|
|
|
|
|
# bind at parse time |
223
|
13
|
50
|
|
|
|
46
|
if ($id_remap_h{$1}) { |
224
|
0
|
|
|
|
|
0
|
$self->parse_err("remapping $1 to $2"); |
225
|
|
|
|
|
|
|
} |
226
|
13
|
|
|
|
|
48
|
$id_remap_h{$1} = $2; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
else { |
229
|
0
|
|
|
|
|
0
|
$self->parse_err("id-mapping requires two columns"); |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
} |
232
|
94
|
50
|
|
|
|
600
|
if ($tag eq 'default-id-prefix') { |
233
|
0
|
|
|
|
|
0
|
$default_id_prefix = $val; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
else { |
237
|
0
|
|
|
|
|
0
|
$self->parse_err("illegal header entry: $_"); |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
} # END OF IN-HEADER |
240
|
|
|
|
|
|
|
elsif (/^([\w\-]+)\:\s*(.*)/) { # tag-val pair |
241
|
2625
|
|
|
|
|
7179
|
my ($tag, $val) = ($1,$2); |
242
|
2625
|
|
|
|
|
2758
|
my $qh; |
243
|
2625
|
|
|
|
|
4519
|
($val, $qh) = extract_quals($val); |
244
|
|
|
|
|
|
|
#$val =~ s/\\//g; |
245
|
2625
|
|
|
|
|
4659
|
my $val2 = $val; |
246
|
2625
|
|
|
|
|
3983
|
$val2 =~ s/\\,/,/g; |
247
|
2625
|
|
|
|
|
3433
|
$val2 =~ s/\\//g; |
248
|
2625
|
100
|
66
|
|
|
24969
|
if ($tag eq ID) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
249
|
494
|
100
|
|
|
|
1172
|
if ($id_remap_h{$val}) { |
250
|
6
|
|
|
|
|
14
|
$val = $id_remap_h{$val}; |
251
|
|
|
|
|
|
|
} |
252
|
494
|
100
|
|
|
|
1446
|
if ($val !~ /:/) { |
253
|
13
|
50
|
|
|
|
42
|
if ($default_id_prefix) { |
254
|
0
|
|
|
|
|
0
|
$val = "$default_id_prefix:$val"; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
elsif ($tag eq NAME) { |
259
|
|
|
|
|
|
|
# replace underscore in name |
260
|
494
|
|
|
|
|
714
|
$val = $val2; |
261
|
494
|
50
|
|
|
|
956
|
if ($usc) { |
262
|
0
|
|
|
|
|
0
|
$val =~ s/_/$usc/g; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
elsif ($tag eq RELATIONSHIP) { |
266
|
97
|
|
|
|
|
413
|
my ($type, @ids) = split(' ', $val2); |
267
|
97
|
|
|
|
|
158
|
my $id = shift @ids; |
268
|
97
|
100
|
|
|
|
256
|
if ($id_remap_h{$type}) { |
269
|
75
|
|
|
|
|
133
|
$type = $id_remap_h{$type}; |
270
|
|
|
|
|
|
|
} |
271
|
97
|
100
|
|
|
|
267
|
if ($type !~ /:/) { |
272
|
22
|
50
|
|
|
|
47
|
if ($default_id_prefix) { |
273
|
0
|
|
|
|
|
0
|
$type = "$default_id_prefix:$type"; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
97
|
|
|
|
|
385
|
$val = [[TYPE,$type],[TO,$id]]; |
277
|
97
|
|
|
|
|
238
|
push(@$val,map {['additional_argument',$_]} @ids); |
|
0
|
|
|
|
|
0
|
|
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
elsif ($tag eq INTERSECTION_OF || $tag eq UNION_OF) { |
280
|
4
|
|
|
|
|
13
|
my ($type, $id) = split(' ', $val2); |
281
|
4
|
100
|
|
|
|
15
|
if ($id_remap_h{$type}) { |
282
|
2
|
|
|
|
|
5
|
$type = $id_remap_h{$type}; |
283
|
|
|
|
|
|
|
} |
284
|
4
|
50
|
|
|
|
15
|
if ($type !~ /:/) { |
285
|
0
|
0
|
|
|
|
0
|
if ($default_id_prefix) { |
286
|
0
|
|
|
|
|
0
|
$type = "$default_id_prefix:$type"; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
} |
289
|
4
|
100
|
|
|
|
10
|
if (defined $id) { |
290
|
2
|
|
|
|
|
11
|
$val = [[TYPE,$type],[TO,$id]]; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
else { |
293
|
2
|
|
|
|
|
5
|
$id = $type; |
294
|
2
|
|
|
|
|
9
|
$val = [[TO,$id]]; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
elsif ($tag eq INVERSE_OF || $tag eq TRANSITIVE_OVER || $tag eq IS_A) { |
298
|
476
|
100
|
|
|
|
1113
|
if ($id_remap_h{$val}) { |
299
|
3
|
|
|
|
|
7
|
$val = $id_remap_h{$val}; |
300
|
|
|
|
|
|
|
} |
301
|
476
|
100
|
|
|
|
1429
|
if ($val !~ /:/) { |
302
|
23
|
50
|
|
|
|
62
|
if ($default_id_prefix) { |
303
|
0
|
|
|
|
|
0
|
$val = "$default_id_prefix:$val"; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
elsif ($tag eq DISJOINT_FROM) { |
308
|
2
|
50
|
|
|
|
6
|
if ($id_remap_h{$val}) { |
309
|
0
|
|
|
|
|
0
|
$val = $id_remap_h{$val}; |
310
|
|
|
|
|
|
|
} |
311
|
2
|
50
|
|
|
|
30
|
if ($val !~ /:/) { |
312
|
0
|
0
|
|
|
|
0
|
if ($default_id_prefix) { |
313
|
0
|
|
|
|
|
0
|
$val = "$default_id_prefix:$val"; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
elsif ($tag eq XREF) { |
318
|
118
|
|
|
|
|
412
|
$tag = XREF_ANALOG; |
319
|
118
|
|
|
|
|
323
|
my $dbxref = dbxref($val); |
320
|
118
|
|
|
|
|
348
|
$val = $dbxref->[1]; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
elsif ($tag eq XREF_ANALOG) { |
323
|
1
|
|
|
|
|
5
|
my $dbxref = dbxref($val); |
324
|
1
|
|
|
|
|
3
|
$val = $dbxref->[1]; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
elsif ($tag eq XREF_UNKNOWN) { |
327
|
0
|
|
|
|
|
0
|
my $dbxref = dbxref($val); |
328
|
0
|
|
|
|
|
0
|
$val = $dbxref->[1]; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
elsif ($tag eq PROPERTY_VALUE) { |
331
|
116
|
50
|
|
|
|
442
|
if ($val =~ /^(\S+)\s+(\".*)/) { |
332
|
|
|
|
|
|
|
# first form |
333
|
|
|
|
|
|
|
# property_value: relation "literal value" xsd:datatype |
334
|
0
|
|
|
|
|
0
|
my $type = $1; |
335
|
0
|
|
|
|
|
0
|
my $rest = $2; |
336
|
0
|
|
|
|
|
0
|
my ($to, $datatype) = extract_quotelike($rest); |
337
|
0
|
|
|
|
|
0
|
$to =~ s/^\"//; |
338
|
0
|
|
|
|
|
0
|
$to =~ s/\"$//; |
339
|
0
|
|
|
|
|
0
|
$datatype =~ s/^\s+//; |
340
|
0
|
|
|
|
|
0
|
$val = [[TYPE,$type], |
341
|
|
|
|
|
|
|
[VALUE,$to], |
342
|
|
|
|
|
|
|
[DATATYPE,$datatype]]; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
else { |
345
|
|
|
|
|
|
|
# second form |
346
|
|
|
|
|
|
|
# property_value: relation ToID |
347
|
116
|
|
|
|
|
336
|
my ($type,$to) = split(' ',$val); |
348
|
116
|
|
|
|
|
498
|
$val = [[TYPE,$type], |
349
|
|
|
|
|
|
|
[TO,$to]]; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
elsif ($tag eq NAMESPACE) { |
353
|
338
|
50
|
|
|
|
898
|
if ($force_namespace) { |
354
|
|
|
|
|
|
|
# override whatever namespace was provided |
355
|
0
|
|
|
|
|
0
|
$val = $force_namespace; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
else { |
358
|
|
|
|
|
|
|
# do nothing - we will export later |
359
|
|
|
|
|
|
|
} |
360
|
338
|
|
|
|
|
436
|
$namespace_set = $val; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
elsif ($tag eq DEF) { |
363
|
43
|
|
|
|
|
148
|
my ($defstr, $parts) = |
364
|
|
|
|
|
|
|
extract_qstr($val); |
365
|
53
|
|
|
|
|
125
|
$val = |
366
|
|
|
|
|
|
|
[[DEFSTR,$defstr], |
367
|
43
|
|
|
|
|
177
|
map {dbxref($_)} @$parts]; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
elsif ($tag =~ /(\w*)synonym/) { |
370
|
380
|
|
100
|
|
|
1652
|
my $scope = $1 || ''; |
371
|
380
|
100
|
|
|
|
679
|
if ($scope) { |
372
|
1
|
|
|
|
|
2
|
$tag = SYNONYM; |
373
|
1
|
50
|
|
|
|
5
|
if ($scope =~ /(\w+)_$/) { |
374
|
1
|
|
|
|
|
3
|
$scope = $1; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
else { |
377
|
0
|
|
|
|
|
0
|
$self->parse_err("bad synonym type: $scope"); |
378
|
0
|
|
|
|
|
0
|
$scope = ''; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
} |
381
|
380
|
|
|
|
|
755
|
my ($syn, $parts, $extra_quals) = |
382
|
|
|
|
|
|
|
extract_qstr($val2); |
383
|
380
|
100
|
|
|
|
964
|
if (@$extra_quals) { |
384
|
297
|
|
|
|
|
494
|
$scope = shift @$extra_quals; |
385
|
297
|
|
|
|
|
519
|
$scope = lc($scope); |
386
|
297
|
100
|
|
|
|
993
|
$qh->{synonym_type} = shift @$extra_quals if @$extra_quals; |
387
|
|
|
|
|
|
|
} |
388
|
380
|
50
|
|
|
|
795
|
if ($qh->{scope}) { |
389
|
0
|
0
|
|
|
|
0
|
if ($scope) { |
390
|
0
|
0
|
|
|
|
0
|
if ($scope ne $qh->{scope}) { |
391
|
0
|
|
|
|
|
0
|
$self->parse_err("inconsistent scope: $scope/$qh->{scope}"); |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
else { |
394
|
0
|
|
|
|
|
0
|
$self->parse_err("redundant scope: $scope"); |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
else { |
399
|
380
|
|
|
|
|
753
|
$qh->{scope} = $scope; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
56
|
|
|
|
|
121
|
$val = |
403
|
|
|
|
|
|
|
[[SYNONYM_TEXT,$syn], |
404
|
380
|
|
|
|
|
1425
|
(map {dbxref($_)} @$parts)]; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
elsif ($tag =~ /formula/) { |
407
|
0
|
|
|
|
|
0
|
my ($formula, $parts, $extra_quals) = |
408
|
|
|
|
|
|
|
extract_qstr($val2); |
409
|
0
|
|
|
|
|
0
|
my $lang = 'CLIF'; |
410
|
0
|
0
|
|
|
|
0
|
if (@$extra_quals) { |
411
|
0
|
|
|
|
|
0
|
$lang = shift @$extra_quals; |
412
|
|
|
|
|
|
|
} |
413
|
0
|
|
|
|
|
0
|
$qh->{format} = $lang; |
414
|
|
|
|
|
|
|
|
415
|
0
|
|
|
|
|
0
|
$val = |
416
|
|
|
|
|
|
|
[['formula_text',$formula], |
417
|
0
|
|
|
|
|
0
|
(map {dbxref($_)} @$parts)]; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
elsif ($tag eq 'holds_temporally_between' || # experimental support for obof1.3 |
420
|
|
|
|
|
|
|
$tag eq 'holds_atemporally_between' || |
421
|
|
|
|
|
|
|
$tag eq 'holds_on_class_level_between') { |
422
|
0
|
|
|
|
|
0
|
my ($sub, $ob) = split(' ', $val2); |
423
|
0
|
0
|
|
|
|
0
|
if ($id_remap_h{$sub}) { |
424
|
0
|
|
|
|
|
0
|
$sub = $id_remap_h{$sub}; |
425
|
|
|
|
|
|
|
} |
426
|
0
|
0
|
|
|
|
0
|
if ($id_remap_h{$ob}) { |
427
|
0
|
|
|
|
|
0
|
$ob = $id_remap_h{$ob}; |
428
|
|
|
|
|
|
|
} |
429
|
0
|
|
|
|
|
0
|
$val = [[subject=>$sub],[object=>$ob]]; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
elsif ($tag eq 'holds_over_chain' || $tag eq 'equivalent_to_chain') { # obof1.3 |
432
|
0
|
|
|
|
|
0
|
my @rels = split(' ', $val2); |
433
|
0
|
|
|
|
|
0
|
@rels = map { |
434
|
0
|
|
|
|
|
0
|
my $rel = $_; |
435
|
0
|
0
|
|
|
|
0
|
if ($id_remap_h{$_}) { |
436
|
0
|
|
|
|
|
0
|
$rel = $id_remap_h{$_} |
437
|
|
|
|
|
|
|
} |
438
|
0
|
0
|
|
|
|
0
|
if ($rel !~ /:/) { |
439
|
0
|
0
|
|
|
|
0
|
if ($default_id_prefix) { |
440
|
0
|
|
|
|
|
0
|
$rel = "$default_id_prefix:$rel"; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
} |
443
|
0
|
|
|
|
|
0
|
$rel; |
444
|
|
|
|
|
|
|
} @rels; |
445
|
0
|
|
|
|
|
0
|
$val = [map {[relation=>$_]} @rels]; |
|
0
|
|
|
|
|
0
|
|
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
elsif ($tag =~ /^expand/) { |
448
|
0
|
|
|
|
|
0
|
my ($template, $parts) = |
449
|
|
|
|
|
|
|
extract_qstr($val); |
450
|
0
|
|
|
|
|
0
|
$val = $template; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
else { |
453
|
62
|
|
|
|
|
106
|
$val = $val2; |
454
|
|
|
|
|
|
|
# normal tag:val |
455
|
|
|
|
|
|
|
} |
456
|
2625
|
100
|
100
|
|
|
13862
|
if (!ref($val) && $val eq 'true') { |
457
|
29
|
|
|
|
|
35
|
$val = 1; |
458
|
|
|
|
|
|
|
} |
459
|
2625
|
50
|
66
|
|
|
8914
|
if (!ref($val) && $val eq 'false') { |
460
|
0
|
|
|
|
|
0
|
$val = 0; |
461
|
|
|
|
|
|
|
} |
462
|
2625
|
100
|
|
|
|
4657
|
if (%$qh) { |
463
|
|
|
|
|
|
|
# note that if attributes are used for |
464
|
|
|
|
|
|
|
# terminal nodes then we effectively have |
465
|
|
|
|
|
|
|
# to 'push the node down' a level; |
466
|
|
|
|
|
|
|
# eg |
467
|
|
|
|
|
|
|
# x |
468
|
|
|
|
|
|
|
# ==> [is_a=>'x'] |
469
|
|
|
|
|
|
|
# x |
470
|
|
|
|
|
|
|
# ==> [is_a=>[[@=>[[t=>v]]],[.=>x]]] |
471
|
390
|
100
|
|
|
|
767
|
my $data = ref $val ? $val : [['.'=>$val]]; |
472
|
390
|
|
|
|
|
1049
|
my @quals = map {[$_=>$qh->{$_}]} keys %$qh; |
|
668
|
|
|
|
|
2432
|
|
473
|
390
|
|
|
|
|
2200
|
$self->event($tag=>[['@'=>[@quals]], |
474
|
|
|
|
|
|
|
@$data, |
475
|
|
|
|
|
|
|
]); |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
else { |
478
|
2235
|
|
|
|
|
6800
|
$self->event($tag=>$val); |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
|
481
|
2625
|
100
|
100
|
|
|
417414
|
if ($tag eq IS_A || $tag eq RELATIONSHIP) { |
482
|
553
|
|
|
|
|
797
|
$is_root = 0; |
483
|
|
|
|
|
|
|
} |
484
|
2625
|
50
|
33
|
|
|
6598
|
if ($tag eq IS_OBSOLETE && $val) { |
485
|
0
|
|
|
|
|
0
|
$is_root = 0; |
486
|
|
|
|
|
|
|
} |
487
|
2625
|
100
|
|
|
|
4879
|
if ($tag eq ID) { |
488
|
494
|
|
|
|
|
1512
|
$id = $val; |
489
|
|
|
|
|
|
|
} |
490
|
2625
|
100
|
|
|
|
14983
|
if ($tag eq NAME) { |
491
|
494
|
50
|
|
|
|
1186
|
if (!$id) { |
492
|
0
|
|
|
|
|
0
|
$self->parse_err("missing id!") |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
else { |
495
|
494
|
|
|
|
|
2246
|
$self->acc2name_h->{$id} = $val; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
else { |
500
|
0
|
|
|
|
|
0
|
$self->throw("uh oh: $_"); |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
# duplicated code! check final event |
505
|
10
|
100
|
|
|
|
66
|
if (!$namespace_set) { |
506
|
7
|
50
|
33
|
|
|
193
|
if (!$namespace && $stanza_count) { |
507
|
|
|
|
|
|
|
#$self->parse_err("missing namespace for ID: $id"); |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
else { |
510
|
7
|
|
|
|
|
37
|
$self->event(NAMESPACE, $namespace); |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
} |
513
|
10
|
50
|
|
|
|
864
|
$self->event(IS_ROOT,1) if $is_root; |
514
|
|
|
|
|
|
|
|
515
|
10
|
|
|
|
|
33
|
foreach my $import_file (@imports) { |
516
|
0
|
|
|
|
|
0
|
$import_file = $self->download_file_if_required($import_file); |
517
|
0
|
|
|
|
|
0
|
$self->file($import_file); |
518
|
0
|
|
|
|
|
0
|
$self->pop_stack_to_depth(1); |
519
|
|
|
|
|
|
|
#$self->end_event(HEADER); |
520
|
0
|
|
|
|
|
0
|
my $ifh = FileHandle->new($import_file); |
521
|
0
|
|
|
|
|
0
|
$self->parse_fh_inner($ifh); |
522
|
|
|
|
|
|
|
#$self->pop_stack_to_depth(1); |
523
|
0
|
|
|
|
|
0
|
$ifh->close(); |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
10
|
|
|
|
|
209
|
return; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
# each tag line can have trailing qualifiers in {}s at the end |
530
|
|
|
|
|
|
|
sub extract_quals { |
531
|
2625
|
|
|
2625
|
0
|
3520
|
my $str = shift; |
532
|
|
|
|
|
|
|
|
533
|
2625
|
|
|
|
|
9290
|
my %q = (); |
534
|
2625
|
100
|
|
|
|
5850
|
if ($str =~ /(.*[^\s])\s+(\{.*)\}\s*$/) { |
535
|
10
|
|
|
|
|
376
|
my $return_str = $1; |
536
|
10
|
|
|
|
|
28
|
my $extr = $2; |
537
|
10
|
50
|
|
|
|
26
|
if ($extr) { |
538
|
10
|
|
|
|
|
28
|
my @qparts = split_on_comma($extr); |
539
|
10
|
|
|
|
|
24
|
foreach (@qparts) { |
540
|
10
|
50
|
|
|
|
147
|
if (/(\w+)=\"(.*)\"/) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
541
|
0
|
|
|
|
|
0
|
$q{$1} = $2; |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
elsif (/(\w+)=\'(.*)\'/) { |
544
|
0
|
|
|
|
|
0
|
$q{$1} = $2; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
elsif (/(\w+)=(\S+)/) { # current 1.2 standard; non-quoted |
547
|
10
|
|
|
|
|
54
|
$q{$1} = $2; |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
else { |
550
|
0
|
|
|
|
|
0
|
warn("$_ in $str"); |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
} |
554
|
10
|
|
|
|
|
118
|
return ($return_str, \%q); |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
else { |
557
|
2615
|
|
|
|
|
9292
|
return ($str, {}); |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
sub extract_qstr { |
562
|
435
|
|
|
435
|
0
|
657
|
my $str = shift; |
563
|
|
|
|
|
|
|
|
564
|
435
|
|
|
|
|
1426
|
my ($extr, $rem, $prefix) = extract_quotelike($str); |
565
|
435
|
|
|
|
|
43084
|
my $txt = $extr; |
566
|
435
|
100
|
|
|
|
2123
|
$txt =~ s/^\"// if $txt; |
567
|
435
|
100
|
|
|
|
2857
|
$txt =~ s/\"$// if $txt; |
568
|
435
|
50
|
|
|
|
1113
|
if ($prefix) { |
569
|
0
|
|
|
|
|
0
|
warn("illegal prefix: $prefix in: $str"); |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
435
|
|
|
|
|
606
|
my @extra = (); |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# synonyms can have two words following quoted part |
575
|
|
|
|
|
|
|
# before dbxref section |
576
|
|
|
|
|
|
|
# - two |
577
|
435
|
100
|
|
|
|
1852
|
if ($rem =~ /(\w+)\s+(\w+)\s+(\[.*)/) { |
|
|
100
|
|
|
|
|
|
578
|
278
|
|
|
|
|
617
|
$rem = $3; |
579
|
278
|
|
|
|
|
762
|
push(@extra,$1,$2); |
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
elsif ($rem =~ /(\w+)\s+(\[.*)/) { |
582
|
19
|
|
|
|
|
36
|
$rem = $2; |
583
|
19
|
|
|
|
|
45
|
push(@extra,$1); |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
else { |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
435
|
|
|
|
|
577
|
my @parts = (); |
589
|
435
|
|
|
|
|
1315
|
while (($extr, $rem, $prefix) = extract_bracketed($rem, '[]')) { |
590
|
788
|
100
|
|
|
|
73304
|
last unless $extr; |
591
|
353
|
|
|
|
|
1148
|
$extr =~ s/^\[//; |
592
|
353
|
|
|
|
|
1066
|
$extr =~ s/\]$//; |
593
|
353
|
100
|
|
|
|
1545
|
push(@parts, $extr) if $extr; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
@parts = |
596
|
435
|
|
|
|
|
836
|
map {split_on_comma($_)} @parts; |
|
95
|
|
|
|
|
248
|
|
597
|
|
|
|
|
|
|
|
598
|
435
|
100
|
|
|
|
1100
|
$txt =~ s/\\//g if $txt; |
599
|
435
|
|
|
|
|
1620
|
return ($txt, \@parts, \@extra); |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
sub split_on_comma { |
603
|
105
|
|
|
105
|
0
|
167
|
my $str = shift; |
604
|
105
|
|
|
|
|
192
|
my @parts = (); |
605
|
105
|
|
|
|
|
422
|
while ($str =~ /(.*[^\\],\s*)(.*)/) { |
606
|
14
|
|
|
|
|
39
|
$str = $1; |
607
|
14
|
|
|
|
|
27
|
my $part = $2; |
608
|
14
|
|
|
|
|
42
|
unshift(@parts, $part); |
609
|
14
|
|
|
|
|
106
|
$str =~ s/,\s*$//; |
610
|
|
|
|
|
|
|
} |
611
|
105
|
|
|
|
|
243
|
unshift(@parts, $str); |
612
|
105
|
|
|
|
|
257
|
return map {s/\\//g;$_} @parts; |
|
119
|
|
|
|
|
219
|
|
|
119
|
|
|
|
|
479
|
|
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# turns a DB:ACC string into an obo-xml dbxref element |
616
|
|
|
|
|
|
|
sub dbxref { |
617
|
228
|
|
|
228
|
0
|
314
|
my $str = shift; |
618
|
228
|
|
|
|
|
388
|
$str =~ s/\\//g; |
619
|
228
|
|
|
|
|
235
|
my $name; |
620
|
228
|
50
|
|
|
|
574
|
if ($str =~ /(.*)\s+\"(.*)\"$/) { |
621
|
0
|
|
|
|
|
0
|
$str = $1; |
622
|
0
|
|
|
|
|
0
|
$name = $2; |
623
|
|
|
|
|
|
|
} |
624
|
228
|
|
|
|
|
877
|
my ($db, @rest) = split(/:/, $str); |
625
|
228
|
|
|
|
|
524
|
my $acc = join(':',@rest); |
626
|
228
|
|
|
|
|
677
|
$db =~ s/^\s+//; |
627
|
228
|
50
|
33
|
|
|
751
|
if ($db eq 'http' && $acc =~ /^\/\//) { |
628
|
|
|
|
|
|
|
# dbxref is actually a URI |
629
|
0
|
|
|
|
|
0
|
$db = 'URL'; |
630
|
0
|
|
|
|
|
0
|
$acc = simple_escape($acc); |
631
|
0
|
|
|
|
|
0
|
$acc =~ s/\s/\%20/g; |
632
|
0
|
|
|
|
|
0
|
$acc = "http:$acc"; |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
else { |
635
|
|
|
|
|
|
|
# $db=escape($db); |
636
|
|
|
|
|
|
|
# $acc=escape($acc); |
637
|
|
|
|
|
|
|
} |
638
|
228
|
|
|
|
|
422
|
$db =~ s/\s+/_/g; # HumanDO.obo has spaces in xref |
639
|
228
|
|
|
|
|
371
|
$acc =~ s/\s+/_/g; |
640
|
228
|
50
|
|
|
|
490
|
$db = 'NULL' unless $db; |
641
|
228
|
50
|
|
|
|
487
|
$acc = 'NULL' unless $acc; |
642
|
228
|
50
|
|
|
|
1846
|
[DBXREF,[[ACC,$acc], |
643
|
|
|
|
|
|
|
[DBNAME,$db], |
644
|
|
|
|
|
|
|
defined $name ? [NAME,$name] : () |
645
|
|
|
|
|
|
|
]]; |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
sub parse_term_expression { |
649
|
4
|
|
|
4
|
0
|
12735
|
my $self = shift; |
650
|
4
|
|
|
|
|
8
|
my $expr = shift; |
651
|
4
|
|
|
|
|
18
|
my ($te,$rest) = $self->parse_term_expression_with_rest($expr); |
652
|
4
|
50
|
|
|
|
14
|
if ($rest) { |
653
|
0
|
|
|
|
|
0
|
$self->parse_err("trailing: $rest"); |
654
|
|
|
|
|
|
|
} |
655
|
4
|
|
|
|
|
44
|
return Data::Stag->nodify($te); |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
sub parse_term_expression_with_rest { |
659
|
15
|
|
|
15
|
0
|
20
|
my $self = shift; |
660
|
15
|
|
|
|
|
20
|
my $expr = shift; |
661
|
15
|
100
|
|
|
|
99
|
if ($expr =~ /^\((.*)/) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
662
|
1
|
|
|
|
|
8
|
my $genus_expr = $1; |
663
|
1
|
|
|
|
|
5
|
my ($genus,$diff_expr) = $self->parse_term_expression_with_rest($genus_expr); |
664
|
1
|
|
|
|
|
5
|
my $next_c = substr($diff_expr,0,1,''); |
665
|
1
|
50
|
|
|
|
5
|
if ($next_c eq ')') { |
666
|
1
|
|
|
|
|
5
|
my ($diffs,$rest) = $self->parse_differentia_with_rest($diff_expr); |
667
|
1
|
|
|
|
|
5
|
my $stag = [intersection=>[ |
668
|
|
|
|
|
|
|
[link=>[[to=>[$genus]]]], |
669
|
|
|
|
|
|
|
@$diffs]]; |
670
|
1
|
|
|
|
|
5
|
return ($stag,$rest); |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
else { |
674
|
0
|
|
|
|
|
0
|
$self->parse_err("expected ) at end of genus. Got: $next_c followed by $diff_expr"); |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
elsif ($expr =~ /^([\w\:\.\-]+)\^(.*)/) { |
678
|
7
|
|
|
|
|
21
|
my $genus = $1; |
679
|
7
|
|
|
|
|
19
|
my $diff_expr = $2; |
680
|
7
|
|
|
|
|
22
|
my ($diffs,$rest) = $self->parse_differentia_with_rest($diff_expr); |
681
|
7
|
|
|
|
|
30
|
my $stag = [intersection=>[ |
682
|
|
|
|
|
|
|
[link=>[[to=>$genus]]], |
683
|
|
|
|
|
|
|
@$diffs]]; |
684
|
7
|
|
|
|
|
24
|
return ($stag,$rest); |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
elsif ($expr =~ /^([\w\:\.\-]+)(.*)/) { |
687
|
7
|
|
|
|
|
28
|
return ($1,$2); |
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
else { |
690
|
0
|
|
|
|
|
0
|
$self->parse_err("could not parse: $expr"); |
691
|
|
|
|
|
|
|
} |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
sub parse_differentia { |
695
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
696
|
0
|
|
|
|
|
0
|
my $expr = shift; |
697
|
0
|
|
|
|
|
0
|
my ($diffs,$rest) = $self->parse_differentia_with_rest($expr); |
698
|
0
|
0
|
|
|
|
0
|
if ($rest) { |
699
|
0
|
|
|
|
|
0
|
$self->parse_err("trailing: $rest"); |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
|
702
|
0
|
|
|
|
|
0
|
Data::Stag->nodify($_) foreach @$diffs; |
703
|
0
|
|
|
|
|
0
|
return $diffs; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub parse_differentia_with_rest { |
707
|
10
|
|
|
10
|
0
|
16
|
my $self = shift; |
708
|
10
|
|
|
|
|
20
|
my $expr = shift; |
709
|
10
|
50
|
|
|
|
42
|
if ($expr =~ /^(.+?)\((.*)/) { |
710
|
10
|
|
|
|
|
20
|
my $rel = $1; |
711
|
10
|
|
|
|
|
17
|
my $term_expr = $2; |
712
|
10
|
|
|
|
|
28
|
my ($term,$rest) = $self->parse_term_expression_with_rest($term_expr); |
713
|
10
|
100
|
|
|
|
56
|
my $diff = [link=>[[type=>$rel], |
714
|
|
|
|
|
|
|
[to=>(ref($term) ? [$term] : $term)]]]; |
715
|
10
|
50
|
|
|
|
24
|
if ($rest) { |
716
|
10
|
|
|
|
|
25
|
my $next_c = substr($rest,0,1,''); |
717
|
10
|
50
|
|
|
|
24
|
if ($next_c eq ')') { |
718
|
10
|
|
|
|
|
19
|
$next_c = substr($rest,0,1); |
719
|
10
|
100
|
66
|
|
|
64
|
if ($next_c eq '^' || $next_c eq ',') { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
720
|
2
|
|
|
|
|
12
|
my ($next_diffs,$next_rest) = $self->parse_differentia_with_rest(substr($rest,1)); |
721
|
2
|
50
|
|
|
|
8
|
if (!$next_diffs) { |
722
|
0
|
|
|
|
|
0
|
$self->parse_err("problem parsing differentia: $rest. Expr: $term_expr"); |
723
|
0
|
|
|
|
|
0
|
return ([$diff],$rest); |
724
|
|
|
|
|
|
|
} |
725
|
2
|
|
|
|
|
12
|
return ([$diff,@$next_diffs],$next_rest); |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
elsif ($next_c eq '') { |
728
|
4
|
|
|
|
|
16
|
return ([$diff],$rest); |
729
|
|
|
|
|
|
|
} |
730
|
|
|
|
|
|
|
elsif ($next_c eq ')') { |
731
|
4
|
|
|
|
|
16
|
return ([$diff],$rest); |
732
|
|
|
|
|
|
|
} |
733
|
|
|
|
|
|
|
else { |
734
|
0
|
|
|
|
|
|
$self->parse_err("expected ^ or ) in differentium. Got: $next_c followed_by: $rest. Expr: $term_expr"); |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
else { |
738
|
0
|
|
|
|
|
|
$self->parse_err("expected ) to close differentium. Got: $next_c followed by: $rest. Expr: $term_expr"); |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
else { |
742
|
0
|
|
|
|
|
|
$self->parse_err("expected ). Got: \"\". Expr: $term_expr"); |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
else { |
746
|
0
|
|
|
|
|
|
$self->parse_err("expect relation(...). Got: $expr. "); |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
# lifted from CGI::Util |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
our $EBCDIC = "\t" ne "\011"; |
753
|
|
|
|
|
|
|
# (ord('^') == 95) for codepage 1047 as on os390, vmesa |
754
|
|
|
|
|
|
|
our @E2A = ( |
755
|
|
|
|
|
|
|
0, 1, 2, 3,156, 9,134,127,151,141,142, 11, 12, 13, 14, 15, |
756
|
|
|
|
|
|
|
16, 17, 18, 19,157, 10, 8,135, 24, 25,146,143, 28, 29, 30, 31, |
757
|
|
|
|
|
|
|
128,129,130,131,132,133, 23, 27,136,137,138,139,140, 5, 6, 7, |
758
|
|
|
|
|
|
|
144,145, 22,147,148,149,150, 4,152,153,154,155, 20, 21,158, 26, |
759
|
|
|
|
|
|
|
32,160,226,228,224,225,227,229,231,241,162, 46, 60, 40, 43,124, |
760
|
|
|
|
|
|
|
38,233,234,235,232,237,238,239,236,223, 33, 36, 42, 41, 59, 94, |
761
|
|
|
|
|
|
|
45, 47,194,196,192,193,195,197,199,209,166, 44, 37, 95, 62, 63, |
762
|
|
|
|
|
|
|
248,201,202,203,200,205,206,207,204, 96, 58, 35, 64, 39, 61, 34, |
763
|
|
|
|
|
|
|
216, 97, 98, 99,100,101,102,103,104,105,171,187,240,253,254,177, |
764
|
|
|
|
|
|
|
176,106,107,108,109,110,111,112,113,114,170,186,230,184,198,164, |
765
|
|
|
|
|
|
|
181,126,115,116,117,118,119,120,121,122,161,191,208, 91,222,174, |
766
|
|
|
|
|
|
|
172,163,165,183,169,167,182,188,189,190,221,168,175, 93,180,215, |
767
|
|
|
|
|
|
|
123, 65, 66, 67, 68, 69, 70, 71, 72, 73,173,244,246,242,243,245, |
768
|
|
|
|
|
|
|
125, 74, 75, 76, 77, 78, 79, 80, 81, 82,185,251,252,249,250,255, |
769
|
|
|
|
|
|
|
92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213, |
770
|
|
|
|
|
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159 |
771
|
|
|
|
|
|
|
); |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
sub escape { |
774
|
0
|
0
|
0
|
0
|
0
|
|
shift() if @_ > 1 and ( ref($_[0]) || (defined $_[1] && $_[0] eq $CGI::DefaultClass)); |
|
|
|
0
|
|
|
|
|
775
|
0
|
|
|
|
|
|
my $toencode = shift; |
776
|
0
|
0
|
|
|
|
|
return undef unless defined($toencode); |
777
|
|
|
|
|
|
|
# force bytes while preserving backward compatibility -- dankogai |
778
|
0
|
|
|
|
|
|
$toencode = pack("C*", unpack("C*", $toencode)); |
779
|
0
|
0
|
|
|
|
|
if ($EBCDIC) { |
780
|
0
|
|
|
|
|
|
$toencode=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",$E2A[ord($1)])/eg; |
|
0
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
} else { |
782
|
0
|
|
|
|
|
|
$toencode=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg; |
|
0
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
} |
784
|
0
|
|
|
|
|
|
return $toencode; |
785
|
|
|
|
|
|
|
} |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
sub simple_escape { |
788
|
0
|
0
|
|
0
|
0
|
|
return unless defined(my $toencode = shift); |
789
|
0
|
|
|
|
|
|
$toencode =~ s{&}{&}gso; |
790
|
0
|
|
|
|
|
|
$toencode =~ s{<}{<}gso; |
791
|
0
|
|
|
|
|
|
$toencode =~ s{>}{>}gso; |
792
|
0
|
|
|
|
|
|
$toencode =~ s{\"}{"}gso; |
793
|
|
|
|
|
|
|
# Doesn't work. Can't work. forget it. |
794
|
|
|
|
|
|
|
# $toencode =~ s{\x8b}{}gso; |
795
|
|
|
|
|
|
|
# $toencode =~ s{\x9b}{}gso; |
796
|
0
|
|
|
|
|
|
$toencode; |
797
|
|
|
|
|
|
|
} |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
1; |