line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: go_assoc_parser.pm,v 1.22 2009/08/17 00:46:16 cmungall Exp $ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# see also - http://www.geneontology.org |
5
|
|
|
|
|
|
|
# - http://www.godatabase.org/dev |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package GO::Parsers::go_assoc_parser; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
GO::Parsers::go_assoc_parser - syntax parsing of GO gene-association flat files |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
do not use this class directly; use L |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
This generates Stag/XML event streams from GO association files. |
23
|
|
|
|
|
|
|
Examples of these files can be found at http://www.geneontology.org, |
24
|
|
|
|
|
|
|
an example of lines from an association file: |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
SGD S0004660 AAC1 GO:0005743 SGD:12031|PMID:2167309 TAS C ADP/ATP translocator YMR056C gene taxon:4932 20010118 |
27
|
|
|
|
|
|
|
SGD S0004660 AAC1 GO:0006854 SGD:12031|PMID:2167309 IDA P ADP/ATP translocator YMR056C gene taxon:4932 20010118 |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
See L |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
See |
32
|
|
|
|
|
|
|
L |
33
|
|
|
|
|
|
|
For the DTD of the event stream that is generated |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
The following stag-schema describes the events that are generated in |
36
|
|
|
|
|
|
|
parsing an assoc file: |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
(assocs |
39
|
|
|
|
|
|
|
(dbset+ |
40
|
|
|
|
|
|
|
(proddb "s") |
41
|
|
|
|
|
|
|
(prod+ |
42
|
|
|
|
|
|
|
(prodacc "s") |
43
|
|
|
|
|
|
|
(prodsymbol "s") |
44
|
|
|
|
|
|
|
(prodtype "s") |
45
|
|
|
|
|
|
|
(prodtaxa "i") |
46
|
|
|
|
|
|
|
(assoc+ |
47
|
|
|
|
|
|
|
(assocdate "i") |
48
|
|
|
|
|
|
|
(source_db "s") |
49
|
|
|
|
|
|
|
(termacc "s") |
50
|
|
|
|
|
|
|
(is_not "i") |
51
|
|
|
|
|
|
|
(aspect "s") |
52
|
|
|
|
|
|
|
(evidence+ |
53
|
|
|
|
|
|
|
(evcode "s") |
54
|
|
|
|
|
|
|
(ref "s")))))) |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
57
|
|
|
|
|
|
|
|
58
|
2
|
|
|
2
|
|
13
|
use Exporter; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
126
|
|
59
|
2
|
|
|
2
|
|
12
|
use base qw(GO::Parsers::base_parser Exporter); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
610
|
|
60
|
|
|
|
|
|
|
#use Text::Balanced qw(extract_bracketed); |
61
|
2
|
|
|
2
|
|
551
|
use GO::Parsers::ParserEventNames; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
572
|
|
62
|
2
|
|
|
2
|
|
12
|
use GO::Parser; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
43
|
|
63
|
|
|
|
|
|
|
|
64
|
2
|
|
|
2
|
|
16
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
122
|
|
65
|
2
|
|
|
2
|
|
10
|
use FileHandle; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
16
|
|
66
|
2
|
|
|
2
|
|
1137
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
6683
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub dtd { |
69
|
0
|
|
|
0
|
0
|
0
|
'go_assoc-parser-events.dtd'; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub ev_filter { |
73
|
2
|
|
|
2
|
0
|
4
|
my $self = shift; |
74
|
2
|
50
|
|
|
|
7
|
$self->{_ev_filter} = shift if @_; |
75
|
2
|
|
|
|
|
4
|
return $self->{_ev_filter}; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub skip_uncurated { |
81
|
2
|
|
|
2
|
0
|
4
|
my $self = shift; |
82
|
2
|
50
|
|
|
|
6
|
$self->{_skip_uncurated} = shift if @_; |
83
|
2
|
|
|
|
|
6
|
return $self->{_skip_uncurated}; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub parse_fh { |
87
|
2
|
|
|
2
|
0
|
321
|
my ($self, $fh) = @_; |
88
|
2
|
|
|
|
|
6
|
my $file = $self->file; |
89
|
|
|
|
|
|
|
|
90
|
2
|
|
|
|
|
12
|
my $product; |
91
|
|
|
|
|
|
|
my $term; |
92
|
0
|
|
|
|
|
0
|
my $assoc; |
93
|
2
|
|
|
|
|
5
|
my $line_no = 0; |
94
|
|
|
|
|
|
|
|
95
|
2
|
|
|
|
|
3
|
my $obo_parser; # an OBO parser may be required for parsing the PROPERTIES column |
96
|
|
|
|
|
|
|
|
97
|
2
|
|
|
|
|
6
|
my @COLS = (0..16); |
98
|
2
|
|
|
|
|
7
|
my ($PRODDB, |
99
|
|
|
|
|
|
|
$PRODACC, |
100
|
|
|
|
|
|
|
$PRODSYMBOL, |
101
|
|
|
|
|
|
|
$QUALIFIER, |
102
|
|
|
|
|
|
|
$TERMACC, |
103
|
|
|
|
|
|
|
$REF, |
104
|
|
|
|
|
|
|
$EVCODE, |
105
|
|
|
|
|
|
|
$WITH, |
106
|
|
|
|
|
|
|
$ASPECT, |
107
|
|
|
|
|
|
|
$PRODNAME, |
108
|
|
|
|
|
|
|
$PRODSYN, |
109
|
|
|
|
|
|
|
$PRODTYPE, |
110
|
|
|
|
|
|
|
$PRODTAXA, |
111
|
|
|
|
|
|
|
$ASSOCDATE, |
112
|
|
|
|
|
|
|
$SOURCE_DB, |
113
|
|
|
|
|
|
|
$PROPERTIES, # GAF2.0 |
114
|
|
|
|
|
|
|
$ISOFORM, # GAF2.0 |
115
|
|
|
|
|
|
|
) = @COLS; |
116
|
|
|
|
|
|
|
|
117
|
2
|
|
|
|
|
4
|
my @mandatory_cols = ($PRODDB, $PRODACC, $TERMACC, $EVCODE); |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# |
120
|
|
|
|
|
|
|
# |
121
|
|
|
|
|
|
|
# fb |
122
|
|
|
|
|
|
|
# |
123
|
|
|
|
|
|
|
# FBgn0027087> |
124
|
|
|
|
|
|
|
# Aats-his> |
125
|
|
|
|
|
|
|
# gene> |
126
|
|
|
|
|
|
|
# 7227> |
127
|
|
|
|
|
|
|
# ...> |
128
|
|
|
|
|
|
|
# |
129
|
|
|
|
|
|
|
# GO:0004821 |
130
|
|
|
|
|
|
|
# |
131
|
|
|
|
|
|
|
# NAS |
132
|
|
|
|
|
|
|
# [FB:FBrf0105495] |
133
|
|
|
|
|
|
|
# ... |
134
|
|
|
|
|
|
|
# |
135
|
|
|
|
|
|
|
# |
136
|
|
|
|
|
|
|
# |
137
|
|
|
|
|
|
|
# |
138
|
|
|
|
|
|
|
# |
139
|
|
|
|
|
|
|
|
140
|
2
|
|
|
|
|
20
|
$self->start_event(ASSOCS); |
141
|
2
|
|
|
|
|
118
|
$self->fire_source_event($file); |
142
|
|
|
|
|
|
|
|
143
|
2
|
|
|
|
|
4
|
my @last = map {''} @COLS; |
|
34
|
|
|
|
|
39
|
|
144
|
|
|
|
|
|
|
|
145
|
2
|
|
|
|
|
7
|
my $skip_uncurated = $self->skip_uncurated; |
146
|
2
|
|
|
|
|
14
|
my $ev = $self->ev_filter; |
147
|
2
|
|
|
|
|
2
|
my %evyes = (); |
148
|
2
|
|
|
|
|
4
|
my %evno = (); |
149
|
2
|
50
|
|
|
|
5
|
if ($ev) { |
150
|
0
|
0
|
|
|
|
0
|
if ($ev =~ /\!(.*)/) { |
151
|
0
|
|
|
|
|
0
|
$evno{$1} = 1; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
else { |
154
|
0
|
|
|
|
|
0
|
$evyes{$ev} = 1; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
2
|
|
|
|
|
3
|
my $taxa_warning; |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
my $line; |
161
|
0
|
|
|
|
|
0
|
my @vals; |
162
|
2
|
|
|
|
|
2
|
my @stack = (); |
163
|
2
|
|
|
|
|
67
|
while (<$fh>) { |
164
|
|
|
|
|
|
|
# UNICODE causes problems for XML and DB |
165
|
|
|
|
|
|
|
# delete 8th bit |
166
|
204
|
|
|
|
|
6222
|
tr [\200-\377] |
167
|
|
|
|
|
|
|
[\000-\177]; # see 'man perlop', section on tr/ |
168
|
|
|
|
|
|
|
# weird ascii characters should be excluded |
169
|
204
|
|
|
|
|
454
|
tr/\0-\10//d; # remove weird characters; ascii 0-8 |
170
|
|
|
|
|
|
|
# preserve \11 (9 - tab) and \12 (10-linefeed) |
171
|
204
|
|
|
|
|
463
|
tr/\13\14//d; # remove weird characters; 11,12 |
172
|
|
|
|
|
|
|
# preserve \15 (13 - carriage return) |
173
|
204
|
|
|
|
|
426
|
tr/\16-\37//d; # remove 14-31 (all rest before space) |
174
|
204
|
|
|
|
|
872
|
tr/\177//d; # remove DEL character |
175
|
|
|
|
|
|
|
|
176
|
204
|
|
|
|
|
243
|
$line_no++; |
177
|
204
|
|
|
|
|
335
|
chomp; |
178
|
204
|
50
|
|
|
|
609
|
if (/^\!/) { |
179
|
0
|
|
|
|
|
0
|
next; |
180
|
|
|
|
|
|
|
} |
181
|
204
|
100
|
|
|
|
485
|
if (!$_) { |
182
|
2
|
|
|
|
|
43
|
next; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
# some files use string NULL - we just use empty string as null |
185
|
202
|
|
|
|
|
424
|
s/\\NULL//g; |
186
|
202
|
|
|
|
|
306
|
$line = $_; |
187
|
|
|
|
|
|
|
|
188
|
202
|
|
|
|
|
673
|
$self->line($line); |
189
|
202
|
|
|
|
|
1419
|
$self->line_no($line_no); |
190
|
|
|
|
|
|
|
|
191
|
202
|
|
|
|
|
3908
|
@vals = split(/\t/, $line); |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# normalise columns, and set $h |
194
|
202
|
|
|
|
|
763
|
for (my $i=0; $i<@COLS;$i++) { |
195
|
3434
|
100
|
|
|
|
8186
|
if (defined($vals[$i])) { |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# remove trailing and |
198
|
|
|
|
|
|
|
# leading blanks |
199
|
3030
|
|
|
|
|
8850
|
$vals[$i] =~ s/^\s*//; |
200
|
3030
|
|
|
|
|
15300
|
$vals[$i] =~ s/\s*$//; |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# sometimes - is used for null |
203
|
3030
|
|
|
|
|
3971
|
$vals[$i] =~ s/^\-$//; |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# TAIR seem to be |
206
|
|
|
|
|
|
|
# doing a mysql dump... |
207
|
3030
|
|
|
|
|
11256
|
$vals[$i] =~ s/\\NULL//; |
208
|
|
|
|
|
|
|
} |
209
|
3434
|
100
|
100
|
|
|
17142
|
if (!defined($vals[$i]) || |
210
|
|
|
|
|
|
|
length ($vals[$i]) == 0) { |
211
|
|
|
|
|
|
|
|
212
|
784
|
50
|
|
|
|
1044
|
if ( grep {$i == $_} @mandatory_cols) { |
|
3136
|
|
|
|
|
6286
|
|
213
|
0
|
|
|
|
|
0
|
$self->parse_err("no value defined for col ".($i+1)." in line_no $line_no line\n$line\n"); |
214
|
0
|
|
|
|
|
0
|
next; |
215
|
|
|
|
|
|
|
} |
216
|
784
|
|
|
|
|
2243
|
$vals[$i] = ''; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
202
|
|
|
|
|
766
|
my ($proddb, |
221
|
|
|
|
|
|
|
$prodacc, |
222
|
|
|
|
|
|
|
$prodsymbol, |
223
|
|
|
|
|
|
|
$qualifier, |
224
|
|
|
|
|
|
|
$termacc, |
225
|
|
|
|
|
|
|
$ref, |
226
|
|
|
|
|
|
|
$evcode, |
227
|
|
|
|
|
|
|
$with, |
228
|
|
|
|
|
|
|
$aspect, |
229
|
|
|
|
|
|
|
$prodname, |
230
|
|
|
|
|
|
|
$prodsyn, |
231
|
|
|
|
|
|
|
$prodtype, |
232
|
|
|
|
|
|
|
$prodtaxa, |
233
|
|
|
|
|
|
|
$assocdate, |
234
|
|
|
|
|
|
|
$source_db, |
235
|
|
|
|
|
|
|
$properties, # GAF2.0 |
236
|
|
|
|
|
|
|
$isoform) = @vals; # GAF2.0 |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# backward compatibility GAF2.0 -> GAF1.0 |
239
|
202
|
50
|
|
|
|
398
|
$properties = '' unless defined $properties; |
240
|
202
|
50
|
|
|
|
530
|
$isoform = '' unless defined $isoform; |
241
|
|
|
|
|
|
|
|
242
|
202
|
50
|
|
|
|
347
|
$assocdate = '' unless defined $assocdate; |
243
|
202
|
50
|
|
|
|
343
|
$source_db = '' unless defined $source_db; |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# if (!grep {$aspect eq $_} qw(P C F)) { |
246
|
|
|
|
|
|
|
# $self->parse_err("Aspect column says: \"$aspect\" - aspect must be P/C/F"); |
247
|
|
|
|
|
|
|
# next; |
248
|
|
|
|
|
|
|
# } |
249
|
202
|
100
|
|
|
|
755
|
if ($self->acc_not_found($termacc)) { |
250
|
1
|
|
|
|
|
17
|
$self->parse_err("No such ID: $termacc"); |
251
|
1
|
|
|
|
|
646
|
next; |
252
|
|
|
|
|
|
|
} |
253
|
201
|
50
|
|
|
|
727
|
if (!($ref =~ /:/)) { |
254
|
|
|
|
|
|
|
# ref does not have a prefix - we assume it is medline |
255
|
0
|
|
|
|
|
0
|
$ref = "medline:$ref"; |
256
|
|
|
|
|
|
|
} |
257
|
201
|
50
|
33
|
|
|
465
|
if ($skip_uncurated && $evcode eq "IEA") { |
258
|
0
|
|
|
|
|
0
|
next; |
259
|
|
|
|
|
|
|
} |
260
|
201
|
50
|
33
|
|
|
442
|
if (%evyes && !$evyes{$evcode}) { |
261
|
0
|
|
|
|
|
0
|
next; |
262
|
|
|
|
|
|
|
} |
263
|
201
|
50
|
33
|
|
|
510
|
if (%evno && $evno{$evcode}) { |
264
|
0
|
|
|
|
|
0
|
next; |
265
|
|
|
|
|
|
|
} |
266
|
201
|
|
|
|
|
638
|
my @prodtaxa_ids = split(/\|/,$prodtaxa); |
267
|
201
|
|
|
|
|
426
|
@prodtaxa_ids = |
268
|
|
|
|
|
|
|
map { |
269
|
201
|
|
|
|
|
429
|
s/taxonid://gi; |
270
|
201
|
|
|
|
|
585
|
s/taxon://gi; |
271
|
201
|
50
|
|
|
|
661
|
if ($_ !~ /\d+/) { |
272
|
0
|
0
|
|
|
|
0
|
if (!$taxa_warning) { |
273
|
0
|
|
|
|
|
0
|
$taxa_warning = 1; |
274
|
0
|
|
|
|
|
0
|
$self->parse_err("No NCBI TAXON wrong fmt: $_"); |
275
|
0
|
|
|
|
|
0
|
$_ = ""; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
} |
278
|
201
|
|
|
|
|
678
|
$_; |
279
|
|
|
|
|
|
|
} @prodtaxa_ids; |
280
|
201
|
|
|
|
|
310
|
@prodtaxa_ids = grep {$_} @prodtaxa_ids; |
|
201
|
|
|
|
|
389
|
|
281
|
201
|
|
|
|
|
317
|
my $main_taxon_id = shift @prodtaxa_ids; |
282
|
201
|
50
|
|
|
|
372
|
if (!$main_taxon_id) { |
283
|
0
|
0
|
|
|
|
0
|
if (!$taxa_warning) { |
284
|
0
|
|
|
|
|
0
|
$taxa_warning = 1; |
285
|
0
|
|
|
|
|
0
|
$self->parse_err("No NCBI TAXON specified; ignoring"); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# check for new element; shift a level |
291
|
201
|
|
|
|
|
461
|
my $new_dbset = $proddb ne $last[$PRODDB]; |
292
|
201
|
|
66
|
|
|
828
|
my $new_prodacc = |
293
|
|
|
|
|
|
|
$prodacc ne $last[$PRODACC] || $new_dbset; |
294
|
201
|
|
33
|
|
|
1276
|
my $new_assoc = |
295
|
|
|
|
|
|
|
($termacc ne $last[$TERMACC]) || |
296
|
|
|
|
|
|
|
$new_prodacc || |
297
|
|
|
|
|
|
|
($qualifier ne $last[$QUALIFIER]) || |
298
|
|
|
|
|
|
|
($source_db ne $last[$SOURCE_DB]) || |
299
|
|
|
|
|
|
|
($assocdate ne $last[$ASSOCDATE]) || |
300
|
|
|
|
|
|
|
($isoform ne $last[$ISOFORM]); |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
#if (!$new_prodacc && ($prodtaxa ne $last[$PRODTAXA])) { |
303
|
|
|
|
|
|
|
## Before we declare an error, let's make sure that we're not |
304
|
|
|
|
|
|
|
## talking about secondary taxons... |
305
|
201
|
|
|
|
|
219
|
my $chopped_taxa = $prodtaxa; |
306
|
201
|
|
|
|
|
291
|
my $chopped_prev_taxa = $last[$PRODTAXA]; |
307
|
201
|
|
|
|
|
322
|
$chopped_taxa =~ s/\|.+//; |
308
|
201
|
|
|
|
|
252
|
$chopped_prev_taxa =~ s/\|.+//; |
309
|
201
|
50
|
66
|
|
|
784
|
if (!$new_prodacc && ($chopped_taxa ne $chopped_prev_taxa)) { |
310
|
|
|
|
|
|
|
# two identical gene products with the same taxon |
311
|
|
|
|
|
|
|
# IGNORE! |
312
|
0
|
|
|
|
|
0
|
$self->parse_err("different taxa ($prodtaxa, $last[$PRODTAXA]) for same product $prodacc"); |
313
|
0
|
|
|
|
|
0
|
next; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# close finished events |
317
|
201
|
100
|
|
|
|
402
|
if ($new_assoc) { |
318
|
173
|
100
|
|
|
|
847
|
$self->pop_stack_to_depth(3) if $last[$TERMACC]; |
319
|
|
|
|
|
|
|
# $self->end_event("assoc") if $last[$TERMACC]; |
320
|
|
|
|
|
|
|
} |
321
|
201
|
100
|
|
|
|
14320
|
if ($new_prodacc) { |
322
|
33
|
100
|
|
|
|
144
|
$self->pop_stack_to_depth(2) if $last[$PRODACC]; |
323
|
|
|
|
|
|
|
# $self->end_event("prod") if $last[$PRODACC]; |
324
|
|
|
|
|
|
|
} |
325
|
201
|
100
|
|
|
|
2206
|
if ($new_dbset) { |
326
|
2
|
50
|
|
|
|
7
|
$self->pop_stack_to_depth(1) if $last[$PRODDB]; |
327
|
|
|
|
|
|
|
# $self->end_event("dbset") if $last[$PRODDB]; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
# open new events |
330
|
201
|
100
|
|
|
|
424
|
if ($new_dbset) { |
331
|
2
|
|
|
|
|
9
|
$self->start_event(DBSET); |
332
|
2
|
|
|
|
|
76
|
$self->event(PRODDB, $proddb); |
333
|
|
|
|
|
|
|
} |
334
|
201
|
100
|
|
|
|
503
|
if ($new_prodacc) { |
335
|
33
|
|
|
|
|
131
|
$self->start_event(PROD); |
336
|
33
|
|
|
|
|
2268
|
$self->event(PRODACC, $prodacc); |
337
|
33
|
|
|
|
|
4466
|
$self->event(PRODSYMBOL, $prodsymbol); |
338
|
33
|
50
|
|
|
|
3666
|
$self->event(PRODNAME, $prodname) if $prodname; |
339
|
33
|
50
|
|
|
|
2958
|
$self->event(PRODTYPE, $prodtype) if $prodtype; |
340
|
33
|
50
|
|
|
|
2449
|
if ($main_taxon_id) { |
341
|
33
|
|
|
|
|
109
|
$self->event(PRODTAXA, $main_taxon_id); |
342
|
|
|
|
|
|
|
} |
343
|
33
|
|
|
|
|
2614
|
my $syn = $prodsyn; |
344
|
33
|
50
|
|
|
|
89
|
if ($syn) { |
345
|
33
|
|
|
|
|
233
|
my @syns = split(/\|/, $syn); |
346
|
33
|
|
|
|
|
88
|
my %ucheck = (); |
347
|
|
|
|
|
|
|
@syns = grep { |
348
|
33
|
100
|
|
|
|
74
|
if ($ucheck{lc($_)}) { |
|
214
|
|
|
|
|
428
|
|
349
|
31
|
|
|
|
|
92
|
0; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
else { |
352
|
183
|
|
|
|
|
378
|
$ucheck{lc($_)} = 1; |
353
|
183
|
|
|
|
|
304
|
1; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
} @syns; |
356
|
183
|
|
|
|
|
11813
|
map { |
357
|
33
|
|
|
|
|
69
|
$self->event(PRODSYN, $_); |
358
|
|
|
|
|
|
|
} @syns; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
} |
361
|
201
|
100
|
|
|
|
3045
|
if ($new_assoc) { |
362
|
173
|
|
|
|
|
224
|
my $assocdate = $assocdate; |
363
|
173
|
|
|
|
|
497
|
$self->start_event(ASSOC); |
364
|
173
|
50
|
|
|
|
8845
|
if ($assocdate) { |
365
|
173
|
50
|
33
|
|
|
773
|
if ($assocdate && length($assocdate) == 8) { |
366
|
173
|
|
|
|
|
552
|
$self->event(ASSOCDATE, $assocdate); |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
else { |
369
|
0
|
|
|
|
|
0
|
$self->parse_err("ASSOCDATE wrong format (must be YYYYMMDD): $assocdate"); |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
} |
372
|
173
|
50
|
|
|
|
16111
|
$self->event(SOURCE_DB, $source_db) |
373
|
|
|
|
|
|
|
if $source_db; |
374
|
173
|
|
|
|
|
13653
|
$self->event(TERMACC, $termacc); |
375
|
173
|
|
50
|
|
|
13583
|
my @quals = map lc,split(/[\|]\s*/,$qualifier || ''); |
376
|
173
|
|
|
|
|
336
|
my $is_not = grep {/^not$/i} @quals; |
|
0
|
|
|
|
|
0
|
|
377
|
173
|
|
50
|
|
|
910
|
$self->event(IS_NOT, $is_not || '0'); |
378
|
173
|
|
|
|
|
14196
|
$self->event(QUALIFIER, $_) foreach @quals; |
379
|
173
|
|
|
|
|
323
|
$self->event(SPECIES_QUALIFIER, $_) foreach @prodtaxa_ids; # all REMAINING (after "|') tax ids are qualifiers |
380
|
173
|
|
|
|
|
605
|
$self->event(ASPECT, $aspect); |
381
|
173
|
50
|
|
|
|
13846
|
if ($isoform) { |
382
|
0
|
|
|
|
|
0
|
$self->event(ISOFORM, $isoform); |
383
|
|
|
|
|
|
|
} |
384
|
173
|
50
|
|
|
|
852
|
if ($properties) { |
385
|
0
|
|
|
|
|
0
|
my @properties_list = split(/\|/,$properties); |
386
|
0
|
0
|
|
|
|
0
|
if (!$obo_parser) { |
387
|
0
|
|
|
|
|
0
|
$obo_parser = GO::Parser->new({format=>'obo_text'}); |
388
|
|
|
|
|
|
|
} |
389
|
0
|
|
|
|
|
0
|
foreach my $p (@properties_list) { |
390
|
0
|
|
|
|
|
0
|
my $diffs = $obo_parser->parse_differentia($p); |
391
|
0
|
|
|
|
|
0
|
$self->event(PROPERTIES, $diffs); |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
} |
395
|
201
|
|
|
|
|
537
|
$self->start_event(EVIDENCE); |
396
|
201
|
|
|
|
|
5117
|
$self->event(EVCODE, $evcode); |
397
|
201
|
100
|
|
|
|
15630
|
if ($with) { |
398
|
|
|
|
|
|
|
# TODO: discriminate between pipes and commas |
399
|
|
|
|
|
|
|
# (semicolon is there for legacy reasons - check if this can be removed) |
400
|
24
|
|
|
|
|
173
|
my @with_accs = split(/\s*[\|\;\,]\s*/, $with); |
401
|
|
|
|
|
|
|
$self->event(WITH, $_) |
402
|
24
|
|
|
|
|
183
|
foreach (grep (/:/, @with_accs)); |
403
|
|
|
|
|
|
|
# we have found errors where the : was left out, this just skips |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# no longer checks for cardinality errors |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
} |
408
|
201
|
|
|
|
|
3761
|
my @refs = split(/\|/, $ref); |
409
|
273
|
|
|
|
|
5773
|
map { |
410
|
201
|
|
|
|
|
642
|
$self->event(REF, $_) |
411
|
|
|
|
|
|
|
} @refs; |
412
|
201
|
|
|
|
|
17031
|
$self->end_event(EVIDENCE); |
413
|
|
|
|
|
|
|
#@last = @vals; |
414
|
201
|
|
|
|
|
12278
|
@last = |
415
|
|
|
|
|
|
|
( |
416
|
|
|
|
|
|
|
$proddb, |
417
|
|
|
|
|
|
|
$prodacc, |
418
|
|
|
|
|
|
|
$prodsymbol, |
419
|
|
|
|
|
|
|
$qualifier, |
420
|
|
|
|
|
|
|
$termacc, |
421
|
|
|
|
|
|
|
$ref, |
422
|
|
|
|
|
|
|
$evcode, |
423
|
|
|
|
|
|
|
$with, |
424
|
|
|
|
|
|
|
$aspect, |
425
|
|
|
|
|
|
|
$prodname, |
426
|
|
|
|
|
|
|
$prodsyn, |
427
|
|
|
|
|
|
|
$prodtype, |
428
|
|
|
|
|
|
|
$prodtaxa, |
429
|
|
|
|
|
|
|
$assocdate, |
430
|
|
|
|
|
|
|
$source_db, |
431
|
|
|
|
|
|
|
$properties, |
432
|
|
|
|
|
|
|
$isoform, |
433
|
|
|
|
|
|
|
); |
434
|
|
|
|
|
|
|
} |
435
|
2
|
|
|
|
|
18
|
$fh->close; |
436
|
|
|
|
|
|
|
|
437
|
2
|
|
|
|
|
111
|
$self->pop_stack_to_depth(0); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
1; |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
# 2.864 orig/handler |
444
|
|
|
|
|
|
|
# 2.849 opt/handler |
445
|
|
|
|
|
|
|
# 1.986 orig/xml |
446
|
|
|
|
|
|
|
# 1.310 opt/xml |