File Coverage

blib/lib/GOBO/Parsers/OBOParser.pm

Criterion	Covered	Total	%
statement	1	3	33.3
branch			n/a
condition			n/a
subroutine	1	1	100.0
pod			n/a
total	2	4	50.0

line	stmt	sub	time	code
1				package GOBO::Parsers::OBOParser;
2	1	1	55749	use Moose;
	0
	0
3				use strict;
4				extends 'GOBO::Parsers::Parser';
5				with 'GOBO::Parsers::GraphParser';
6
7				use GOBO::Graph;
8				=cut
9				use GOBO::Node;
10				use GOBO::InstanceNode;
11				use GOBO::Synonym;
12				use GOBO::Subset;
13				use GOBO::Formula;
14				use GOBO::LinkStatement;
15				use GOBO::LiteralStatement;
16				use GOBO::ClassExpression;
17				use GOBO::ClassExpression::Union;
18				=cut
19				use Data::Dumper;
20
21				has default_namespace => (is=>'rw', isa=>'Str');
22				has format_version => (is=>'rw', isa=>'Str');
23
24
25				sub parse_header {
26				my $self = shift;
27				my $g = $self->graph;
28				my $header_check = sub { return 1; };
29
30				if ($self->has_header_parser_options)
31				{ if ($self->header_parser_options->{ignore_all})
32				{ $header_check = sub {
33				return undef;
34				};
35				}
36				elsif ($self->header_parser_options->{ignore})
37				{ my $arr = $self->header_parser_options->{ignore};
38				$header_check = sub {
39				my $t = shift;
40				return 1 unless grep { $t eq $_ } @$arr;
41				return undef;
42				};
43				}
44				else
45				{ my $arr = $self->header_parser_options->{parse_only};
46				$header_check = sub {
47				my $t = shift;
48				return 1 if grep { $t eq $_ } @$arr;
49				return undef;
50				};
51				}
52				}
53
54				$/ = "\n";
55				while($_ = $self->next_line) {
56				next unless /\S/;
57
58				if (/^\[/) {
59				$self->unshift_line($_);
60				# set the parse_header to 1
61				$self->parsed_header(1);
62				return;
63				}
64
65				if (/^(\S+):\s(.?)$/) {
66				next unless &$header_check($1);
67				my ($t,$v) = ($1,$2);
68				if ($1 eq 'default-namespace') {
69				$self->default_namespace($2);
70				}
71				elsif ($t eq 'subsetdef') {
72				# subsetdef: gosubset_prok "Prokaryotic GO subset"
73				if ($v =~ /^(\S+)\s+\"(.*)\"/) {
74				my ($id,$label) = ($1,$2);
75				my $ss = new GOBO::Subset(id=>$id,
76				label=>$label);
77				$g->subset_index->{$id} = $ss;
78				}
79				else {
80				warn $v;
81				}
82				}
83				elsif ($t eq 'date') {
84				$g->date($v);
85				}
86				elsif ($t eq 'remark') {
87				$g->comment($v);
88				}
89				elsif ($t eq 'format-version') {
90				$self->format_version($v);
91				}
92				elsif ($t eq 'data-version') {
93				$g->version($v);
94				}
95				else {
96				$g->set_property_value($t,$v);
97				}
98				}
99				}
100				return;
101				}
102
103
104				sub parse_body {
105				my $self = shift;
106
107				my $stanza_check = sub { return 1; };
108				my $tag_check = sub { return 1; };
109
110				if ($self->has_body_parser_options)
111				{ if ($self->body_parser_options->{ignore_all})
112				{ # ignore the whole thing
113				# no more body parsing required
114				# warn "Found that I don't have to parse the body. Returning!";
115				return;
116				}
117				elsif ($self->body_parser_options->{ignore})
118				{ my $h = $self->body_parser_options->{ignore};
119
120				my @ignore_all = grep { $h->{$_}[0] eq '*' } keys %$h;
121
122				if (@ignore_all)
123				{ # ignore this stanza if the stanza type exists in the ignore all set
124				$stanza_check = sub {
125				my $s_type = shift;
126				if (grep { $s_type eq $_ } @ignore_all)
127				{ $self->next_stanza(\@ignore_all, 'ignore');
128				return undef;
129				}
130				return 1;
131				};
132				}
133
134				# ignore the stanza if the stanza type exists in the ignore set
135				# skip the line if the line type exists or the full stanza is to be ignored
136				$tag_check = sub {
137				my ($s_type, $t) = @_;
138				# print STDERR "\n$s_type $t";
139				return 1 if ! $h->{$s_type};
140				return undef if ( $h->{$s_type}[0] eq '*' \|\| grep { /^$t$/i } @{$h->{$s_type}} );
141				# print STDERR "=> OK\n";
142				return 1;
143				};
144				}
145				elsif ($self->body_parser_options->{parse_only})
146				{ my $h = $self->body_parser_options->{parse_only};
147
148				# print STDERR "h: " . Dumper($h) . "\n";
149
150				# parse this stanza if the stanza type exists in the parse_only set
151				# otherwise, go to the next stanza
152				$stanza_check = sub {
153				my $s_type = shift;
154				return 1 if $h->{$s_type};
155				$self->next_stanza([ keys %$h ]);
156				return undef;
157				};
158
159				# if the stanza type exists and the tag exists, we're good
160				# otherwise, go to the next stanza
161				$tag_check = sub {
162				my ($s_type, $t) = @_;
163				if ($h->{$s_type})
164				{ if ( $h->{$s_type}[0] eq '*' \|\| grep { $t eq $_ } @{$h->{$s_type}} )
165				{ return 1;
166				}
167				return undef;
168				}
169				# we should have already caught incorrect stanzas, but n'mind...
170				warn "Incorrect stanza type!\n";
171				$self->next_stanza([ keys %$h ]);
172				return undef;
173				};
174				}
175				}
176
177				my $stanzaclass;
178				my $id;
179				my $n;
180				my %union_h = ();
181				my @anns = ();
182				my $g = $self->graph;
183
184				while($_ = $self->next_line) {
185				next unless /\S/;
186
187				if (/^\[(\S+)\]/) {
188				undef $n;
189				$stanzaclass = lc($1);
190				next unless &$stanza_check( $stanzaclass );
191				# print STDERR "passed the stanza check!\n";
192				if ($stanzaclass eq 'annotation') {
193				$n = new GOBO::Annotation;
194				push(@anns, $n);
195				}
196				next;
197				}
198
199
200				if (/^(.?):\s/) {
201				next unless &$tag_check( $stanzaclass, $1 );
202				# print STDERR "passed the tag check!\n";
203				}
204
205				chomp;
206				s/\!.*//; # TODO
207				s/\s+$//;
208				if (/^id:\s(.)\s*$/) {
209				$id = $1;
210				if ($stanzaclass eq 'term') {
211				#$n = $g->term_noderef($id);
212				$n = $g->add_term($id);
213				}
214				elsif ($stanzaclass eq 'typedef') {
215				#$n = $g->relation_noderef($id);
216				$n = $g->add_relation($id);
217				}
218				elsif ($stanzaclass eq 'instance') {
219				$n = $g->instance_noderef($id);
220				$g->add_instance($n);
221				}
222				elsif ($stanzaclass eq 'annotation') {
223				# TODO
224				}
225				else {
226				}
227
228				if (!$n) {
229				die "cannot parse: $_";
230				}
231
232				$n->namespace($self->default_namespace)
233				if (!$n->namespace &&
234				$self->default_namespace);
235				next;
236				}
237
238				my $vals = [];
239				if (/^name:\s(.)/) {
240				$n->label($1);
241				}
242				elsif (/^namespace:\s(.)/) {
243				$n->namespace($1);
244				}
245				elsif (/^alt_id:\s(.)/) {
246				$n->add_alt_ids($1);
247				}
248				elsif (/^def:\s(.)/) {
249				_parse_vals($1,$vals);
250				$n->definition($vals->[0]); # TODO
251				if ($vals->[1] && @{$vals->[1]}) {
252				$n->definition_xrefs( [ map { $_ = new GOBO::Node({ id => $_ }) } @{$vals->[1]} ]);
253				}
254				}
255				elsif (/^property_value:\s(.)/) {
256				_parse_vals($1,$vals);
257				$n->add_property_value($vals->[0], $vals->[1]); # TODO
258				}
259				elsif (/^comment:\s(.)/) {
260				$n->comment($1);
261				}
262				elsif (/^subset:\s*(\S+)/) {
263				my $ss = $g->subset_noderef($1);
264				$n->add_subsets($ss);
265
266				if ($self->liberal_mode && ! $g->subset_index->{$ss->id})
267				{ print STDERR "$1 was not in the subset index. Crap!\n";
268				$g->subset_index->{$1} = $ss;
269				}
270				}
271				elsif (/^consider:\s*(\S+)/) {
272				$n->add_considers($1);
273				}
274				elsif (/^replaced_by:\s*(\S+)/) {
275				$n->add_replaced_bys($1);
276				}
277				elsif (/^created_by:\s*(\S+)/) {
278				$n->created_by($1);
279				}
280				elsif (/^creation_date:\s*(\S+)/) {
281				$n->creation_date($1);
282				}
283				elsif (/^synonym:\s(.)/) {
284				_parse_vals($1,$vals);
285				my $syn = new GOBO::Synonym(label=>shift @$vals);
286				$n->add_synonym($syn);
287				my $xrefs = pop @$vals;
288				if (@$vals) {
289				$syn->scope(shift @$vals);
290				}
291				else {
292				warn "no scope specified: $_";
293				}
294				if ($vals->[0] && !ref($vals->[0])) {
295				$syn->type(shift @$vals);
296				}
297				$syn->xrefs($xrefs);
298				}
299				elsif (/^xref:\s*(\S+)/) {
300				$n->add_xrefs($1);
301				}
302				elsif (/^is_a:\s(\S+)(.)/) {
303				#my $tn = $stanzaclass eq 'typedef' ? $g->relation_noderef($1) : $g->term_noderef($1);
304				my $tn = $self->getnode($1, $stanzaclass eq 'typedef' ? 'r' : 'c');
305				my $s = new GOBO::LinkStatement(node=>$n,relation=>'is_a',target=>$tn);
306				$self->add_metadata($s,$2);
307				$g->add_link($s);
308				if ($stanzaclass eq 'typedef') {
309				$n->add_subrelation_of($tn);
310				}
311				}
312				elsif (/^relationship:\s(\S+)\s+(\S+)(.)/) {
313				my $rn = $g->relation_noderef($1);
314				#my $tn = $stanzaclass eq 'typedef' ? $g->relation_noderef($2) : $g->term_noderef($2);
315				my $tn = $self->getnode($2, $stanzaclass eq 'typedef' ? 'r' : 'c');
316				#my $tn = $g->term_noderef($2);
317				my $s = new GOBO::LinkStatement(node=>$n,relation=>$rn,target=>$tn);
318				$self->add_metadata($s,$3);
319				$g->add_link($s);
320				}
321				elsif (/^complement_of:\s*(\S+)/) {
322				my $tn = $self->getnode($1, $stanzaclass eq 'typedef' ? 'r' : 'c');
323				$n->complement_of($tn);
324				}
325				elsif (/^disjoint_from:\s*(\S+)/) {
326				my $tn = $self->getnode($1, $stanzaclass eq 'typedef' ? 'r' : 'c');
327				$n->add_disjoint_from($tn);
328				}
329				elsif (/^domain:\s*(\S+)/) {
330				my $tn = $self->getnode($1, 'c');
331				$n->domain($tn);
332				}
333				elsif (/^range:\s*(\S+)/) {
334				my $tn = $self->getnode($1, 'c');
335				$n->range($tn);
336				}
337				elsif (/^disjoint_over:\s*(\S+)/) {
338				my $tn = $self->getnode($1, 'r');
339				$n->add_disjoint_over($tn);
340				}
341				elsif (/^inverse_of:\s*(\S+)/) {
342				my $tn = $self->getnode($1, 'r');
343				$n->add_inverse_of($tn);
344				}
345				elsif (/^inverse_of_on_instance_level:\s*(\S+)/) {
346				my $tn = $self->getnode($1, 'r');
347				$n->add_inverse_of_on_instance_level($tn);
348				}
349				elsif (/^instance_of:\s*(\S+)/) {
350				my $tn = $self->getnode($1, 'c');
351				$n->add_type($tn);
352				}
353				elsif (/^equivalent_to:\s*(\S+)/) {
354				my $tn = $self->getnode($1, $stanzaclass eq 'typedef' ? 'r' : 'c');
355				$n->add_equivalent_to($tn);
356				}
357				elsif (/^intersection_of:/) {
358				# TODO: generalize
359				if (/^intersection_of:\s*(\S+)\s+(\S+)/) {
360				my $rn = $g->relation_noderef($1);
361				#my $tn = $g->term_noderef($2);
362				my $tn = $self->getnode($2, $stanzaclass eq 'typedef' ? 'r' : 'c');
363				#my $tn = $stanzaclass eq 'typedef' ? $g->relation_noderef($2) : $g->term_noderef($2);
364				my $s = new GOBO::LinkStatement(node=>$n,relation=>$rn,target=>$tn, is_intersection=>1);
365				$g->add_link($s);
366				}
367				elsif (/^intersection_of:\s*(\S+)/) {
368				#my $tn = $g->term_noderef($1);
369				#my $tn = $stanzaclass eq 'typedef' ? $g->relation_noderef($1) : $g->term_noderef($1);
370				my $tn = $self->getnode($1, $stanzaclass eq 'typedef' ? 'r' : 'c');
371				my $s = new GOBO::LinkStatement(node=>$n,relation=>'is_a',target=>$tn, is_intersection=>1);
372				$g->add_link($s);
373				}
374				else {
375				$self->throw("badly formatted intersection: $_");
376				}
377				}
378				elsif (/^union_of:\s*(\S+)/) {
379				my $u = $self->getnode($1, $stanzaclass eq 'typedef' ? 'r' : 'c');
380				my $ud = $n->union_definition;
381				if (!$ud) {
382				$ud = new GOBO::ClassExpression::Union;
383				$n->union_definition($ud);
384				}
385				$ud->add_argument($u);
386				}
387				elsif (/^is_(\w+):\s*(\w+)/) {
388				my $att = $1;
389				$n->$att(1) if $2 eq 'true';
390				#$n->{$att} = $val; # TODO : check
391				}
392				elsif (/^transitive_over:\s*(\w+)/) {
393				my $rn = $g->relation_noderef($1);
394				$n->transitive_over($rn);
395				}
396				elsif (/^(holds_over_chain\|equivalent_to_chain):\s(.)/) {
397				my $ct = $1;
398				my @rels = map { $self->getnode($_,'r') } split(' ',$2);
399				$ct eq 'holds_over_chain' ? $n->add_holds_over_chain(\@rels) : $n->add_equivalent_to_chain(\@rels);
400				}
401				# following for annotation stanzas only
402				elsif (/^subject:\s(.)/) {
403				$n->node($self->getnode($1));
404				}
405				elsif (/^relation:\s(.)/) {
406				$n->relation($self->getnode($1,'r'));
407				}
408				elsif (/^object:\s(.)/) {
409				$n->target($self->getnode($1));
410				}
411				elsif (/^description:\s(.)/) {
412				$n->description($1);
413				}
414				elsif (/^source:\s(.)/) {
415				$n->provenance($self->getnode($1));
416				}
417				elsif (/^assigned_by:\s(.)/) {
418				$n->source($self->getnode($1));
419				}
420				elsif (/^formula:\s(.)/) {
421				_parse_vals($1,$vals);
422				my $f = new GOBO::Formula(text=>$vals->[0],
423				language=>$vals->[1]);
424				$f->associated_with($n);
425				$g->add_formula($f);
426				}
427				else {
428				# warn "ignored: $_";
429				# ...
430				}
431				}
432				if (@anns) {
433				$g->add_annotations(\@anns);
434				}
435				return;
436				}
437
438
439
440				sub getnode {
441				my $self = shift;
442				my $id = shift;
443				my $metatype = shift \|\| '';
444				my $g = $self->graph;
445				my $n;
446				if ($metatype eq 'c') {
447				$n = $g->term_noderef($id);
448				}
449				elsif ($metatype eq 'r') {
450				$n = $g->relation_noderef($id);
451				}
452				elsif ($metatype eq 'i') {
453				$n = $g->instance_noderef($id);
454				}
455				else {
456				$n = $g->noderef($id);
457				}
458				return $n;
459				}
460
461				sub add_metadata {
462				my $self = shift;
463				my $s = shift;
464				my $v = shift;
465				if ($v =~ /^\s\{(.)\}/) {
466				my $tq = $1;
467				my @tvs = ();
468				while ($tq) {
469				if ($tq =~ /(\w+)=\"([^\"])\"(.)/) {
470				push(@tvs,[$1,$2]);
471				$tq = $3;
472				}
473				elsif ($tq =~ /(\w+)=(\w+)(.*)/) {
474				push(@tvs,[$1,$2]);
475				$tq = $3;
476				}
477				else {
478				$self->throw($v);
479				}
480				if ($tq =~ /^s\,\s(.*)/) {
481				$tq = $1;
482				}
483				elsif ($tq =~ /^\s*$/) {
484				# ok
485				}
486				else {
487				$self->throw($v);
488				}
489				}
490				my @sub_statements = ();
491				foreach (@tvs) {
492				my ($t,$v) = @$_;
493				my $ss = new GOBO::LiteralStatement(relation=>$t,target=>$v);
494				push(@sub_statements,$ss);
495				}
496				$s->sub_statements(\@sub_statements);
497				}
498				return;
499				}
500
501				sub _parse_vals {
502				my $s = shift;
503				my $vals = shift;
504
505				# print STDERR "input: s: $s\nvals: $vals\n";
506				#
507				# optionally leads with quoted sections
508				if ($s =~ /^(\".*)/) {
509				$s = _parse_quoted($s,$vals);
510				}
511
512				# follows with optional list of atoms
513				while ($s =~ /^([^\{\[]\S)\s(.*)/) {
514				push(@$vals,$1);
515				$s = $2;
516				}
517
518				# option xrefs
519				if ($s =~ /^(\[)/) {
520				$s = _parse_xrefs($s,$vals);
521				}
522				# print STDERR "now: s: $s\nvals: ". Dumper($vals);
523				#
524				}
525
526				sub _parse_quoted {
527				my $s = shift;
528				my $vals = shift;
529				if ($s =~ /^\"(([^\"\\]\|\\.))\"\s(.*)/) {
530				push(@$vals,$1);
531				return $3;
532				}
533				else {
534				die "$s";
535				}
536				}
537
538				sub _parse_xrefs {
539				my $s = shift;
540				my $vals = shift;
541				if ($s =~ /^\[(([^\]\\]\|\\.))\]\s(.*)/) {
542				$s = $2;
543				push(@$vals, [split(/,\s*/,$1)]); # TODO
544				}
545				else {
546				die "$s";
547				}
548				}
549
550
551				## validate the options that we have
552
553				sub check_options {
554				my $self = shift;
555				my $options = $self->options;
556				if ($options && values %$options)
557				{ # get rid of any existing options
558				$self->clear_header_parser_options;
559				$self->clear_body_parser_options;
560				## see if we have any settings for parsing the header
561				if ($options->{header} && keys %{$options->{header}})
562				{
563				if ($options->{header}{ignore} && $options->{header}{parse_only})
564				{ warn "Warning: both ignore and parse_only specified in header parsing options; using setting in parse_only";
565				}
566
567				# parse_only takes priority
568				if ($options->{header}{parse_only})
569				{ if (ref $options->{header}{parse_only} && ref $options->{header}{parse_only} eq 'ARRAY')
570				{ $self->set_header_parser_options({ parse_only => $options->{header}{parse_only} });
571				}
572				else
573				{ warn "wrong header options format";
574				}
575				}
576				elsif ($options->{header}{ignore})
577				{ if (! ref $options->{header}{ignore} && $options->{header}{ignore} eq '*')
578				{ $self->set_header_parser_options({ ignore_all => 1 });
579				}
580				elsif (ref $options->{header}{ignore} && ref $options->{header}{ignore} eq 'ARRAY')
581				{ $self->set_header_parser_options({ ignore => $options->{header}{ignore} });
582				}
583				else
584				{ warn "wrong header options format";
585				}
586				}
587				}
588
589				## check the body parsing options
590				if ($options->{body} && keys %{$options->{body}})
591				{ my $b_hash;
592
593				if ($options->{body}{ignore} && $options->{body}{parse_only})
594				{ warn "Warning: both ignore and parse_only specified in body parsing options; using setting in parse_only";
595				}
596
597				# parse_only takes priority
598				if ($options->{body}{parse_only})
599				{ if (ref $options->{body}{parse_only} && ref $options->{body}{parse_only} eq 'HASH')
600				{ ## stanza types
601				foreach my $s_type (keys %{$options->{body}{parse_only}})
602				{ # s_type = '*'
603				if (! ref $options->{body}{parse_only}{$s_type} && $options->{body}{parse_only}{$s_type} eq '*')
604				{ $b_hash->{$s_type} = ['*'];
605				}
606				# s_type = [ tag, tag, tag ]
607				elsif (ref $options->{body}{parse_only}{$s_type} && ref $options->{body}{parse_only}{$s_type} eq 'ARRAY')
608				{ $b_hash->{$s_type} = $options->{body}{parse_only}{$s_type};
609				}
610				}
611
612				# print STDERR "b hash: " . Dumper($b_hash);
613				$self->set_body_parser_options({ parse_only => $b_hash }) if $b_hash;
614				}
615				else
616				{ warn "wrong body options format";
617				}
618				}
619				elsif ($options->{body}{ignore})
620				{ if (ref $options->{body}{ignore} && ref $options->{body}{ignore} eq 'HASH')
621				{ ## stanza types
622				foreach my $s_type (keys %{$options->{body}{ignore}})
623				{ # s_type = '*'
624				if (! ref $options->{body}{ignore}{$s_type} && $options->{body}{ignore}{$s_type} eq '*')
625				{ $b_hash->{$s_type} = ['*'];
626				}
627				# s_type = [ tag, tag, tag ]
628				elsif (ref $options->{body}{ignore}{$s_type} && ref $options->{body}{ignore}{$s_type} eq 'ARRAY')
629				{ $b_hash->{$s_type} = $options->{body}{ignore}{$s_type};
630				}
631				}
632				$self->set_body_parser_options({ ignore => $b_hash }) if $b_hash;
633				}
634				elsif (! ref $options->{body}{ignore} && $options->{body}{ignore} eq '*')
635				{ $self->set_body_parser_options({ ignore_all => 1 });
636				}
637				else
638				{ warn "wrong body options format";
639				}
640				}
641				}
642				}
643				$self->checked_options(1);
644				}
645
646				=head2 next_stanza
647
648				Skip the rest of this stanza and go to the next
649
650				input: self, optional hashref of stanza types to parse
651
652				if the hashref is specified, will continue to skip stanzas until the stanza type
653				matches one of those in the hash ref
654
655				=cut
656
657				sub next_stanza {
658				my $self = shift;
659				my $s_types = shift;
660				my $ignore = shift \|\| undef;
661
662				if ($s_types)
663				{ if ($ignore)
664				{ while($_ = $self->next_line)
665				{ if ($_ =~ /^\[(\S+)\]/ && ! grep { lc($1) eq $_ } @$s_types)
666				{ $self->unshift_line($_);
667				return;
668				}
669				next;
670				}
671				}
672				else
673				{ while($_ = $self->next_line)
674				{ next unless $_ =~ /^\[(\S+)\]/ && grep { lc($1) eq $_ } @$s_types;
675				$self->unshift_line($_);
676				return;
677				}
678				}
679				}
680				else
681				{ while($_ = $self->next_line) {
682				next unless $_ =~ /^\[(\S+)\]/;
683				$self->unshift_line($_);
684				return;
685				}
686				}
687				}
688
689
690
691
692
693				1;
694
695				=head1 NAME
696
697				GOBO::Parsers::OBOParser
698
699				=head1 SYNOPSIS
700
701				my $parser = new GOBO::Parsers::OBOParser(file => "t/data/cell.obo");
702				$parser->parse;
703				print $parser->graph;
704
705				my $writer = new GOBO::Writers::OBOWriter;
706				$writer->graph($parser->graph);
707				$writer->write();
708
709				=head1 DESCRIPTION
710
711				An GOBO::Parsers::Parser that parses OBO Files.
712
713				The goal is to be obof1.3 compliant:
714
715				http://www.geneontology.org/GO.format.obo-1_3.shtml
716
717				however, obof1.2 and obof1.0 are also supported
718
719				=head2 Term stanzas
720
721				These are converted to GOBO::TermNode objects
722
723				=head2 Typedef stanzas
724
725				These are converted to GOBO::RelationNode objects
726
727				=head2 Instance stanzas
728
729				These are converted to GOBO::InstanceNode objects
730
731				=head2 Statements
732
733				is_a and relationship tags are converted to GOBO::LinkStatement objects and added to the graph
734
735				=head2 intersection_of tags
736
737				These are added to the graph as GOBO::LinkStatement objects, with is_intersection=>1
738
739				You can call
740
741				$g->convert_intersection_links_to_logical_definitions
742
743				To move these links from the graph to $term->logical_definition
744
745				TBD: do this as the default?
746				TBD: generalize for all links? sometimes it is convenient to have the links available in the Node object...?
747
748				=cut
749
750				=head2 Parser options
751
752				The default behaviour of the parser is to parse everything it comes across.
753				Customized parsing can be achieved by giving the parser a hash ref of options
754				encoding the parsing preferences:
755
756				$parser->set_options($options);
757
758				To set parser options, use the following structures:
759
760				=head3 Header-related parsing options
761
762				Header parsing instructions should be contained in the options hash with the key
763				'header':
764
765				$options->{header} = ...
766
767				# parse only tag_1, tag_2 and tag_3, and ignore any other tags in the header
768				$options->{header} = {
769				parse_only => [ 'tag_1', 'tag_2', 'tag_3' ],
770				}
771
772
773				# parse everything apart from tag_4, tag_5 and tag_6
774				$options->{header} = {
775				ignore => [ 'tag_4', 'tag_5', 'tag_6' ],
776				}
777
778
779				# ignore all information in the header
780				$options->{header}{ignore} = '*';
781
782				There is no need to specify $options->{header}{parse_only} = '*' : this is the
783				default behaviour. There is also no need to specify both 'ignore' and 'parse_only'.
784
785
786				=head3 Body parsing options
787
788				Body parsing instructions should be contained in the options hash with the key
789				'body':
790
791				$options->{body} = ...
792
793
794				## parsing or ignore tags
795
796				# parse only tag_1, tag_2 and tag_3 from $stanza_type stanzas
797				$options->{body}{parse_only}{$stanza_type} = [ 'tag_1', 'tag_2', 'tag_3' ],
798
799
800				# ignore 'tag_4', 'tag_5', 'tag_6' from $stanza_type stanzas
801				$options->{body}{ignore}{$stanza_type} = [ 'tag_4', 'tag_5', 'tag_6' ],
802
803
804				## parsing or ignoring stanzas
805
806				# parse only stanzas where the type matches the key $stanza_type
807				$options->{body}{parse_only}{ $stanza_type } = '*'
808
809
810				# ignore stanzas where the type matches the key $stanza_type
811				$options->{body}{ignore}{ $stanza_type } = '*'
812
813				# ignore all information in the body
814				$options->{body}{ignore} = '*';
815
816				There is no need to specify $options->{body}{parse_only} = '*' : this is the
817				default behaviour. There is also no need to specify both 'ignore' and 'parse_only'.
818
819
820				=head3 Examples
821
822				# parse everything from the header; parse only instance stanzas and the id, name and namespace tags from term stanzas
823				$parser->set_options({ body => { parse_only => { term => [ qw(id name namespace) ] }, instance => '*' } });
824
825
826				# ignore the header; parse everything in the body
827				$parser->set_options({ header => { ignore => '*' } });
828
829
830				# parse the date from the header; ignore instance and annotation stanzas
831				$parser->set_options({
832				header => { parse_only => [ 'date' ] },
833				body => {
834				ignore => { instance => '', annotation => '' },
835				},
836				});
837
838				=cut