File Coverage

blib/lib/Treex/Core/Node/EffectiveRelations.pm
Criterion Covered Total %
statement 100 142 70.4
branch 44 72 61.1
condition 10 16 62.5
subroutine 16 21 76.1
pod 3 5 60.0
total 173 256 67.5


line stmt bran cond sub pod time code
1             package Treex::Core::Node::EffectiveRelations;
2             $Treex::Core::Node::EffectiveRelations::VERSION = '2.20210102';
3 24     24   18937 use Moose::Role;
  24         68  
  24         208  
4              
5             # with Moose >= 2.00, this must be present also in roles
6 24     24   131812 use MooseX::SemiAffordanceAccessor;
  24         69  
  24         229  
7 24     24   65155 use Treex::Core::Log;
  24         70  
  24         39215  
8              
9             has is_member => (
10             is => 'rw',
11             isa => 'Bool',
12             documentation => 'Is this node a member of a coordination (i.e. conjunct) or apposition?',
13             );
14              
15             # Shared modifiers of coordinations can be distinguished in PDT style
16             # just based on the fact they are hanged on the conjunction (coord. head).
17             # However, in other styles (e.g. Stanford) this attribute might be useful.
18             has is_shared_modifier => (
19             is => 'rw',
20             isa => 'Bool',
21             documentation => 'Is this node a shared modifier of a coordination?',
22             );
23              
24             requires 'is_coap_root';
25              
26             # Implementation details:
27             # Members of a coordination can be distinguished from shared modifiers
28             # according to the attribute is_member. The only problem is when some
29             # members are hanged on prepositions(afun=AuxP) or subord. conjunctions(AuxC).
30             # The PDT style is that Aux[CP] nodes can never have is_member=1, resulting in
31             # e.g. "It was in(parent=and) Prague(parent=in,is_member=1)
32             # and(parent=was) in(parent=and) London(parent=in,is_member=1)."
33             # The style adpoted in Treex is that members have always is_member=1
34             # no matter what afun they have. This results in:
35             # "It was in(parent=and,is_member=1) Prague(parent=in)
36             # and(parent=was) in(parent=and,is_member=1) London(parent=in)."
37             # Both annotation styles have their pros and cons.
38              
39             sub get_echildren {
40 12     12 1 5566 my ( $self, $arg_ref ) = @_;
41 12 50       63 if ( !defined $arg_ref ) {
42 0         0 $arg_ref = {};
43             }
44 12 50       38 log_fatal('Incorrect number of arguments') if @_ > 2;
45              
46 12         24 my @echildren;
47 12 100       40 if ( $self->_can_apply_eff($arg_ref) ) {
48             # 1) Get my own effective children (i.e. I am their only eff. parent).
49             # These are in my subtree.
50 9         32 @echildren = $self->_get_my_own_echildren($arg_ref);
51              
52             # 2) Add shared effective children
53             # (i.e. I am their eff. parent, but not the only one).
54             # This can happen only if I am member of a coordination
55             # and these eff. children are shared modifiers of the coordination.
56 9         36 push @echildren, $self->_get_shared_echildren($arg_ref);
57             } else {
58 3         15 @echildren = $self->get_children();
59             }
60              
61             # 3) Process eventual switches (ordered=>1, add_self=>1,...)
62             #return @echildren if !$arg_ref; TODO this cannot happen now, see $arg_ref = {} if !defined $arg_ref;
63 12         47 delete $arg_ref->{dive};
64 12         26 delete $arg_ref->{or_topological};
65 12         22 delete $arg_ref->{ignore_incorrect_tree_structure};
66 12         61 return $self->_process_switches( $arg_ref, @echildren );
67             }
68              
69             sub get_eparents {
70 10     10 1 5396 my ( $self, $arg_ref ) = @_;
71 10 50       34 if ( !defined $arg_ref ) {
72 0         0 $arg_ref = {};
73             }
74 10 50       33 log_fatal('Incorrect number of arguments') if @_ > 2;
75              
76             # Get effective parents
77 10 50       34 my @eparents = $self->_can_apply_eff($arg_ref) ?
78             $self->_get_eparents($arg_ref) : ($self->get_parent());
79              
80             # Process eventual switches (ordered=>1, add_self=>1,...)
81 10         33 delete $arg_ref->{dive};
82 10         21 delete $arg_ref->{or_topological};
83 10         19 delete $arg_ref->{ignore_incorrect_tree_structure};
84 10         41 return $self->_process_switches( $arg_ref, @eparents );
85             }
86              
87             sub _get_eparents {
88 10     10   27 my ( $self, $arg_ref ) = @_;
89              
90             # 0) Check if there is a topological parent.
91             # Otherwise, there is no chance getting effective parents.
92 10 50       40 if ( !$self->get_parent() ) {
93 0         0 my $id = $self->id;
94              
95             #TODO: log_fatal if !$robust
96 0         0 log_warn( "The node $id has no effective nor topological parent, using the root", 1 );
97 0         0 return $self->get_root();
98             }
99              
100             # 1) If $self is a member of a coordination/aposition,
101             # get the highest node representing $self -- i.e. the coord/apos root.
102             # Otherwise, let $node be $self.
103 10   66     74 my $node = $self->_get_transitive_coap_root($arg_ref) || $self;
104              
105             # 2) Get the parent
106 10 50       31 $node = $node->get_parent() or return $self->_fallback_parent($arg_ref);
107              
108             # 3) If it is a node to be dived, look above for the first non-dive ancestor.
109 10         56 while ( $arg_ref->{dive}->($node) ) {
110 0 0       0 $node = $node->get_parent() or return $self->_fallback_parent($arg_ref);
111             }
112              
113             # If $node is not a head of a coordination/aposition,
114             # it is the effective parent we are looking for.
115 10 100       33 return $node if !$node->is_coap_root();
116              
117             # Otherwise, there can be more than one effective parent.
118             # All effective parents (of $self) are shared modifiers
119             # of the coordination rooted in $node.
120 2         11 my @eff = $node->get_coap_members($arg_ref);
121 2 50       24 return @eff if @eff;
122              
123 0         0 return $self->_fallback_parent($arg_ref);
124             }
125              
126             # --- Utility methods for get_echildren and get_eparents
127              
128             sub _is_auxCP {
129 58     58   102 my ($self) = @_;
130 58   100     1631 my $afun = $self->afun || '';
131 58         259 return $afun =~ /^Aux[CP]$/;
132             }
133              
134             sub _get_direct_coap_root {
135 37     37   76 my ( $self, $arg_ref ) = @_;
136 37 50       91 my $parent = $self->get_parent() or return;
137 37 100       193 return $parent if $self->get_attr('is_member');
138 13 50 33     61 return if !$arg_ref->{dive} || $arg_ref->{dive}->($self);
139 13         33 while ( $arg_ref->{dive}->($parent) ) {
140 2 50       8 return $parent->get_parent() if $parent->get_attr('is_member');
141 0 0       0 $parent = $parent->get_parent() or return;
142             }
143 11         79 return;
144             }
145              
146             sub _get_transitive_coap_root {
147 10     10   23 my ( $self, $arg_ref ) = @_;
148 10 100       25 my $root = $self->_get_direct_coap_root($arg_ref) or return;
149 8         32 while ( $root->get_attr('is_member') ) {
150 6 50       21 $root = $root->_get_direct_coap_root($arg_ref) or return;
151             }
152 8         22 return $root;
153             }
154              
155             sub _can_apply_eff {
156 22     22   52 my ( $self, $arg_ref ) = @_;
157 22 100       85 if ( !$arg_ref->{dive} ) {
    50          
158 11     45   62 $arg_ref->{dive} = sub {0};
  45         216  
159             }
160             elsif ( $arg_ref->{dive} eq 'AuxCP' ) {
161 11         30 $arg_ref->{dive} = \&_is_auxCP;
162             }
163 22 100       62 my $error = $arg_ref->{dive}->($self)
    100          
164             ? 'a node that is "to be dived"'
165             : $self->is_coap_root() ? 'coap root' : 0;
166 22 100       103 return 1 if !$error;
167 3 50       17 return 0 if $arg_ref->{or_topological}; #TODO: document
168 0         0 my $method_name = ( caller 1 )[3];
169 0         0 my $id = $self->id;
170 0         0 log_warn( "$method_name called on $error ($id). Fallback to topological one.", 1 );
171 0         0 return 0;
172             }
173              
174             # used only if there is an error in the tree structure,
175             # eg. there is a coordination with no members or a non-root node with no parent
176             sub _fallback_parent {
177 0     0   0 my ( $self, $arg_ref ) = @_;
178 0         0 my $id = $self->get_attr('id');
179             log_warn "The node $id has no effective parent, using the topological one."
180 0 0 0     0 if ( !$arg_ref || !$arg_ref->{ignore_incorrect_tree_structure} );
181 0         0 return $self->get_parent();
182             }
183              
184             # Get my own effective children (i.e. I am their only eff. parent).
185             sub _get_my_own_echildren {
186 9     9   23 my ( $self, $arg_ref ) = @_;
187 9         17 my @members = ();
188 9         49 my @queue = $self->get_children();
189 9         32 while (@queue) {
190 6         14 my $node = shift @queue;
191 6 50       18 if ( $arg_ref->{dive}->($node) ) {
    100          
192 0         0 push @queue, $node->get_children();
193             }
194             elsif ( $node->is_coap_root() ) {
195 3         16 push @members, $node->get_coap_members($arg_ref);
196              
197             #push @queue, grep { $_->get_attr('is_member') } $node->get_children();
198             }
199             else {
200 3         14 push @members, $node;
201             }
202             }
203 9         26 return @members;
204             }
205              
206             # Get shared effective children
207             # (i.e. I am their eff. parent but not the only one).
208             sub _get_shared_echildren {
209 9     9   22 my ( $self, $arg_ref ) = @_;
210              
211             # Only members of coord/apos can have shared eff. children
212 9 100       28 my $coap_root = $self->_get_direct_coap_root($arg_ref) or return ();
213 6         19 my @shared_echildren = ();
214              
215             # All shared modifiers of $coap_root are eff. children of $self.
216             # We must process all possibly nested coap_roots.
217             # In the first iteration, $self is one of children of $coap_root.
218             # (In case of "diving", it's not $self, but its governing Aux[CP].)
219             # However, it has is_member==1, so it won't get into @shared_echildren.
220             # Similarly for other iterations.
221 6         20 while ($coap_root) {
222             push @shared_echildren,
223 6         21 map { $_->get_coap_members($arg_ref) }
224 12         41 grep { !$_->get_attr('is_member') }
  36         82  
225             $coap_root->get_children();
226 12         37 $coap_root = $coap_root->_get_direct_coap_root($arg_ref);
227             }
228 6         16 return @shared_echildren;
229             }
230              
231             sub get_coap_members {
232 19     19 1 1663 my ( $self, $arg_ref ) = @_;
233 19 50       54 log_fatal('Incorrect number of arguments') if @_ > 2;
234 19 100       63 return $self if !$self->is_coap_root();
235 13         32 my $direct_only = $arg_ref->{direct_only};
236 13   100 13   59 my $dive = $arg_ref->{dive} || sub {0};
  13         44  
237 13 100       43 if ( $dive eq 'AuxCP' ) { $dive = \&_is_auxCP; }
  4         13  
238 13         28 my @members = ();
239              
240 13         56 my @queue = grep { $_->is_member } $self->get_children();
  38         1123  
241 13         48 while (@queue) {
242 52         123 my $node = shift @queue;
243 52 100 100     131 if ( $dive->($node) ) {
    100          
244 5         24 push @queue, $node->get_children();
245             }
246             elsif ( !$direct_only && $node->is_coap_root() ) {
247 5         23 push @queue, grep { $_->is_member } $node->get_children();
  20         561  
248             }
249             else {
250 42         124 push @members, $node;
251             }
252             }
253 13         97 return @members;
254             }
255              
256 24     24   266 use List::MoreUtils "any";
  24         70  
  24         318  
257             sub is_echild_of {
258 0     0 0   my ($potential_echild, $eparent, $arg_ref) = @_;
259              
260 0           my @echildren = $eparent->get_echildren( $arg_ref );
261 0     0     return any { $_ == $potential_echild } @echildren;
  0            
262             }
263              
264             my @eff_args = qw(dive or_topological ignore_incorrect_tree_structure);
265             # Remove args relevant only to eff relations from arg_ref and return them
266             sub _extract_eff_args {
267 0     0     my ($self, $arg_ref) = @_;
268              
269 0           my $eff_arg_ref = {};
270 0           foreach my $eff_arg (@eff_args) {
271 0 0         if (defined $arg_ref->{$eff_arg}) {
272 0           $eff_arg_ref->{$eff_arg} = $arg_ref->{$eff_arg};
273 0           delete $arg_ref->{$eff_arg};
274             }
275             }
276              
277 0           return $eff_arg_ref;
278             }
279              
280             sub get_esiblings {
281 0     0 0   my ( $self, $arg_ref ) = @_;
282 0 0         log_fatal('Incorrect number of arguments') if @_ > 2;
283 0 0         if ( !defined $arg_ref ) {
284 0           $arg_ref = {};
285             }
286              
287 0           my $eff_arg_ref = $self->_extract_eff_args($arg_ref);
288              
289 0           my @eparents = $self->get_eparents($eff_arg_ref);
290            
291 0           my %esiblings_hash;
292 0           foreach my $eparent (@eparents) {
293 0           my @esiblings = grep { $_ ne $self } $eparent->get_echildren($eff_arg_ref);
  0            
294 0           foreach my $esibling (@esiblings) {
295 0           $esiblings_hash{$esibling->id} = $esibling;
296             }
297             }
298 0           my @esiblings = values %esiblings_hash;
299              
300 0           return $self->_process_switches($arg_ref, @esiblings);
301             }
302              
303             1;
304              
305             __END__
306              
307             =encoding utf-8
308              
309             =head1 NAME
310              
311             Treex::Core::Node::EffectiveRelations
312              
313             =head1 VERSION
314              
315             version 2.20210102
316              
317             =head1 DESCRIPTION
318              
319             Moose role for nodes with a notion of so called
320             I<effective> parents and I<effective> children.
321             This notion is used both
322             on the a-layer (L<Treex::Core::Node::A>) and
323             on the t-layer (L<Treex::Core::Node::T>).
324              
325             TODO: explain it, some examples, reference to PDT manual
326              
327             Eg. in the sentence "Martin and Rudolph came", the tree structure is
328              
329             came(and(Martin,Rudolph))
330              
331             where "and" is a head of a coordination with members "Martin" and "Rudolph".
332             See what the methods C<get_children>, C<get_echildren>, C<get_parent>
333             and C<get_eparents> return when called on various nodes
334             (some pseudocode used for better readability):
335              
336              
337             # CHILDREN AND EFFECTIVE CHILDREN
338              
339             "came"->get_children()
340             # returns "and"
341              
342             "came"->get_echildren()
343             # returns ("Martin", "Rudolph")
344              
345             "and"->get_children()
346             # returns ("Martin", "Rudolph")
347              
348             "and"->get_echildren()
349             # returns ("Martin", "Rudolph") and issues a warning:
350             # get_echildren called on coap root ([id_of_and]). Fallback to topological one.
351              
352             "and"->get_echildren({or_topological => 1})
353             # returns ("Martin", "Rudolph") with no warning
354              
355              
356             # PARENTS AND EFFECTIVE PARENTS
357              
358             "Rudolph"->get_parent()
359             # returns "and"
360              
361             "Rudolph"->get_eparents()
362             # returns "came"
363              
364             "and"->get_parent()
365             # returns "came"
366              
367             "and"->get_eparents()
368             # returns "came" and issues a warning:
369             # get_eparents called on coap root ([id_of_and]). Fallback to topological one.
370              
371             "and"->get_eparents({or_topological => 1})
372             # returns "came" with no warning
373              
374              
375             Note that to skip prepositions and subordinating conjunctions on the a-layer,
376             you must use option C<dive>, e.g.:
377              
378             my $eff_children = $node->get_echildren({dive=>'AuxCP'});
379              
380             Methods C<get_eparents> and C<get_echildren> produce a warning
381             "C<called on coap root ($id). Fallback to topological one.>"
382             when called on a root of coordination or apposition,
383             because effective children/parents are not properly defined in this case.
384             This warning can be supressed by option C<or_topological>.
385              
386             =head1 METHODS
387              
388             =over
389              
390             =item my @effective_children = $node->get_echildren($arg_ref?)
391              
392             Returns a list of effective children of the C<$node>. It means that
393             a) instead of coordination/aposition heads, their members are returned
394             b) shared modifiers of a coord/apos (technically hanged on the head of coord/apos)
395             count as effective children of the members.
396              
397             OPTIONS:
398              
399             =over
400              
401             =item dive=>$sub_ref
402              
403             Using C<dive>, you can define nodes to be skipped (or I<dived>).
404             C<dive> is a reference to a subroutine that decides
405             whether the given node should be skipped or not.
406             Typically this is used for prepositions and subord. conjunctions on a-layer.
407             You can set C<dive> to the string C<AuxCP> which is a shortcut
408             for C<sub {my $self=shift;return $self->afun =~ /^Aux[CP]$/;}>.
409              
410             =item or_topological
411              
412             If the notion of effective child is not defined
413             (if C<$node> is a head of coordination),
414             return the topological children without warnings.
415              
416             =item ordered, add_self, following_only, preceding_only, first_only, last_only
417              
418             You can specify the same options as in
419             L<Treex::Core::Node::get_children()|Treex::Core::Node/get_children>.
420              
421             =back
422              
423              
424             =item my @effective_parents = $node->get_eparents($arg_ref?)
425              
426             Returns a list of effective parents of the C<$node>.
427              
428             OPTIONS
429              
430             =over
431              
432             =item dive
433              
434             see L<get_echildren>
435              
436             =item or_topological
437              
438             If the notion of effective parent is not defined
439             (if C<$node> is a head of coordination),
440             return the topological parent without warnings.
441              
442             =item ignore_incorrect_tree_structure
443              
444             If there is an error in the tree structure,
445             eg. there is a coordination with no members or a non-root node with no parent,
446             a warning is issued
447             (C<The node [node_id] has no effective parent, using the topological one.>).
448             This option supresses the warning.
449             Use thoughtfully (preferably do not use at all).
450              
451             =item ordered, add_self, following_only, preceding_only, first_only, last_only
452              
453             You can specify the same options as in
454             L<Treex::Core::Node::get_children()|Treex::Core::Node/get_children>.
455              
456             =back
457              
458              
459              
460             =item $node->get_coap_members($arg_ref?)
461              
462             If the node is a coordination/apposition head (see
463             L<Node::A::is_coap_root()|Node::A/is_coap_root> and
464             L<Node::T::is_coap_root()|Node::T/is_coap_root>) a list of all coordinated
465             members is returned. Otherwise, the node itself is returned.
466              
467             OPTIONS
468              
469             =over
470              
471             =item direct_only
472              
473             In case of nested coordinations return only "first-level" members.
474             The default is to return I<transitive> members.
475             For example "(A and B) or C":
476              
477             $or->get_coap_members(); # returns A,B,C
478             $or->get_coap_members({direct_only=>1}); # returns and,C
479              
480             =item dive
481              
482             see L<get_echildren>
483              
484             =back
485              
486             =back
487              
488              
489             =head1 AUTHOR
490              
491             Martin Popel <popel@ufal.mff.cuni.cz>
492              
493             =head1 COPYRIGHT AND LICENSE
494              
495             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
496              
497             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.