File Coverage

blib/lib/Treex/Core/Node/Ordered.pm
Criterion Covered Total %
statement 90 212 42.4
branch 32 116 27.5
condition 2 24 8.3
subroutine 12 21 57.1
pod 9 12 75.0
total 145 385 37.6


line stmt bran cond sub pod time code
1             package Treex::Core::Node::Ordered;
2             $Treex::Core::Node::Ordered::VERSION = '2.20210102';
3 24     24   19345 use Moose::Role;
  24         61  
  24         237  
4              
5             # with Moose >= 2.00, this must be present also in roles
6 24     24   135781 use MooseX::SemiAffordanceAccessor;
  24         61  
  24         286  
7 24     24   66414 use Treex::Core::Log;
  24         69  
  24         2155  
8 24     24   201 use List::Util qw(first); # TODO: this wouldn't be needed if there was Treex::Core::Common for roles
  24         57  
  24         1752  
9 24     24   181 use Treex::Core::Types;
  24         75  
  24         60847  
10              
11             has ord => (
12             is => 'ro',
13             isa => 'Treex::Type::NonNegativeInt',
14             writer => '_set_ord',
15             reader => 'ord',
16             );
17              
18             sub precedes {
19 2 50   2 1 11399 log_fatal 'Incorrect number of arguments' if @_ != 2;
20 2         11 my ( $self, $another_node ) = @_;
21 2         76 return $self->ord() < $another_node->ord();
22             }
23             sub follows {
24 0 0   0 0 0 log_fatal 'Incorrect number of arguments' if @_ != 2;
25 0         0 my ( $self, $another_node ) = @_;
26 0         0 return $self->ord() > $another_node->ord();
27             }
28              
29             # Methods get_next_node and get_prev_node are implemented
30             # so they can handle deprecated fractional ords.
31             # When no "fract-ords" will be used in the whole TectoMT nor Treex
32             # this could be reimplemented a bit more effectively.
33             # TODO
34             sub get_next_node {
35 0 0   0 1 0 log_fatal 'Incorrect number of arguments' if @_ != 1;
36 0         0 my $self = shift;
37 0         0 my $my_ord = $self->ord();
38 0 0       0 log_fatal('Undefined ordering value') if !defined $my_ord;
39              
40             # Find closest higher ord
41 0         0 my ( $next_node, $next_ord ) = ( undef, undef );
42 0         0 foreach my $node ( $self->get_root()->get_descendants() ) {
43 0         0 my $ord = $node->ord();
44 0 0       0 next if $ord <= $my_ord;
45 0 0 0     0 next if defined $next_ord && $ord > $next_ord;
46 0         0 ( $next_node, $next_ord ) = ( $node, $ord );
47             }
48 0         0 return $next_node;
49             }
50              
51             sub get_prev_node {
52 0 0   0 1 0 log_fatal 'Incorrect number of arguments' if @_ != 1;
53 0         0 my $self = shift;
54 0         0 my $my_ord = $self->ord();
55 0 0       0 log_fatal('Undefined ordering value') if !defined $my_ord;
56              
57             # Find closest lower ord
58 0         0 my ( $prev_node, $prev_ord ) = ( undef, undef );
59 0         0 foreach my $node ( $self->get_root()->get_descendants() ) {
60 0         0 my $ord = $node->ord();
61 0 0       0 next if $ord >= $my_ord;
62 0 0 0     0 next if defined $prev_ord && $ord < $prev_ord;
63 0         0 ( $prev_node, $prev_ord ) = ( $node, $ord );
64             }
65 0         0 return $prev_node;
66             }
67              
68             sub get_nodes_between {
69 4 50   4 1 17 log_fatal 'Incorrect number of arguments' if @_ != 2;
70 4         11 my ($self, $other) = @_;
71              
72             # want $self preceding $other
73 4 100       142 if ( $self->ord > $other->ord ) {
74 2         4 my $temp = $other;
75 2         4 $other = $self;
76 2         5 $self = $temp;
77             }
78              
79 4         18 my @all_nodes = $self->get_root->get_descendants({ordered => 1});
80             my @nodes_between = grep {
81 4 100       17 $_->ord > $self->ord && $_->ord < $other->ord
  32         795  
82             } @all_nodes;
83              
84 4         19 return @nodes_between;
85             }
86              
87             sub _normalize_node_ordering {
88 10 50   10   41 log_fatal 'Incorrect number of arguments' if @_ != 1;
89 10         24 my $self = shift;
90 10 50       35 log_fatal('Ordering normalization can be applied only on root nodes!') if $self->get_parent();
91             # Do not use add_self => 1. Unshift myself as the first element instead.
92             # Otherwise normalization will not work as expected if the current ord of the root is nonzero and/or a non-root node has zero.
93 10         81 my @nodes = $self->get_descendants( { ordered => 1 } );
94 10         40 unshift(@nodes, $self);
95 10         23 my $new_ord = 0;
96 10         25 foreach my $node (@nodes) {
97 48         1459 $node->_set_ord($new_ord);
98 48         92 $new_ord++
99             }
100 10         29 return;
101             }
102              
103             sub _check_shifting_method_args {
104 22     22   52 my ( $self, $reference_node, $arg_ref ) = @_;
105 22         200 my @c = caller 1;
106 22         93 my $stack = "$c[3] called from $c[1], line $c[2]";
107 22 50 33     123 log_fatal( 'Incorrect number of arguments for ' . $stack ) if @_ < 2 || @_ > 3;
108 22 50       60 log_fatal( 'Undefined reference node for ' . $stack ) if !$reference_node;
109 22 50       74 log_fatal( 'Reference node must be from the same tree. In ' . $stack )
110             if $reference_node->get_root() != $self->get_root();
111              
112             log_fatal '$reference_node is a descendant of $self.'
113             . ' Maybe you have forgotten {without_children=>1}. ' . "\n" . $stack
114 22 50 33     281 if !$arg_ref->{without_children} && $reference_node->is_descendant_of($self);
115              
116 22 50       65 return if !defined $arg_ref;
117              
118 22 50       68 log_fatal(
119             'Second argument for shifting methods can be only options hash reference. In ' . $stack
120             ) if ref $arg_ref ne 'HASH';
121 22     0   109 my $unknown = first { $_ ne 'without_children' } keys %{$arg_ref};
  0         0  
  22         96  
122 22 50       100 log_warn("Unknown switch '$unknown' for $stack") if defined $unknown;
123 22         75 return;
124             }
125              
126             sub shift_after_node {
127 16     16 1 108 my ( $self, $reference_node, $arg_ref ) = @_;
128 16 50       58 return if $self == $reference_node;
129 16         60 _check_shifting_method_args(@_);
130 16 50       39 return $self->_shift_to_node( $reference_node, 1, $arg_ref->{without_children} ) if $arg_ref;
131 16         63 return $self->_shift_to_node( $reference_node, 1, 0 );
132             }
133              
134             sub shift_before_node {
135 6     6 1 55 my ( $self, $reference_node, $arg_ref ) = @_;
136 6 50       19 return if $self == $reference_node;
137 6         28 _check_shifting_method_args(@_);
138 6 50       16 return $self->_shift_to_node( $reference_node, 0, $arg_ref->{without_children} ) if $arg_ref;
139 6         25 return $self->_shift_to_node( $reference_node, 0, 0 );
140             }
141              
142             sub shift_after_subtree {
143 0     0 1 0 my ( $self, $reference_node, $arg_ref ) = @_;
144 0         0 _check_shifting_method_args(@_);
145              
146 0         0 my $last_node;
147 0 0       0 if ( $arg_ref->{without_children} ) {
148 0         0 ($last_node) = reverse grep { $_ != $self } $reference_node->get_descendants( { ordered => 1, add_self => 1 } );
  0         0  
149             }
150             else {
151 0         0 $last_node = $reference_node->get_descendants( { except => $self, last_only => 1, add_self => 1 } );
152             }
153 0 0       0 return if !defined $last_node;
154 0 0       0 return $self->_shift_to_node( $last_node, 1, $arg_ref->{without_children} ) if $arg_ref;
155 0         0 return $self->_shift_to_node( $last_node, 1, 0 );
156             }
157              
158             sub shift_before_subtree {
159 0     0 1 0 my ( $self, $reference_node, $arg_ref ) = @_;
160 0         0 _check_shifting_method_args(@_);
161              
162 0         0 my $first_node;
163 0 0       0 if ( $arg_ref->{without_children} ) {
164 0         0 ($first_node) = grep { $_ != $self } $reference_node->get_descendants( { ordered => 1, add_self => 1 } );
  0         0  
165             }
166             else {
167 0         0 $first_node = $reference_node->get_descendants( { except => $self, first_only => 1, add_self => 1 } );
168             }
169 0 0       0 return if !defined $first_node;
170 0 0       0 return $self->_shift_to_node( $first_node, 0, $arg_ref->{without_children} ) if $arg_ref;
171 0         0 return $self->_shift_to_node( $first_node, 0, 0 );
172             }
173              
174             # This method does the real work for all shift_* methods.
175             # However, due to unfriendly name and arguments it's not public.
176             sub _shift_to_node {
177 22     22   51 my ( $self, $reference_node, $after, $without_children ) = @_;
178 22         66 my @all_nodes = $self->get_root()->get_descendants();
179              
180             # Make sure that ord of all nodes is defined
181             #my $maximal_ord = @all_nodes; -this does not work, since there may be gaps in ordering
182 22         46 my $maximal_ord = 10000;
183 22         51 foreach my $d (@all_nodes) {
184 146 100       3860 if ( !defined $d->ord() ) {
185 22         653 $d->_set_ord( $maximal_ord++ );
186             }
187             }
188              
189             # Which nodes are to be moved?
190             # $self only (the {without_children=>1} switch)
191             # or $self and all its descendants (the default)?
192 22         37 my @nodes_to_move;
193 22 50       56 if ($without_children) {
194 0         0 @nodes_to_move = ($self);
195             }
196             else {
197 22         121 @nodes_to_move = $self->get_descendants( { ordered => 1, add_self => 1 } );
198             }
199              
200             # Let's make a hash, so we can easily recognize which nodes are to be moved.
201 22         416 my %is_moving = map { $_ => 1 } @nodes_to_move;
  22         120  
202              
203             # Recompute ord of all nodes.
204             # The technical root has ord=0 and the first node will have ord=1.
205 22         48 my $counter = 1;
206 22         35 my $nodes_moved = 0;
207 22         86 @all_nodes = sort { $a->ord() <=> $b->ord() } @all_nodes;
  302         7447  
208 22         58 foreach my $node (@all_nodes) {
209              
210             # We skip nodes that are being moved.
211             # Their ord is recomuted elsewhere (look 8 lines down).
212 146 100       388 next if $is_moving{$node};
213              
214             # If moving "after" a reference node
215             # then ord of the $node can be recomputed now
216             # even if it is actually the $reference_node.
217 124 100       232 if ($after) {
218 88         2532 $node->_set_ord( $counter++ );
219             }
220              
221             # Now we insert (i.e. recompute ord of) all nodes which are being moved.
222             # The nodes are inserted in the original order.
223 124 100       277 if ( $node == $reference_node ) {
224 16         45 foreach my $moving_node (@nodes_to_move) {
225 16         467 $moving_node->_set_ord( $counter++ );
226             }
227 16         33 $nodes_moved = 1;
228             }
229              
230             # If moving "before" a node, then now it is the right moment
231             # for recomputing ord of the $node
232 124 100       273 if ( !$after ) {
233 36         1034 $node->_set_ord( $counter++ );
234             }
235             }
236              
237             # If $is_moving{$reference_node}, e.g. when there is just one node in the tree,
238             # we need to do the reordering now (otherwise the ord would be still 10000).
239 22 100       58 if ( !$nodes_moved ) {
240 6         12 foreach my $moving_node (@nodes_to_move) {
241 6         187 $moving_node->_set_ord( $counter++ );
242             }
243             }
244 22         94 return;
245             }
246              
247             ###!!! DZ: reverting to 7183, 2011-11-03.
248             # This function by Martin is faster than my own below but it does not always return correct results.
249             # The key problem could be that it relies on ord values forming a contiguous sequence which might not always be true.
250             # Go to devel/hamledt/nonprojectivity and test that make yields the same numbers for the old and the new method.
251             sub is_nonprojective_does_not_work {
252 0     0 0   my ($self) = @_;
253              
254             # Root and its children are always projective
255 0           my $parent = $self->get_parent();
256 0 0 0       return 0 if !$parent || $parent->is_root();
257              
258             # Edges between neighbouring nodes are always projective.
259             # Check it now to make it a bit faster.
260 0           my ( $ordA, $ordB ) = ( $self->ord, $parent->ord );
261 0 0         if ( $ordA > $ordB ) {
262 0           ( $ordA, $ordB ) = ( $ordB, $ordA );
263             }
264 0           my $distance = $ordB - $ordA;
265 0 0         return 0 if $distance == 1;
266              
267             # Get all the descendants of $parent that are in the span of the edge.
268 0 0         my @span = grep { $_->ord > $ordA && $_->ord < $ordB } $parent->get_descendants();
  0            
269              
270             # For projective edges @span must include all the nodes between $parent and $self.
271 0           return @span != $distance - 1;
272             }
273              
274             #------------------------------------------------------------------------------
275             # Tells whether the node is attached to its parent nonprojectively, i.e. there
276             # is at least one node between this node and its parent that is not dominated
277             # by the parent.
278             #------------------------------------------------------------------------------
279             sub is_nonprojective
280             {
281 0 0   0 1   log_fatal('Incorrect number of arguments') if(scalar(@_)!=1);
282 0           my $self = shift;
283 0           my $parent = $self->parent();
284             # A node that does not have a parent cannot be nonprojective.
285 0 0         return 0 if(!$parent);
286             # Get a hash of all descendants of the parent.
287 0           my @pdesc = $parent->get_descendants({add_self=>1});
288 0           my %pdesc; map {$pdesc{$_}++} (@pdesc);
  0            
  0            
289             # Figure out whether the node is to the left or to the right from its parent.
290 0           my $nord = $self->ord();
291 0           my $pord = $parent->ord();
292 0           my ($x, $y);
293 0 0         if($pord>$nord)
294             {
295 0           $x = $self;
296 0           $y = $parent;
297             }
298             else
299             {
300 0           $x = $parent;
301 0           $y = $self;
302             }
303             # Get the ordered list of all nodes between $x and $y.
304 0           my $xord = $x->ord();
305 0           my $yord = $y->ord();
306 0 0         my @between = grep {$_->ord()>$xord && $_->ord()<$yord} ($parent->root()->get_descendants({ordered=>1}));
  0            
307             # This node is nonprojective if @between contains anything that is not in %pdesc.
308 0           foreach my $b (@between)
309             {
310 0 0         if(!$pdesc{$b})
311             {
312 0           return 1;
313             }
314             }
315 0           return 0;
316             }
317              
318             #------------------------------------------------------------------------------
319             # If the node is in a gap that causes nonprojectivity of a dependency, this
320             # method returns the ordered list of all nodes in the same gap. (The gap is
321             # contiguous; there may be multiple gaps within one nonprojectivity.) If the
322             # node is member of multiple overlapping gaps (corresponding to different
323             # nonprojective edges going over the node), this method returns the smallest
324             # gap containing the node. If the node is not part of any gap, the method
325             # returns an empty list.
326             #------------------------------------------------------------------------------
327             sub get_gap
328             {
329 0 0   0 0   log_fatal('Incorrect number of arguments') if(scalar(@_)!=1);
330 0           my $self = shift;
331             # We need access to all nodes in the tree.
332 0           my $root = $self->get_root();
333 0           my @nodes = $root->get_descendants({'ordered' => 1});
334             # Normally the index of a node in @nodes will be the node's ord() - 1.
335             # But it is not guaranteed. We will thus work only with the indices of
336             # the elements of @nodes. To be able to ask a node about its index, we will
337             # temporarily store the indices as wild attributes.
338 0           for(my $i = 0; $i <= $#nodes; $i++)
339             {
340 0           $nodes[$i]->wild()->{i} = $i;
341             }
342             # Find all dependencies that go nonprojectively over this node.
343 0           my $is = $self->wild()->{i};
344 0           my @over;
345 0           foreach my $node (@nodes)
346             {
347 0 0         next if($node == $self);
348 0           my $parent = $node->parent();
349             # Nodes that depend directly on the root cannot be nonprojective.
350 0 0         next if($parent->is_root());
351 0           my $i = $node->wild()->{i};
352 0           my $j = $parent->wild()->{i};
353 0 0 0       if($i<$is && $j>$is || $j<$is && $i>$is)
      0        
      0        
354             {
355 0 0         if(!$self->is_descendant_of($parent))
356             {
357 0 0         my %record =
    0          
358             (
359             'child' => $node,
360             'parent' => $parent,
361             'lord' => $i<$j ? $i : $j,
362             'rord' => $j>$i ? $j : $i
363             );
364 0           push(@over, \%record);
365             }
366             }
367             }
368             # For every nonprojectivity where this node is in the gap, get the nodes
369             # in the gap.
370 0           my @gaps;
371 0           foreach my $nprj (@over)
372             {
373 0           my @gap = ($self);
374             # Look for other gap members on the left.
375 0           for(my $i = $is-1; $i > $nprj->{lord}; $i--)
376             {
377 0 0         if(!$nodes[$i]->is_descendant_of($nprj->{parent}))
378             {
379 0           unshift(@gap, $nodes[$i]);
380             }
381             else
382             {
383 0           last;
384             }
385             }
386             # Look for other gap members on the right.
387 0           for(my $i = $is+1; $i < $nprj->{rord}; $i++)
388             {
389 0 0         if(!$nodes[$i]->is_descendant_of($nprj->{parent}))
390             {
391 0           push(@gap, $nodes[$i]);
392             }
393             else
394             {
395 0           last;
396             }
397             }
398 0           push(@gaps, \@gap);
399             }
400             # Clean up. Remove the temporary wild attributes.
401 0           foreach my $node (@nodes)
402             {
403 0           delete($node->wild()->{i});
404             }
405 0 0         if(scalar(@gaps)>=1)
406             {
407             # If there are multiple overlapping gaps, return the shortest one.
408             # If two gaps have the same shortest length, the choice is arbitrary.
409 0           @gaps = sort {scalar(@{$a}) <=> scalar(@{$b})} (@gaps);
  0            
  0            
  0            
410 0           return @{$gaps[0]};
  0            
411             }
412             else
413             {
414 0           return ();
415             }
416             }
417              
418             1;
419              
420             __END__
421              
422             =encoding utf-8
423              
424             =head1 NAME
425              
426             Treex::Core::Node::Ordered
427              
428             =head1 VERSION
429              
430             version 2.20210102
431              
432             =head1 DESCRIPTION
433              
434             Moose role for nodes which can/should be ordered by the attribute C<ord>
435             (usually following the word order).
436              
437             =head1 ATTRIBUTES
438              
439             =over
440              
441             =item ord
442              
443             The ordering attribute, ordinal number of a node.
444             The ordering should be without gaps, so
445              
446             print join ' ', map {$_->ord} $root->get_descendants({ordered=>1});
447             # should print
448             # 1 2 3 ... number_of_descendants
449              
450             =back
451              
452             =head1 METHODS
453              
454             =head2 Access to nodes ordering
455              
456             =over
457              
458             =item my $boolean = $node->precedes($another_node);
459              
460             Does this node precede C<$another_node>?
461              
462             =item my $following_node = $node->get_next_node();
463              
464             Return the closest following node (according to the ordering attribute)
465             or C<undef> if C<$node> is the last one in the tree.
466              
467             =item my $preceding_node = $node->get_prev_node();
468              
469             Return the closest preceding node (according to the ordering attribute)
470             or C<undef> if C<$node> is the first one in the tree.
471              
472             =item my @nodes_in_between = $node->get_nodes_between($other_node);
473              
474             Return nodes that lie between C<$node> and C<$other_node>, ordered, exclusive.
475             (The returned array may be empty.)
476             C<$node> and C<$other_node> can be given in any order,
477             i.e. the result of C<$node-&gt;get_nodes_between($other_node)>
478             is the same as the result of C<$other_node-&gt;get_nodes_between($node)>.
479              
480             =back
481              
482             =head2 Reordering nodes
483              
484             Next four methods for changing the order of nodes
485             (the word order defined by the attribute C<ord>)
486             have an optional argument C<$arg_ref> for specifying switches.
487             So far there is only one switch - C<without_children>
488             which is by default set to 0.
489             It means that the default behavior is to shift the node
490             with all its descendants.
491             Only if you want to leave the position of the descendants unchanged
492             and shift just the node, use e.g.
493              
494             $node->shift_after_node($reference_node, {without_children=>1});
495              
496             Shifting involves only changing the ordering attribute (C<ord>) of nodes.
497             There is no rehanging (changing parents). The node which is
498             going to be shifted must be already added to the tree
499             and the reference node must be in the same tree.
500              
501             For languages with left-to-right script: C<after> means "to the right of"
502             and C<before> means "to the left of".
503              
504             =over
505              
506             =item $node->shift_after_node($reference_node);
507              
508             Shifts (changes the C<ord> of) the node just behind the reference node.
509              
510             =item $node->shift_after_subtree($reference_node);
511              
512             Shifts (changes the C<ord> of) the node behind the subtree of the reference node.
513              
514             =item $node->shift_before_node($reference_node);
515              
516             Shifts (changes the C<ord> of) the node just in front of the reference node.
517              
518             =item $node->shift_before_subtree($reference_node);
519              
520             Shifts (changes the C<ord> of) the node in front of the subtree of the reference node.
521              
522             =back
523              
524             =head2 Nonprojectivity
525              
526             =over
527              
528             =item my $nonproj = $node->is_nonprojective();
529              
530             Return 1 if the node is attached to its parent nonprojectively, i.e. there is
531             at least one node between this node and its parent that is not descendant of
532             the parent. Return 0 otherwise.
533              
534             =back
535              
536              
537             =head1 AUTHOR
538              
539             Martin Popel <popel@ufal.mff.cuni.cz>
540              
541             =head1 COPYRIGHT AND LICENSE
542              
543             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
544              
545             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.