File Coverage

blib/lib/Treex/Core/Phrase/BaseNTerm.pm
Criterion Covered Total %
statement 15 144 10.4
branch 0 62 0.0
condition 0 21 0.0
subroutine 5 25 20.0
pod n/a
total 20 252 7.9


line stmt bran cond sub pod time code
1             package Treex::Core::Phrase::BaseNTerm;
2             $Treex::Core::Phrase::BaseNTerm::VERSION = '2.20150928';
3 1     1   961 use utf8;
  1         2  
  1         22  
4 1     1   28 use namespace::autoclean;
  1         2  
  1         8  
5              
6 1     1   65 use Moose;
  1         1  
  1         8  
7 1     1   6173 use List::MoreUtils qw(any);
  1         3  
  1         15  
8 1     1   416 use Treex::Core::Log;
  1         3  
  1         1817  
9              
10             extends 'Treex::Core::Phrase';
11              
12              
13              
14             has '_dependents_ref' =>
15             (
16             is => 'ro',
17             isa => 'ArrayRef[Treex::Core::Phrase]',
18             default => sub { [] },
19             documentation => 'The public should not access directly the array reference. '.
20             'They may use the public method dependents() to get the list.'
21             );
22              
23             has 'dead' =>
24             (
25             is => 'rw',
26             isa => 'Bool',
27             writer => '_set_dead',
28             reader => 'dead',
29             default => 0,
30             documentation => 'Most non-terminal phrases cannot exist without children. '.
31             'If we want to change the class of a non-terminal phrase, we construct '.
32             'an object of the new class and move the children there from the old '.
33             'one. But the old object will not be physically destroyed until it '.
34             'gets out of scope. So we will mark it as “dead”. If anyone tries to '.
35             'use the dead object, an exception will be thrown.'
36             );
37              
38              
39              
40             #------------------------------------------------------------------------------
41             # Tells whether this phrase is terminal. We could probably use the Moose's
42             # methods to query the class name but this will be more convenient.
43             #------------------------------------------------------------------------------
44             sub is_terminal
45             {
46 0     0     my $self = shift;
47 0           return 0;
48             }
49              
50              
51              
52             #------------------------------------------------------------------------------
53             # Returns the head child of the phrase. This is an abstract method that must be
54             # defined in every derived class.
55             #------------------------------------------------------------------------------
56             sub head
57             {
58 0     0     my $self = shift;
59 0           log_fatal("The head() method is not implemented");
60             }
61              
62              
63              
64             #------------------------------------------------------------------------------
65             # Figures out whether an ordered list of children is required. Allows both hash
66             # and non-hash notations, i.e.
67             # my @c = $p->dependents({'ordered' => 1});
68             # my @c = $p->dependents('ordered' => 1);
69             # my @c = $p->dependents('ordered');
70             #------------------------------------------------------------------------------
71             sub _order_required
72             {
73 0     0     my $self = shift;
74 0           my @parray = @_;
75 0 0         return 0 unless(@parray);
76 0 0         return $parray[0]->{ordered} if(ref($parray[0]) eq 'HASH');
77 0           my %phash = @_;
78 0 0         if(exists($phash{ordered}))
79             {
80             # To accommodate the $p->dependents('ordered') calling style, even undefined value will count as true.
81 0 0 0       if(defined($phash{ordered}) && $phash{ordered}==0)
82             {
83 0           return 0;
84             }
85 0           return 1;
86             }
87 0           return 0;
88             }
89              
90              
91              
92             #------------------------------------------------------------------------------
93             # Sorts a list of phrases according to the word order of their head nodes.
94             #------------------------------------------------------------------------------
95             sub order_phrases
96             {
97 0     0     my $self = shift;
98 0           return sort {$a->ord() <=> $b->ord()} (@_);
  0            
99             }
100              
101              
102              
103             #------------------------------------------------------------------------------
104             # Returns the list of dependents of the phrase. The only difference from the
105             # getter _dependents_ref() is that the getter returns a reference to the array
106             # of dependents, while this method returns a list of dependents, hence it is
107             # more similar to the other methods that return lists of children.
108             #------------------------------------------------------------------------------
109             sub dependents
110             {
111 0     0     my $self = shift;
112 0 0         log_fatal('Dead') if($self->dead());
113 0           my @dependents = @{$self->_dependents_ref()};
  0            
114 0 0         return $self->_order_required(@_) ? $self->order_phrases(@dependents) : @dependents;
115             }
116              
117              
118              
119             #------------------------------------------------------------------------------
120             # Returns the list of non-head children of the phrase. By default these are the
121             # dependents. However, in special nonterminal phrases there may be children
122             # that are neither head nor dependents.
123             #------------------------------------------------------------------------------
124             sub nonhead_children
125             {
126 0     0     my $self = shift;
127 0 0         log_fatal('Dead') if($self->dead());
128 0           return $self->dependents(@_);
129             }
130              
131              
132              
133             #------------------------------------------------------------------------------
134             # Returns the list of the children of the phrase that are not dependents. By
135             # default this is just the head child. However, in special nonterminal phrases
136             # there may be other children that have a special status but are not the
137             # current head.
138             #------------------------------------------------------------------------------
139             sub core_children
140             {
141 0     0     my $self = shift;
142 0 0         log_fatal('Dead') if($self->dead());
143 0           my @children = ($self->head());
144 0           return @children;
145             }
146              
147              
148              
149             #------------------------------------------------------------------------------
150             # Returns the list of all children of the phrase, i.e. core children and
151             # dependents.
152             #------------------------------------------------------------------------------
153             sub children
154             {
155 0     0     my $self = shift;
156 0 0         log_fatal('Dead') if($self->dead());
157 0           my @children = ($self->core_children(), $self->dependents());
158 0 0         return $self->_order_required(@_) ? $self->order_phrases(@children) : @children;
159             }
160              
161              
162              
163             #------------------------------------------------------------------------------
164             # Returns the head node of the phrase. For nonterminal phrases this recursively
165             # returns head node of their head child.
166             #------------------------------------------------------------------------------
167             sub node
168             {
169 0     0     my $self = shift;
170 0 0         log_fatal('Dead') if($self->dead());
171 0           return $self->head()->node();
172             }
173              
174              
175              
176             #------------------------------------------------------------------------------
177             # Returns the type of the dependency relation of the phrase to the governing
178             # phrase. A general nonterminal phrase has the same deprel as its head child.
179             #------------------------------------------------------------------------------
180             sub deprel
181             {
182 0     0     my $self = shift;
183 0 0         log_fatal('Dead') if($self->dead());
184 0           return $self->head()->deprel();
185             }
186              
187              
188              
189             #------------------------------------------------------------------------------
190             # Sets a new type of the dependency relation of the phrase to the governing
191             # phrase. For nonterminal phrases the label is propagated to one (or several)
192             # of their children. It is not propagated to the underlying dependency tree
193             # (the project_dependencies() method would have to be called to achieve that).
194             #------------------------------------------------------------------------------
195             sub set_deprel
196             {
197 0     0     my $self = shift;
198 0 0         log_fatal('Dead') if($self->dead());
199 0           $self->head()->set_deprel(@_);
200             }
201              
202              
203              
204             #------------------------------------------------------------------------------
205             # Adds a child phrase (subphrase). By default, the new child will not be head,
206             # it will be an ordinary modifier. This is a private method that should be
207             # called only from the public method Phrase::set_parent().
208             #------------------------------------------------------------------------------
209             sub _add_child
210             {
211 0 0   0     log_fatal('Incorrect number of arguments') if(scalar(@_) != 2);
212 0           my $self = shift;
213 0           my $new_child = shift; # Treex::Core::Phrase
214 0 0         log_fatal('Dead') if($self->dead());
215             # If we are called correctly from Phrase::set_parent(), then the child already knows about us.
216 0 0 0       if(!defined($new_child) || !defined($new_child->parent()) || $new_child->parent() != $self)
      0        
217             {
218 0           log_fatal("The child must point to the parent first. This private method must be called only from Phrase::set_parent()");
219             }
220 0           my $nhc = $self->_dependents_ref();
221 0           push(@{$nhc}, $new_child);
  0            
222             }
223              
224              
225              
226             #------------------------------------------------------------------------------
227             # Removes a child phrase (subphrase). Only non-head children can be removed
228             # this way. If the head is to be removed, it must be first replaced by another
229             # child; or the whole nonterminal phrase must be destroyed. This is a private
230             # method that should be called only from the public method Phrase::set_parent().
231             #------------------------------------------------------------------------------
232             sub _remove_child
233             {
234 0 0   0     log_fatal('Incorrect number of arguments') if(scalar(@_) != 2);
235 0           my $self = shift;
236 0           my $child = shift; # Treex::Core::Phrase
237 0 0         log_fatal('Dead') if($self->dead());
238 0 0 0       if(!defined($child) || !defined($child->parent()) || $child->parent() != $self)
      0        
239             {
240 0           log_fatal("The child does not think I'm its parent");
241             }
242 0 0   0     if(any {$_ == $child} ($self->core_children()))
  0            
243             {
244 0           log_fatal("Cannot remove the head child or any other core child");
245             }
246 0           my $nhc = $self->_dependents_ref();
247 0           my $found = 0;
248 0           for(my $i = 0; $i <= $#{$nhc}; $i++)
  0            
249             {
250 0 0         if($nhc->[$i] == $child)
251             {
252 0           $found = 1;
253 0           splice(@{$nhc}, $i, 1);
  0            
254 0           last;
255             }
256             }
257 0 0         if(!$found)
258             {
259 0           log_fatal("Could not find the phrase among my non-head children");
260             }
261             }
262              
263              
264              
265             #------------------------------------------------------------------------------
266             # Common validation for replace_child() and replace_core_child(). May throw
267             # exceptions.
268             #------------------------------------------------------------------------------
269             sub _check_old_new_child
270             {
271 0     0     my $self = shift;
272 0           my $old_child = shift; # Treex::Core::Phrase
273 0           my $new_child = shift; # Treex::Core::Phrase
274 0 0         log_fatal('Dead') if($self->dead());
275 0 0 0       if(!defined($old_child) || !defined($old_child->parent()) || $old_child->parent() != $self)
      0        
276             {
277 0           log_fatal("The child to be replaced does not think I'm its parent");
278             }
279 0 0         if(!defined($new_child))
280             {
281 0           log_fatal("The replacement child is not defined");
282             }
283 0 0         if(defined($new_child->parent()))
284             {
285 0 0         if($new_child->parent() == $self)
286             {
287 0           log_fatal("The replacement already is my child");
288             }
289             else
290             {
291 0           log_fatal("The replacement child already has a parent");
292             }
293             }
294             }
295              
296              
297              
298             #------------------------------------------------------------------------------
299             # Replaces a child by another phrase. This method will work with any child,
300             # including the core children. The core children cannot be undefined but if we
301             # immediately replace them by a new child, the phrase will remain valid.
302             #------------------------------------------------------------------------------
303             sub replace_child
304             {
305 0     0     my $self = shift;
306 0           my $old_child = shift; # Treex::Core::Phrase
307 0           my $new_child = shift; # Treex::Core::Phrase
308 0 0         log_fatal('Dead') if($self->dead());
309 0           $self->_check_old_new_child($old_child, $new_child);
310             # If the child is dependent, we can do it here. If it is a core child,
311             # we need a subclass to decide what to do.
312 0           my $nhc = $self->_dependents_ref();
313 0           for(my $i = 0; $i <= $#{$nhc}; $i++)
  0            
314             {
315 0 0         if($nhc->[$i] == $old_child)
316             {
317 0           splice(@{$nhc}, $i, 1, $new_child);
  0            
318 0           $old_child->_set_parent(undef);
319 0           $new_child->_set_parent($self);
320 0           return;
321             }
322             }
323             # If we are here, we did not find the old child among the dependents.
324             # Thus it has to be a core child.
325 0           $self->replace_core_child($old_child, $new_child);
326             }
327              
328              
329              
330             #------------------------------------------------------------------------------
331             # Replaces a core child by another phrase. This is an abstract method that must
332             # be defined in every derived class.
333             #------------------------------------------------------------------------------
334             sub replace_core_child
335             {
336 0     0     my $self = shift;
337 0           log_fatal("The replace_core_child() method is not implemented");
338             }
339              
340              
341              
342             #------------------------------------------------------------------------------
343             # Detaches all children (including core children) and then marks itself as dead
344             # so that it cannot be used any more. This method should be called when we want
345             # to replace a non-terminal phrase by a new phrase of a different class. The
346             # method will not detach the dying phrase from its parent! That could kill the
347             # parent too (if the dying phrase is a core child) but we probably want the
348             # parent to survive and to replace the dying child by a new phrase we create.
349             # However, it is the caller's responsibility to modify the parent immediately.
350             #------------------------------------------------------------------------------
351             sub detach_children_and_die
352             {
353 0     0     my $self = shift;
354             # Visit all children and tell them they have no parent now. We cannot use
355             # the public method set_parent() because it will call our method _remove_child()
356             # and that only works for non-core children. (Besides, we want to destroy
357             # our links to children all at once. The _remove_child() method would be
358             # unnecessarily slow for that purpose, as it works with only one child and
359             # has to find it first.) Thus we will directly modify the one-way link via
360             # _set_parent().
361 0           my @children = $self->children();
362 0           foreach my $child (@children)
363             {
364 0           $child->_set_parent(undef);
365             }
366             # Remove the references leading from this phrase to its dependents.
367 0           splice(@{$self->_dependents_ref()});
  0            
368             # We cannot remove the references to the core children because we do not
369             # know how many core children there are and how they are accessed, and
370             # they cannot be undefined anyway. However, we will mark this phrase as
371             # dead, so it cannot be used until it is physically destroyed by Perl.
372 0           $self->_set_dead(1);
373 0           return @children;
374             }
375              
376              
377              
378             #------------------------------------------------------------------------------
379             # Projects dependencies between the head and the dependents back to the
380             # underlying dependency structure.
381             #------------------------------------------------------------------------------
382             sub project_dependencies
383             {
384 0     0     my $self = shift;
385 0 0         log_fatal('Dead') if($self->dead());
386             # Recursion first, we work bottom-up.
387 0           my @children = $self->children();
388 0           foreach my $child (@children)
389             {
390 0           $child->project_dependencies();
391             }
392 0           my $head_node = $self->node();
393 0           my @dependents = $self->nonhead_children();
394 0           foreach my $dependent (@dependents)
395             {
396 0           my $dep_node = $dependent->node();
397 0           $dep_node->set_parent($head_node);
398 0           $dep_node->set_deprel($dependent->deprel());
399             }
400             }
401              
402              
403              
404             #------------------------------------------------------------------------------
405             # Returns a textual representation of the phrase and all subphrases. Useful for
406             # debugging.
407             #------------------------------------------------------------------------------
408             sub as_string
409             {
410 0     0     my $self = shift;
411 0           my @core_children = $self->core_children('ordered' => 1);
412 0           my $core = 'CORE '.join(', ', map {$_->as_string()} (@core_children));
  0            
413 0           my @dependents = $self->dependents('ordered' => 1);
414 0           my $deps = join(', ', map {$_->as_string()} (@dependents));
  0            
415 0 0         $deps = 'DEPS '.$deps if($deps);
416 0           my $subtree = join(' ', ($core, $deps));
417 0           return "(BNT $subtree)";
418             }
419              
420              
421              
422             __PACKAGE__->meta->make_immutable();
423              
424             1;
425              
426              
427              
428             =for Pod::Coverage BUILD
429              
430             =encoding utf-8
431              
432             =head1 NAME
433              
434             Treex::Core::Phrase::BaseNTerm
435              
436             =head1 VERSION
437              
438             version 2.20150928
439              
440             =head1 DESCRIPTION
441              
442             C<BaseNTerm> is an abstract class that defines the basic interface of
443             nonterminal phrases. The general nonterminal phrase, C<NTerm>, is derived from
444             C<BaseNTerm>. So are some special cases of nonterminals, such as C<PP>.
445             (They cannot be derived from C<NTerm> because they implement certain parts
446             of the interface differently.)
447              
448             See also L<Treex::Core::Phrase> and L<Treex::Core::Phrase::NTerm>.
449              
450             =head1 ATTRIBUTES
451              
452             =over
453              
454             =item _dependents_ref
455              
456             Reference to array of sub-C<Phrase>s (children) of this phrase that do not belong to the
457             core of the phrase. By default the core contains only the head child. However,
458             some specialized subclasses may define a larger core where two or more
459             children have a special status, but only one of them can be the head.
460              
461             =item dead
462              
463             Most non-terminal phrases cannot exist without children.
464             If we want to change the class of a non-terminal phrase, we construct
465             an object of the new class and move the children there from the old
466             one. But the old object will not be physically destroyed until it
467             gets out of scope. So we will mark it as “dead”. If anyone tries to
468             use the dead object, an exception will be thrown.
469              
470             =back
471              
472             =head1 METHODS
473              
474             =over
475              
476             =item head
477              
478             A sub-C<Phrase> of this phrase that is at the moment considered the head phrase (in the sense of dependency syntax).
479             A general C<NTerm> phrase just has a C<head> attribute.
480             Special cases of nonterminals may have multiple children with special behavior,
481             and they may choose which one of these children shall be head under the current
482             annotation style.
483              
484             =item dependents
485              
486             Returns the list of dependents of the phrase. The only difference from the
487             getter C<_dependents_ref()> is that the getter returns a reference to the array
488             of dependents, while this method returns a list of dependents. Hence this method is
489             more similar to the other methods that return lists of children.
490              
491             =item nonhead_children
492              
493             Returns the list of non-head children of the phrase. By default these are the
494             dependents. However, in special nonterminal phrases there may be children
495             that are neither head nor dependents.
496              
497             =item core_children
498              
499             Returns the list of the children of the phrase that are not dependents. By default this
500             is just the head child. However, in specialized nonterminal phrases there may be
501             other children that have a special status but are not the current head.
502              
503             =item children
504              
505             Returns the list of all children of the phrase, i.e. core children and
506             dependents.
507              
508             =item order_phrases
509              
510             Sorts a list of phrases according to the word order of their head nodes.
511             All methods that return lists of children (C<dependents()>, C<nonhead_children()>,
512             C<core_children()>, C<children()>) can be asked to sort the list using this
513             method. The following calling styles are possible:
514              
515             my @ordered_children = $phrase->children({'ordered' => 1});
516             my @ordered_children = $phrase->children('ordered' => 1);
517             my @ordered_children = $phrase->children('ordered');
518              
519             =item deprel
520              
521             Returns the type of the dependency relation of the phrase to the governing
522             phrase. A general nonterminal phrase has the same deprel as its head child.
523              
524             =item set_deprel
525              
526             Sets a new type of the dependency relation of the phrase to the governing
527             phrase. For nonterminal phrases the label is propagated to one (or several)
528             of their children. It is not propagated to the underlying dependency tree
529             (the C<project_dependencies()> method would have to be called to achieve that).
530              
531             =item replace_child
532              
533             $nonterminal->replace_child ($old_child, $new_child);
534              
535             Replaces a child by another phrase. This method will work with any child,
536             including the core children. The core children cannot be undefined but if we
537             immediately replace them by a new child, the phrase will remain valid.
538              
539             =item replace_core_child
540              
541             Same as C<replace_child()> but used with core children only. If we know that we
542             are replacing a core child, it is more efficient to call directly this method.
543             If we do not know what type of child we have, we can call the more general
544             C<replace_child()> and it will decide.
545              
546             C<BaseNTerm::replace_core_child()> is an abstract method that must be defined
547             in every derived class.
548              
549             =item detach_children_and_die
550              
551             my $parent = $phrase->parent();
552             my $replacement = new Treex::Core::Phrase::PP (...);
553             my @children = $phrase->detach_children_and_die();
554             $parent->replace_child ($phrase, $replacement);
555              
556             Detaches all children (including core children) and then marks itself as dead
557             so that it cannot be used any more. This method should be called when we want
558             to replace a non-terminal phrase by a new phrase of a different class. The
559             method will not detach the dying phrase from its parent! That could kill the
560             parent too (if the dying phrase is a core child) but we probably want the
561             parent to survive and to replace the dying child by a new phrase we create.
562             However, it is the caller's responsibility to modify the parent immediately.
563              
564             =item project_dependencies
565              
566             Recursively projects dependencies between the head and the dependents back to the
567             underlying dependency structure.
568              
569             =item as_string
570              
571             Returns a textual representation of the phrase and all subphrases. Useful for
572             debugging.
573              
574             =back
575              
576             =head1 AUTHORS
577              
578             Daniel Zeman <zeman@ufal.mff.cuni.cz>
579              
580             =head1 COPYRIGHT AND LICENSE
581              
582             Copyright © 2013, 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague
583             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.