File Coverage

blib/lib/Treex/Core/Phrase.pm
Criterion Covered Total %
statement 39 73 53.4
branch 7 14 50.0
condition 1 3 33.3
subroutine 9 24 37.5
pod 16 16 100.0
total 72 130 55.3


line stmt bran cond sub pod time code
1             package Treex::Core::Phrase;
2             $Treex::Core::Phrase::VERSION = '2.20210102';
3 1     1   105084 use utf8;
  1         30  
  1         5  
4 1     1   513 use namespace::autoclean;
  1         18362  
  1         4  
5              
6 1     1   619 use Moose;
  1         471293  
  1         5  
7 1     1   8148 use MooseX::SemiAffordanceAccessor; # attribute x is written using set_x($value) and read using x()
  1         13088  
  1         5  
8 1     1   9349 use List::MoreUtils qw(any);
  1         13633  
  1         7  
9 1     1   1607 use Treex::Core::Log;
  1         5  
  1         88  
10 1     1   689 use Treex::Core::Node;
  1         5  
  1         917  
11              
12              
13              
14             has 'parent' =>
15             (
16             is => 'rw',
17             isa => 'Maybe[Treex::Core::Phrase]',
18             writer => '_set_parent',
19             reader => 'parent',
20             default => undef
21             );
22              
23             has 'is_member' =>
24             (
25             is => 'rw',
26             isa => 'Bool',
27             documentation => 'Is this phrase a member of a coordination (i.e. conjunct) or apposition?',
28             );
29              
30              
31              
32             #------------------------------------------------------------------------------
33             # Sets a new parent for this phrase. Unlike the bare setter _set_parent(),
34             # this public method also takes care of the reverse links from the parent to
35             # the children. The method returns the old parent, if any.
36             #------------------------------------------------------------------------------
37             sub set_parent
38             {
39 1 50   1 1 14 log_fatal('Incorrect number of arguments') if(scalar(@_) != 2);
40 1         3 my $self = shift;
41 1         3 my $new_parent = shift; # Treex::Core::Phrase::NTerm or undef
42 1 50 33     13 if(defined($new_parent) && $new_parent->is_descendant_of($self))
43             {
44 0         0 log_info($self->as_string());
45 0         0 log_fatal('Cannot set parent phrase because it would create a cycle');
46             }
47 1         33 my $old_parent = $self->parent();
48             # Say the old parent good bye.
49 1 50       4 if(defined($old_parent))
50             {
51 0         0 $old_parent->_remove_child($self);
52             }
53             # Set the new parent before we call its _add_child() method so that it can verify it has been called from here.
54 1         30 $self->_set_parent($new_parent);
55             # Say the new parent hello.
56 1 50       8 if(defined($new_parent))
57             {
58 1         26 $new_parent->_add_child($self);
59             }
60 1         4 return $old_parent;
61             }
62              
63              
64              
65             #------------------------------------------------------------------------------
66             # Returns the list of dependents of the phrase. This is an abstract method that
67             # must be implemented in every derived class. Nonterminal phrases have a list
68             # of dependents (possible empty) as their attribute. Terminal phrases return an
69             # empty list by definition.
70             #------------------------------------------------------------------------------
71             sub dependents
72             {
73 0     0 1 0 my $self = shift;
74 0         0 log_fatal("The dependents() method is not implemented");
75             }
76              
77              
78              
79             #------------------------------------------------------------------------------
80             # Returns the list of children of the phrase. This is an abstract method that
81             # must be implemented in every derived class. Nonterminal phrases distinguish
82             # between core children and dependents, and this method should return both.
83             # Terminal phrases return an empty list by definition.
84             #------------------------------------------------------------------------------
85             sub children
86             {
87 0     0 1 0 my $self = shift;
88 0         0 log_fatal("The children() method is not implemented");
89             }
90              
91              
92              
93             #------------------------------------------------------------------------------
94             # Tests whether this phrase depends on another phrase via the parent links.
95             # This method is used to prevent cycles when setting a new parent.
96             #------------------------------------------------------------------------------
97             sub is_descendant_of
98             {
99 7 50   7 1 29 log_fatal('Incorrect number of arguments') if(scalar(@_) != 2);
100 7         14 my $self = shift;
101 7         12 my $on_phrase = shift; # Treex::Core::Phrase
102 7         234 my $parent = $self->parent();
103 7         21 while(defined($parent))
104             {
105 9 100       44 return 1 if($parent == $on_phrase);
106 3         80 $parent = $parent->parent();
107             }
108 1         8 return 0;
109             }
110              
111              
112              
113             #------------------------------------------------------------------------------
114             # Tells whether this phrase is terminal. We could probably use the Moose's
115             # methods to query the class name but this will be more convenient.
116             #------------------------------------------------------------------------------
117             sub is_terminal
118             {
119 0     0 1   my $self = shift;
120 0           log_fatal("The is_terminal() method is not implemented");
121             }
122              
123              
124              
125             #------------------------------------------------------------------------------
126             # Tells whether this phrase is coordination. We could probably use the Moose's
127             # methods to query the class name but this will be more convenient.
128             #------------------------------------------------------------------------------
129             sub is_coordination
130             {
131 0     0 1   my $self = shift;
132             # Default is FALSE, to be overridden in Coordination.
133 0           return 0;
134             }
135              
136              
137              
138             #------------------------------------------------------------------------------
139             # Tells whether this phrase is core child of another phrase. That is sometimes
140             # important to know because core children cannot be easily moved around.
141             #------------------------------------------------------------------------------
142             sub is_core_child
143             {
144 0     0 1   my $self = shift;
145 0           my $parent = $self->parent();
146 0 0         return 0 if(!defined($parent));
147 0     0     return any {$_ == $self} ($parent->core_children())
  0            
148             }
149              
150              
151              
152             #------------------------------------------------------------------------------
153             # Returns the head node of the phrase. For terminal phrases this should just
154             # return their node attribute. For nonterminal phrases this should return the
155             # node of their head child. This is an abstract method that must be defined in
156             # every derived class.
157             #------------------------------------------------------------------------------
158             sub node
159             {
160 0     0 1   my $self = shift;
161 0           log_fatal("The node() method is not implemented");
162             }
163              
164              
165              
166             #------------------------------------------------------------------------------
167             # Returns the list of all nodes covered by the phrase, i.e. the head node of
168             # this phrase and of all its descendants.
169             #------------------------------------------------------------------------------
170             sub nodes
171             {
172 0     0 1   my $self = shift;
173 0           log_fatal("The nodes() method is not implemented");
174             }
175              
176              
177              
178             #------------------------------------------------------------------------------
179             # Returns the list of all terminal descendants of this phrase. Similar to
180             # nodes(), but instead of Node objects returns the Phrase::Term objects, in
181             # which the nodes are wrapped.
182             #------------------------------------------------------------------------------
183             sub terminals
184             {
185 0     0 1   my $self = shift;
186 0           log_fatal("The terminals() method is not implemented");
187             }
188              
189              
190              
191             #------------------------------------------------------------------------------
192             # Returns the type of the dependency relation of the phrase to the governing
193             # phrase. This is an abstract method that must be defined in every derived
194             # class. When the phrase structure is built around a dependency tree, the
195             # relations will be probably taken from (or based on) the deprels of the
196             # underlying nodes. When the phrase tree is transformed to the desired style,
197             # the relations may be modified; at the end, they can be projected to the
198             # dependency tree again. A general nonterminal phrase typically has the same
199             # deprel as its head child. Terminal phrases store deprels as attributes.
200             #------------------------------------------------------------------------------
201             sub deprel
202             {
203 0     0 1   my $self = shift;
204 0           log_fatal("The deprel() method is not implemented");
205             }
206              
207              
208              
209             #------------------------------------------------------------------------------
210             # Returns the deprel that should be used when the phrase tree is projected back
211             # to a dependency tree (see the method project_dependencies()). In most cases
212             # this is identical to what deprel() returns. However, for instance
213             # prepositional phrases in Prague treebanks are attached using AuxP. Their
214             # relation to the parent (returned by deprel()) is projected to the argument of
215             # the preposition.
216             #------------------------------------------------------------------------------
217             sub project_deprel
218             {
219 0     0 1   my $self = shift;
220 0           log_fatal("The project_deprel() method is not implemented");
221             }
222              
223              
224              
225             #------------------------------------------------------------------------------
226             # Returns the node's ord attribute. This means that nodes that do not implement
227             # the Ordered role cannot be wrapped in phrases. We sometimes need to order
228             # child phrases according to the word order of their head nodes.
229             #------------------------------------------------------------------------------
230             sub ord
231             {
232 0     0 1   my $self = shift;
233 0           return $self->node()->ord();
234             }
235              
236              
237              
238             #------------------------------------------------------------------------------
239             # Returns the lowest and the highest ord values of the nodes covered by this
240             # phrase (always a pair of scalar values; they will be identical for terminal
241             # phrases). Note that there is no guarantee that all nodes within the span are
242             # covered by this phrase. There may be gaps!
243             #------------------------------------------------------------------------------
244             sub span
245             {
246 0     0 1   my $self = shift;
247 0           log_fatal("The span() method is not implemented");
248             }
249              
250              
251              
252             #------------------------------------------------------------------------------
253             # Projects dependencies between the head and the dependents back to the
254             # underlying dependency structure. This is an abstract method that must be
255             # implemented in the derived classes.
256             #------------------------------------------------------------------------------
257             sub project_dependencies
258             {
259 0     0 1   my $self = shift;
260 0           log_fatal("The project_dependencies() method is not implemented");
261             }
262              
263              
264              
265             #------------------------------------------------------------------------------
266             # Returns a textual representation of the phrase and all subphrases. Useful for
267             # debugging. This is an abstract method that must be implemented in the derived
268             # classes.
269             #------------------------------------------------------------------------------
270             sub as_string
271             {
272 0     0 1   my $self = shift;
273 0           log_fatal("The as_string() method is not implemented");
274             }
275              
276              
277              
278             __PACKAGE__->meta->make_immutable();
279              
280             1;
281              
282              
283              
284             =for Pod::Coverage BUILD
285              
286             =encoding utf-8
287              
288             =head1 NAME
289              
290             Treex::Core::Phrase
291              
292             =head1 VERSION
293              
294             version 2.20210102
295              
296             =head1 DESCRIPTION
297              
298             A C<Phrase> is a concept defined on top of dependency trees and subtrees
299             (where a subtree contains a node and all its descendants, not just any arbitrary subset of nodes).
300             Similarly to the Chomsky's hierarchy of formal grammars, there are two main types of phrases:
301             I<terminal> and I<nonterminal>.
302             Furthermore, there may be subtypes of the nonterminal type with special behavior.
303              
304             A B<terminal phrase> contains just one C<Node> (which typically corresponds to a surface token).
305              
306             A B<nonterminal phrase> does not directly contain any C<Node> but it contains
307             one or more (usually at least two) sub-phrases.
308             The hierarchy of phrases and their sub-phrases is also a tree structure.
309             In the typical case there is a relation between the tree of phrases and the underlying dependency
310             tree, but the rules governing this relation are not fixed.
311              
312             Phrases help us model situations that are difficult to model in the dependency tree alone.
313             We can encode multiple levels of “tightness” of relations between governors and dependents.
314             In particular we can distinguish between dependents that modify the whole phrase (shared modifiers)
315             and those that modify only the head of the phrase (private modifiers).
316              
317             This is particularly useful for various tree transformations and conversions between annotation
318             styles (such as in the HamleDT blocks).
319             The idea is that we will first construct a phrase tree based on the existing dependency tree,
320             then we will perform transformations on the phrase tree
321             and finally we will create new dependency relations based on the phrase tree and
322             on the rules defined by the desired annotation style.
323             Phrase is a temporary internal structure that will not be saved in the Treex format on the disk.
324              
325             Every phrase knows its parent (superphrase) and, if it is nonterminal, its children (subphrases).
326             It also knows which of the children is the I<head> (as long as there are children, there is always
327             one and only one head child).
328             The phrase can also return its head node. For terminal phrases, this is the node they enwrap.
329             For nonterminal phrases, this is defined recursively as the head node of their head child phrase.
330              
331             Every phrase also has a dependency relation label I<(deprel)>.
332             These labels are analogous to deprels of nodes in dependency trees.
333             Most of them are just taken from the underlying dependency tree and they are propagated back when
334             new dependency structure is shaped after the phrases; however, some labels may have special
335             meaning even for the C<Phrase> objects. They help recognize special types of nonterminal phrases,
336             such as coordinations.
337             If the phrase is the head of its parent phrase, its deprel is identical to the deprel of its parent.
338             Otherwise, the deprel represents the dependency relation between the phrase and the head of its parent.
339              
340             =head1 ATTRIBUTES
341              
342             =over
343              
344             =item parent
345              
346             Refers to the parent C<Phrase>, if any.
347              
348             =item is_member
349              
350             Is this phrase member of a paratactic structure such as coordination (where
351             members are known as conjuncts) or apposition? We need this attribute because
352             of the Prague-style dependency trees. We need it only during the building phase
353             of the phrase tree.
354              
355             We could encode this attribute in C<deprel> but it would not be practical
356             because it acts independently of C<deprel>. Unlike C<deprel>, C<is_member> is
357             less tied to the underlying nodes; it is really an attribute of the whole
358             phrase. If we decide to change the C<deprel> of the phrase (which is propagated
359             to selected core children), we do not necessarily want to change C<is_member>
360             too. And we do not want to decode C<is_member> from C<deprel>, shuffle and
361             encode elsewhere again.
362              
363             When a terminal phrase is created around a C<Node>, it takes its C<is_member>
364             value from the node. When the phrase receives a parent, the C<is_member> flag
365             will be typically moved to the parent (and erased at the child). However, this
366             does not happen automatically and the C<Builder> has to do that when desired.
367             Similarly, when the type of the phrase is changed (e.g. a new C<Phrase::PP> is
368             created, the contents of the old C<Phrase::NTerm> is moved to it and the old
369             phrase is destroyed), the surrounding code should make sure that the
370             C<is_member> flag is carried over, too. Finally, the value will be used when
371             a C<Phrase::Coordination> is recognized. At that point the C<is_member> flag
372             can be erased for all newly identified conjuncts because now they can be
373             recognized without the flag. However, if the C<Phrase::Coordination> itself (or its
374             C<Phrase::NTerm> predecessor) is a member of a larger paratactic structure, then it
375             must keep the flag for its parent to see and use.
376              
377             =back
378              
379             =head1 METHODS
380              
381             =over
382              
383             =item $phrase->set_parent ($nonterminal_phrase);
384              
385             Sets a new parent for this phrase. The parent phrase must be a L<nonterminal|Treex::Core::Phrase::NTerm>.
386             This phrase will become its new I<non-head> child.
387             The new parent may also be undefined, which means that the current phrase will
388             be disconnected from the phrase structure (but it will keeep its own children,
389             if any).
390             The method returns the old parent.
391              
392             =item my @dependents = $phrase->dependents();
393              
394             Returns the list of dependents of the phrase. This is an abstract method that
395             must be implemented in every derived class. Nonterminal phrases have a list
396             of dependents (possible empty) as their attribute. Terminal phrases return an
397             empty list by definition.
398              
399             =item my @children = $phrase->children();
400              
401             Returns the list of children of the phrase. This is an abstract method that
402             must be implemented in every derived class. Nonterminal phrases distinguish
403             between core children and dependents, and this method should return both.
404             Terminal phrases return an empty list by definition.
405              
406             =item if( $phrase->is_descendant_of ($another_phrase) ) {...}
407              
408             Tests whether this phrase depends on another phrase via the parent links.
409             This method is used to prevent cycles when setting a new parent.
410              
411             =item my $ist = $phrase->is_terminal();
412              
413             Tells whether this phrase is terminal, that is, it does not have children
414             (subphrases).
415              
416             =item my $isc = $phrase->is_coordination();
417              
418             Tells whether this phrase is L<Treex::Core::Phrase::Coordination> or its
419             descendant.
420              
421             =item my $iscc = $phrase->is_core_child();
422              
423             Tells whether this phrase is core child of another phrase. That is sometimes
424             important to know because core children cannot be easily moved around.
425              
426             =item my $node = $phrase->node();
427              
428             Returns the head node of the phrase. For terminal phrases this should just
429             return their node attribute. For nonterminal phrases this should return the
430             node of their head child. This is an abstract method that must be defined in
431             every derived class.
432              
433             =item my @nodes = $phrase->nodes();
434              
435             Returns the list of all nodes covered by the phrase, i.e. the head node of
436             this phrase and of all its descendants.
437              
438             =item my @phrases = $phrase->terminals();
439              
440             Returns the list of all terminal descendants of this phrase. Similar to
441             C<nodes()>, but instead of C<Node> objects returns the C<Phrase::Term> objects, in
442             which the nodes are wrapped.
443              
444             =item my $deprel = $phrase->deprel();
445              
446             Returns the type of the dependency relation of the phrase to the governing
447             phrase. This is an abstract method that must be defined in every derived
448             class. When the phrase structure is built around a dependency tree, the
449             relations will be probably taken from (or based on) the deprels of the
450             underlying nodes. When the phrase tree is transformed to the desired style,
451             the relations may be modified; at the end, they can be projected to the
452             dependency tree again. A general nonterminal phrase typically has the same
453             deprel as its head child. Terminal phrases store deprels as attributes.
454              
455             =item my $deprel = $phrase->project_deprel();
456              
457             Returns the deprel that should be used when the phrase tree is projected back
458             to a dependency tree (see the method project_dependencies()). In most cases
459             this is identical to what deprel() returns. However, for instance
460             prepositional phrases in Prague treebanks are attached using C<AuxP>. Their
461             relation to the parent (returned by deprel()) is projected as the label of
462             the dependency between the preposition and its argument.
463              
464             =item my $ord = $phrase->ord();
465              
466             Returns the head node's ord attribute. This means that nodes that do not implement
467             the L<Treex::Core::Node::Ordered|Ordered> role cannot be wrapped in phrases. We sometimes need to order
468             child phrases according to the word order of their head nodes.
469              
470             =item my ($left, $right) = $phrase->span();
471              
472             Returns the lowest and the highest ord values of the nodes covered by this
473             phrase (always a pair of scalar values; they will be identical for terminal
474             phrases). Note that there is no guarantee that all nodes within the span are
475             covered by this phrase. There may be gaps!
476              
477             =item $phrase->project_dependencies();
478              
479             Recursively projects dependencies between the head and the dependents back to the
480             underlying dependency structure.
481              
482             =item my $phrase_string = $phrase->as_string();
483              
484             Returns a textual representation of the phrase and all subphrases. Useful for
485             debugging.
486              
487             =back
488              
489             =head1 AUTHORS
490              
491             Daniel Zeman <zeman@ufal.mff.cuni.cz>
492              
493             =head1 COPYRIGHT AND LICENSE
494              
495             Copyright © 2013, 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague
496             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.