File Coverage

blib/lib/Treex/Core/Phrase.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             package Treex::Core::Phrase;
2             $Treex::Core::Phrase::VERSION = '2.20150928';
3 1     1   20319 use utf8;
  1         9  
  1         5  
4 1     1   679 use namespace::autoclean;
  1         20186  
  1         4  
5              
6 1     1   852 use Moose;
  1         435766  
  1         8  
7 1     1   7445 use MooseX::SemiAffordanceAccessor; # attribute x is written using set_x($value) and read using x()
  1         11542  
  1         5  
8 1     1   7636 use Treex::Core::Log;
  1         3  
  1         118  
9 1     1   893 use Treex::Core::Node;
  0            
  0            
10              
11              
12              
13             has 'parent' =>
14             (
15             is => 'rw',
16             isa => 'Maybe[Treex::Core::Phrase]',
17             writer => '_set_parent',
18             reader => 'parent',
19             default => undef
20             );
21              
22             has 'is_member' =>
23             (
24             is => 'rw',
25             isa => 'Bool',
26             documentation => 'Is this phrase a member of a coordination (i.e. conjunct) or apposition?',
27             );
28              
29              
30              
31             #------------------------------------------------------------------------------
32             # Sets a new parent for this phrase. Unlike the bare setter _set_parent(),
33             # this public method also takes care of the reverse links from the parent to
34             # the children. The method returns the old parent, if any.
35             #------------------------------------------------------------------------------
36             sub set_parent
37             {
38             log_fatal('Incorrect number of arguments') if(scalar(@_) != 2);
39             my $self = shift;
40             my $new_parent = shift; # Treex::Core::Phrase::NTerm or undef
41             if(defined($new_parent) && $new_parent->is_descendant_of($self))
42             {
43             log_info($self->as_string());
44             log_fatal('Cannot set parent phrase because it would create a cycle');
45             }
46             my $old_parent = $self->parent();
47             # Say the old parent good bye.
48             if(defined($old_parent))
49             {
50             $old_parent->_remove_child($self);
51             }
52             # Set the new parent before we call its _add_child() method so that it can verify it has been called from here.
53             $self->_set_parent($new_parent);
54             # Say the new parent hello.
55             if(defined($new_parent))
56             {
57             $new_parent->_add_child($self);
58             }
59             return $old_parent;
60             }
61              
62              
63              
64             #------------------------------------------------------------------------------
65             # Returns the list of dependents of the phrase. This is an abstract method that
66             # must be implemented in every derived class. Nonterminal phrases have a list
67             # of dependents (possible empty) as their attribute. Terminal phrases return an
68             # empty list by definition.
69             #------------------------------------------------------------------------------
70             sub dependents
71             {
72             my $self = shift;
73             log_fatal("The dependents() method is not implemented");
74             }
75              
76              
77              
78             #------------------------------------------------------------------------------
79             # Returns the list of children of the phrase. This is an abstract method that
80             # must be implemented in every derived class. Nonterminal phrases distinguish
81             # between core children and dependents, and this method should return both.
82             # Terminal phrases return an empty list by definition.
83             #------------------------------------------------------------------------------
84             sub children
85             {
86             my $self = shift;
87             log_fatal("The children() method is not implemented");
88             }
89              
90              
91              
92             #------------------------------------------------------------------------------
93             # Tests whether this phrase depends on another phrase via the parent links.
94             # This method is used to prevent cycles when setting a new parent.
95             #------------------------------------------------------------------------------
96             sub is_descendant_of
97             {
98             log_fatal('Incorrect number of arguments') if(scalar(@_) != 2);
99             my $self = shift;
100             my $on_phrase = shift; # Treex::Core::Phrase
101             my $parent = $self->parent();
102             while(defined($parent))
103             {
104             return 1 if($parent == $on_phrase);
105             $parent = $parent->parent();
106             }
107             return 0;
108             }
109              
110              
111              
112             #------------------------------------------------------------------------------
113             # Tells whether this phrase is terminal. We could probably use the Moose's
114             # methods to query the class name but this will be more convenient.
115             #------------------------------------------------------------------------------
116             sub is_terminal
117             {
118             my $self = shift;
119             log_fatal("The is_terminal() method is not implemented");
120             }
121              
122              
123              
124             #------------------------------------------------------------------------------
125             # Returns the head node of the phrase. For terminal phrases this should just
126             # return their node attribute. For nonterminal phrases this should return the
127             # node of their head child. This is an abstract method that must be defined in
128             # every derived class.
129             #------------------------------------------------------------------------------
130             sub node
131             {
132             my $self = shift;
133             log_fatal("The node() method is not implemented");
134             }
135              
136              
137              
138             #------------------------------------------------------------------------------
139             # Returns the type of the dependency relation of the phrase to the governing
140             # phrase. This is an abstract method that must be defined in every derived
141             # class. When the phrase structure is built around a dependency tree, the
142             # relations will be probably taken from (or based on) the deprels of the
143             # underlying nodes. When the phrase tree is transformed to the desired style,
144             # the relations may be modified; at the end, they can be projected to the
145             # dependency tree again. A general nonterminal phrase typically has the same
146             # deprel as its head child. Terminal phrases store deprels as attributes.
147             #------------------------------------------------------------------------------
148             sub deprel
149             {
150             my $self = shift;
151             log_fatal("The deprel() method is not implemented");
152             }
153              
154              
155              
156             #------------------------------------------------------------------------------
157             # Returns the node's ord attribute. This means that nodes that do not implement
158             # the Ordered role cannot be wrapped in phrases. We sometimes need to order
159             # child phrases according to the word order of their head nodes.
160             #------------------------------------------------------------------------------
161             sub ord
162             {
163             my $self = shift;
164             return $self->node()->ord();
165             }
166              
167              
168              
169             #------------------------------------------------------------------------------
170             # Projects dependencies between the head and the dependents back to the
171             # underlying dependency structure. This is an abstract method that must be
172             # implemented in the derived classes.
173             #------------------------------------------------------------------------------
174             sub project_dependencies
175             {
176             my $self = shift;
177             log_fatal("The project_dependencies() method is not implemented");
178             }
179              
180              
181              
182             #------------------------------------------------------------------------------
183             # Returns a textual representation of the phrase and all subphrases. Useful for
184             # debugging. This is an abstract method that must be implemented in the derived
185             # classes.
186             #------------------------------------------------------------------------------
187             sub as_string
188             {
189             my $self = shift;
190             log_fatal("The as_string() method is not implemented");
191             }
192              
193              
194              
195             __PACKAGE__->meta->make_immutable();
196              
197             1;
198              
199              
200              
201             =for Pod::Coverage BUILD
202              
203             =encoding utf-8
204              
205             =head1 NAME
206              
207             Treex::Core::Phrase
208              
209             =head1 VERSION
210              
211             version 2.20150928
212              
213             =head1 DESCRIPTION
214              
215             A C<Phrase> is a concept defined on top of dependency trees and subtrees
216             (where a subtree contains a node and all its descendants, not just any arbitrary subset of nodes).
217             Similarly to the Chomsky's hierarchy of formal grammars, there are two main types of phrases:
218             I<terminal> and I<nonterminal>.
219             Furthermore, there may be subtypes of the nonterminal type with special behavior.
220              
221             A B<terminal phrase> contains just one C<Node> (which typically corresponds to a surface token).
222              
223             A B<nonterminal phrase> does not directly contain any C<Node> but it contains
224             one or more (usually at least two) sub-phrases.
225             The hierarchy of phrases and their sub-phrases is also a tree structure.
226             In the typical case there is a relation between the tree of phrases and the underlying dependency
227             tree, but the rules governing this relation are not fixed.
228              
229             Phrases help us model situations that are difficult to model in the dependency tree alone.
230             We can encode multiple levels of “tightness” of relations between governors and dependents.
231             In particular we can distinguish between dependents that modify the whole phrase (shared modifiers)
232             and those that modify only the head of the phrase (private modifiers).
233              
234             This is particularly useful for various tree transformations and conversions between annotation
235             styles (such as in the HamleDT blocks).
236             The idea is that we will first construct a phrase tree based on the existing dependency tree,
237             then we will perform transformations on the phrase tree
238             and finally we will create new dependency relations based on the phrase tree and
239             on the rules defined by the desired annotation style.
240             Phrase is a temporary internal structure that will not be saved in the Treex format on the disk.
241              
242             Every phrase knows its parent (superphrase) and, if it is nonterminal, its children (subphrases).
243             It also knows which of the children is the I<head> (as long as there are children, there is always
244             one and only one head child).
245             The phrase can also return its head node. For terminal phrases, this is the node they enwrap.
246             For nonterminal phrases, this is defined recursively as the head node of their head child phrase.
247              
248             Every phrase also has a dependency relation label I<(deprel)>.
249             These labels are analogous to deprels of nodes in dependency trees.
250             Most of them are just taken from the underlying dependency tree and they are propagated back when
251             new dependency structure is shaped after the phrases; however, some labels may have special
252             meaning even for the C<Phrase> objects. They help recognize special types of nonterminal phrases,
253             such as coordinations.
254             If the phrase is the head of its parent phrase, its deprel is identical to the deprel of its parent.
255             Otherwise, the deprel represents the dependency relation between the phrase and the head of its parent.
256              
257             =head1 ATTRIBUTES
258              
259             =over
260              
261             =item parent
262              
263             Refers to the parent C<Phrase>, if any.
264              
265             =item is_member
266              
267             Is this phrase member of a paratactic structure such as coordination (where
268             members are known as conjuncts) or apposition? We need this attribute because
269             of the Prague-style dependency trees. We need it only during the building phase
270             of the phrase tree.
271              
272             We could encode this attribute in C<deprel> but it would not be practical
273             because it acts independently of C<deprel>. Unlike C<deprel>, C<is_member> is
274             less tied to the underlying nodes; it is really an attribute of the whole
275             phrase. If we decide to change the C<deprel> of the phrase (which is propagated
276             to selected core children), we do not necessarily want to change C<is_member>
277             too. And we do not want to decode C<is_member> from C<deprel>, shuffle and
278             encode elsewhere again.
279              
280             When a terminal phrase is created around a C<Node>, it takes its C<is_member>
281             value from the node. When the phrase receives a parent, the C<is_member> flag
282             will be typically moved to the parent (and erased at the child). However, this
283             does not happen automatically and the C<Builder> has to do that when desired.
284             Similarly, when the type of the phrase is changed (e.g. a new C<Phrase::PP> is
285             created, the contents of the old C<Phrase::NTerm> is moved to it and the old
286             phrase is destroyed), the surrounding code should make sure that the
287             C<is_member> flag is carried over, too. Finally, the value will be used when
288             a C<Phrase::Coordination> is recognized. At that point the C<is_member> flag
289             can be erased for all newly identified conjuncts because now they can be
290             recognized without the flag. However, if the C<Phrase::Coordination> itself (or its
291             C<Phrase::NTerm> predecessor) is a member of a larger paratactic structure, then it
292             must keep the flag for its parent to see and use.
293              
294             =back
295              
296             =head1 METHODS
297              
298             =over
299              
300             =item $phrase->set_parent ($nonterminal_phrase);
301              
302             Sets a new parent for this phrase. The parent phrase must be a L<nonterminal|Treex::Core::Phrase::NTerm>.
303             This phrase will become its new I<non-head> child.
304             The new parent may also be undefined, which means that the current phrase will
305             be disconnected from the phrase structure (but it will keeep its own children,
306             if any).
307             The method returns the old parent.
308              
309             =item my @dependents = $phrase->dependents();
310              
311             Returns the list of dependents of the phrase. This is an abstract method that
312             must be implemented in every derived class. Nonterminal phrases have a list
313             of dependents (possible empty) as their attribute. Terminal phrases return an
314             empty list by definition.
315              
316             =item my @children = $phrase->children();
317              
318             Returns the list of children of the phrase. This is an abstract method that
319             must be implemented in every derived class. Nonterminal phrases distinguish
320             between core children and dependents, and this method should return both.
321             Terminal phrases return an empty list by definition.
322              
323             =item if( $phrase->is_descendant_of ($another_phrase) ) {...}
324              
325             Tests whether this phrase depends on another phrase via the parent links.
326             This method is used to prevent cycles when setting a new parent.
327              
328             =item my $ist = $phrase->is_terminal();
329              
330             Tells whether this phrase is terminal, that is, it does not have children
331             (subphrases).
332              
333             =item my $node = $phrase->node();
334              
335             Returns the head node of the phrase. For terminal phrases this should just
336             return their node attribute. For nonterminal phrases this should return the
337             node of their head child. This is an abstract method that must be defined in
338             every derived class.
339              
340             =item my $deprel = $phrase->deprel();
341              
342             Returns the type of the dependency relation of the phrase to the governing
343             phrase. This is an abstract method that must be defined in every derived
344             class. When the phrase structure is built around a dependency tree, the
345             relations will be probably taken from (or based on) the deprels of the
346             underlying nodes. When the phrase tree is transformed to the desired style,
347             the relations may be modified; at the end, they can be projected to the
348             dependency tree again. A general nonterminal phrase typically has the same
349             deprel as its head child. Terminal phrases store deprels as attributes.
350              
351             =item my $ord = $phrase->ord();
352              
353             Returns the head node's ord attribute. This means that nodes that do not implement
354             the L<Treex::Core::Node::Ordered|Ordered> role cannot be wrapped in phrases. We sometimes need to order
355             child phrases according to the word order of their head nodes.
356              
357             =item $phrase->project_dependencies();
358              
359             Recursively projects dependencies between the head and the dependents back to the
360             underlying dependency structure.
361              
362             =item my $phrase_string = $phrase->as_string();
363              
364             Returns a textual representation of the phrase and all subphrases. Useful for
365             debugging.
366              
367             =back
368              
369             =head1 AUTHORS
370              
371             Daniel Zeman <zeman@ufal.mff.cuni.cz>
372              
373             =head1 COPYRIGHT AND LICENSE
374              
375             Copyright © 2013, 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague
376             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.