line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::Core::Phrase; |
2
|
|
|
|
|
|
|
$Treex::Core::Phrase::VERSION = '2.20160630'; |
3
|
1
|
|
|
1
|
|
14200
|
use utf8; |
|
1
|
|
|
|
|
13
|
|
|
1
|
|
|
|
|
4
|
|
4
|
1
|
|
|
1
|
|
417
|
use namespace::autoclean; |
|
1
|
|
|
|
|
14398
|
|
|
1
|
|
|
|
|
5
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
464
|
use Moose; |
|
1
|
|
|
|
|
379982
|
|
|
1
|
|
|
|
|
6
|
|
7
|
1
|
|
|
1
|
|
7403
|
use MooseX::SemiAffordanceAccessor; # attribute x is written using set_x($value) and read using x() |
|
1
|
|
|
|
|
11057
|
|
|
1
|
|
|
|
|
4
|
|
8
|
1
|
|
|
1
|
|
8258
|
use List::MoreUtils qw(any); |
|
1
|
|
|
|
|
6542
|
|
|
1
|
|
|
|
|
9
|
|
9
|
1
|
|
|
1
|
|
954
|
use Treex::Core::Log; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
62
|
|
10
|
1
|
|
|
1
|
|
408
|
use Treex::Core::Node; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
has 'parent' => |
15
|
|
|
|
|
|
|
( |
16
|
|
|
|
|
|
|
is => 'rw', |
17
|
|
|
|
|
|
|
isa => 'Maybe[Treex::Core::Phrase]', |
18
|
|
|
|
|
|
|
writer => '_set_parent', |
19
|
|
|
|
|
|
|
reader => 'parent', |
20
|
|
|
|
|
|
|
default => undef |
21
|
|
|
|
|
|
|
); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
has 'is_member' => |
24
|
|
|
|
|
|
|
( |
25
|
|
|
|
|
|
|
is => 'rw', |
26
|
|
|
|
|
|
|
isa => 'Bool', |
27
|
|
|
|
|
|
|
documentation => 'Is this phrase a member of a coordination (i.e. conjunct) or apposition?', |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
33
|
|
|
|
|
|
|
# Sets a new parent for this phrase. Unlike the bare setter _set_parent(), |
34
|
|
|
|
|
|
|
# this public method also takes care of the reverse links from the parent to |
35
|
|
|
|
|
|
|
# the children. The method returns the old parent, if any. |
36
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
37
|
|
|
|
|
|
|
sub set_parent |
38
|
|
|
|
|
|
|
{ |
39
|
|
|
|
|
|
|
log_fatal('Incorrect number of arguments') if(scalar(@_) != 2); |
40
|
|
|
|
|
|
|
my $self = shift; |
41
|
|
|
|
|
|
|
my $new_parent = shift; # Treex::Core::Phrase::NTerm or undef |
42
|
|
|
|
|
|
|
if(defined($new_parent) && $new_parent->is_descendant_of($self)) |
43
|
|
|
|
|
|
|
{ |
44
|
|
|
|
|
|
|
log_info($self->as_string()); |
45
|
|
|
|
|
|
|
log_fatal('Cannot set parent phrase because it would create a cycle'); |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
my $old_parent = $self->parent(); |
48
|
|
|
|
|
|
|
# Say the old parent good bye. |
49
|
|
|
|
|
|
|
if(defined($old_parent)) |
50
|
|
|
|
|
|
|
{ |
51
|
|
|
|
|
|
|
$old_parent->_remove_child($self); |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
# Set the new parent before we call its _add_child() method so that it can verify it has been called from here. |
54
|
|
|
|
|
|
|
$self->_set_parent($new_parent); |
55
|
|
|
|
|
|
|
# Say the new parent hello. |
56
|
|
|
|
|
|
|
if(defined($new_parent)) |
57
|
|
|
|
|
|
|
{ |
58
|
|
|
|
|
|
|
$new_parent->_add_child($self); |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
return $old_parent; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
66
|
|
|
|
|
|
|
# Returns the list of dependents of the phrase. This is an abstract method that |
67
|
|
|
|
|
|
|
# must be implemented in every derived class. Nonterminal phrases have a list |
68
|
|
|
|
|
|
|
# of dependents (possible empty) as their attribute. Terminal phrases return an |
69
|
|
|
|
|
|
|
# empty list by definition. |
70
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
71
|
|
|
|
|
|
|
sub dependents |
72
|
|
|
|
|
|
|
{ |
73
|
|
|
|
|
|
|
my $self = shift; |
74
|
|
|
|
|
|
|
log_fatal("The dependents() method is not implemented"); |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
80
|
|
|
|
|
|
|
# Returns the list of children of the phrase. This is an abstract method that |
81
|
|
|
|
|
|
|
# must be implemented in every derived class. Nonterminal phrases distinguish |
82
|
|
|
|
|
|
|
# between core children and dependents, and this method should return both. |
83
|
|
|
|
|
|
|
# Terminal phrases return an empty list by definition. |
84
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
85
|
|
|
|
|
|
|
sub children |
86
|
|
|
|
|
|
|
{ |
87
|
|
|
|
|
|
|
my $self = shift; |
88
|
|
|
|
|
|
|
log_fatal("The children() method is not implemented"); |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
94
|
|
|
|
|
|
|
# Tests whether this phrase depends on another phrase via the parent links. |
95
|
|
|
|
|
|
|
# This method is used to prevent cycles when setting a new parent. |
96
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
97
|
|
|
|
|
|
|
sub is_descendant_of |
98
|
|
|
|
|
|
|
{ |
99
|
|
|
|
|
|
|
log_fatal('Incorrect number of arguments') if(scalar(@_) != 2); |
100
|
|
|
|
|
|
|
my $self = shift; |
101
|
|
|
|
|
|
|
my $on_phrase = shift; # Treex::Core::Phrase |
102
|
|
|
|
|
|
|
my $parent = $self->parent(); |
103
|
|
|
|
|
|
|
while(defined($parent)) |
104
|
|
|
|
|
|
|
{ |
105
|
|
|
|
|
|
|
return 1 if($parent == $on_phrase); |
106
|
|
|
|
|
|
|
$parent = $parent->parent(); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
return 0; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
114
|
|
|
|
|
|
|
# Tells whether this phrase is terminal. We could probably use the Moose's |
115
|
|
|
|
|
|
|
# methods to query the class name but this will be more convenient. |
116
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
117
|
|
|
|
|
|
|
sub is_terminal |
118
|
|
|
|
|
|
|
{ |
119
|
|
|
|
|
|
|
my $self = shift; |
120
|
|
|
|
|
|
|
log_fatal("The is_terminal() method is not implemented"); |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
126
|
|
|
|
|
|
|
# Tells whether this phrase is coordination. We could probably use the Moose's |
127
|
|
|
|
|
|
|
# methods to query the class name but this will be more convenient. |
128
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
129
|
|
|
|
|
|
|
sub is_coordination |
130
|
|
|
|
|
|
|
{ |
131
|
|
|
|
|
|
|
my $self = shift; |
132
|
|
|
|
|
|
|
# Default is FALSE, to be overridden in Coordination. |
133
|
|
|
|
|
|
|
return 0; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
139
|
|
|
|
|
|
|
# Tells whether this phrase is core child of another phrase. That is sometimes |
140
|
|
|
|
|
|
|
# important to know because core children cannot be easily moved around. |
141
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
142
|
|
|
|
|
|
|
sub is_core_child |
143
|
|
|
|
|
|
|
{ |
144
|
|
|
|
|
|
|
my $self = shift; |
145
|
|
|
|
|
|
|
my $parent = $self->parent(); |
146
|
|
|
|
|
|
|
return 0 if(!defined($parent)); |
147
|
|
|
|
|
|
|
return any {$_ == $self} ($parent->core_children()) |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
153
|
|
|
|
|
|
|
# Returns the head node of the phrase. For terminal phrases this should just |
154
|
|
|
|
|
|
|
# return their node attribute. For nonterminal phrases this should return the |
155
|
|
|
|
|
|
|
# node of their head child. This is an abstract method that must be defined in |
156
|
|
|
|
|
|
|
# every derived class. |
157
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
158
|
|
|
|
|
|
|
sub node |
159
|
|
|
|
|
|
|
{ |
160
|
|
|
|
|
|
|
my $self = shift; |
161
|
|
|
|
|
|
|
log_fatal("The node() method is not implemented"); |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
167
|
|
|
|
|
|
|
# Returns the list of all nodes covered by the phrase, i.e. the head node of |
168
|
|
|
|
|
|
|
# this phrase and of all its descendants. |
169
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
170
|
|
|
|
|
|
|
sub nodes |
171
|
|
|
|
|
|
|
{ |
172
|
|
|
|
|
|
|
my $self = shift; |
173
|
|
|
|
|
|
|
log_fatal("The nodes() method is not implemented"); |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
179
|
|
|
|
|
|
|
# Returns the list of all terminal descendants of this phrase. Similar to |
180
|
|
|
|
|
|
|
# nodes(), but instead of Node objects returns the Phrase::Term objects, in |
181
|
|
|
|
|
|
|
# which the nodes are wrapped. |
182
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
183
|
|
|
|
|
|
|
sub terminals |
184
|
|
|
|
|
|
|
{ |
185
|
|
|
|
|
|
|
my $self = shift; |
186
|
|
|
|
|
|
|
log_fatal("The terminals() method is not implemented"); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
192
|
|
|
|
|
|
|
# Returns the type of the dependency relation of the phrase to the governing |
193
|
|
|
|
|
|
|
# phrase. This is an abstract method that must be defined in every derived |
194
|
|
|
|
|
|
|
# class. When the phrase structure is built around a dependency tree, the |
195
|
|
|
|
|
|
|
# relations will be probably taken from (or based on) the deprels of the |
196
|
|
|
|
|
|
|
# underlying nodes. When the phrase tree is transformed to the desired style, |
197
|
|
|
|
|
|
|
# the relations may be modified; at the end, they can be projected to the |
198
|
|
|
|
|
|
|
# dependency tree again. A general nonterminal phrase typically has the same |
199
|
|
|
|
|
|
|
# deprel as its head child. Terminal phrases store deprels as attributes. |
200
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
201
|
|
|
|
|
|
|
sub deprel |
202
|
|
|
|
|
|
|
{ |
203
|
|
|
|
|
|
|
my $self = shift; |
204
|
|
|
|
|
|
|
log_fatal("The deprel() method is not implemented"); |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
210
|
|
|
|
|
|
|
# Returns the deprel that should be used when the phrase tree is projected back |
211
|
|
|
|
|
|
|
# to a dependency tree (see the method project_dependencies()). In most cases |
212
|
|
|
|
|
|
|
# this is identical to what deprel() returns. However, for instance |
213
|
|
|
|
|
|
|
# prepositional phrases in Prague treebanks are attached using AuxP. Their |
214
|
|
|
|
|
|
|
# relation to the parent (returned by deprel()) is projected to the argument of |
215
|
|
|
|
|
|
|
# the preposition. |
216
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
217
|
|
|
|
|
|
|
sub project_deprel |
218
|
|
|
|
|
|
|
{ |
219
|
|
|
|
|
|
|
my $self = shift; |
220
|
|
|
|
|
|
|
log_fatal("The project_deprel() method is not implemented"); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
226
|
|
|
|
|
|
|
# Returns the node's ord attribute. This means that nodes that do not implement |
227
|
|
|
|
|
|
|
# the Ordered role cannot be wrapped in phrases. We sometimes need to order |
228
|
|
|
|
|
|
|
# child phrases according to the word order of their head nodes. |
229
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
230
|
|
|
|
|
|
|
sub ord |
231
|
|
|
|
|
|
|
{ |
232
|
|
|
|
|
|
|
my $self = shift; |
233
|
|
|
|
|
|
|
return $self->node()->ord(); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
239
|
|
|
|
|
|
|
# Returns the lowest and the highest ord values of the nodes covered by this |
240
|
|
|
|
|
|
|
# phrase (always a pair of scalar values; they will be identical for terminal |
241
|
|
|
|
|
|
|
# phrases). Note that there is no guarantee that all nodes within the span are |
242
|
|
|
|
|
|
|
# covered by this phrase. There may be gaps! |
243
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
244
|
|
|
|
|
|
|
sub span |
245
|
|
|
|
|
|
|
{ |
246
|
|
|
|
|
|
|
my $self = shift; |
247
|
|
|
|
|
|
|
log_fatal("The span() method is not implemented"); |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
253
|
|
|
|
|
|
|
# Projects dependencies between the head and the dependents back to the |
254
|
|
|
|
|
|
|
# underlying dependency structure. This is an abstract method that must be |
255
|
|
|
|
|
|
|
# implemented in the derived classes. |
256
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
257
|
|
|
|
|
|
|
sub project_dependencies |
258
|
|
|
|
|
|
|
{ |
259
|
|
|
|
|
|
|
my $self = shift; |
260
|
|
|
|
|
|
|
log_fatal("The project_dependencies() method is not implemented"); |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
266
|
|
|
|
|
|
|
# Returns a textual representation of the phrase and all subphrases. Useful for |
267
|
|
|
|
|
|
|
# debugging. This is an abstract method that must be implemented in the derived |
268
|
|
|
|
|
|
|
# classes. |
269
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
270
|
|
|
|
|
|
|
sub as_string |
271
|
|
|
|
|
|
|
{ |
272
|
|
|
|
|
|
|
my $self = shift; |
273
|
|
|
|
|
|
|
log_fatal("The as_string() method is not implemented"); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable(); |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
1; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=for Pod::Coverage BUILD |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=encoding utf-8 |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=head1 NAME |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
Treex::Core::Phrase |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head1 VERSION |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
version 2.20160630 |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=head1 DESCRIPTION |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
A C<Phrase> is a concept defined on top of dependency trees and subtrees |
299
|
|
|
|
|
|
|
(where a subtree contains a node and all its descendants, not just any arbitrary subset of nodes). |
300
|
|
|
|
|
|
|
Similarly to the Chomsky's hierarchy of formal grammars, there are two main types of phrases: |
301
|
|
|
|
|
|
|
I<terminal> and I<nonterminal>. |
302
|
|
|
|
|
|
|
Furthermore, there may be subtypes of the nonterminal type with special behavior. |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
A B<terminal phrase> contains just one C<Node> (which typically corresponds to a surface token). |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
A B<nonterminal phrase> does not directly contain any C<Node> but it contains |
307
|
|
|
|
|
|
|
one or more (usually at least two) sub-phrases. |
308
|
|
|
|
|
|
|
The hierarchy of phrases and their sub-phrases is also a tree structure. |
309
|
|
|
|
|
|
|
In the typical case there is a relation between the tree of phrases and the underlying dependency |
310
|
|
|
|
|
|
|
tree, but the rules governing this relation are not fixed. |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Phrases help us model situations that are difficult to model in the dependency tree alone. |
313
|
|
|
|
|
|
|
We can encode multiple levels of âtightnessâ of relations between governors and dependents. |
314
|
|
|
|
|
|
|
In particular we can distinguish between dependents that modify the whole phrase (shared modifiers) |
315
|
|
|
|
|
|
|
and those that modify only the head of the phrase (private modifiers). |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
This is particularly useful for various tree transformations and conversions between annotation |
318
|
|
|
|
|
|
|
styles (such as in the HamleDT blocks). |
319
|
|
|
|
|
|
|
The idea is that we will first construct a phrase tree based on the existing dependency tree, |
320
|
|
|
|
|
|
|
then we will perform transformations on the phrase tree |
321
|
|
|
|
|
|
|
and finally we will create new dependency relations based on the phrase tree and |
322
|
|
|
|
|
|
|
on the rules defined by the desired annotation style. |
323
|
|
|
|
|
|
|
Phrase is a temporary internal structure that will not be saved in the Treex format on the disk. |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
Every phrase knows its parent (superphrase) and, if it is nonterminal, its children (subphrases). |
326
|
|
|
|
|
|
|
It also knows which of the children is the I<head> (as long as there are children, there is always |
327
|
|
|
|
|
|
|
one and only one head child). |
328
|
|
|
|
|
|
|
The phrase can also return its head node. For terminal phrases, this is the node they enwrap. |
329
|
|
|
|
|
|
|
For nonterminal phrases, this is defined recursively as the head node of their head child phrase. |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
Every phrase also has a dependency relation label I<(deprel)>. |
332
|
|
|
|
|
|
|
These labels are analogous to deprels of nodes in dependency trees. |
333
|
|
|
|
|
|
|
Most of them are just taken from the underlying dependency tree and they are propagated back when |
334
|
|
|
|
|
|
|
new dependency structure is shaped after the phrases; however, some labels may have special |
335
|
|
|
|
|
|
|
meaning even for the C<Phrase> objects. They help recognize special types of nonterminal phrases, |
336
|
|
|
|
|
|
|
such as coordinations. |
337
|
|
|
|
|
|
|
If the phrase is the head of its parent phrase, its deprel is identical to the deprel of its parent. |
338
|
|
|
|
|
|
|
Otherwise, the deprel represents the dependency relation between the phrase and the head of its parent. |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=over |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=item parent |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Refers to the parent C<Phrase>, if any. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=item is_member |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Is this phrase member of a paratactic structure such as coordination (where |
351
|
|
|
|
|
|
|
members are known as conjuncts) or apposition? We need this attribute because |
352
|
|
|
|
|
|
|
of the Prague-style dependency trees. We need it only during the building phase |
353
|
|
|
|
|
|
|
of the phrase tree. |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
We could encode this attribute in C<deprel> but it would not be practical |
356
|
|
|
|
|
|
|
because it acts independently of C<deprel>. Unlike C<deprel>, C<is_member> is |
357
|
|
|
|
|
|
|
less tied to the underlying nodes; it is really an attribute of the whole |
358
|
|
|
|
|
|
|
phrase. If we decide to change the C<deprel> of the phrase (which is propagated |
359
|
|
|
|
|
|
|
to selected core children), we do not necessarily want to change C<is_member> |
360
|
|
|
|
|
|
|
too. And we do not want to decode C<is_member> from C<deprel>, shuffle and |
361
|
|
|
|
|
|
|
encode elsewhere again. |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
When a terminal phrase is created around a C<Node>, it takes its C<is_member> |
364
|
|
|
|
|
|
|
value from the node. When the phrase receives a parent, the C<is_member> flag |
365
|
|
|
|
|
|
|
will be typically moved to the parent (and erased at the child). However, this |
366
|
|
|
|
|
|
|
does not happen automatically and the C<Builder> has to do that when desired. |
367
|
|
|
|
|
|
|
Similarly, when the type of the phrase is changed (e.g. a new C<Phrase::PP> is |
368
|
|
|
|
|
|
|
created, the contents of the old C<Phrase::NTerm> is moved to it and the old |
369
|
|
|
|
|
|
|
phrase is destroyed), the surrounding code should make sure that the |
370
|
|
|
|
|
|
|
C<is_member> flag is carried over, too. Finally, the value will be used when |
371
|
|
|
|
|
|
|
a C<Phrase::Coordination> is recognized. At that point the C<is_member> flag |
372
|
|
|
|
|
|
|
can be erased for all newly identified conjuncts because now they can be |
373
|
|
|
|
|
|
|
recognized without the flag. However, if the C<Phrase::Coordination> itself (or its |
374
|
|
|
|
|
|
|
C<Phrase::NTerm> predecessor) is a member of a larger paratactic structure, then it |
375
|
|
|
|
|
|
|
must keep the flag for its parent to see and use. |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=back |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=head1 METHODS |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=over |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item $phrase->set_parent ($nonterminal_phrase); |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Sets a new parent for this phrase. The parent phrase must be a L<nonterminal|Treex::Core::Phrase::NTerm>. |
386
|
|
|
|
|
|
|
This phrase will become its new I<non-head> child. |
387
|
|
|
|
|
|
|
The new parent may also be undefined, which means that the current phrase will |
388
|
|
|
|
|
|
|
be disconnected from the phrase structure (but it will keeep its own children, |
389
|
|
|
|
|
|
|
if any). |
390
|
|
|
|
|
|
|
The method returns the old parent. |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item my @dependents = $phrase->dependents(); |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Returns the list of dependents of the phrase. This is an abstract method that |
395
|
|
|
|
|
|
|
must be implemented in every derived class. Nonterminal phrases have a list |
396
|
|
|
|
|
|
|
of dependents (possible empty) as their attribute. Terminal phrases return an |
397
|
|
|
|
|
|
|
empty list by definition. |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=item my @children = $phrase->children(); |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Returns the list of children of the phrase. This is an abstract method that |
402
|
|
|
|
|
|
|
must be implemented in every derived class. Nonterminal phrases distinguish |
403
|
|
|
|
|
|
|
between core children and dependents, and this method should return both. |
404
|
|
|
|
|
|
|
Terminal phrases return an empty list by definition. |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=item if( $phrase->is_descendant_of ($another_phrase) ) {...} |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
Tests whether this phrase depends on another phrase via the parent links. |
409
|
|
|
|
|
|
|
This method is used to prevent cycles when setting a new parent. |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=item my $ist = $phrase->is_terminal(); |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Tells whether this phrase is terminal, that is, it does not have children |
414
|
|
|
|
|
|
|
(subphrases). |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=item my $isc = $phrase->is_coordination(); |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
Tells whether this phrase is L<Treex::Core::Phrase::Coordination> or its |
419
|
|
|
|
|
|
|
descendant. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item my $iscc = $phrase->is_core_child(); |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Tells whether this phrase is core child of another phrase. That is sometimes |
424
|
|
|
|
|
|
|
important to know because core children cannot be easily moved around. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=item my $node = $phrase->node(); |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Returns the head node of the phrase. For terminal phrases this should just |
429
|
|
|
|
|
|
|
return their node attribute. For nonterminal phrases this should return the |
430
|
|
|
|
|
|
|
node of their head child. This is an abstract method that must be defined in |
431
|
|
|
|
|
|
|
every derived class. |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=item my @nodes = $phrase->nodes(); |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Returns the list of all nodes covered by the phrase, i.e. the head node of |
436
|
|
|
|
|
|
|
this phrase and of all its descendants. |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=item my @phrases = $phrase->terminals(); |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Returns the list of all terminal descendants of this phrase. Similar to |
441
|
|
|
|
|
|
|
C<nodes()>, but instead of C<Node> objects returns the C<Phrase::Term> objects, in |
442
|
|
|
|
|
|
|
which the nodes are wrapped. |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=item my $deprel = $phrase->deprel(); |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
Returns the type of the dependency relation of the phrase to the governing |
447
|
|
|
|
|
|
|
phrase. This is an abstract method that must be defined in every derived |
448
|
|
|
|
|
|
|
class. When the phrase structure is built around a dependency tree, the |
449
|
|
|
|
|
|
|
relations will be probably taken from (or based on) the deprels of the |
450
|
|
|
|
|
|
|
underlying nodes. When the phrase tree is transformed to the desired style, |
451
|
|
|
|
|
|
|
the relations may be modified; at the end, they can be projected to the |
452
|
|
|
|
|
|
|
dependency tree again. A general nonterminal phrase typically has the same |
453
|
|
|
|
|
|
|
deprel as its head child. Terminal phrases store deprels as attributes. |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=item my $deprel = $phrase->project_deprel(); |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Returns the deprel that should be used when the phrase tree is projected back |
458
|
|
|
|
|
|
|
to a dependency tree (see the method project_dependencies()). In most cases |
459
|
|
|
|
|
|
|
this is identical to what deprel() returns. However, for instance |
460
|
|
|
|
|
|
|
prepositional phrases in Prague treebanks are attached using C<AuxP>. Their |
461
|
|
|
|
|
|
|
relation to the parent (returned by deprel()) is projected as the label of |
462
|
|
|
|
|
|
|
the dependency between the preposition and its argument. |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item my $ord = $phrase->ord(); |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Returns the head node's ord attribute. This means that nodes that do not implement |
467
|
|
|
|
|
|
|
the L<Treex::Core::Node::Ordered|Ordered> role cannot be wrapped in phrases. We sometimes need to order |
468
|
|
|
|
|
|
|
child phrases according to the word order of their head nodes. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=item my ($left, $right) = $phrase->span(); |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Returns the lowest and the highest ord values of the nodes covered by this |
473
|
|
|
|
|
|
|
phrase (always a pair of scalar values; they will be identical for terminal |
474
|
|
|
|
|
|
|
phrases). Note that there is no guarantee that all nodes within the span are |
475
|
|
|
|
|
|
|
covered by this phrase. There may be gaps! |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item $phrase->project_dependencies(); |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
Recursively projects dependencies between the head and the dependents back to the |
480
|
|
|
|
|
|
|
underlying dependency structure. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=item my $phrase_string = $phrase->as_string(); |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
Returns a textual representation of the phrase and all subphrases. Useful for |
485
|
|
|
|
|
|
|
debugging. |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=back |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=head1 AUTHORS |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
Daniel Zeman <zeman@ufal.mff.cuni.cz> |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
Copyright © 2013, 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague |
496
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |