line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::Treebank::Const; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
52
|
use 5.008; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
125
|
|
4
|
3
|
|
|
3
|
|
17
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
103
|
|
5
|
3
|
|
|
3
|
|
13
|
use warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
73
|
|
6
|
3
|
|
|
3
|
|
13
|
use Carp; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
164
|
|
7
|
|
|
|
|
|
|
|
8
|
3
|
|
|
3
|
|
15
|
use Lingua::Treebank; |
|
3
|
|
|
|
|
13
|
|
|
3
|
|
|
|
|
276
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.16'; # stay in sync automatically |
10
|
|
|
|
|
|
|
our $VERBOSE = $Lingua::Treebank::VERBOSE; |
11
|
|
|
|
|
|
|
our $BF_TRAVERSAL; |
12
|
|
|
|
|
|
|
################################################################## |
13
|
|
|
|
|
|
|
use constant { |
14
|
3
|
|
|
|
|
545
|
TAG => 1, |
15
|
|
|
|
|
|
|
ANNOT => 2, |
16
|
|
|
|
|
|
|
WORD => 3, |
17
|
|
|
|
|
|
|
PARENT => 4, |
18
|
|
|
|
|
|
|
CHILDREN => 5, |
19
|
|
|
|
|
|
|
NUM => 6, |
20
|
|
|
|
|
|
|
HEADCHILD => 7, # only used after Headfinder |
21
|
3
|
|
|
3
|
|
18
|
}; |
|
3
|
|
|
|
|
4
|
|
22
|
|
|
|
|
|
|
use overload |
23
|
3
|
|
|
|
|
27
|
'""' => \&stringify, |
24
|
|
|
|
|
|
|
'0+' => \&numerify, # find location in memory |
25
|
|
|
|
|
|
|
fallback => 1, # numeric tests measure memory location |
26
|
3
|
|
|
3
|
|
6099
|
; |
|
3
|
|
|
|
|
3525
|
|
27
|
|
|
|
|
|
|
################################################################## |
28
|
|
|
|
|
|
|
our $INDENT_CHAR = ' ' x 4; |
29
|
|
|
|
|
|
|
our $CHILD_PROLOG = "\n"; |
30
|
|
|
|
|
|
|
our $CHILD_EPILOG = ""; |
31
|
|
|
|
|
|
|
our $STRINGIFY = 'as_penn_text'; |
32
|
|
|
|
|
|
|
################################################################## |
33
|
|
|
|
|
|
|
sub numerify { |
34
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
35
|
0
|
|
|
|
|
0
|
my $num = $self->[NUM]; |
36
|
0
|
0
|
|
|
|
0
|
confess "no numeric value!?" unless defined $num; |
37
|
0
|
|
|
|
|
0
|
return $num; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
## the approach below is not portable. assign a new number from |
40
|
|
|
|
|
|
|
## $__NUMID for every constituent at new() instead. |
41
|
|
|
|
|
|
|
# sub numerify { |
42
|
|
|
|
|
|
|
# my $self = shift; |
43
|
|
|
|
|
|
|
# if (not defined $self->[NUM]) { |
44
|
|
|
|
|
|
|
# # fetch out the number indicating the location in memory |
45
|
|
|
|
|
|
|
# my $refstr= overload::StrVal( $self ); |
46
|
|
|
|
|
|
|
# if ($refstr =~ m{\( 0x ([0-9a-fA-F]+) \) $}x) { # |
47
|
|
|
|
|
|
|
# # } |
48
|
|
|
|
|
|
|
# # cache it for later to save the regex |
49
|
|
|
|
|
|
|
# $self->[NUM] = hex $1; |
50
|
|
|
|
|
|
|
# } |
51
|
|
|
|
|
|
|
# else { |
52
|
|
|
|
|
|
|
# confess "numerify wasn't able to extract a numeric ref"; |
53
|
|
|
|
|
|
|
# } |
54
|
|
|
|
|
|
|
# } |
55
|
|
|
|
|
|
|
# return $self->[NUM]; |
56
|
|
|
|
|
|
|
#} |
57
|
|
|
|
|
|
|
our $__NUMID = 100; # never be small, just to be sure |
58
|
|
|
|
|
|
|
sub _next_numid { |
59
|
|
|
|
|
|
|
# for assigning unique numeric values to each new constituent; |
60
|
|
|
|
|
|
|
# invoked from the ->new() method |
61
|
4507
|
|
|
4507
|
|
4703
|
my $class = shift; |
62
|
4507
|
|
|
|
|
4538
|
++$__NUMID; |
63
|
4507
|
|
|
|
|
10623
|
return $__NUMID; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
################################################################## |
66
|
|
|
|
|
|
|
sub stringify { |
67
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
68
|
0
|
0
|
|
|
|
0
|
if ($STRINGIFY eq 'as_penn_text') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
69
|
0
|
|
|
|
|
0
|
return $self->as_penn_text(); |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
elsif ($STRINGIFY eq 'words') { |
72
|
0
|
|
|
|
|
0
|
return join ' ', map { $_->word() } $self->get_all_terminals(); |
|
0
|
|
|
|
|
0
|
|
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
elsif ($STRINGIFY eq 'preterm_tags') { |
75
|
0
|
|
|
|
|
0
|
return join ' ', map { $_->tag() } $self->get_all_terminals(); |
|
0
|
|
|
|
|
0
|
|
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
else { |
78
|
0
|
|
|
|
|
0
|
carp "don't recognize \$", |
79
|
|
|
|
|
|
|
__PACKAGE__, "::STRINGIFY value of $STRINGIFY"; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
################################################################## |
84
|
|
|
|
|
|
|
sub edges { |
85
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
86
|
|
|
|
|
|
|
return |
87
|
0
|
|
|
|
|
0
|
map { join ',', @{$_} } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
88
|
|
|
|
|
|
|
$self->edges_data(@_); |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
sub edges_data { |
91
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
92
|
0
|
|
|
|
|
0
|
my (%args) = @_; |
93
|
0
|
|
|
|
|
0
|
my %ignore; |
94
|
0
|
0
|
|
|
|
0
|
my $do_terminal = |
95
|
|
|
|
|
|
|
(defined $args{'keepterminal'} ? $args{'keepterminal'} : 1); |
96
|
0
|
0
|
|
|
|
0
|
if (defined $args{ignore}) { |
97
|
0
|
|
|
|
|
0
|
%ignore = map {$_ => 1} @{$args{ignore}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
98
|
|
|
|
|
|
|
} |
99
|
0
|
|
|
|
|
0
|
my %coindex; |
100
|
0
|
0
|
|
|
|
0
|
%coindex = %{$args{coindex}} if defined $args{coindex}; |
|
0
|
|
|
|
|
0
|
|
101
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
0
|
my (@edges) = $self->_edges_driver(0); |
103
|
0
|
|
|
|
|
0
|
my (@returns); |
104
|
0
|
|
|
|
|
0
|
for (@edges) { |
105
|
0
|
0
|
|
|
|
0
|
next if $ignore{$_->[0]}; |
106
|
|
|
|
|
|
|
|
107
|
0
|
0
|
0
|
|
|
0
|
next if (not $do_terminal and $_->[0] =~ /::/); |
108
|
|
|
|
|
|
|
|
109
|
0
|
0
|
|
|
|
0
|
$_->[1] = $coindex{$_->[1]} if defined $coindex{$_->[1]}; |
110
|
0
|
0
|
|
|
|
0
|
$_->[2] = $coindex{$_->[2]} if defined $coindex{$_->[2]}; |
111
|
0
|
|
|
|
|
0
|
push @returns, $_; |
112
|
|
|
|
|
|
|
} |
113
|
0
|
|
|
|
|
0
|
return @returns; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
sub _edges_driver { |
116
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
117
|
0
|
|
|
|
|
0
|
my $start_index = shift; |
118
|
|
|
|
|
|
|
|
119
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
120
|
0
|
|
|
|
|
0
|
return ([$self->tag() . '::' . $self->word(), |
121
|
|
|
|
|
|
|
$start_index, |
122
|
|
|
|
|
|
|
$start_index + 1]); |
123
|
|
|
|
|
|
|
} |
124
|
0
|
|
|
|
|
0
|
my @edges; |
125
|
0
|
|
|
|
|
0
|
my $l_idx = $start_index; |
126
|
0
|
|
|
|
|
0
|
for (@{$self->children()}) { |
|
0
|
|
|
|
|
0
|
|
127
|
0
|
|
|
|
|
0
|
push @edges, $_->_edges_driver($l_idx); |
128
|
0
|
|
|
|
|
0
|
$l_idx = $edges[-1][-1]; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# don't forget the edge for myself |
132
|
0
|
|
|
|
|
0
|
push @edges, [$self->tag(), $start_index, $l_idx]; |
133
|
0
|
|
|
|
|
0
|
return @edges; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
################################################################## |
136
|
|
|
|
|
|
|
sub shared_edges { |
137
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
138
|
0
|
|
|
|
|
0
|
my $other = shift; |
139
|
0
|
|
|
|
|
0
|
my %args = @_; |
140
|
|
|
|
|
|
|
|
141
|
0
|
|
|
|
|
0
|
my %is_phantom = map {$_ => 1} @{$args{phantom}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
142
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
0
|
my %coindex; # those trees that need coindexation |
144
|
0
|
0
|
|
|
|
0
|
if (scalar keys %is_phantom) { |
145
|
0
|
|
|
|
|
0
|
for ($self->edges_data(%args)) { |
146
|
0
|
0
|
|
|
|
0
|
if ($is_phantom{$_->[0]}) { |
147
|
0
|
|
|
|
|
0
|
$coindex{$_->[1]} = $_->[2]; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
0
|
|
|
|
|
0
|
my %edges; |
153
|
0
|
|
|
|
|
0
|
for ($self->edges(%args, coindex => \%coindex)) { |
154
|
0
|
|
|
|
|
0
|
$edges{$_}++; |
155
|
|
|
|
|
|
|
} |
156
|
0
|
|
|
|
|
0
|
my %other_edges; |
157
|
0
|
|
|
|
|
0
|
for ($other->edges(%args, coindex => \%coindex)) { |
158
|
0
|
|
|
|
|
0
|
$other_edges{$_}++; |
159
|
|
|
|
|
|
|
} |
160
|
3
|
|
|
3
|
|
3241
|
use List::Util 'min'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
21163
|
|
161
|
0
|
|
|
|
|
0
|
my @to_return; |
162
|
0
|
|
|
|
|
0
|
for (keys %edges) { |
163
|
0
|
|
0
|
|
|
0
|
push @to_return, ($_) x min ($edges{$_} || 0, $other_edges{$_} || 0); |
|
|
|
0
|
|
|
|
|
164
|
|
|
|
|
|
|
} |
165
|
0
|
|
|
|
|
0
|
return @to_return; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
################################################################## |
168
|
|
|
|
|
|
|
sub list_constituents { |
169
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
170
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
171
|
0
|
|
|
|
|
0
|
return ($self); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
else { |
174
|
0
|
|
|
|
|
0
|
my @list; |
175
|
0
|
|
|
|
|
0
|
for (@{$self->children()}) { |
|
0
|
|
|
|
|
0
|
|
176
|
0
|
|
|
|
|
0
|
push @list, $_->list_constituents(); |
177
|
|
|
|
|
|
|
} |
178
|
0
|
|
|
|
|
0
|
return $self, @list; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
################################################################## |
182
|
|
|
|
|
|
|
# High-power generic function for crawling the tree. Most of the other |
183
|
|
|
|
|
|
|
# functions could probably be implemented in terms of this one. |
184
|
|
|
|
|
|
|
sub walk { |
185
|
0
|
|
|
0
|
1
|
0
|
my ($self, $action, $stop_criterion, $state, $bf_traversal) = @_; |
186
|
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
0
|
croak "walk()'s first non-self arg not defined" |
188
|
|
|
|
|
|
|
unless (defined $action); |
189
|
0
|
0
|
|
|
|
0
|
if (ref $action eq '') { |
|
|
0
|
|
|
|
|
|
190
|
0
|
|
|
|
|
0
|
$action = $self->can($action); |
191
|
0
|
0
|
|
|
|
0
|
if (not defined $action) { |
192
|
0
|
|
|
|
|
0
|
croak "couldn't find method $action to call from within walk" |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
elsif (not ref $action eq 'CODE') { |
196
|
0
|
|
|
|
|
0
|
croak "walk()'s first non-self arg not CODE-ref" |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
0
|
0
|
|
|
|
0
|
if (defined $stop_criterion) { |
200
|
0
|
0
|
|
|
|
0
|
if (ref $stop_criterion eq '') { |
|
|
0
|
|
|
|
|
|
201
|
0
|
|
|
|
|
0
|
$stop_criterion = $self->can($stop_criterion); |
202
|
0
|
0
|
|
|
|
0
|
croak "couldn't find method $stop_criterion ", |
203
|
|
|
|
|
|
|
" to call from within walk()" |
204
|
|
|
|
|
|
|
if (not defined $stop_criterion); |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
elsif (not ref $stop_criterion eq 'CODE') { |
207
|
0
|
|
|
|
|
0
|
croak "walk()'s stop criterion arg not a CODE-ref"; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
0
|
|
|
|
|
0
|
my @children = (); |
212
|
0
|
|
|
|
|
0
|
my @stack = ($self); |
213
|
|
|
|
|
|
|
|
214
|
0
|
0
|
|
|
|
0
|
if (not defined $bf_traversal) { |
215
|
0
|
|
|
|
|
0
|
$bf_traversal = $BF_TRAVERSAL; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
NODE: |
219
|
0
|
|
|
|
|
0
|
while (1) { |
220
|
|
|
|
|
|
|
# take one off the front of the line |
221
|
0
|
|
|
|
|
0
|
my $node = shift @stack; |
222
|
0
|
0
|
|
|
|
0
|
return if not defined $node; |
223
|
|
|
|
|
|
|
|
224
|
0
|
|
|
|
|
0
|
&{$action}($node, $state); |
|
0
|
|
|
|
|
0
|
|
225
|
|
|
|
|
|
|
|
226
|
0
|
0
|
0
|
|
|
0
|
if ( defined $stop_criterion |
|
0
|
|
|
|
|
0
|
|
227
|
|
|
|
|
|
|
and &{$stop_criterion}($node, $state) ) { |
228
|
|
|
|
|
|
|
# don't put the children on the agenda |
229
|
0
|
|
|
|
|
0
|
next NODE; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# else include node's children |
233
|
0
|
0
|
|
|
|
0
|
if ($bf_traversal) { |
234
|
|
|
|
|
|
|
# children go in the back of the line |
235
|
0
|
|
|
|
|
0
|
push @stack, @{$node->children()}; |
|
0
|
|
|
|
|
0
|
|
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
else { # depth-first traversal |
238
|
|
|
|
|
|
|
# children go in the front of the line |
239
|
0
|
|
|
|
|
0
|
unshift @stack, @{$node->children()}; |
|
0
|
|
|
|
|
0
|
|
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
################################################################## |
244
|
|
|
|
|
|
|
sub find_common_ancestor { |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# returns lowest common ancestor, or undef if there is none. |
247
|
|
|
|
|
|
|
|
248
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
249
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $cousin = shift; |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# error checking |
252
|
0
|
0
|
|
|
|
0
|
croak "cousin arg not defined" if not defined $cousin; |
253
|
0
|
0
|
|
|
|
0
|
croak "cousin not a " . __PACKAGE__ |
254
|
|
|
|
|
|
|
unless UNIVERSAL::isa($cousin, __PACKAGE__); |
255
|
|
|
|
|
|
|
|
256
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $matriarch = $self->root(); |
257
|
0
|
0
|
|
|
|
0
|
if ( $cousin->root() != $matriarch ) { |
258
|
0
|
|
|
|
|
0
|
return; # no common ancestor |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
0
|
|
|
|
|
0
|
my @self_lineage = $self->path_up_to( $matriarch ); |
262
|
0
|
|
|
|
|
0
|
my @cousin_lineage = $cousin->path_up_to( $matriarch ); |
263
|
|
|
|
|
|
|
|
264
|
0
|
|
0
|
|
|
0
|
while (@self_lineage and @cousin_lineage) { |
265
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $self_gramma = pop @self_lineage; |
266
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $cousin_gramma = pop @cousin_lineage; |
267
|
0
|
0
|
|
|
|
0
|
if ($self_gramma == $cousin_gramma) { |
268
|
0
|
|
|
|
|
0
|
$matriarch = $self_gramma; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
else { |
271
|
|
|
|
|
|
|
# stop looking -- once unshared, thereafter its a waste to |
272
|
|
|
|
|
|
|
# keep looking. No incestuous trees here, one would hope. |
273
|
0
|
|
|
|
|
0
|
last; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
0
|
|
|
|
|
0
|
return $matriarch; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
################################################################## |
280
|
|
|
|
|
|
|
sub equiv_to { |
281
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
282
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $other = shift; |
283
|
|
|
|
|
|
|
|
284
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
285
|
0
|
0
|
|
|
|
0
|
return 0 unless $other->is_terminal(); |
286
|
|
|
|
|
|
|
|
287
|
0
|
0
|
|
|
|
0
|
if ($self->tag() ne $other->tag()) { |
288
|
0
|
|
|
|
|
0
|
return 0; |
289
|
|
|
|
|
|
|
} |
290
|
0
|
0
|
|
|
|
0
|
if ($self->word() ne $other->word()) { |
291
|
0
|
|
|
|
|
0
|
return 0; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# otherwise it all passes: |
295
|
0
|
|
|
|
|
0
|
return 1; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
else { |
298
|
|
|
|
|
|
|
# self non-terminal |
299
|
0
|
0
|
|
|
|
0
|
return 0 if $other->is_terminal(); |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# different number of children |
302
|
0
|
0
|
|
|
|
0
|
return 0 if (@{ $self->children() } != @{ $other->children() }); |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
303
|
|
|
|
|
|
|
|
304
|
0
|
|
|
|
|
0
|
foreach my $idx ( 0 .. $#{ $self->children() } ) { |
|
0
|
|
|
|
|
0
|
|
305
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $lchild = $self->children($idx); |
306
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $rchild = $other->children($idx); |
307
|
0
|
0
|
|
|
|
0
|
if (not $lchild->equiv_to($rchild)) { |
308
|
0
|
|
|
|
|
0
|
return 0; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
# otherwise it all passes |
312
|
0
|
|
|
|
|
0
|
return 1; |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
################################################################## |
316
|
|
|
|
|
|
|
sub equiv_tags { |
317
|
0
|
|
|
0
|
1
|
0
|
croak "not implemented\n"; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
sub equiv_words { |
320
|
0
|
|
|
0
|
1
|
0
|
croak "not implemented\n"; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
################################################################## |
323
|
|
|
|
|
|
|
# height/depth functions |
324
|
|
|
|
|
|
|
################################################################## |
325
|
|
|
|
|
|
|
sub depth_from { |
326
|
|
|
|
|
|
|
# return depth from given target. returns undef if $target is not |
327
|
|
|
|
|
|
|
# the ancestor of $self |
328
|
|
|
|
|
|
|
|
329
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
330
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $target = shift; |
331
|
|
|
|
|
|
|
|
332
|
0
|
0
|
|
|
|
0
|
if ($self == $target) { |
|
|
0
|
|
|
|
|
|
333
|
0
|
|
|
|
|
0
|
return 0; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
elsif ($self->is_root()) { |
336
|
0
|
|
|
|
|
0
|
carp "depth_from argument not an ancestor of instance"; |
337
|
|
|
|
|
|
|
# we could check this explicitly, but users may already know |
338
|
|
|
|
|
|
|
# this isn't going to happen, so let's not waste cycles |
339
|
0
|
|
|
|
|
0
|
return (); # not defined |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
else { |
342
|
0
|
|
|
|
|
0
|
return $self->parent->depth_from($target) + 1; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
################################################################## |
346
|
|
|
|
|
|
|
sub depth { |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# returns how many steps from self up to root |
349
|
|
|
|
|
|
|
|
350
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# implemented using more general function -- but it does require |
353
|
|
|
|
|
|
|
# two traversals of the tree... other implementations may be easier |
354
|
0
|
|
|
|
|
0
|
return $self->depth_from( $self->root() ); |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# if benchmarking turns up a problem here, use one of these below |
357
|
|
|
|
|
|
|
# instead (probably the second, since it involves the fewest stack ops |
358
|
|
|
|
|
|
|
# and so is probably the fastest). |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# simple recursive implementation |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
## if ( $self->is_root() ) { |
363
|
|
|
|
|
|
|
## return 0; |
364
|
|
|
|
|
|
|
## } |
365
|
|
|
|
|
|
|
## else { |
366
|
|
|
|
|
|
|
## return $self->parent->depth() + 1; |
367
|
|
|
|
|
|
|
## } |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# non-recursive implementation |
370
|
|
|
|
|
|
|
## my $d = 0; |
371
|
|
|
|
|
|
|
## my __PACKAGE__ $p = $self->parent; |
372
|
|
|
|
|
|
|
## until ( $p->is_root() ) { |
373
|
|
|
|
|
|
|
## $h++; |
374
|
|
|
|
|
|
|
## $p = $p->parent; |
375
|
|
|
|
|
|
|
## } |
376
|
|
|
|
|
|
|
## return $d; |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
################################################################## |
380
|
|
|
|
|
|
|
sub height { |
381
|
|
|
|
|
|
|
# returns longest distance from self down to any leaf |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# could be re-implemented with get_all_terminals, path_up_to and |
384
|
|
|
|
|
|
|
# array lengths, but that seems unnecessary |
385
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
386
|
|
|
|
|
|
|
|
387
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
388
|
0
|
|
|
|
|
0
|
return 0; |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
else { |
391
|
0
|
|
|
|
|
0
|
my ($max) = 0; |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# choose the largest height among the children, return that |
394
|
|
|
|
|
|
|
# (+1) |
395
|
0
|
|
|
|
|
0
|
foreach my __PACKAGE__ $d (@{ $self->children() }) { |
|
0
|
|
|
|
|
0
|
|
396
|
0
|
|
|
|
|
0
|
my $this_height = $d->height(); |
397
|
0
|
0
|
|
|
|
0
|
if ($max < $this_height) { |
398
|
0
|
|
|
|
|
0
|
$max = $this_height; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
} |
401
|
0
|
|
|
|
|
0
|
return $max + 1; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
################################################################## |
405
|
|
|
|
|
|
|
sub get_index { |
406
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
407
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $daughter = shift; |
408
|
|
|
|
|
|
|
|
409
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
410
|
0
|
|
|
|
|
0
|
carp "get_index called on terminal node, can't get_index"; |
411
|
0
|
|
|
|
|
0
|
return; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
0
|
0
|
|
|
|
0
|
if (not $self == $daughter->parent ) { |
415
|
0
|
|
|
|
|
0
|
carp "argument not daughter of instance, can't get index"; |
416
|
0
|
|
|
|
|
0
|
return ; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
0
|
|
|
|
|
0
|
for ( 0 .. $#{$self->children} ) { |
|
0
|
|
|
|
|
0
|
|
420
|
0
|
0
|
|
|
|
0
|
if ( $self->children($_) == $daughter ) { |
421
|
0
|
|
|
|
|
0
|
return $_; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
0
|
|
|
|
|
0
|
carp "malformed tree:", |
426
|
|
|
|
|
|
|
" daughter identifies instance as parent, but parent does ", |
427
|
|
|
|
|
|
|
"not claim daughter"; |
428
|
0
|
|
|
|
|
0
|
return ; |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
################################################################## |
431
|
|
|
|
|
|
|
# node retrieval functions |
432
|
|
|
|
|
|
|
################################################################## |
433
|
|
|
|
|
|
|
sub path_up_to { |
434
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
435
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $terminus = shift; |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# could be done non-recursively, but this is grammatical structure |
438
|
|
|
|
|
|
|
# -- very small heights. Besides, recursivity is cooler, and |
439
|
|
|
|
|
|
|
# easier to think about |
440
|
|
|
|
|
|
|
|
441
|
0
|
0
|
|
|
|
0
|
if ($self == $terminus) { |
|
|
0
|
|
|
|
|
|
442
|
0
|
|
|
|
|
0
|
return ($self); |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
elsif ( $self->is_root() ) { |
445
|
0
|
|
|
|
|
0
|
carp "terminus argument not an ancestor of instance!"; |
446
|
0
|
|
|
|
|
0
|
return ; |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
else { |
449
|
0
|
|
|
|
|
0
|
my @path = $self->parent->path_up_to( $terminus ); |
450
|
0
|
0
|
|
|
|
0
|
if (not @path) { |
451
|
0
|
|
|
|
|
0
|
return; # not found |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
else { |
454
|
0
|
|
|
|
|
0
|
return ( $self, @path ); |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
################################################################## |
459
|
|
|
|
|
|
|
sub root { |
460
|
|
|
|
|
|
|
# returns the root of a given node |
461
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
462
|
0
|
0
|
|
|
|
0
|
if ($self->is_root()) { |
463
|
0
|
|
|
|
|
0
|
return $self; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
else { |
466
|
0
|
|
|
|
|
0
|
return $self->parent->root(); |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
################################################################## |
470
|
|
|
|
|
|
|
# Return a list of ancestors of a node matching a criteria given in a |
471
|
|
|
|
|
|
|
# function parameter. |
472
|
|
|
|
|
|
|
# my $path = <
|
473
|
|
|
|
|
|
|
# (NP |
474
|
|
|
|
|
|
|
# (NP |
475
|
|
|
|
|
|
|
# (VP |
476
|
|
|
|
|
|
|
# (N dog)))) |
477
|
|
|
|
|
|
|
# EOTREE |
478
|
|
|
|
|
|
|
# |
479
|
|
|
|
|
|
|
# my $node = Lingua::Treebank::TB3Const->new->from_penn_string($text); |
480
|
|
|
|
|
|
|
# my @terms = $node->get_all_terminals(); |
481
|
|
|
|
|
|
|
# my $node = shift @terms; |
482
|
|
|
|
|
|
|
# my @ancestors = $node->select_ancestors(sub{$_[0]->tag() eq "NP"}); |
483
|
|
|
|
|
|
|
sub select_ancestors { |
484
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
485
|
0
|
|
|
|
|
0
|
my $criteria = shift; |
486
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
0
|
my @ancestors = (); |
488
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
0
|
$self = $self->parent(); |
490
|
|
|
|
|
|
|
PARENT: |
491
|
0
|
|
|
|
|
0
|
while (defined $self) { |
492
|
0
|
0
|
|
|
|
0
|
push @ancestors, $self if (&$criteria($self)); |
493
|
0
|
|
|
|
|
0
|
$self = $self->parent(); |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
0
|
return @ancestors; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
################################################################## |
499
|
|
|
|
|
|
|
# Return a list of children of a node matching a criteria given in a |
500
|
|
|
|
|
|
|
# function parameter. The children are searched breadth-first. |
501
|
|
|
|
|
|
|
sub select_children { |
502
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
503
|
0
|
|
|
|
|
0
|
my $criteria = shift; |
504
|
|
|
|
|
|
|
|
505
|
0
|
|
|
|
|
0
|
my @children = (); |
506
|
0
|
|
|
|
|
0
|
my @stack = ($self); |
507
|
|
|
|
|
|
|
CHILD: |
508
|
0
|
|
|
|
|
0
|
while (1) { |
509
|
0
|
|
|
|
|
0
|
my $node = pop @stack; |
510
|
0
|
0
|
|
|
|
0
|
last CHILD if (not $node); |
511
|
|
|
|
|
|
|
|
512
|
0
|
0
|
|
|
|
0
|
push @children, $node if (&$criteria($node)); |
513
|
0
|
|
|
|
|
0
|
push @stack, @{$node->children()}; |
|
0
|
|
|
|
|
0
|
|
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
0
|
|
|
|
|
0
|
return @children; |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
################################################################## |
519
|
|
|
|
|
|
|
sub get_all_terminals { |
520
|
|
|
|
|
|
|
# returns all leaves in a left-right traversal |
521
|
|
|
|
|
|
|
|
522
|
4507
|
|
|
4507
|
1
|
9274
|
my __PACKAGE__ $self = shift; |
523
|
|
|
|
|
|
|
|
524
|
4507
|
|
|
|
|
4307
|
my @terminals; |
525
|
|
|
|
|
|
|
|
526
|
4507
|
100
|
|
|
|
7249
|
if ( $self->is_terminal() ) { |
527
|
2363
|
|
|
|
|
3499
|
@terminals = ( $self ); # parens force list return |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
else { |
530
|
2144
|
|
|
|
|
2116
|
foreach my __PACKAGE__ $d ( @{$self->children} ) { |
|
2144
|
|
|
|
|
3451
|
|
531
|
4158
|
|
|
|
|
7951
|
push @terminals, $d->get_all_terminals; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
} |
534
|
4507
|
|
|
|
|
13484
|
return @terminals; |
535
|
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
################################################################## |
537
|
|
|
|
|
|
|
sub next_sib { |
538
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
539
|
|
|
|
|
|
|
|
540
|
0
|
0
|
|
|
|
0
|
return if $self->is_root; # no sib, return undef |
541
|
|
|
|
|
|
|
|
542
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $parent = $self->parent; |
543
|
|
|
|
|
|
|
|
544
|
0
|
|
|
|
|
0
|
my $index = $parent->get_index($self); |
545
|
|
|
|
|
|
|
|
546
|
0
|
0
|
|
|
|
0
|
if ($index == $#{$parent->children}) { |
|
0
|
|
|
|
|
0
|
|
547
|
|
|
|
|
|
|
# this is the rightmost of the group of siblings |
548
|
0
|
|
|
|
|
0
|
return; # no right sib |
549
|
|
|
|
|
|
|
} |
550
|
0
|
|
|
|
|
0
|
return $parent->children($index + 1); |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
################################################################## |
553
|
|
|
|
|
|
|
sub prev_sib { |
554
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
555
|
|
|
|
|
|
|
|
556
|
0
|
0
|
|
|
|
0
|
return if $self->is_root; # no sib, return undef |
557
|
|
|
|
|
|
|
|
558
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $parent = $self->parent; |
559
|
|
|
|
|
|
|
|
560
|
0
|
|
|
|
|
0
|
my $index = $parent->get_index($self); |
561
|
|
|
|
|
|
|
|
562
|
0
|
0
|
|
|
|
0
|
if ($index == 0) { |
563
|
|
|
|
|
|
|
# this is the leftmost of the group of siblings |
564
|
0
|
|
|
|
|
0
|
return; # no left sib |
565
|
|
|
|
|
|
|
} |
566
|
0
|
|
|
|
|
0
|
return $parent->children($index - 1); |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
################################################################## |
569
|
|
|
|
|
|
|
sub right_leaf { |
570
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
571
|
|
|
|
|
|
|
# returns rightmost leaf of current node |
572
|
|
|
|
|
|
|
|
573
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
574
|
0
|
|
|
|
|
0
|
return $self; |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
else { |
577
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $right_daughter = $self->children(-1); |
578
|
0
|
|
|
|
|
0
|
return $right_daughter->right_leaf(); |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
################################################################## |
582
|
|
|
|
|
|
|
sub left_leaf { |
583
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
584
|
|
|
|
|
|
|
# returns leftmost leaf of current node |
585
|
|
|
|
|
|
|
|
586
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
587
|
0
|
|
|
|
|
0
|
return $self; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
else { |
590
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $left_daughter = $self->children(0); |
591
|
0
|
|
|
|
|
0
|
return $left_daughter->left_leaf(); |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
################################################################## |
595
|
|
|
|
|
|
|
sub prev_leaf { |
596
|
|
|
|
|
|
|
# return the next leaf to the left (back in time), not dominated |
597
|
|
|
|
|
|
|
# by the current node |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# should behave correctly even when called on a non-terminal -- |
600
|
|
|
|
|
|
|
# returns the first leaf to the left not-dominated by the current |
601
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
602
|
|
|
|
|
|
|
|
603
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $left_sib = $self->prev_sib; |
604
|
|
|
|
|
|
|
|
605
|
0
|
0
|
|
|
|
0
|
if (defined $left_sib) { |
606
|
0
|
|
|
|
|
0
|
return $left_sib->right_leaf(); |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
else { |
609
|
|
|
|
|
|
|
# no immediate left sib, go up the tree |
610
|
|
|
|
|
|
|
|
611
|
0
|
0
|
|
|
|
0
|
if ( $self->is_root() ) { |
612
|
0
|
|
|
|
|
0
|
return; # no previous leaves |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
else { |
615
|
0
|
|
|
|
|
0
|
return $self->parent->prev_leaf(); |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
################################################################## |
620
|
|
|
|
|
|
|
sub next_leaf { |
621
|
|
|
|
|
|
|
# return the next leaf to the right (forward in time) |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
# should behave correctly even when called on a non-terminal -- |
624
|
|
|
|
|
|
|
# returns the first leaf to the right not-dominated by the current |
625
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
626
|
|
|
|
|
|
|
|
627
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $right_sib = $self->next_sib; |
628
|
|
|
|
|
|
|
|
629
|
0
|
0
|
|
|
|
0
|
if (defined $right_sib) { |
630
|
0
|
|
|
|
|
0
|
return $right_sib->left_leaf(); |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
else { |
633
|
|
|
|
|
|
|
# no immediate right sib, go up the tree |
634
|
|
|
|
|
|
|
|
635
|
0
|
0
|
|
|
|
0
|
if ( $self->is_root() ) { |
636
|
0
|
|
|
|
|
0
|
return; # no previous leaves |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
else { |
639
|
0
|
|
|
|
|
0
|
return $self->parent->next_leaf(); |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
################################################################## |
644
|
|
|
|
|
|
|
# boolean requests (one additional argument) |
645
|
|
|
|
|
|
|
################################################################## |
646
|
|
|
|
|
|
|
sub is_descendant_of { |
647
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
648
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $grandma = shift; |
649
|
|
|
|
|
|
|
|
650
|
0
|
0
|
|
|
|
0
|
if ($self == $grandma) { |
651
|
0
|
|
|
|
|
0
|
return 1; # yes, you are your own descendant. :p |
652
|
|
|
|
|
|
|
} |
653
|
0
|
0
|
|
|
|
0
|
if ($self->is_root) { |
654
|
0
|
|
|
|
|
0
|
return 0; # root is descendant of nobody, grandma or otherwise |
655
|
|
|
|
|
|
|
} |
656
|
|
|
|
|
|
|
else { |
657
|
0
|
|
|
|
|
0
|
return $self->parent->is_descendant_of($grandma); |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
################################################################## |
661
|
|
|
|
|
|
|
sub is_ancestor_of { |
662
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
663
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $candidate = shift; |
664
|
0
|
|
|
|
|
0
|
return $candidate->is_descendant_of($self); |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
################################################################## |
667
|
|
|
|
|
|
|
# Are the two nodes siblings? |
668
|
|
|
|
|
|
|
# |
669
|
|
|
|
|
|
|
# my $sibling = <
|
670
|
|
|
|
|
|
|
# (S |
671
|
|
|
|
|
|
|
# (NP |
672
|
|
|
|
|
|
|
# (D the) |
673
|
|
|
|
|
|
|
# (N boy)) |
674
|
|
|
|
|
|
|
# (VP |
675
|
|
|
|
|
|
|
# ran)) |
676
|
|
|
|
|
|
|
# EOTREE |
677
|
|
|
|
|
|
|
# |
678
|
|
|
|
|
|
|
# my $node = Lingua::Treebank::TB3Const->new()->from_penn_string($sibling); |
679
|
|
|
|
|
|
|
# my @child = @{$node->children()}; |
680
|
|
|
|
|
|
|
# my $np = $child[0]; |
681
|
|
|
|
|
|
|
# my $vp = $child[1]; |
682
|
|
|
|
|
|
|
# print "This is true." if ($np->is_sibling($vp)); |
683
|
|
|
|
|
|
|
sub is_sibling { |
684
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
685
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $other = shift; |
686
|
|
|
|
|
|
|
|
687
|
0
|
0
|
0
|
|
|
0
|
return 0 if ($self->is_root() or $other->is_root()); |
688
|
|
|
|
|
|
|
|
689
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $parent = $self->find_common_ancestor($other); |
690
|
0
|
0
|
|
|
|
0
|
return 0 if (not defined $parent); |
691
|
|
|
|
|
|
|
|
692
|
0
|
|
0
|
|
|
0
|
return ($parent == $self->parent() and $parent == $other->parent()); |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
################################################################## |
695
|
|
|
|
|
|
|
# I/O methods (to/from text) |
696
|
|
|
|
|
|
|
################################################################## |
697
|
|
|
|
|
|
|
sub as_penn_text { |
698
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
699
|
0
|
|
|
|
|
0
|
my $step = shift; |
700
|
0
|
|
|
|
|
0
|
my $indentChar = shift; |
701
|
0
|
|
|
|
|
0
|
my $child_prolog = shift; |
702
|
0
|
|
|
|
|
0
|
my $child_epilog = shift; |
703
|
0
|
|
|
|
|
0
|
my $am_head = shift; |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
# set defaults (in case called without full specification) |
706
|
0
|
0
|
|
|
|
0
|
$step = 0 if not defined $step; |
707
|
0
|
0
|
|
|
|
0
|
$indentChar = $INDENT_CHAR if not defined $indentChar; |
708
|
0
|
0
|
|
|
|
0
|
$child_prolog = $CHILD_PROLOG if not defined $child_prolog; |
709
|
0
|
0
|
|
|
|
0
|
$child_epilog = $CHILD_EPILOG if not defined $child_epilog; |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
# begin composition of text |
712
|
0
|
|
|
|
|
0
|
my $label = $self->tag(); |
713
|
0
|
0
|
|
|
|
0
|
if (defined $am_head) { |
714
|
0
|
0
|
|
|
|
0
|
if ($am_head) { |
715
|
0
|
|
|
|
|
0
|
$label = '*'.$label.'*'; |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
} |
718
|
|
|
|
|
|
|
# don't touch if $am_head undef |
719
|
|
|
|
|
|
|
|
720
|
0
|
0
|
|
|
|
0
|
if (defined $self->annot()) { |
721
|
0
|
|
|
|
|
0
|
$label .= '-' . $self->annot(); |
722
|
|
|
|
|
|
|
} |
723
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
0
|
my $text = '(' . $label . ' '; |
725
|
|
|
|
|
|
|
|
726
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
727
|
0
|
|
|
|
|
0
|
$text .= $self->word(); |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
else { |
730
|
|
|
|
|
|
|
# non-terminal |
731
|
0
|
|
|
|
|
0
|
my $head = $self->headchild(); |
732
|
|
|
|
|
|
|
|
733
|
0
|
|
|
|
|
0
|
foreach my __PACKAGE__ $d ( @{$self->children} ) { |
|
0
|
|
|
|
|
0
|
|
734
|
0
|
|
|
|
|
0
|
$text .= $child_prolog; |
735
|
0
|
|
|
|
|
0
|
$text .= ($indentChar x ($step + 1)); |
736
|
0
|
|
|
|
|
0
|
my $child_is_head; |
737
|
0
|
0
|
|
|
|
0
|
if (defined $head) { |
738
|
0
|
0
|
|
|
|
0
|
$child_is_head = ($head == $d ? 1 : 0); |
739
|
|
|
|
|
|
|
} |
740
|
0
|
|
|
|
|
0
|
$text .= $d->as_penn_text($step + 1, $indentChar, $child_prolog, $child_epilog, $child_is_head); |
741
|
0
|
|
|
|
|
0
|
$text .= $child_epilog; |
742
|
|
|
|
|
|
|
} |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
|
745
|
0
|
|
|
|
|
0
|
$text .= ')'; |
746
|
|
|
|
|
|
|
|
747
|
0
|
|
|
|
|
0
|
return $text; |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
################################################################## |
750
|
|
|
|
|
|
|
sub from_cnf_string { |
751
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
752
|
0
|
|
|
|
|
0
|
my $class = ref $self; |
753
|
0
|
|
|
|
|
0
|
local $_ = shift; |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
# Strip leading and trailing whitespace. |
756
|
0
|
|
|
|
|
0
|
s/^\s+//; |
757
|
0
|
|
|
|
|
0
|
s/\s+$//; |
758
|
|
|
|
|
|
|
# Remove outermost parenthesis pair. |
759
|
0
|
0
|
|
|
|
0
|
if (s/^ \( \s* (.*) \s* \) $/$1/x) { |
|
|
0
|
|
|
|
|
|
760
|
|
|
|
|
|
|
# This is a non-terminal node. |
761
|
|
|
|
|
|
|
# Extract the non-terminal tag. |
762
|
0
|
|
|
|
|
0
|
s/^(\S+)\s*//; |
763
|
0
|
|
|
|
|
0
|
my $tag = $1; |
764
|
0
|
|
|
|
|
0
|
$self->tag($tag); |
765
|
|
|
|
|
|
|
# Enumerate all the children of this node. |
766
|
0
|
|
|
|
|
0
|
while (length $_) { |
767
|
0
|
|
|
|
|
0
|
my $childtext; |
768
|
0
|
0
|
|
|
|
0
|
if ( /^\(/ ) { |
769
|
|
|
|
|
|
|
# The child is a non-terminal node. |
770
|
0
|
|
|
|
|
0
|
$childtext = $class->find_brackets($_); |
771
|
0
|
|
|
|
|
0
|
substr ($_, 0, length $childtext) = ''; |
772
|
|
|
|
|
|
|
# BUGBUG check for errors here? |
773
|
|
|
|
|
|
|
} |
774
|
|
|
|
|
|
|
else { |
775
|
|
|
|
|
|
|
# The child is a terminal node. |
776
|
0
|
0
|
|
|
|
0
|
s/^(\S+)\s*// or carp "couldn't find text in $_\n"; |
777
|
0
|
|
|
|
|
0
|
$childtext = $1; |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
# Create a child node structure. |
780
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $child = $class->new(); |
781
|
0
|
|
|
|
|
0
|
$child->from_cnf_string($childtext); |
782
|
0
|
|
|
|
|
0
|
$self->append($child); |
783
|
|
|
|
|
|
|
# Skip whitespace delimiting children. |
784
|
0
|
|
|
|
|
0
|
s/^\s+//; |
785
|
|
|
|
|
|
|
} |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
elsif (/^([^_]+)_(\S+)$/) { |
788
|
|
|
|
|
|
|
# This is a terminal node. |
789
|
0
|
|
|
|
|
0
|
my ($word, $tag) = ($1, $2); |
790
|
0
|
|
|
|
|
0
|
$self->word($word); |
791
|
0
|
|
|
|
|
0
|
$self->tag($tag); |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
else { |
794
|
0
|
|
|
|
|
0
|
croak "can't parse '$_'"; |
795
|
|
|
|
|
|
|
} |
796
|
0
|
|
|
|
|
0
|
return $self; |
797
|
|
|
|
|
|
|
} |
798
|
|
|
|
|
|
|
################################################################## |
799
|
|
|
|
|
|
|
sub from_penn_string { |
800
|
4507
|
|
|
4507
|
1
|
4632
|
my __PACKAGE__ $self = shift; |
801
|
4507
|
|
|
|
|
5906
|
my $class = ref $self; |
802
|
4507
|
|
|
|
|
5276
|
my $text = shift; |
803
|
|
|
|
|
|
|
# pass it a complete constituent in text form. |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
# records the tag plus a list of its subconstituents. If |
806
|
|
|
|
|
|
|
# subconstituents themselves have structure, then they will be |
807
|
|
|
|
|
|
|
# arrayrefs |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
# JGK: why @tags? can't remember... |
810
|
|
|
|
|
|
|
# my (@tags) = shift; |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
# strip off front and back parens and whitespace |
814
|
4507
|
|
|
|
|
17487
|
$text =~ s/^ \s* \( \s* //x; |
815
|
4507
|
|
|
|
|
56613
|
$text =~ s/ \s* \) \s* $//x; |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
# handle perverse cases where the brackets are the text, like |
818
|
|
|
|
|
|
|
# (NP (-LRB- () (NNP Joe) (-RRB- ))) |
819
|
4507
|
|
|
|
|
6606
|
$text =~ s/\(-LRB- \(\)/__LPRN__/g; |
820
|
4507
|
|
|
|
|
5243
|
$text =~ s/\(-RRB- \)\)/__RPRN__/g; |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
# tag is everything up to the first whitespace or |
823
|
|
|
|
|
|
|
# parenthesis. Children are everything else. |
824
|
4507
|
|
|
|
|
18056
|
my ($tag, $childrentext) = |
825
|
|
|
|
|
|
|
($text =~ /^ ([^\s\(]*) \s* (.*) $/sx); |
826
|
|
|
|
|
|
|
|
827
|
4507
|
50
|
33
|
|
|
16747
|
if (not defined $tag or not defined $childrentext) { |
828
|
0
|
|
|
|
|
0
|
croak "couldn't find a constituent in '$text'"; |
829
|
0
|
|
|
|
|
0
|
return; # undef |
830
|
|
|
|
|
|
|
} |
831
|
|
|
|
|
|
|
|
832
|
4507
|
100
|
|
|
|
13484
|
if ($tag =~ m/ ^ ( [^-]+? ) ([-=]) ( .* ) $/x ) { |
833
|
469
|
|
|
|
|
1188
|
my $short_tag = $1; |
834
|
469
|
50
|
|
|
|
1259
|
if ($2 ne '-') { |
835
|
0
|
0
|
|
|
|
0
|
warn "found '$2' (should be '-') separating annotation ". |
836
|
|
|
|
|
|
|
"in tag $tag\n" if $VERBOSE; |
837
|
|
|
|
|
|
|
} |
838
|
469
|
|
|
|
|
995
|
$self->annot( $3 ); |
839
|
469
|
|
|
|
|
833
|
$self->tag( $short_tag ); |
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
else { |
842
|
4038
|
|
|
|
|
8270
|
$self->tag($tag); |
843
|
|
|
|
|
|
|
} |
844
|
4507
|
|
|
|
|
9370
|
while (length $childrentext) { |
845
|
|
|
|
|
|
|
# handle perverse cases where the brackets are the text, like |
846
|
|
|
|
|
|
|
# (NP (-LRB- () (NNP Joe) (-RRB- ))) |
847
|
6521
|
50
|
|
|
|
18061
|
if ($childrentext =~ s/^\s*__LPRN__\s*//) { |
|
|
50
|
|
|
|
|
|
848
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $child = $class->new(); |
849
|
0
|
|
|
|
|
0
|
$child->tag('-LRB-'); |
850
|
0
|
|
|
|
|
0
|
$child->word('('); |
851
|
0
|
|
|
|
|
0
|
$self->append($child); |
852
|
0
|
|
|
|
|
0
|
next; |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
elsif ($childrentext =~ s/^\s*__RPRN__\s*//) { |
855
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $child = $class->new(); |
856
|
0
|
|
|
|
|
0
|
$child->tag('-RRB-'); |
857
|
0
|
|
|
|
|
0
|
$child->word(')'); |
858
|
0
|
|
|
|
|
0
|
$self->append($child); |
859
|
0
|
|
|
|
|
0
|
next; |
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
|
862
|
6521
|
|
|
|
|
14474
|
my $childtext = $class->find_brackets($childrentext); |
863
|
6521
|
100
|
|
|
|
13526
|
if (defined $childtext) { |
864
|
|
|
|
|
|
|
# child is itself a constituent |
865
|
4158
|
|
|
|
|
8694
|
my __PACKAGE__ $child = $class->new(); |
866
|
4158
|
|
|
|
|
8797
|
$child->from_penn_string($childtext); |
867
|
|
|
|
|
|
|
|
868
|
4158
|
|
|
|
|
7564
|
$self->append($child); |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
# $child->parent($self); |
871
|
|
|
|
|
|
|
# push @{$self->children}, $child; |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
# chop out the childrentext |
874
|
4158
|
|
|
|
|
7037
|
substr ($childrentext, 0, length $childtext) = ""; |
875
|
4158
|
|
|
|
|
7139
|
$childrentext =~ s/^\s+//; |
876
|
|
|
|
|
|
|
|
877
|
4158
|
50
|
|
|
|
6776
|
warn "trouble -- child constituent found " . |
878
|
|
|
|
|
|
|
"in token that already had word\n" |
879
|
|
|
|
|
|
|
if defined $self->word; |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
else { |
882
|
2363
|
50
|
|
|
|
4956
|
if ($childrentext =~ tr {()} {()} ) { |
883
|
0
|
|
|
|
|
0
|
carp "found a parenthesis in word '$childrentext'; ", |
884
|
|
|
|
|
|
|
" this suggests that the data had unbalanced parens"; |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
# this is a word; we're done |
888
|
2363
|
|
|
|
|
4487
|
$self->word($childrentext); |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
# eliminate text so that we can exit the while loop |
891
|
2363
|
|
|
|
|
2831
|
$childrentext = ''; |
892
|
|
|
|
|
|
|
|
893
|
2363
|
|
|
|
|
3972
|
warn "trouble -- word found in token that " |
894
|
|
|
|
|
|
|
. "already had child constituents\n" |
895
|
2363
|
50
|
|
|
|
2400
|
if @{$self->children}; |
896
|
|
|
|
|
|
|
} |
897
|
|
|
|
|
|
|
} |
898
|
|
|
|
|
|
|
|
899
|
4507
|
|
|
|
|
7533
|
return $self; |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
my $bracket_error; |
902
|
|
|
|
|
|
|
sub find_brackets { |
903
|
6870
|
|
|
6870
|
0
|
8008
|
my $class = shift; |
904
|
6870
|
|
|
|
|
12833
|
my $text = shift; |
905
|
6870
|
|
|
|
|
7605
|
my $count_l = 1; |
906
|
|
|
|
|
|
|
|
907
|
6870
|
|
|
|
|
6486
|
my $posn = -1; |
908
|
|
|
|
|
|
|
|
909
|
6870
|
|
|
|
|
9743
|
my $nextL = index $text, '(', $posn+1; |
910
|
6870
|
|
|
|
|
8548
|
my $nextR = index $text, ')', $posn+1; |
911
|
|
|
|
|
|
|
|
912
|
6870
|
50
|
|
|
|
11930
|
croak ("I found a right bracket before a left-bracket. ", |
913
|
|
|
|
|
|
|
"Brackets mis-nested. Are you using .psd files instead of .mrg?") |
914
|
|
|
|
|
|
|
if $nextR < $nextL; |
915
|
|
|
|
|
|
|
|
916
|
6870
|
100
|
66
|
|
|
21718
|
return if ($nextL==-1 and $nextR==-1); |
917
|
|
|
|
|
|
|
|
918
|
4507
|
|
|
|
|
4465
|
$posn=$nextL; |
919
|
|
|
|
|
|
|
|
920
|
4507
|
50
|
|
|
|
7851
|
if ($posn == -1) { |
921
|
|
|
|
|
|
|
# undefined |
922
|
0
|
|
|
|
|
0
|
return; |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
4507
|
|
|
|
|
8907
|
while ($count_l > 0) { |
926
|
40523
|
|
|
|
|
51605
|
$nextL=index $text, '(', $posn+1; |
927
|
40523
|
|
|
|
|
43445
|
$nextR=index $text, ')', $posn+1; |
928
|
40523
|
50
|
|
|
|
62834
|
if ($nextR == -1) { |
929
|
0
|
|
|
|
|
0
|
croak "missing close parens in $text"; |
930
|
|
|
|
|
|
|
} |
931
|
40523
|
100
|
100
|
|
|
141531
|
if ($nextL == -1 or $nextR < $nextL) { |
932
|
22515
|
|
|
|
|
26088
|
$count_l--; |
933
|
22515
|
|
|
|
|
46470
|
$posn=$nextR; |
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
else { # ($nextL < $nextR) |
936
|
18008
|
|
|
|
|
15723
|
$count_l++; |
937
|
18008
|
|
|
|
|
34308
|
$posn = $nextL; |
938
|
|
|
|
|
|
|
} |
939
|
|
|
|
|
|
|
} |
940
|
4507
|
|
|
|
|
14310
|
return substr $text, 0, $posn+1; |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
################################################################## |
943
|
|
|
|
|
|
|
# Tree modification methods |
944
|
|
|
|
|
|
|
################################################################## |
945
|
|
|
|
|
|
|
sub flatten { |
946
|
|
|
|
|
|
|
# pull up all terminals to be children of the instance here, |
947
|
|
|
|
|
|
|
# regardless of how deep they are |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
# see POD below for better details |
950
|
|
|
|
|
|
|
|
951
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
952
|
|
|
|
|
|
|
|
953
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
954
|
0
|
|
|
|
|
0
|
carp "flatten called on terminal node"; |
955
|
0
|
|
|
|
|
0
|
return; |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
|
958
|
0
|
|
|
|
|
0
|
foreach my __PACKAGE__ $daughter (@{$self->children}) { |
|
0
|
|
|
|
|
0
|
|
959
|
|
|
|
|
|
|
|
960
|
0
|
0
|
|
|
|
0
|
next if $daughter->is_terminal; # this child's done |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
# pull up all descendants of non-terminal daughter to depend |
963
|
|
|
|
|
|
|
# directly on the daughter |
964
|
0
|
|
|
|
|
0
|
$daughter->flatten(); |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
# now reparent all the grandchildren to self, by retracting |
967
|
|
|
|
|
|
|
# the daughter |
968
|
0
|
|
|
|
|
0
|
$self->retract($daughter); |
969
|
|
|
|
|
|
|
} |
970
|
|
|
|
|
|
|
|
971
|
0
|
|
|
|
|
0
|
return $self; |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
# could probably be reimplemented by "get_all_terminals" and |
974
|
|
|
|
|
|
|
# judicious use of insert, but this recursive strategy is more |
975
|
|
|
|
|
|
|
# elegant and takes advantage of brains of retract() method |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
################################################################## |
978
|
|
|
|
|
|
|
sub retract { |
979
|
|
|
|
|
|
|
# pulls in and removes one layer of non-terminal nodes, attaching |
980
|
|
|
|
|
|
|
# their children directly to the current node, retaining what |
981
|
|
|
|
|
|
|
# surface order they originally had. |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
# see POD for more details |
984
|
|
|
|
|
|
|
|
985
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
986
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $daughter = shift; |
987
|
|
|
|
|
|
|
|
988
|
0
|
0
|
|
|
|
0
|
if ( $daughter->parent() != $self ) { |
989
|
0
|
|
|
|
|
0
|
carp "argument daughter does not claim instance as mother,", |
990
|
|
|
|
|
|
|
" can't retract!"; |
991
|
0
|
|
|
|
|
0
|
return; |
992
|
|
|
|
|
|
|
} |
993
|
|
|
|
|
|
|
|
994
|
0
|
0
|
|
|
|
0
|
if ( $daughter->is_terminal() ) { |
995
|
0
|
|
|
|
|
0
|
carp "daughter is a terminal node, can't retract!"; |
996
|
0
|
|
|
|
|
0
|
return; |
997
|
|
|
|
|
|
|
} |
998
|
|
|
|
|
|
|
|
999
|
0
|
|
|
|
|
0
|
$self->replace( $daughter, @{$daughter->children} ); |
|
0
|
|
|
|
|
0
|
|
1000
|
|
|
|
|
|
|
|
1001
|
0
|
|
|
|
|
0
|
return $self; |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
################################################################## |
1005
|
|
|
|
|
|
|
sub replace { |
1006
|
|
|
|
|
|
|
# replace target arg with replacement list |
1007
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
1008
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $target = shift; |
1009
|
0
|
|
|
|
|
0
|
my @replacements = @_; |
1010
|
|
|
|
|
|
|
|
1011
|
0
|
0
|
|
|
|
0
|
carp "argument not a child of instance, can't replace!" |
1012
|
|
|
|
|
|
|
unless ($target->parent == $self); |
1013
|
|
|
|
|
|
|
|
1014
|
0
|
|
|
|
|
0
|
my $index = $self->get_index($target); |
1015
|
|
|
|
|
|
|
|
1016
|
0
|
|
|
|
|
0
|
$self->detach_at($index); |
1017
|
|
|
|
|
|
|
|
1018
|
0
|
|
|
|
|
0
|
$self->insert_at($index, @replacements); |
1019
|
|
|
|
|
|
|
} |
1020
|
|
|
|
|
|
|
################################################################## |
1021
|
|
|
|
|
|
|
sub wither { |
1022
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
1023
|
0
|
0
|
|
|
|
0
|
return if $self->is_root(); |
1024
|
|
|
|
|
|
|
|
1025
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $parent = $self->parent(); |
1026
|
|
|
|
|
|
|
|
1027
|
0
|
|
|
|
|
0
|
my $num_sibs = $parent->num_children(); |
1028
|
|
|
|
|
|
|
|
1029
|
0
|
|
|
|
|
0
|
$parent->detach($self); |
1030
|
0
|
0
|
|
|
|
0
|
if ($num_sibs == 1) { |
1031
|
|
|
|
|
|
|
# unary parent, wither it too |
1032
|
0
|
|
|
|
|
0
|
$parent->wither(); |
1033
|
|
|
|
|
|
|
} |
1034
|
|
|
|
|
|
|
} |
1035
|
|
|
|
|
|
|
################################################################## |
1036
|
|
|
|
|
|
|
sub detach { |
1037
|
|
|
|
|
|
|
# removes an entire subtree. |
1038
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
1039
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $daughter = shift; |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
# actually do the detachment |
1042
|
0
|
|
|
|
|
0
|
my $index = $self->get_index($daughter); |
1043
|
|
|
|
|
|
|
|
1044
|
0
|
|
|
|
|
0
|
$self->detach_at($index); |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
} |
1047
|
|
|
|
|
|
|
################################################################## |
1048
|
|
|
|
|
|
|
sub detach_at { |
1049
|
|
|
|
|
|
|
# remove one daughter node at index |
1050
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
1051
|
0
|
|
|
|
|
0
|
my $index = shift; |
1052
|
|
|
|
|
|
|
|
1053
|
0
|
0
|
|
|
|
0
|
if (not defined $index) { |
1054
|
0
|
|
|
|
|
0
|
croak "no index provided to detach_at method"; |
1055
|
|
|
|
|
|
|
} |
1056
|
|
|
|
|
|
|
|
1057
|
0
|
|
|
|
|
0
|
my __PACKAGE__ $d = $self->children($index); |
1058
|
|
|
|
|
|
|
|
1059
|
0
|
0
|
|
|
|
0
|
if (not defined $d) { |
1060
|
0
|
|
|
|
|
0
|
carp "no daughter at index $index"; |
1061
|
0
|
|
|
|
|
0
|
return; |
1062
|
|
|
|
|
|
|
} |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
# remove links |
1065
|
0
|
|
|
|
|
0
|
$d->clear_parent(); |
1066
|
|
|
|
|
|
|
|
1067
|
0
|
0
|
0
|
|
|
0
|
if (defined $self->headchild() and $self->headchild() == $d) { |
1068
|
0
|
|
|
|
|
0
|
$self->clear_headchild(); |
1069
|
|
|
|
|
|
|
} |
1070
|
|
|
|
|
|
|
|
1071
|
0
|
|
|
|
|
0
|
splice @{$self->children}, $index, 1, (); # replace with empty list |
|
0
|
|
|
|
|
0
|
|
1072
|
|
|
|
|
|
|
} |
1073
|
|
|
|
|
|
|
################################################################## |
1074
|
|
|
|
|
|
|
sub prepend { |
1075
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
1076
|
0
|
|
|
|
|
0
|
my @daughters = @_; |
1077
|
0
|
|
|
|
|
0
|
$self->insert_at(0, @daughters); |
1078
|
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
|
################################################################## |
1080
|
|
|
|
|
|
|
sub append { |
1081
|
4158
|
|
|
4158
|
1
|
4398
|
my __PACKAGE__ $self = shift; |
1082
|
4158
|
|
|
|
|
6725
|
my @daughters = @_; |
1083
|
4158
|
|
|
|
|
4097
|
$self->insert_at(scalar @{$self->children}, @daughters); |
|
4158
|
|
|
|
|
6977
|
|
1084
|
|
|
|
|
|
|
} |
1085
|
|
|
|
|
|
|
################################################################## |
1086
|
|
|
|
|
|
|
sub insert_before { |
1087
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
1088
|
|
|
|
|
|
|
|
1089
|
0
|
|
|
|
|
0
|
my $parent = $self->parent(); |
1090
|
0
|
|
|
|
|
0
|
my $position = $parent->get_index($self); |
1091
|
|
|
|
|
|
|
|
1092
|
0
|
|
|
|
|
0
|
my @sibs = @_; |
1093
|
0
|
|
|
|
|
0
|
$parent->insert_at($position, @sibs); |
1094
|
|
|
|
|
|
|
|
1095
|
0
|
|
|
|
|
0
|
return $self; |
1096
|
|
|
|
|
|
|
} |
1097
|
|
|
|
|
|
|
################################################################## |
1098
|
|
|
|
|
|
|
sub insert_after { |
1099
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
1100
|
|
|
|
|
|
|
|
1101
|
0
|
|
|
|
|
0
|
my $parent = $self->parent(); |
1102
|
0
|
|
|
|
|
0
|
my $position = $parent->get_index($self); |
1103
|
|
|
|
|
|
|
|
1104
|
0
|
|
|
|
|
0
|
my @sibs = @_; |
1105
|
0
|
|
|
|
|
0
|
$parent->insert_at($position + 1, @sibs); |
1106
|
|
|
|
|
|
|
|
1107
|
0
|
|
|
|
|
0
|
return $self; |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
################################################################## |
1110
|
|
|
|
|
|
|
sub insert_at { |
1111
|
4158
|
|
|
4158
|
1
|
5343
|
my __PACKAGE__ $self = shift; |
1112
|
4158
|
|
|
|
|
4386
|
my $position = shift; |
1113
|
4158
|
|
|
|
|
5639
|
my @daughters = @_; |
1114
|
|
|
|
|
|
|
|
1115
|
4158
|
|
|
|
|
6267
|
foreach my __PACKAGE__ $d (@daughters) { |
1116
|
4158
|
|
|
|
|
7013
|
$d->parent($self); |
1117
|
|
|
|
|
|
|
} |
1118
|
|
|
|
|
|
|
|
1119
|
4158
|
|
|
|
|
5293
|
splice @{$self->children}, $position, 0, @daughters; |
|
4158
|
|
|
|
|
6884
|
|
1120
|
4158
|
|
|
|
|
7653
|
return $self; |
1121
|
|
|
|
|
|
|
} |
1122
|
|
|
|
|
|
|
################################################################## |
1123
|
|
|
|
|
|
|
# FEATURES OF THE CURRENT NODE |
1124
|
|
|
|
|
|
|
################################################################## |
1125
|
|
|
|
|
|
|
sub is_root { |
1126
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
1127
|
0
|
|
|
|
|
0
|
return ( not defined $self->[PARENT] ); |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
################################################################## |
1130
|
|
|
|
|
|
|
# Is this an empty root node? |
1131
|
|
|
|
|
|
|
# |
1132
|
|
|
|
|
|
|
# my $text = <
|
1133
|
|
|
|
|
|
|
# ( |
1134
|
|
|
|
|
|
|
# (INTJ |
1135
|
|
|
|
|
|
|
# (UH Okay) |
1136
|
|
|
|
|
|
|
# (. .) |
1137
|
|
|
|
|
|
|
# (-DFL- E_S))) |
1138
|
|
|
|
|
|
|
# EOTREE |
1139
|
|
|
|
|
|
|
# |
1140
|
|
|
|
|
|
|
# my $node = Lingua::Treebank::TB3Const->new->from_penn_string($text) |
1141
|
|
|
|
|
|
|
# print "This is true." if ($node->is_empty_root()); |
1142
|
|
|
|
|
|
|
# |
1143
|
|
|
|
|
|
|
sub is_empty_root { |
1144
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
return ($self->is_root() and |
1147
|
|
|
|
|
|
|
not $self->tag() and |
1148
|
0
|
|
0
|
|
|
0
|
scalar(@{$self->children()}) == 1 ) |
1149
|
|
|
|
|
|
|
} |
1150
|
|
|
|
|
|
|
################################################################# |
1151
|
|
|
|
|
|
|
sub is_terminal { |
1152
|
4507
|
|
|
4507
|
1
|
4612
|
my __PACKAGE__ $self = shift; |
1153
|
4507
|
100
|
|
|
|
8812
|
if (defined $self->[WORD]) { |
1154
|
2363
|
50
|
|
|
|
2399
|
if ( @{$self->children()} ) { |
|
2363
|
|
|
|
|
3693
|
|
1155
|
0
|
|
|
|
|
0
|
carp "how did I get children AND a word?"; |
1156
|
|
|
|
|
|
|
} |
1157
|
2363
|
|
|
|
|
4957
|
return 1; |
1158
|
|
|
|
|
|
|
} |
1159
|
|
|
|
|
|
|
else { |
1160
|
2144
|
50
|
|
|
|
2005
|
if ( not @{ $self->children() } ) { |
|
2144
|
|
|
|
|
8788
|
|
1161
|
0
|
|
|
|
|
0
|
croak "how did I get neither a word NOR children?"; |
1162
|
0
|
|
|
|
|
0
|
return 1; # might as well terminate |
1163
|
|
|
|
|
|
|
} |
1164
|
2144
|
|
|
|
|
4560
|
return 0; |
1165
|
|
|
|
|
|
|
} |
1166
|
|
|
|
|
|
|
} |
1167
|
|
|
|
|
|
|
################################################################## |
1168
|
|
|
|
|
|
|
sub children { |
1169
|
17330
|
|
|
17330
|
1
|
18747
|
my $self = shift; |
1170
|
17330
|
50
|
|
|
|
32160
|
if (@_ > 2) { |
1171
|
0
|
|
|
|
|
0
|
croak "children() called with >2 args"; |
1172
|
|
|
|
|
|
|
} |
1173
|
17330
|
50
|
|
|
|
28972
|
if (@_ == 2) { |
1174
|
|
|
|
|
|
|
# e.g. $d->children(1, $foo_child); |
1175
|
0
|
0
|
|
|
|
0
|
croak "wrong package type: ", ref($_[1]), |
1176
|
|
|
|
|
|
|
" . Expecting ", __PACKAGE__ |
1177
|
|
|
|
|
|
|
unless UNIVERSAL::isa($_[1], __PACKAGE__); |
1178
|
|
|
|
|
|
|
|
1179
|
0
|
|
|
|
|
0
|
return $self->[ CHILDREN ][ $_[0] ] = $_[1]; |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
} |
1182
|
17330
|
50
|
|
|
|
33973
|
if (@_ == 1) { |
1183
|
0
|
0
|
|
|
|
0
|
if (ref $_[0] eq 'ARRAY') { |
1184
|
|
|
|
|
|
|
# reset entire array, |
1185
|
|
|
|
|
|
|
# e.g. $d->children([ $foo, $bar ]) |
1186
|
0
|
|
|
|
|
0
|
foreach (@{$_[1]}) { |
|
0
|
|
|
|
|
0
|
|
1187
|
0
|
0
|
|
|
|
0
|
if (not UNIVERSAL::isa($_, __PACKAGE__)) { |
1188
|
0
|
|
|
|
|
0
|
croak "ref ", ref $_, " in arrayref not a ", |
1189
|
|
|
|
|
|
|
__PACKAGE__; |
1190
|
|
|
|
|
|
|
} |
1191
|
|
|
|
|
|
|
} |
1192
|
0
|
|
|
|
|
0
|
$self->[ CHILDREN ] = $_[1]; |
1193
|
|
|
|
|
|
|
} |
1194
|
|
|
|
|
|
|
else { |
1195
|
|
|
|
|
|
|
# getting single element |
1196
|
|
|
|
|
|
|
# e.g. $d->children(2); |
1197
|
0
|
|
|
|
|
0
|
return $self->[ CHILDREN ][ $_[0] ]; |
1198
|
|
|
|
|
|
|
} |
1199
|
|
|
|
|
|
|
} |
1200
|
|
|
|
|
|
|
# else no args |
1201
|
17330
|
|
|
|
|
49533
|
return $self->[ CHILDREN ]; |
1202
|
|
|
|
|
|
|
} |
1203
|
|
|
|
|
|
|
################################################################## |
1204
|
|
|
|
|
|
|
sub num_children { |
1205
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
1206
|
0
|
|
|
|
|
0
|
return scalar @{$self->[ CHILDREN ]}; |
|
0
|
|
|
|
|
0
|
|
1207
|
|
|
|
|
|
|
} |
1208
|
|
|
|
|
|
|
################################################################## |
1209
|
|
|
|
|
|
|
# Functions for headed trees |
1210
|
|
|
|
|
|
|
################################################################## |
1211
|
|
|
|
|
|
|
sub capitalize_headed { |
1212
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
1213
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal) { |
1214
|
0
|
|
|
|
|
0
|
return; |
1215
|
|
|
|
|
|
|
} |
1216
|
0
|
|
|
|
|
0
|
my $head = $self->headchild(); |
1217
|
0
|
|
|
|
|
0
|
for my $kid (@{$self->children}) { |
|
0
|
|
|
|
|
0
|
|
1218
|
0
|
0
|
|
|
|
0
|
if ($kid == $head) { |
1219
|
0
|
|
|
|
|
0
|
$kid->tag(uc $kid->tag()); |
1220
|
|
|
|
|
|
|
} |
1221
|
|
|
|
|
|
|
else { |
1222
|
0
|
|
|
|
|
0
|
$kid->tag(lc $kid->tag()); |
1223
|
|
|
|
|
|
|
} |
1224
|
0
|
|
|
|
|
0
|
$kid->capitalize_headed(); |
1225
|
|
|
|
|
|
|
} |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
sub maximal_projection { |
1229
|
|
|
|
|
|
|
# given a node (usually a leaf!) climb the tree until I'm not the |
1230
|
|
|
|
|
|
|
# headword any more |
1231
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
1232
|
0
|
|
|
|
|
0
|
my $maximal = $self; |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
CLIMB: |
1235
|
0
|
|
|
|
|
0
|
while (1) { |
1236
|
0
|
|
|
|
|
0
|
my $parent = $maximal->parent(); |
1237
|
0
|
0
|
0
|
|
|
0
|
if (not defined $parent or $parent->headterminal() != $self) { |
1238
|
0
|
|
|
|
|
0
|
last CLIMB; # done! $maximal is it |
1239
|
|
|
|
|
|
|
} |
1240
|
0
|
|
|
|
|
0
|
$maximal = $parent; |
1241
|
|
|
|
|
|
|
} |
1242
|
0
|
|
|
|
|
0
|
return $maximal; |
1243
|
|
|
|
|
|
|
} |
1244
|
|
|
|
|
|
|
################################################################## |
1245
|
|
|
|
|
|
|
sub clear_headchild { |
1246
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
1247
|
0
|
|
|
|
|
0
|
$self->[HEADCHILD] = undef; |
1248
|
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
################################################################## |
1250
|
|
|
|
|
|
|
sub headterminal { |
1251
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
1252
|
0
|
0
|
|
|
|
0
|
if ($self->is_terminal()) { |
1253
|
0
|
|
|
|
|
0
|
return $self; |
1254
|
|
|
|
|
|
|
} |
1255
|
0
|
|
|
|
|
0
|
my $headchild = $self->headchild(); |
1256
|
|
|
|
|
|
|
|
1257
|
0
|
0
|
|
|
|
0
|
return undef if not defined $headchild; |
1258
|
|
|
|
|
|
|
|
1259
|
0
|
|
|
|
|
0
|
return $headchild->headterminal(); |
1260
|
|
|
|
|
|
|
} |
1261
|
|
|
|
|
|
|
################################################################## |
1262
|
|
|
|
|
|
|
sub headchild { |
1263
|
0
|
|
|
0
|
0
|
0
|
my __PACKAGE__ $self = shift; |
1264
|
0
|
0
|
|
|
|
0
|
if (@_) { |
1265
|
|
|
|
|
|
|
# setting |
1266
|
0
|
0
|
|
|
|
0
|
if (@_ > 1) { |
1267
|
0
|
|
|
|
|
0
|
croak "->headchild() called with >1 argument"; |
1268
|
|
|
|
|
|
|
} |
1269
|
0
|
|
|
|
|
0
|
my $val = $_[0]; |
1270
|
0
|
0
|
|
|
|
0
|
croak "->headchild() argument wrong class" |
1271
|
|
|
|
|
|
|
if ( not UNIVERSAL::isa($val, __PACKAGE__) ); |
1272
|
|
|
|
|
|
|
|
1273
|
0
|
0
|
|
|
|
0
|
if (not grep { $val == $_ } @{$self->[ CHILDREN ]}) { |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1274
|
0
|
|
|
|
|
0
|
croak "->headchild() setting used value that wasn't ", |
1275
|
|
|
|
|
|
|
"one of its kids"; |
1276
|
|
|
|
|
|
|
} |
1277
|
0
|
|
|
|
|
0
|
$self->[HEADCHILD] = $val; |
1278
|
|
|
|
|
|
|
} |
1279
|
|
|
|
|
|
|
else { |
1280
|
|
|
|
|
|
|
# getting |
1281
|
0
|
|
|
|
|
0
|
return $self->[HEADCHILD]; |
1282
|
|
|
|
|
|
|
} |
1283
|
|
|
|
|
|
|
} |
1284
|
|
|
|
|
|
|
################################################################## |
1285
|
|
|
|
|
|
|
sub parent { |
1286
|
4158
|
|
|
4158
|
1
|
4308
|
my __PACKAGE__ $self = shift; |
1287
|
4158
|
50
|
|
|
|
6967
|
if (@_) { |
1288
|
|
|
|
|
|
|
# setting |
1289
|
4158
|
50
|
|
|
|
7671
|
if (@_ > 1) { |
1290
|
0
|
|
|
|
|
0
|
croak "parent called with >1 argument"; |
1291
|
|
|
|
|
|
|
} |
1292
|
4158
|
|
|
|
|
4153
|
my $val = $_[0]; |
1293
|
4158
|
50
|
|
|
|
12356
|
croak "parent argument wrong class" |
1294
|
|
|
|
|
|
|
if ( not UNIVERSAL::isa($val, __PACKAGE__) ); |
1295
|
4158
|
|
|
|
|
13491
|
$self->[PARENT] = $val; |
1296
|
|
|
|
|
|
|
} |
1297
|
|
|
|
|
|
|
else { |
1298
|
|
|
|
|
|
|
# getting |
1299
|
0
|
|
|
|
|
0
|
return $self->[PARENT]; |
1300
|
|
|
|
|
|
|
} |
1301
|
|
|
|
|
|
|
} |
1302
|
|
|
|
|
|
|
################################################################## |
1303
|
|
|
|
|
|
|
sub clear_parent { |
1304
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
1305
|
0
|
|
|
|
|
0
|
$self->[PARENT] = undef; |
1306
|
|
|
|
|
|
|
} |
1307
|
|
|
|
|
|
|
################################################################## |
1308
|
|
|
|
|
|
|
sub tag { |
1309
|
4507
|
|
|
4507
|
1
|
7762
|
my __PACKAGE__ $self = shift; |
1310
|
4507
|
50
|
|
|
|
7863
|
if (@_) { |
1311
|
|
|
|
|
|
|
# setting |
1312
|
4507
|
50
|
|
|
|
8137
|
if (@_ > 1) { |
1313
|
0
|
|
|
|
|
0
|
croak "tag() called with >1 argument"; |
1314
|
|
|
|
|
|
|
} |
1315
|
4507
|
50
|
|
|
|
7388
|
carp "tag() passed a reference!" if ref($_[0]); |
1316
|
4507
|
|
|
|
|
11035
|
$self->[TAG] = $_[0]; |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
else { |
1319
|
|
|
|
|
|
|
# getting |
1320
|
0
|
|
|
|
|
0
|
return $self->[TAG]; |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
} |
1323
|
|
|
|
|
|
|
################################################################## |
1324
|
|
|
|
|
|
|
sub annot { |
1325
|
469
|
|
|
469
|
1
|
545
|
my __PACKAGE__ $self = shift; |
1326
|
469
|
50
|
|
|
|
849
|
if (@_) { |
1327
|
|
|
|
|
|
|
# setting |
1328
|
469
|
50
|
|
|
|
937
|
if (@_ > 1) { |
1329
|
0
|
|
|
|
|
0
|
croak "annot() called with >1 argument"; |
1330
|
|
|
|
|
|
|
} |
1331
|
469
|
50
|
|
|
|
997
|
carp "annot() passed a reference!" if ref($_[0]); |
1332
|
469
|
|
|
|
|
1426
|
$self->[ANNOT] = $_[0]; |
1333
|
|
|
|
|
|
|
} |
1334
|
|
|
|
|
|
|
else { |
1335
|
|
|
|
|
|
|
# getting |
1336
|
0
|
|
|
|
|
0
|
return $self->[ANNOT]; |
1337
|
|
|
|
|
|
|
} |
1338
|
|
|
|
|
|
|
} |
1339
|
|
|
|
|
|
|
################################################################## |
1340
|
|
|
|
|
|
|
sub word { |
1341
|
6521
|
|
|
6521
|
1
|
12249
|
my __PACKAGE__ $self = shift; |
1342
|
6521
|
100
|
|
|
|
10906
|
if (@_) { |
1343
|
|
|
|
|
|
|
# setting |
1344
|
2363
|
50
|
|
|
|
4475
|
if (@_ > 1) { |
1345
|
0
|
|
|
|
|
0
|
croak "word() called with >1 argument"; |
1346
|
|
|
|
|
|
|
} |
1347
|
|
|
|
|
|
|
|
1348
|
2363
|
50
|
|
|
|
2201
|
if (@{$self->[CHILDREN]}) { |
|
2363
|
|
|
|
|
5373
|
|
1349
|
0
|
|
|
|
|
0
|
croak "can't assign a word when children exist, failing!"; |
1350
|
0
|
|
|
|
|
0
|
return; |
1351
|
|
|
|
|
|
|
} |
1352
|
|
|
|
|
|
|
|
1353
|
2363
|
50
|
|
|
|
4084
|
carp "word() passed a reference!" if ref($_[0]); |
1354
|
2363
|
|
|
|
|
5501
|
$self->[WORD] = $_[0]; |
1355
|
|
|
|
|
|
|
} |
1356
|
|
|
|
|
|
|
else { |
1357
|
|
|
|
|
|
|
# getting |
1358
|
4158
|
|
|
|
|
21218
|
return $self->[WORD]; |
1359
|
|
|
|
|
|
|
} |
1360
|
|
|
|
|
|
|
} |
1361
|
|
|
|
|
|
|
################################################################## |
1362
|
|
|
|
|
|
|
sub text { |
1363
|
0
|
|
|
0
|
1
|
0
|
my __PACKAGE__ $self = shift; |
1364
|
0
|
|
|
|
|
0
|
return join(" ", |
1365
|
0
|
|
|
|
|
0
|
map {$_->word()} |
1366
|
0
|
|
|
|
|
0
|
grep {$_->tag ne '-NONE-'} |
1367
|
|
|
|
|
|
|
$self->get_all_terminals()); |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
################################################################## |
1370
|
|
|
|
|
|
|
sub new { |
1371
|
4507
|
|
|
4507
|
1
|
5534
|
my $class = shift; |
1372
|
4507
|
|
|
|
|
6521
|
my %args = @_; |
1373
|
4507
|
|
|
|
|
11059
|
my $self = bless [], $class; |
1374
|
4507
|
|
|
|
|
10377
|
$self->[CHILDREN] = []; |
1375
|
4507
|
|
|
|
|
10615
|
$self->[NUM] = $class->_next_numid(); |
1376
|
4507
|
|
|
|
|
11718
|
foreach (keys %args) { |
1377
|
0
|
0
|
|
|
|
0
|
if ($self->can($_)) { |
1378
|
0
|
|
|
|
|
0
|
$self->$_($args{$_}); |
1379
|
|
|
|
|
|
|
} |
1380
|
|
|
|
|
|
|
else { |
1381
|
0
|
|
|
|
|
0
|
carp "unknown argument $_"; |
1382
|
|
|
|
|
|
|
} |
1383
|
|
|
|
|
|
|
} |
1384
|
4507
|
|
|
|
|
10152
|
return $self; |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
################################################################## |
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
1; |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
__END__ |