line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::YaTeA::MultiWordTestifiedTerm; |
2
|
5
|
|
|
5
|
|
39
|
use strict; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
196
|
|
3
|
5
|
|
|
5
|
|
32
|
use warnings; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
124
|
|
4
|
5
|
|
|
5
|
|
2139
|
use Lingua::YaTeA::TestifiedTerm; |
|
5
|
|
|
|
|
14
|
|
|
5
|
|
|
|
|
61
|
|
5
|
5
|
|
|
5
|
|
154
|
use Lingua::YaTeA::MultiWordUnit; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
32
|
|
6
|
|
|
|
|
|
|
# use UNIVERSAL; |
7
|
|
|
|
|
|
|
# use Scalar::Util qw(blessed); |
8
|
5
|
|
|
5
|
|
109
|
use NEXT; |
|
5
|
|
|
|
|
14
|
|
|
5
|
|
|
|
|
17
|
|
9
|
5
|
|
|
5
|
|
123
|
use base qw(Lingua::YaTeA::TestifiedTerm Lingua::YaTeA::MultiWordPhrase); |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
8916
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $VERSION=$Lingua::YaTeA::VERSION; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub new |
16
|
|
|
|
|
|
|
{ |
17
|
1
|
|
|
1
|
1
|
5
|
my ($class_or_object,$words_a,$source,$match_type,$num_content_words,$tag_set) = @_; |
18
|
1
|
|
|
|
|
3
|
my $this = shift; |
19
|
1
|
50
|
|
|
|
5
|
$this = bless {}, $this unless ref $this; |
20
|
1
|
|
|
|
|
19
|
$this->NEXT::new(@_); |
21
|
1
|
|
|
|
|
4
|
return $this; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub getIslandType |
27
|
|
|
|
|
|
|
{ |
28
|
5
|
|
|
5
|
1
|
10
|
my ($this) = @_; |
29
|
5
|
|
|
|
|
11
|
return join(",",@{$this->getSource}); |
|
5
|
|
|
|
|
18
|
|
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub getIfParsable |
33
|
|
|
|
|
|
|
{ |
34
|
3
|
|
|
3
|
1
|
9
|
my ($this,$parsing_pattern_set,$tag_set,$parsing_direction) = @_; |
35
|
3
|
100
|
|
|
|
19
|
if (!defined $this->getForest) |
36
|
|
|
|
|
|
|
{ |
37
|
1
|
50
|
|
|
|
9
|
if($this->searchParsingPattern($parsing_pattern_set,$parsing_direction)) |
38
|
|
|
|
|
|
|
{ |
39
|
1
|
|
|
|
|
17
|
$this->setParsingMethod('PATTERN_MATCHING'); |
40
|
1
|
|
|
|
|
5
|
return 1; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
else |
43
|
|
|
|
|
|
|
{ |
44
|
0
|
0
|
|
|
|
0
|
if($this->parseProgressively($tag_set,$parsing_direction,$parsing_pattern_set)) |
45
|
|
|
|
|
|
|
{ |
46
|
0
|
|
|
|
|
0
|
$this->setParsingMethod('PROGRESSIVE'); |
47
|
0
|
|
|
|
|
0
|
return 1; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
else |
53
|
|
|
|
|
|
|
{ |
54
|
2
|
|
|
|
|
9
|
return 1; |
55
|
|
|
|
|
|
|
} |
56
|
0
|
|
|
|
|
|
return 0; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
################################### |
61
|
|
|
|
|
|
|
# Computation of BioLG-type links # |
62
|
|
|
|
|
|
|
################################### |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub getHeadAndLinks |
65
|
|
|
|
|
|
|
{ |
66
|
0
|
|
|
0
|
1
|
|
my ($this,$LGPmapping_h,$chained_links) = @_; |
67
|
0
|
|
|
|
|
|
my @links; |
68
|
0
|
0
|
|
|
|
|
if(!defined $this->{FOREST}) |
69
|
|
|
|
|
|
|
{ |
70
|
0
|
|
|
|
|
|
return ($this->getWord($#{$this->getWords}),$#{$this->getWords},\@links); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
else |
73
|
|
|
|
|
|
|
{ |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
} |
76
|
0
|
|
|
|
|
|
my $head = $this->getWord($this->getTree(0)->getHead->getIndex); |
77
|
0
|
|
|
|
|
|
my $left; |
78
|
|
|
|
|
|
|
my $right; |
79
|
0
|
|
|
|
|
|
my $prep; |
80
|
0
|
|
|
|
|
|
my $det; |
81
|
0
|
|
|
|
|
|
my $node; |
82
|
0
|
|
|
|
|
|
my $link_key; |
83
|
|
|
|
|
|
|
|
84
|
0
|
|
|
|
|
|
my %first; |
85
|
0
|
|
|
|
|
|
my %second; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
foreach $node (@{$this->getTree(0)->getNodeSet->getNodes}) |
|
0
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
{ |
90
|
0
|
|
|
|
|
|
$left = $node->getLeftEdge->searchHead (0); |
91
|
0
|
|
|
|
|
|
$right = $node->getRightEdge->searchHead (0); |
92
|
0
|
|
|
|
|
|
$prep = $node->getPreposition; |
93
|
0
|
|
|
|
|
|
$det = $node->getDeterminer; |
94
|
|
|
|
|
|
|
|
95
|
0
|
0
|
|
|
|
|
if (defined $prep) |
96
|
|
|
|
|
|
|
{ |
97
|
0
|
|
|
|
|
|
$link_key = $left->getPOS($this->getWords) . "-" . $prep->getPOS($this->getWords); |
98
|
0
|
|
|
|
|
|
$this->recordLink($link_key,$left,$prep,\@links,$LGPmapping_h); |
99
|
0
|
|
|
|
|
|
push @{$first{$left->getIndex}}, $prep->getIndex; |
|
0
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
push @{$second{$prep->getIndex}}, $left->getIndex; |
|
0
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
$link_key = $prep->getPOS($this->getWords) . "-" . $right->getPOS($this->getWords); |
103
|
0
|
|
|
|
|
|
$this->recordLink($link_key,$prep,$right,\@links,$LGPmapping_h); |
104
|
0
|
|
|
|
|
|
push @{$first{$prep->getIndex}}, $right->getIndex; |
|
0
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
|
push @{$second{$right->getIndex}}, $prep->getIndex; |
|
0
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
else |
108
|
|
|
|
|
|
|
{ |
109
|
0
|
|
|
|
|
|
$link_key = $left->getPOS($this->getWords) . "-" . $right->getPOS($this->getWords); |
110
|
0
|
|
|
|
|
|
$this->recordLink($link_key,$left,$right,\@links,$LGPmapping_h); |
111
|
0
|
|
|
|
|
|
push @{$first{$left->getIndex}}, $right->getIndex; |
|
0
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
|
push @{$second{$right->getIndex}}, $left->getIndex; |
|
0
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
0
|
0
|
|
|
|
|
if (defined $det) |
116
|
|
|
|
|
|
|
{ |
117
|
0
|
|
|
|
|
|
$link_key = $det->getPOS($this->getWords) . "-" . $right->getPOS($this->getWords); |
118
|
0
|
|
|
|
|
|
$this->recordLink($link_key,$det,$right,\@links,$LGPmapping_h); |
119
|
0
|
|
|
|
|
|
push @{$first{$det->getIndex}}, $right->getIndex; |
|
0
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
push @{$second{$right->getIndex}}, $det->getIndex; |
|
0
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
} |
123
|
0
|
|
|
|
|
|
$this->adjustLinksHeight(\@links,\%first,\%second); |
124
|
0
|
|
|
|
|
|
@links = sort{$this->sortLinks($a,$b)} @links; |
|
0
|
|
|
|
|
|
|
125
|
0
|
0
|
|
|
|
|
if($chained_links == 1) |
126
|
|
|
|
|
|
|
{ |
127
|
0
|
|
|
|
|
|
$this->chainLinks(\@links); |
128
|
|
|
|
|
|
|
} |
129
|
0
|
|
|
|
|
|
return ($this->getWord($this->getTree(0)->getHead->getIndex),$this->getTree(0)->getHead->getIndex,\@links); |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub chainLinks |
133
|
|
|
|
|
|
|
{ |
134
|
0
|
|
|
0
|
1
|
|
my ($this,$links_a) = @_; |
135
|
0
|
|
|
|
|
|
my $link; |
136
|
0
|
|
|
|
|
|
my $links_sets_h = $this->getLinksSets($links_a); |
137
|
0
|
|
|
|
|
|
my @chained_links; |
138
|
|
|
|
|
|
|
my $set_a; |
139
|
0
|
|
|
|
|
|
my $left; |
140
|
0
|
|
|
|
|
|
my $right; |
141
|
0
|
|
|
|
|
|
my $height; |
142
|
0
|
|
|
|
|
|
my $type; |
143
|
0
|
|
|
|
|
|
my $i; |
144
|
0
|
|
|
|
|
|
my $search; |
145
|
0
|
|
|
|
|
|
my %recorded; |
146
|
0
|
|
|
|
|
|
my $updated_height; |
147
|
0
|
|
|
|
|
|
my $previous_right; |
148
|
0
|
|
|
|
|
|
foreach $set_a (values (%$links_sets_h)) |
149
|
|
|
|
|
|
|
{ |
150
|
0
|
0
|
|
|
|
|
if(scalar @$set_a > 1) |
151
|
|
|
|
|
|
|
{ |
152
|
0
|
|
|
|
|
|
while ( $link = pop @$set_a) |
153
|
|
|
|
|
|
|
{ |
154
|
0
|
|
|
|
|
|
$link =~ /\[([0-9]+) ([0-9]+) ([0-9]+) \(([^\)]+)\)\]/; |
155
|
0
|
|
|
|
|
|
$left = $1; |
156
|
0
|
|
|
|
|
|
$right = $2; |
157
|
0
|
|
|
|
|
|
$height = $3; |
158
|
0
|
|
|
|
|
|
$type = $4; |
159
|
0
|
0
|
|
|
|
|
if($type eq "CH") |
160
|
|
|
|
|
|
|
{ |
161
|
0
|
0
|
|
|
|
|
if($left < $right -1) |
162
|
|
|
|
|
|
|
{ |
163
|
0
|
|
|
|
|
|
$updated_height = 0; |
164
|
0
|
|
|
|
|
|
for ($i= $left+1; $i < $right; $i++) |
165
|
|
|
|
|
|
|
{ |
166
|
0
|
0
|
|
|
|
|
if(!defined $previous_right) |
167
|
|
|
|
|
|
|
{ |
168
|
0
|
|
|
|
|
|
$previous_right = $right; |
169
|
|
|
|
|
|
|
} |
170
|
0
|
|
|
|
|
|
$search = $i . " " . $previous_right ; |
171
|
0
|
0
|
|
|
|
|
if(exists $recorded{$search}) |
172
|
|
|
|
|
|
|
{ |
173
|
0
|
|
|
|
|
|
$right = $i; |
174
|
0
|
|
|
|
|
|
$height = $updated_height; |
175
|
0
|
|
|
|
|
|
last; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
else |
178
|
|
|
|
|
|
|
{ |
179
|
0
|
|
|
|
|
|
$updated_height++; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
} |
183
|
0
|
|
|
|
|
|
$recorded{$left . " " . $right}++; |
184
|
0
|
|
|
|
|
|
$previous_right = $right; |
185
|
0
|
|
|
|
|
|
$link = "[". $left . " " . $right . " " . $height . " (" . $type . ")]"; |
186
|
|
|
|
|
|
|
} |
187
|
0
|
|
|
|
|
|
push @chained_links, $link; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
else |
191
|
|
|
|
|
|
|
{ |
192
|
0
|
|
|
|
|
|
push @chained_links, @$set_a; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
0
|
|
|
|
|
|
@$links_a = sort{$this->sortLinks($a,$b)} @chained_links; |
|
0
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub getLinksSets |
199
|
|
|
|
|
|
|
{ |
200
|
0
|
|
|
0
|
1
|
|
my ($this,$links_a) = @_; |
201
|
0
|
|
|
|
|
|
my %sets; |
202
|
|
|
|
|
|
|
my $link; |
203
|
0
|
|
|
|
|
|
foreach $link (@$links_a){ |
204
|
0
|
|
|
|
|
|
$link =~ /\[([0-9]+) ([0-9]+) ([0-9]+) (\([^\)]+\)\])/; |
205
|
0
|
|
|
|
|
|
push @{$sets{$2}}, $link; |
|
0
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
} |
207
|
0
|
|
|
|
|
|
return \%sets; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub sortLinks |
212
|
|
|
|
|
|
|
{ |
213
|
0
|
|
|
0
|
1
|
|
my ($this,$link1,$link2) = @_; |
214
|
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
|
my $first_element_of_link1; |
216
|
|
|
|
|
|
|
my $second_element_of_link1; |
217
|
0
|
|
|
|
|
|
my $first_element_of_link2; |
218
|
0
|
|
|
|
|
|
my $second_element_of_link2; |
219
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
|
$link1 =~ /\[([0-9]+) ([0-9]+) ([0-9]+) (\([^\)]+\)\])/; |
221
|
0
|
|
|
|
|
|
$first_element_of_link1 = $1; |
222
|
0
|
|
|
|
|
|
$second_element_of_link1 = $2; |
223
|
0
|
|
|
|
|
|
$link2 =~ /\[([0-9]+) ([0-9]+) ([0-9]+) (\([^\)]+\)\])/; |
224
|
0
|
|
|
|
|
|
$first_element_of_link2 = $1; |
225
|
0
|
|
|
|
|
|
$second_element_of_link2 = $2; |
226
|
|
|
|
|
|
|
|
227
|
0
|
0
|
|
|
|
|
if ($first_element_of_link1 != $first_element_of_link2){ |
228
|
0
|
|
|
|
|
|
return ($first_element_of_link1 <=> $first_element_of_link2); |
229
|
|
|
|
|
|
|
} |
230
|
0
|
|
|
|
|
|
return ($second_element_of_link1 <=> $second_element_of_link2); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub adjustLinksHeight |
234
|
|
|
|
|
|
|
{ |
235
|
0
|
|
|
0
|
1
|
|
my ($this,$links_a,$first_h,$second_h) = @_; |
236
|
0
|
|
|
|
|
|
my $link; |
237
|
|
|
|
|
|
|
my $first_word; |
238
|
0
|
|
|
|
|
|
my $second_word; |
239
|
0
|
|
|
|
|
|
my $link_tag; |
240
|
0
|
|
|
|
|
|
my $height; |
241
|
0
|
|
|
|
|
|
my $first_word_of_other_link; |
242
|
0
|
|
|
|
|
|
my $second_word_of_other_link; |
243
|
|
|
|
|
|
|
|
244
|
0
|
0
|
|
|
|
|
if(scalar @$links_a > 1) |
245
|
|
|
|
|
|
|
{ |
246
|
0
|
|
|
|
|
|
foreach $link (@$links_a){ |
247
|
0
|
|
|
|
|
|
$link =~ /\[([0-9]+) ([0-9]+) ([0-9]+) (\([^\)]+\)\])/; |
248
|
0
|
|
|
|
|
|
$first_word = $1; |
249
|
0
|
|
|
|
|
|
$second_word = $2; |
250
|
0
|
|
|
|
|
|
$height = $3; |
251
|
0
|
|
|
|
|
|
$link_tag = $4; |
252
|
0
|
0
|
|
|
|
|
if(exists $first_h->{$first_word}){ |
253
|
0
|
|
|
|
|
|
foreach $second_word_of_other_link (@{$first_h->{$first_word}}){ |
|
0
|
|
|
|
|
|
|
254
|
0
|
0
|
|
|
|
|
if($second_word_of_other_link < $second_word){ |
255
|
0
|
|
|
|
|
|
$height++; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
0
|
0
|
|
|
|
|
if(exists $second_h->{$second_word}){ |
260
|
0
|
|
|
|
|
|
foreach $first_word_of_other_link (@{$second_h->{$second_word}}){ |
|
0
|
|
|
|
|
|
|
261
|
0
|
0
|
|
|
|
|
if($first_word_of_other_link > $first_word){ |
262
|
0
|
|
|
|
|
|
$height++; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
} |
266
|
0
|
|
|
|
|
|
$link = "[".$first_word . " " . $second_word . " " .$height . " " . $link_tag; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub recordLink |
272
|
|
|
|
|
|
|
{ |
273
|
0
|
|
|
0
|
1
|
|
my ($this,$link_key,$first_element,$second_element,$links_a,$LGPmapping_h) = @_; |
274
|
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
|
my $LGP_link; |
276
|
|
|
|
|
|
|
my %first_items; |
277
|
0
|
|
|
|
|
|
my %second_items; |
278
|
|
|
|
|
|
|
|
279
|
0
|
0
|
|
|
|
|
if(exists $LGPmapping_h->{$link_key}){ |
280
|
0
|
|
|
|
|
|
$LGP_link = "[" .$first_element->getIndex . " " . $second_element->getIndex . " 0 (" .$LGPmapping_h->{$link_key} . ")]"; |
281
|
0
|
|
|
|
|
|
push @$links_a, $LGP_link; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
else{ |
284
|
0
|
|
|
|
|
|
die "Pas de mapping pour " . $link_key . " (" .$this->getIF . ")\n"; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
sub setForest |
289
|
|
|
|
|
|
|
{ |
290
|
0
|
|
|
0
|
0
|
|
my ($this,$bracketed_parse) = @_; |
291
|
0
|
|
|
|
|
|
print STDERR $bracketed_parse . "\n"; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
1; |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
__END__ |