line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# WordNet::Similarity::PathFinder version 2.04 |
2
|
|
|
|
|
|
|
# (Last updated $Id: PathFinder.pm,v 1.39 2008/03/27 06:21:17 sidz1979 Exp $) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Module containing path-finding code for the various measures of semantic |
5
|
|
|
|
|
|
|
# relatedness. |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright (c) 2005, |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Ted Pedersen, University of Minnesota Duluth |
10
|
|
|
|
|
|
|
# tpederse at d.umn.edu |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# Jason Michelizzi, Univeristy of Minnesota Duluth |
13
|
|
|
|
|
|
|
# mich0212 at d.umn.edu |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# Siddharth Patwardhan, University of Utah, Salt Lake City |
16
|
|
|
|
|
|
|
# sidd at cs.utah.edu |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
19
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License |
20
|
|
|
|
|
|
|
# as published by the Free Software Foundation; either version 2 |
21
|
|
|
|
|
|
|
# of the License, or (at your option) any later version. |
22
|
|
|
|
|
|
|
# |
23
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
24
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
25
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
26
|
|
|
|
|
|
|
# GNU General Public License for more details. |
27
|
|
|
|
|
|
|
# |
28
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
29
|
|
|
|
|
|
|
# along with this program; if not, write to |
30
|
|
|
|
|
|
|
# |
31
|
|
|
|
|
|
|
# The Free Software Foundation, Inc., |
32
|
|
|
|
|
|
|
# 59 Temple Place - Suite 330, |
33
|
|
|
|
|
|
|
# Boston, MA 02111-1307, USA. |
34
|
|
|
|
|
|
|
# |
35
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
package WordNet::Similarity::PathFinder; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 NAME |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
WordNet::Similarity::PathFinder - module to implement path finding methods |
42
|
|
|
|
|
|
|
(by node counting) for WordNet::Similarity measures of semantic relatedness |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 SYNOPSIS |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
use WordNet::QueryData; |
47
|
|
|
|
|
|
|
my $wn = WordNet::QueryData->new; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
use WordNet::Similarity::PathFinder; |
50
|
|
|
|
|
|
|
my $obj = WordNet::Similarity::PathFinder->new ($wn); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my $wps1 = 'winston_churchill#n#1'; |
53
|
|
|
|
|
|
|
my $wps2 = 'england#n#1'; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# parseWps returns reference to an array that contains |
56
|
|
|
|
|
|
|
# word1 pos1 sense1 offset1 word2 pos2 sense2 offset2 |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
my $result = $obj->parseWps($wps1, $wps2); |
59
|
|
|
|
|
|
|
print "@$result\n"; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# path is a reference to an array that contains the path between |
62
|
|
|
|
|
|
|
# wps1 and wps2 expressed as a series of wps values |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my @paths = $obj->getShortestPath($wps1, $wps2, 'n', 'wps'); |
65
|
|
|
|
|
|
|
my ($length, $path) = @{shift @paths}; |
66
|
|
|
|
|
|
|
defined $path or die "No path between synsets"; |
67
|
|
|
|
|
|
|
print "shortest path between $wps1 and $wps2 is $length edges long\n"; |
68
|
|
|
|
|
|
|
print "@$path\n"; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $offset1 = $wn -> offset($wps1); |
71
|
|
|
|
|
|
|
my $offset2 = $wn -> offset($wps2); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# path is a reference to an array that contains the path between |
74
|
|
|
|
|
|
|
# offset1 and offset2 expressed as a series of offset values |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
my @paths = $obj->getShortestPath($offset1, $offset2, 'n', 'offset'); |
77
|
|
|
|
|
|
|
my ($length, $path) = @{shift @paths}; |
78
|
|
|
|
|
|
|
defined $path or die "No path between synsets"; |
79
|
|
|
|
|
|
|
print "shortest path between $offset1 and $offset2 is $length edges long\n"; |
80
|
|
|
|
|
|
|
print "@$path\n"; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 DESCRIPTION |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head2 Introduction |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
This class is derived from (i.e., is a sub-class of) WordNet::Similarity. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
The methods in this module are useful for finding paths between concepts |
89
|
|
|
|
|
|
|
in WordNet's 'is-a' taxonomies. Concept A is-a concept B if, and only if, |
90
|
|
|
|
|
|
|
B is a hypernym of A or A is in the hypernym tree of B. N.B., only nouns |
91
|
|
|
|
|
|
|
and verbs have hypernyms. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
The methods that find path lengths (such as C and |
94
|
|
|
|
|
|
|
C compute the lengths using node-counting not edge-counting. |
95
|
|
|
|
|
|
|
In general, the length of a path using node-counting will always be one |
96
|
|
|
|
|
|
|
more than the length using edge-counting. For example, if concept A |
97
|
|
|
|
|
|
|
is a hyponym of concept B, then the path length between A and B using |
98
|
|
|
|
|
|
|
node-counting is 2, but the length using edge-counting is 1. Likewise, the |
99
|
|
|
|
|
|
|
path between A and A is 1 using node-counting and 0 using edge-counting. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head2 Methods |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
This module inherits all the methods of WordNet::Similarity. Additionally, |
104
|
|
|
|
|
|
|
the following methods are also defined. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head3 Public methods |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=over |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=cut |
111
|
|
|
|
|
|
|
|
112
|
7
|
|
|
7
|
|
46679
|
use strict; |
|
7
|
|
|
|
|
16
|
|
|
7
|
|
|
|
|
150
|
|
113
|
7
|
|
|
7
|
|
31
|
use warnings; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
160
|
|
114
|
7
|
|
|
7
|
|
3513
|
use WordNet::Similarity; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
use File::Spec; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
our @ISA = qw/WordNet::Similarity/; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
our $VERSION = '2.04'; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
WordNet::Similarity::addConfigOption ('rootNode', 0, 'i', 1); |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item $measure->setPosList(Z<>) |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Specifies the parts of speech that measures derived from this module |
126
|
|
|
|
|
|
|
support (namely, nouns and verbs). |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
parameters: none |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
returns: true |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=cut |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub setPosList |
135
|
|
|
|
|
|
|
{ |
136
|
|
|
|
|
|
|
my $self = shift; |
137
|
|
|
|
|
|
|
$self->{n} = 1; |
138
|
|
|
|
|
|
|
$self->{v} = 1; |
139
|
|
|
|
|
|
|
return 1; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=item $self->traceOptions(Z<>) |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Overrides method of same name in WordNet::Similarity. Prints module-specific |
146
|
|
|
|
|
|
|
configuration options to the trace string (if tracing is on). PathFinder |
147
|
|
|
|
|
|
|
supports one module specific option: rootNode. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Parameters: none |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
returns: nothing |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub traceOptions |
156
|
|
|
|
|
|
|
{ |
157
|
|
|
|
|
|
|
my $self = shift; |
158
|
|
|
|
|
|
|
$self->{traceString} .= "root node :: $self->{rootNode}\n"; |
159
|
|
|
|
|
|
|
$self->SUPER::traceOptions(); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=item $measure->parseWps($synset1, $synset2) |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
parameters: synset1, synset2 -- two synsets in wps format |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
returns: a reference to an array, WordNet::Similarity::UNRELATED, or undef |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Overrides the parseWps() method in WordNet::Similarity in order to run |
170
|
|
|
|
|
|
|
additional checks, but calls WordNet::Similarity::parseWps() to get |
171
|
|
|
|
|
|
|
those checks accomplished as well. Thus, this method does everything |
172
|
|
|
|
|
|
|
that WordNet::Similarity::parseWps does. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=over |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item quote from WordNet::Similarity::parseWps: |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
This method checks the format of the two input synsets by calling |
179
|
|
|
|
|
|
|
validateSynset() for each synset. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
If the synsets are in wps format, a reference to an array will be returned. |
182
|
|
|
|
|
|
|
This array has the form [$word1, $pos1, $sense1, $offset1, $word2, $pos2, |
183
|
|
|
|
|
|
|
$sense2, $offset2] where $word1 is the word part of $wps1, $pos1, is the |
184
|
|
|
|
|
|
|
part of speech of $wps1, $sense1 is the sense from $wps. $offset1 is the |
185
|
|
|
|
|
|
|
offset for $wps1. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
If an error occurs (such as a synset being poorly-formed), then undef |
188
|
|
|
|
|
|
|
is returned, the error level is set to non-zero, and an error message is |
189
|
|
|
|
|
|
|
appended to the error string. |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=back |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
In addition, if the two synsets are from different parts of speech, then |
194
|
|
|
|
|
|
|
WordNet::Similarity::UNRELATED is returned, the error level is set to 1, and |
195
|
|
|
|
|
|
|
a message is appended to the error string. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
If either synset is not a noun or a verb, then the error level |
198
|
|
|
|
|
|
|
is set to 1, a message is appended to the error string, and undef |
199
|
|
|
|
|
|
|
is returned. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
If the synsets are in wps format, a reference to an array will be returned. |
202
|
|
|
|
|
|
|
This array has the form [$word1, $pos1, $sense1, $offset1, $word2, $pos2, |
203
|
|
|
|
|
|
|
$sense2, $offset2]. |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=cut |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub parseWps |
208
|
|
|
|
|
|
|
{ |
209
|
|
|
|
|
|
|
my $self = shift; |
210
|
|
|
|
|
|
|
my $ret = $self->SUPER::parseWps (@_); |
211
|
|
|
|
|
|
|
my $class = ref $self || $self; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
ref $ret or return $ret; |
214
|
|
|
|
|
|
|
my ($w1, $pos1, $s1, $off1, $w2, $pos2, $s2, $off2) = @{$ret}; |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# check to make sure both input words are of the same part of speech |
217
|
|
|
|
|
|
|
if ($pos1 ne $pos2) { |
218
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
219
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::parseWps()) - "; |
220
|
|
|
|
|
|
|
$self->{errorString} .= |
221
|
|
|
|
|
|
|
"$w1#$pos1 and $w2#$pos2 belong to different parts of speech."; |
222
|
|
|
|
|
|
|
if ($self->{trace}) { |
223
|
|
|
|
|
|
|
$self->{traceString} .= "\n"; |
224
|
|
|
|
|
|
|
$self->printSet ($pos1, 'wps', "$w1#$pos1#$s1"); |
225
|
|
|
|
|
|
|
$self->{traceString} .= " and "; |
226
|
|
|
|
|
|
|
$self->printSet ($pos2, 'wps', "$w2#$pos2#$s2"); |
227
|
|
|
|
|
|
|
$self->{traceString} .= " belong to different parts of speech."; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
return $self->UNRELATED; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# check to make sure that the pos is a noun or verb |
233
|
|
|
|
|
|
|
if (index ("nv", $pos1) < $[) { |
234
|
|
|
|
|
|
|
if ($self->{trace}) { |
235
|
|
|
|
|
|
|
$self->{traceString} .= |
236
|
|
|
|
|
|
|
"Only verbs and nouns have hypernym trees ($w1#$pos1, $w2#$pos2).\n"; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
239
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::parseWps()) - "; |
240
|
|
|
|
|
|
|
$self->{errorString} .= |
241
|
|
|
|
|
|
|
"Only verbs and nouns have hypernym trees ($w1#$pos1, $w2#$pos2)."; |
242
|
|
|
|
|
|
|
return undef; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
return $ret; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item $measure->getShortestPath($synset1, $synset2, $pos, $mode) |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Given two input synsets, returns the shortest path between the two synsets. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
Parameters: two synsets, a part-of-speech, and a mode indicator |
254
|
|
|
|
|
|
|
(i.e., the string 'offset' or 'wps'). If the mode is 'offset', then the |
255
|
|
|
|
|
|
|
synsets should be WordNet offsets. If the mode is 'wps', then the synsets |
256
|
|
|
|
|
|
|
should be in word#pos#sense format. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Returns: a list of references to arrays. Each array has the form |
259
|
|
|
|
|
|
|
C<($path_length, $path_ref)> where $path_ref is |
260
|
|
|
|
|
|
|
a reference to an array whose elements are the synsets along the shortest |
261
|
|
|
|
|
|
|
path between the two input synsets. There will be as many array references |
262
|
|
|
|
|
|
|
returned as there are shortest paths between the synsets. That is, there |
263
|
|
|
|
|
|
|
will be no arrays returned if there is no path between the synsets, and there |
264
|
|
|
|
|
|
|
will be at least one array returned if there is a path between the synsets. |
265
|
|
|
|
|
|
|
If there are multiple paths tied for being shortest in length, then all |
266
|
|
|
|
|
|
|
those paths are returned (hence, this is why multiple array references |
267
|
|
|
|
|
|
|
can be returned). |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Upon error, returns undef, sets the error level to non-zero, and appends |
270
|
|
|
|
|
|
|
a message to the error string. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=cut |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub getShortestPath |
275
|
|
|
|
|
|
|
{ |
276
|
|
|
|
|
|
|
my $self = shift; |
277
|
|
|
|
|
|
|
my $synset1 = shift; |
278
|
|
|
|
|
|
|
my $synset2 = shift; |
279
|
|
|
|
|
|
|
my $pos = shift; |
280
|
|
|
|
|
|
|
my $mode = shift; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
my $class = ref $self || $self; |
283
|
|
|
|
|
|
|
my $wn = $self->{wn}; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# JM 2/9/04 - we do this in validateSynset() now |
286
|
|
|
|
|
|
|
#if ($mode eq 'wps') { |
287
|
|
|
|
|
|
|
# # this prevents problems when the two input words are different word |
288
|
|
|
|
|
|
|
# # senses from the same synset (e.g., car#n#1 and auto#n#1) |
289
|
|
|
|
|
|
|
# ($synset1) = $wn->querySense ($synset1, "syns"); |
290
|
|
|
|
|
|
|
# ($synset2) = $wn->querySense ($synset2, "syns"); |
291
|
|
|
|
|
|
|
#} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
my @paths = $self->getAllPaths ($synset1, $synset2, $pos, $mode); |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
# check to see if any paths were found; if none were found, then |
296
|
|
|
|
|
|
|
# $paths[0] will be undefined |
297
|
|
|
|
|
|
|
unless (defined $paths[0]) { |
298
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
299
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getShortestPath()) - "; |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
my ($wps1, $wps2) = ($synset1, $synset2); |
302
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
303
|
|
|
|
|
|
|
$wps1 = $wn->getSense ($synset1, $pos); |
304
|
|
|
|
|
|
|
$wps2 = $wn->getSense ($synset2, $pos); |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
$self->{errorString} .= "No path between synsets $wps1 and $wps2 found."; |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
if ($self->{trace}) { |
309
|
|
|
|
|
|
|
$self->{traceString} .= "\nNo path between synsets "; |
310
|
|
|
|
|
|
|
$self->printSet ($pos, 'wps', $wps1); |
311
|
|
|
|
|
|
|
$self->{traceString} .= " and "; |
312
|
|
|
|
|
|
|
$self->printSet ($pos, 'wps', $wps2); |
313
|
|
|
|
|
|
|
$self->{traceString} .= " found."; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
return undef; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
my $best_length = $paths[0]->[1]; |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
my @return = ([$paths[0]->[1], $paths[0]->[2]]); |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
foreach (1..$#paths) { |
323
|
|
|
|
|
|
|
last if $paths[$_]->[1] > $best_length; |
324
|
|
|
|
|
|
|
push @return, [$paths[$_]->[1], $paths[$_]->[2]]; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
#my $length = $paths[0]->[1]; |
328
|
|
|
|
|
|
|
#my $path = $paths[0]->[2]; |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
if ($self->{trace}) { |
331
|
|
|
|
|
|
|
for (@return) { |
332
|
|
|
|
|
|
|
$self->{traceString} .= "\nShortest path: "; |
333
|
|
|
|
|
|
|
$self->printSet ($pos, $mode, @{$_->[1]}); |
334
|
|
|
|
|
|
|
$self->{traceString} .= "\nPath length = " . $_->[0]; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
return @return; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item $measure->getAllPaths($synset1, $synset2, $pos, $mode) |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Given two input synsets, returns all the paths between the two synsets. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
Parameters: a reference to the object, two synsets, a part-of-speech, and |
346
|
|
|
|
|
|
|
a mode indicator (the string 'offset' or 'wps'). |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
If the mode is 'offset', then the synsets should be WordNet offsets. If the |
349
|
|
|
|
|
|
|
mode is 'wps', then they should be strings in word#pos#sense format. |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Returns: A list of all paths, sorted by path length in ascending order. The |
352
|
|
|
|
|
|
|
format for each item in the list is a reference to an array that has the |
353
|
|
|
|
|
|
|
format: [$top, $length, [@synsets_list]] where @synset_list is a list |
354
|
|
|
|
|
|
|
of synsets along the path (including the two input synsets) |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
Returns undef on error. |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=cut |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub getAllPaths |
361
|
|
|
|
|
|
|
{ |
362
|
|
|
|
|
|
|
my $self = shift; |
363
|
|
|
|
|
|
|
my $class = ref $self || $self; |
364
|
|
|
|
|
|
|
my $synset1 = shift; |
365
|
|
|
|
|
|
|
my $synset2 = shift; |
366
|
|
|
|
|
|
|
my $pos = shift; |
367
|
|
|
|
|
|
|
my $mode = shift; |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
if (($mode ne 'offset') && ($mode ne 'wps')) { |
370
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
371
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getAllPaths()) - "; |
372
|
|
|
|
|
|
|
$self->{errorString} .= "Mode must be either 'offset' or 'wps'"; |
373
|
|
|
|
|
|
|
return undef; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
my @lTrees = $self->_getHypernymTrees ($synset1, $pos, $mode); |
377
|
|
|
|
|
|
|
my @rTrees = $self->_getHypernymTrees ($synset2, $pos, $mode); |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# [trace] |
380
|
|
|
|
|
|
|
if($self->{trace}) { |
381
|
|
|
|
|
|
|
foreach my $lTree (@lTrees) { |
382
|
|
|
|
|
|
|
$self->{traceString} .= "HyperTree: "; |
383
|
|
|
|
|
|
|
$self->printSet ($pos, $mode, @$lTree); |
384
|
|
|
|
|
|
|
$self->{traceString} .= "\n"; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
foreach my $rTree (@rTrees) { |
387
|
|
|
|
|
|
|
$self->{traceString} .= "HyperTree: "; |
388
|
|
|
|
|
|
|
$self->printSet ($pos, $mode, @$rTree); |
389
|
|
|
|
|
|
|
$self->{traceString} .= "\n"; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
# [/trace] |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
# Find the length of each path in these trees. |
395
|
|
|
|
|
|
|
my @return; |
396
|
|
|
|
|
|
|
# my $root = $mode eq 'offset' |
397
|
|
|
|
|
|
|
# ? 0 |
398
|
|
|
|
|
|
|
# : ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
LTREE: |
401
|
|
|
|
|
|
|
foreach my $lTree (@lTrees) { |
402
|
|
|
|
|
|
|
RTREE: |
403
|
|
|
|
|
|
|
foreach my $rTree (@rTrees) { |
404
|
|
|
|
|
|
|
my $subsumer; |
405
|
|
|
|
|
|
|
$subsumer = $self->_getSubsumerFromTrees ($lTree, $rTree, $mode); |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
next RTREE unless defined $subsumer; |
408
|
|
|
|
|
|
|
#next RTREE if ($subsumer eq $root) and !$self->{rootNode}; |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
my $lCount = 0; |
411
|
|
|
|
|
|
|
my @lpath; |
412
|
|
|
|
|
|
|
foreach my $offset (reverse @{$lTree}) { |
413
|
|
|
|
|
|
|
$lCount++; |
414
|
|
|
|
|
|
|
last if($offset eq $subsumer); |
415
|
|
|
|
|
|
|
push @lpath, $offset; |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
my $rCount = 0; |
418
|
|
|
|
|
|
|
my @rpath; |
419
|
|
|
|
|
|
|
foreach my $offset (reverse @{$rTree}) { |
420
|
|
|
|
|
|
|
$rCount++; |
421
|
|
|
|
|
|
|
last if($offset eq $subsumer); |
422
|
|
|
|
|
|
|
unshift @rpath, $offset; |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
my $path = [@lpath, $subsumer, @rpath]; |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
push @return, [$subsumer, $rCount + $lCount - 1, $path]; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
return sort {$a->[1] <=> $b->[1]} @return; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=item $measure->validateSynset($synset) |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
parameters: synset -- a string in word#pos#sense format |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
returns: a list or undef on error |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
This method overrides the method of the same name in WordNet::Similarity |
442
|
|
|
|
|
|
|
to provide additional behavior but calls WordNet::Similarity::validateSynset |
443
|
|
|
|
|
|
|
to accomplish that method's behavior. Thus, this method does everything |
444
|
|
|
|
|
|
|
that WordNet::Similarity::validateSynset does. |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=over |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=item quote from WordNet::Similarity::validateSynset: |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
This method does the following: |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=over |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=item 1. |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Verifies that the synset is well-formed (i.e., that it consists of three |
457
|
|
|
|
|
|
|
parts separated by #s, the pos is one of {n, v, a, r} and that sense |
458
|
|
|
|
|
|
|
is a natural number). A synset that matches the pattern '[^\#]+\#[nvar]\#\d+' |
459
|
|
|
|
|
|
|
is considered well-formed. |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item 2. |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Checks if the synset exists by trying to find the offset for the synset |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=back |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=back |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This method, however, has a slightly different return value. Instead of |
470
|
|
|
|
|
|
|
merely breaking the synset into three parts, it returns the "safe" form |
471
|
|
|
|
|
|
|
of the synset. That is, if a synset has multiple word senses, this |
472
|
|
|
|
|
|
|
method returns the first word sense in that synset (this is so that |
473
|
|
|
|
|
|
|
other path-finding methods work properly). For example, if the input |
474
|
|
|
|
|
|
|
to this method is auto#n#1, the return value is ('car', 'n', 1, 2853224) |
475
|
|
|
|
|
|
|
since the sense 'car#n#1' is the first member of the synset to which |
476
|
|
|
|
|
|
|
'auto#n#1' belongs. |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
If any of these tests fails, then the error level is set to non-zero, a |
479
|
|
|
|
|
|
|
message is appended to the error string, and undef is returned. |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=cut |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
sub validateSynset |
484
|
|
|
|
|
|
|
{ |
485
|
|
|
|
|
|
|
my $self = shift; |
486
|
|
|
|
|
|
|
my $synset = shift; |
487
|
|
|
|
|
|
|
my ($word, $pos, $sense, $offset) = $self->SUPER::validateSynset ($synset); |
488
|
|
|
|
|
|
|
my $class = ref $self || $self; |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# check to see if previous call encountered an error: |
491
|
|
|
|
|
|
|
return undef if $self->{error}; |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
my @synset = $self->{wn}->querySense ($synset, "syns"); |
494
|
|
|
|
|
|
|
my $safewps = shift @synset; |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
unless (defined $safewps) { |
497
|
|
|
|
|
|
|
# safety check--we shouldn't ever get here. querySense shouldn't |
498
|
|
|
|
|
|
|
# return undef unless the input synset is bad, but we've already |
499
|
|
|
|
|
|
|
# checked that synset |
500
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
501
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
502
|
|
|
|
|
|
|
$self->{errorString} .= "No synset appears to exist for $synset."; |
503
|
|
|
|
|
|
|
return undef; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
unless ($safewps =~ /^([^\s\#]+)\#([nvar])\#(\d+)$/) { |
507
|
|
|
|
|
|
|
# we should never get here -- if QueryData doesn't return word senses |
508
|
|
|
|
|
|
|
# in the right format, then we're in a lot of trouble... nevertheless, |
509
|
|
|
|
|
|
|
# we check just to be sure |
510
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
511
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
512
|
|
|
|
|
|
|
$self->{errorString} .= "Internal error: $safewps is not well-formed. Has WordNet or WordNet::QueryData changed format?"; |
513
|
|
|
|
|
|
|
return undef; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
return ($1, $2, $3, $offset); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=back |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head3 Private methods |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=over |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=item $measure->_getHypernymTrees($synset, $pos, $mode) |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
This method takes as input a synset and returns a list of references |
529
|
|
|
|
|
|
|
to arrays where these arrays are paths from the input synset to the |
530
|
|
|
|
|
|
|
top of the taxonomy (*Root*#[nv]#1 if the root node is on). |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Parameters: a synset, a part-of-speech, and a mode. |
533
|
|
|
|
|
|
|
The mode must be either the string 'wps' or 'offset'. If |
534
|
|
|
|
|
|
|
the mode is 'wps', then the synset must be in wps format; otherwise, it |
535
|
|
|
|
|
|
|
must be an offset. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Returns: a list of references to arrays. These arrays are paths (hypernym |
538
|
|
|
|
|
|
|
trees). |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=cut |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# Suroutine that returns an array of hypernym trees, given the offset of |
543
|
|
|
|
|
|
|
# the synset. Each hypernym tree is an array of offsets. |
544
|
|
|
|
|
|
|
# INPUT PARAMS : $offset .. Offset of the synset. |
545
|
|
|
|
|
|
|
# : $pos .. Part of speech. |
546
|
|
|
|
|
|
|
# RETURN VALUES : (@tree1, @tree2, ...) .. an array of Hypernym trees (offsets) |
547
|
|
|
|
|
|
|
sub _getHypernymTrees |
548
|
|
|
|
|
|
|
{ |
549
|
|
|
|
|
|
|
my $self = shift; |
550
|
|
|
|
|
|
|
my $wn = $self->{wn}; |
551
|
|
|
|
|
|
|
my $synset = shift; |
552
|
|
|
|
|
|
|
my $pos = shift; |
553
|
|
|
|
|
|
|
my $mode = shift; |
554
|
|
|
|
|
|
|
my $curPath = shift; |
555
|
|
|
|
|
|
|
$curPath = {} if(!defined($curPath)); |
556
|
|
|
|
|
|
|
$curPath->{$synset} = 1; |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
my $wordForm = $synset; |
560
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
561
|
|
|
|
|
|
|
# check if the input synset is one of the imaginary root nodes |
562
|
|
|
|
|
|
|
if ($synset == 0) { |
563
|
|
|
|
|
|
|
return ([0]); |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
$wordForm = $wn->getSense($synset, $pos); |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
else { |
568
|
|
|
|
|
|
|
# check for root node |
569
|
|
|
|
|
|
|
if ($synset =~ /\*ROOT\*/i) { |
570
|
|
|
|
|
|
|
return ([$synset]); |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
my @hypernyms = $wn->querySense($wordForm, "hypes"); |
575
|
|
|
|
|
|
|
my @returnArray = (); |
576
|
|
|
|
|
|
|
if($#hypernyms < 0) { |
577
|
|
|
|
|
|
|
my @tmpArray = $synset; |
578
|
|
|
|
|
|
|
if ($self->{rootNode}) { |
579
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
580
|
|
|
|
|
|
|
unshift @tmpArray, 0; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
else { |
583
|
|
|
|
|
|
|
unshift @tmpArray, ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
push @returnArray, [@tmpArray]; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
else { |
589
|
|
|
|
|
|
|
foreach my $hypernym (@hypernyms) { |
590
|
|
|
|
|
|
|
my $hypesynset = $mode eq 'offset' ? $wn->offset ($hypernym) : $hypernym; |
591
|
|
|
|
|
|
|
if(!defined($curPath->{$hypesynset})) |
592
|
|
|
|
|
|
|
{ |
593
|
|
|
|
|
|
|
my %localCopy = %{$curPath}; |
594
|
|
|
|
|
|
|
my @tmpArray = $self->_getHypernymTrees ($hypesynset, $pos, $mode, \%localCopy); |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
foreach my $element (@tmpArray) { |
597
|
|
|
|
|
|
|
push @$element, $synset; |
598
|
|
|
|
|
|
|
push @returnArray, [@$element]; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
if(scalar(@returnArray) <= 0) { |
602
|
|
|
|
|
|
|
my @tmpArray = $synset; |
603
|
|
|
|
|
|
|
if ($self->{rootNode}) { |
604
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
605
|
|
|
|
|
|
|
unshift @tmpArray, 0; |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
else { |
608
|
|
|
|
|
|
|
unshift @tmpArray, ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
push @returnArray, [@tmpArray]; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
return @returnArray; |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=item getLCSbyPath($synset1, $synset2, $pos, $mode) |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Given two input synsets, finds the least common subsumer (LCS) of them. |
621
|
|
|
|
|
|
|
If there are multiple candidates for the LCS (due to multiple inheritance), |
622
|
|
|
|
|
|
|
the LCS that results in the shortest path between in input concepts is |
623
|
|
|
|
|
|
|
chosen. |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
Parameters: two synsets, a part of speech, and a mode. |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
Returns: a list of references to arrays where each array has the from |
628
|
|
|
|
|
|
|
C<($lcs, $pathlength)>. $pathlength is the length |
629
|
|
|
|
|
|
|
of the path between the two input concepts. There can be multiple LCSs |
630
|
|
|
|
|
|
|
returned if there are ties for the shortest path between the two synsets. |
631
|
|
|
|
|
|
|
Returns undef on error. |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=cut |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
sub getLCSbyPath |
636
|
|
|
|
|
|
|
{ |
637
|
|
|
|
|
|
|
my $self = shift; |
638
|
|
|
|
|
|
|
my $synset1 = shift; |
639
|
|
|
|
|
|
|
my $synset2 = shift; |
640
|
|
|
|
|
|
|
my $pos = shift; |
641
|
|
|
|
|
|
|
my $mode = shift; |
642
|
|
|
|
|
|
|
my $class = ref $self || $self; |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
my @paths = $self->getAllPaths ($synset1, $synset2, $pos, $mode); |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
# if no paths were found, $paths[0] should be undefined |
647
|
|
|
|
|
|
|
unless (defined $paths[0]) { |
648
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
649
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getLCSbyPath()) - "; |
650
|
|
|
|
|
|
|
$self->{errorString} .= "No LCS found."; |
651
|
|
|
|
|
|
|
return undef; |
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
if ($self->{trace}) { |
655
|
|
|
|
|
|
|
$self->{traceString} .= "Lowest Common Subsumer(s): "; |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
my @return; |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
# put the best LCS(s) into @return; do some tracing at the same time. |
661
|
|
|
|
|
|
|
foreach my $pathref (@paths) { |
662
|
|
|
|
|
|
|
if ($self->{trace}) { |
663
|
|
|
|
|
|
|
# print path to trace string |
664
|
|
|
|
|
|
|
$self->printSet ($pos, $mode, $pathref->[0]); |
665
|
|
|
|
|
|
|
$self->{traceString} .= " (Length=".$pathref->[1].")\n"; |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# push onto return array if this path length is tied for best |
669
|
|
|
|
|
|
|
if ($pathref->[1] <= $paths[0]->[1]) { |
670
|
|
|
|
|
|
|
push @return, [$pathref->[0], $pathref->[1]]; |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
if ($self->{trace}) { |
675
|
|
|
|
|
|
|
$self->{traceString} .= "\n\n"; |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
return @return; |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=item $measure->_getSubsumerFromTrees($treeref1, $treeref2, $mode) |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
This subroutine returns takes two trees as produced by getHypernymTrees |
685
|
|
|
|
|
|
|
and returns the most specific subsumer from them. |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Parameters: two references to arrays, and |
688
|
|
|
|
|
|
|
a string indicating mode ('wps' or 'offset'). |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
Returns: the subsumer or undef |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=cut |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
sub _getSubsumerFromTrees |
695
|
|
|
|
|
|
|
{ |
696
|
|
|
|
|
|
|
my $self = shift; |
697
|
|
|
|
|
|
|
my $array1 = shift; |
698
|
|
|
|
|
|
|
my $array2 = shift; |
699
|
|
|
|
|
|
|
my $mode = shift; |
700
|
|
|
|
|
|
|
my @tree1 = reverse @{$array1}; |
701
|
|
|
|
|
|
|
my @tree2 = reverse @{$array2}; |
702
|
|
|
|
|
|
|
my $class = ref $self || $self; |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
my $tmpString = " " . join (" ", @tree1) . " "; |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
foreach my $element (@tree2) { |
707
|
|
|
|
|
|
|
my $pattern = ($mode eq 'offset') ? qr/ 0*$element / : qr/ \Q$element\E /; |
708
|
|
|
|
|
|
|
if ($tmpString =~ /$pattern/) { |
709
|
|
|
|
|
|
|
return $element; |
710
|
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
} |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# no common subsumer found, check to see if we are using a root node |
714
|
|
|
|
|
|
|
return undef unless $self->{rootNode}; |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
717
|
|
|
|
|
|
|
$self->{error} .= "\nWarning (${class}::getSubsumerFromTrees()) - "; |
718
|
|
|
|
|
|
|
$self->{errorString} .= "root node 'on' but no subsumer found."; |
719
|
|
|
|
|
|
|
return undef; |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=item getDepth() |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
This method is non-functional and likely to be moved to a different module |
725
|
|
|
|
|
|
|
soon. |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=cut |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
sub getDepth |
730
|
|
|
|
|
|
|
{ |
731
|
|
|
|
|
|
|
use Carp; |
732
|
|
|
|
|
|
|
croak "This method is non-functional"; |
733
|
|
|
|
|
|
|
my $self = shift; |
734
|
|
|
|
|
|
|
my $synset = shift; |
735
|
|
|
|
|
|
|
my $pos = shift; |
736
|
|
|
|
|
|
|
my $mode = shift; |
737
|
|
|
|
|
|
|
my $class = ref $self || $self; |
738
|
|
|
|
|
|
|
my $offset; |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
741
|
|
|
|
|
|
|
$offset = $synset; |
742
|
|
|
|
|
|
|
return 1 if $offset == 0; |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
elsif ($mode eq 'wps') { |
745
|
|
|
|
|
|
|
$offset = $self->{wn}->offset ($synset); |
746
|
|
|
|
|
|
|
return 1 if $synset =~ /^\*Root\*/i; |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
else { |
749
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
750
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getAllPaths()) - "; |
751
|
|
|
|
|
|
|
$self->{errorString} .= "Mode must be either 'offset' or 'wps'"; |
752
|
|
|
|
|
|
|
return undef; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
my $depth = $self->{depths}->{$pos}->{$offset}; |
756
|
|
|
|
|
|
|
defined $depth and return $depth; |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
759
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getDepth) - "; |
760
|
|
|
|
|
|
|
$self->{errorString} .= "$synset appears to have undefined depth."; |
761
|
|
|
|
|
|
|
return undef; |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
1; |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
__END__ |