line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# WordNet::Similarity.pm version 2.05 |
2
|
|
|
|
|
|
|
# (Last updated $Id: Similarity.pm,v 1.50 2008/05/30 10:16:38 sidz1979 Exp $) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Module containing the version information and pod |
5
|
|
|
|
|
|
|
# for the WordNet::Similarity package, and all measures are |
6
|
|
|
|
|
|
|
# derived from this class. |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright (c) 2005, |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# Ted Pedersen, University of Minnesota Duluth |
11
|
|
|
|
|
|
|
# tpederse at d.umn.edu |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
# Siddharth Patwardhan, University of Utah, Salt Lake City |
14
|
|
|
|
|
|
|
# sidd at cs.utah.edu |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
# Jason Michelizzi, Univeristy of Minnesota Duluth |
17
|
|
|
|
|
|
|
# mich0212 at d.umn.edu |
18
|
|
|
|
|
|
|
# |
19
|
|
|
|
|
|
|
# Satanjeev Banerjee, Carnegie Mellon University, Pittsburgh |
20
|
|
|
|
|
|
|
# banerjee+ at cs.cmu.edu |
21
|
|
|
|
|
|
|
# |
22
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
23
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License |
24
|
|
|
|
|
|
|
# as published by the Free Software Foundation; either version 2 |
25
|
|
|
|
|
|
|
# of the License, or (at your option) any later version. |
26
|
|
|
|
|
|
|
# |
27
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
28
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
29
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
30
|
|
|
|
|
|
|
# GNU General Public License for more details. |
31
|
|
|
|
|
|
|
# |
32
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
33
|
|
|
|
|
|
|
# along with this program; if not, write to |
34
|
|
|
|
|
|
|
# |
35
|
|
|
|
|
|
|
# The Free Software Foundation, Inc., |
36
|
|
|
|
|
|
|
# 59 Temple Place - Suite 330, |
37
|
|
|
|
|
|
|
# Boston, MA 02111-1307, USA. |
38
|
|
|
|
|
|
|
# |
39
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
package WordNet::Similarity; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 NAME |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
WordNet::Similarity - Perl modules for computing measures of semantic |
46
|
|
|
|
|
|
|
relatedness. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 SYNOPSIS |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 Basic Usage Example |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
use WordNet::QueryData; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
use WordNet::Similarity::path; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
my $wn = WordNet::QueryData->new; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
my $measure = WordNet::Similarity::path->new ($wn); |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
my $value = $measure->getRelatedness("car#n#1", "bus#n#2"); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
my ($error, $errorString) = $measure->getError(); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
die $errorString if $error; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
print "car (sense 1) <-> bus (sense 2) = $value\n"; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 Using a configuration file to initialize the measure |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
use WordNet::Similarity::path; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
my $sim = WordNet::Similarity::path->new($wn, "mypath.cfg"); |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
my $value = $sim->getRelatedness("dog#n#1", "cat#n#1"); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
($error, $errorString) = $sim->getError(); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
die $errorString if $error; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
print "dog (sense 1) <-> cat (sense 1) = $value\n"; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head2 Printing traces |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
print "Trace String -> ".($sim->getTraceString())."\n"; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 DESCRIPTION |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head2 Introduction |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
We observe that humans find it extremely easy to say if two words are |
91
|
|
|
|
|
|
|
related and if one word is more related to a given word than another. For |
92
|
|
|
|
|
|
|
example, if we come across two words, 'car' and 'bicycle', we know they |
93
|
|
|
|
|
|
|
are related as both are means of transport. Also, we easily observe that |
94
|
|
|
|
|
|
|
'bicycle' is more related to 'car' than 'fork' is. But is there some way to |
95
|
|
|
|
|
|
|
assign a quantitative value to this relatedness? Some ideas have been put |
96
|
|
|
|
|
|
|
forth by researchers to quantify the concept of relatedness of words, with |
97
|
|
|
|
|
|
|
encouraging results. |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Eight of these different measures of relatedness have been implemented in |
100
|
|
|
|
|
|
|
this software package. A simple edge counting measure and a random measure |
101
|
|
|
|
|
|
|
have also been provided. These measures rely heavily on the vast store of |
102
|
|
|
|
|
|
|
knowledge available in the online electronic dictionary -- WordNet. So, we |
103
|
|
|
|
|
|
|
use a Perl interface for WordNet called WordNet::QueryData to make it |
104
|
|
|
|
|
|
|
easier for us to access WordNet. The modules in this package REQUIRE that |
105
|
|
|
|
|
|
|
the WordNet::QueryData module be installed on the system before these |
106
|
|
|
|
|
|
|
modules are installed. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head2 Function |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
The following function is defined: |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=over |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
115
|
|
|
|
|
|
|
|
116
|
19
|
|
|
19
|
|
24232
|
use strict; |
|
19
|
|
|
|
|
37
|
|
|
19
|
|
|
|
|
793
|
|
117
|
19
|
|
|
19
|
|
106
|
use Carp; |
|
19
|
|
|
|
|
149
|
|
|
19
|
|
|
|
|
2445
|
|
118
|
19
|
|
|
19
|
|
113
|
use Exporter; |
|
19
|
|
|
|
|
43
|
|
|
19
|
|
|
|
|
917
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# please use these, but remember that constants are not interpolated: |
121
|
|
|
|
|
|
|
# print "Root: ROOT\n"; # wrong! |
122
|
|
|
|
|
|
|
# print "Root: ".ROOT."\n"; # right |
123
|
|
|
|
|
|
|
# m/ROOT/; # wrong! |
124
|
|
|
|
|
|
|
# $pattern = ROOT; m/$pattern/; # okay |
125
|
19
|
|
|
19
|
|
118
|
use constant ROOT => "*Root*"; |
|
19
|
|
|
|
|
63
|
|
|
19
|
|
|
|
|
1777
|
|
126
|
19
|
|
|
19
|
|
170
|
use constant ROOT_N => "*Root*#n#1"; |
|
19
|
|
|
|
|
50
|
|
|
19
|
|
|
|
|
1247
|
|
127
|
19
|
|
|
19
|
|
159
|
use constant ROOT_V => "*Root*#v#1"; |
|
19
|
|
|
|
|
50
|
|
|
19
|
|
|
|
|
1036
|
|
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# JM 12/9/03 |
130
|
|
|
|
|
|
|
# we would like this to be numeric |
131
|
19
|
|
|
19
|
|
98
|
use constant UNRELATED => -1_000_000; |
|
19
|
|
|
|
|
48
|
|
|
19
|
|
|
|
|
955
|
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# if we are using an unlimited cache size, it's easier to fake an |
134
|
|
|
|
|
|
|
# unlimited cache with a really big value. |
135
|
19
|
|
|
19
|
|
91
|
use constant UNLIMITED_CACHE => 2_147_483_647; |
|
19
|
|
|
|
|
36
|
|
|
19
|
|
|
|
|
2361
|
|
136
|
|
|
|
|
|
|
|
137
|
19
|
|
|
19
|
|
109
|
use constant DEFAULT_CACHE => 5_000; |
|
19
|
|
|
|
|
38
|
|
|
19
|
|
|
|
|
927
|
|
138
|
|
|
|
|
|
|
|
139
|
19
|
|
|
19
|
|
12427
|
use WordNet::Tools; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
%EXPORT_TAGS = (); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
@EXPORT_OK = (); |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
@EXPORT = (); |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
$VERSION = '2.05'; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# a hash to contain the module-specific configuration options. |
154
|
|
|
|
|
|
|
our %config_options; |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=item addConfigOption ($name, $required, $type, $default_val) |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Adds the configuration option, $name, to the list of known config |
159
|
|
|
|
|
|
|
options (cf. configure()). If $required is true, then the option |
160
|
|
|
|
|
|
|
requires a value; otherwise, the value is optional, and the default |
161
|
|
|
|
|
|
|
value $default_val is used if a value is not specified in the config |
162
|
|
|
|
|
|
|
file. $type is the type of value the option takes. It can be |
163
|
|
|
|
|
|
|
'i' for integer, 'f' for floating-point, 's' for string, or 'p' for |
164
|
|
|
|
|
|
|
a file name. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
returns: nothing, but will C on error. You can put the call to |
167
|
|
|
|
|
|
|
this function in an C block to trap the exception (N.B., the |
168
|
|
|
|
|
|
|
C form of C does not significantly degrade performance, |
169
|
|
|
|
|
|
|
unlike the C form of C. See C). |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub addConfigOption |
174
|
|
|
|
|
|
|
{ |
175
|
|
|
|
|
|
|
my $name = shift; |
176
|
|
|
|
|
|
|
my $required = shift; |
177
|
|
|
|
|
|
|
my $type = shift; |
178
|
|
|
|
|
|
|
my $default = shift; |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
my ($package, $filename, $line) = caller; |
181
|
|
|
|
|
|
|
if ($package eq 'vector') { |
182
|
|
|
|
|
|
|
print "vector\n" |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
$config_options{$name}->{$package} = [($required ? 1 : 0), $type, $default]; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=back |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 Methods |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
The following methods are defined in this package: |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head3 Public methods |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=over |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item $obj->new ($wn, $config_file) |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
The constructor for WordNet::Similarity::* objects. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Parameters: $wn is a WordNet::QueryData object, $config_file is a |
203
|
|
|
|
|
|
|
configuration file (optional). |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Return value: the new blessed object |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=cut |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub new |
210
|
|
|
|
|
|
|
{ |
211
|
|
|
|
|
|
|
my $class = shift; |
212
|
|
|
|
|
|
|
my $this = {}; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
$class = ref $class || $class; |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
$this->{errorString} = ''; |
217
|
|
|
|
|
|
|
$this->{error} = 0; |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
if ($class eq 'WordNet::Similarity') { |
220
|
|
|
|
|
|
|
$this->{errorString} .= "\nWarning (${class}::new()) - "; |
221
|
|
|
|
|
|
|
$this->{errorString} .= "This class is intended to be an abstract base class for a measure. Your class should override it."; |
222
|
|
|
|
|
|
|
$this->{error} = 1; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
$this->{wn} = shift; |
226
|
|
|
|
|
|
|
unless (defined $this->{wn}) { |
227
|
|
|
|
|
|
|
$this->{errorString} .= "\nError (${class}::new()) - "; |
228
|
|
|
|
|
|
|
$this->{errorString} .= "A WordNet::QueryData object is required."; |
229
|
|
|
|
|
|
|
$this->{error} = 2; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
else { |
232
|
|
|
|
|
|
|
# queryWord() in older versions of WordNet::QueryData was broken |
233
|
|
|
|
|
|
|
$this->{wn}->VERSION (1.30); |
234
|
|
|
|
|
|
|
my $wntools = WordNet::Tools->new($this->{wn}); |
235
|
|
|
|
|
|
|
unless (defined $wntools) { |
236
|
|
|
|
|
|
|
$this->{errorString} .= "\nError (${class}::new()) - "; |
237
|
|
|
|
|
|
|
$this->{errorString} .= "Error creating WordNet::Tools object."; |
238
|
|
|
|
|
|
|
$this->{error} = 2; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
$this->{wntools} = $wntools; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
bless $this, $class; |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
$this->initialize (shift) if $this->{error} < 2; |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
$this->setPosList(); |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# [trace] |
250
|
|
|
|
|
|
|
if ($this->{trace}) { |
251
|
|
|
|
|
|
|
$this->{traceString} = "${class} object created:\n"; |
252
|
|
|
|
|
|
|
$this->{traceString} .= "trace :: $this->{trace}\n"; |
253
|
|
|
|
|
|
|
$this->{traceString} .= "cache :: $this->{doCache}\n"; |
254
|
|
|
|
|
|
|
$this->{traceString} .= "cache size :: $this->{maxCacheSize}\n"; |
255
|
|
|
|
|
|
|
$this->traceOptions (); |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
# [/trace] |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
return $this; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=item $obj->initialize ($config_file) |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
Performs some initialization on the module. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Parameter: the location of a configuration file |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Returns: nothing |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=cut |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
sub initialize |
274
|
|
|
|
|
|
|
{ |
275
|
|
|
|
|
|
|
my $self = shift; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# initialize cache--caching is ON by default |
278
|
|
|
|
|
|
|
$self->{doCache} = 1; |
279
|
|
|
|
|
|
|
$self->{simCache} = (); |
280
|
|
|
|
|
|
|
$self->{traceCache} = (); |
281
|
|
|
|
|
|
|
$self->{cacheQ} = (); |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
# (JM - 11/26/03) |
284
|
|
|
|
|
|
|
# Using unlimited cache can cause simCache and esp. traceCache |
285
|
|
|
|
|
|
|
# to use huge amounts of memory if a lot of queries are performed. |
286
|
|
|
|
|
|
|
# $self->{maxCacheSize} = UNLIMITED_CACHE; |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
$self->{maxCacheSize} = DEFAULT_CACHE; |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# initialize tracing--tracing is OFF by default |
291
|
|
|
|
|
|
|
$self->{trace} = 0; |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# JM 1/26/04 |
294
|
|
|
|
|
|
|
# moved option for root node to PathFinder.pm |
295
|
|
|
|
|
|
|
# |
296
|
|
|
|
|
|
|
# use a virtual root node (if applicable) |
297
|
|
|
|
|
|
|
# six of the measures (res, lin, jcn, path, wup, lch) use a virtual |
298
|
|
|
|
|
|
|
# root node in some way, and it is present by default in these cases. |
299
|
|
|
|
|
|
|
# Three of the measures--path, wup, and lch--allow this root node to be |
300
|
|
|
|
|
|
|
# turned off (i.e., the measure would be run without a root node). |
301
|
|
|
|
|
|
|
# $self->{rootNode} = 1; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
return $self->configure (@_); |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=item $obj->configure($config_file) |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
Parses a configuration file. |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
If you write a module and want to add a new configuration option, you |
312
|
|
|
|
|
|
|
can use the addConfigOption function to specify the name and nature |
313
|
|
|
|
|
|
|
of the option. |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
The value of the option is place in "self": $self->{optionname}. |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
parameter: a file name |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
returns: true if parsing of config file was successful, false on error |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=cut |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub configure |
324
|
|
|
|
|
|
|
{ |
325
|
|
|
|
|
|
|
my $self = shift; |
326
|
|
|
|
|
|
|
my $file = shift; |
327
|
|
|
|
|
|
|
my $class = ref $self || $self; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
while (my ($opt, $classHash) = each %config_options) { |
330
|
|
|
|
|
|
|
while (my ($class, $arrayRef) = each %$classHash) { |
331
|
|
|
|
|
|
|
if ($self->isa ($class)) { |
332
|
|
|
|
|
|
|
$self->{$opt} = $arrayRef->[2]; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
return unless defined $file; |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# my %options = %config_options; |
339
|
|
|
|
|
|
|
# foreach my $optionstr (@options) { |
340
|
|
|
|
|
|
|
# next unless $optionstr; |
341
|
|
|
|
|
|
|
# my $patternstr = '^(\w+)([=:])([ifps])$'; |
342
|
|
|
|
|
|
|
# unless ($optionstr =~ m/$patternstr/o) { |
343
|
|
|
|
|
|
|
# $self->{errorString} .= "\nWarning (${class}::configure) - "; |
344
|
|
|
|
|
|
|
# $self->{errorString} .= "Bad option string $optionstr: option strings"; |
345
|
|
|
|
|
|
|
# $self->{errorString} .= " must match the pattern ${patternstr}."; |
346
|
|
|
|
|
|
|
# $self->{error} = ($self->{error} > 1) ? $self->{error} : 1; |
347
|
|
|
|
|
|
|
# } |
348
|
|
|
|
|
|
|
# $options{$1} = [$2, $3]; |
349
|
|
|
|
|
|
|
# } |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
unless (open CF, $file) { |
352
|
|
|
|
|
|
|
my $class = ref $self || $self; |
353
|
|
|
|
|
|
|
$self->{errorString} .= "\nError (${class}::configure) - "; |
354
|
|
|
|
|
|
|
$self->{errorString} .= "Unable to open config file $file."; |
355
|
|
|
|
|
|
|
$self->{error} = 2; |
356
|
|
|
|
|
|
|
return undef; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
$_ = ; |
360
|
|
|
|
|
|
|
unless (m/^$class/) { |
361
|
|
|
|
|
|
|
close CF; |
362
|
|
|
|
|
|
|
my $class = ref $self || $self; |
363
|
|
|
|
|
|
|
$self->{errorString} .= "\nError (${class}::configure()) - "; |
364
|
|
|
|
|
|
|
$self->{errorString} .= "$file does not appear to be a config file for $class."; |
365
|
|
|
|
|
|
|
$self->{error} = 2; |
366
|
|
|
|
|
|
|
return undef; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# keep track of which options we've already seen |
370
|
|
|
|
|
|
|
my %optionCache; |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
my %rtn; |
373
|
|
|
|
|
|
|
OPTION_READ: |
374
|
|
|
|
|
|
|
while () { |
375
|
|
|
|
|
|
|
s/\s+|\#.*//g; # ignore comments |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# JM 12/4/03 (#3) |
378
|
|
|
|
|
|
|
# edited the below to |
379
|
|
|
|
|
|
|
# (1) ensure the values for options are valid |
380
|
|
|
|
|
|
|
# (2) handle options without values in a consistent manner |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
if ($_ eq "") { |
383
|
|
|
|
|
|
|
next; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
# JM 1/9/04 (#1) |
387
|
|
|
|
|
|
|
# added the following block to check for repeated options |
388
|
|
|
|
|
|
|
my ($option, $value) = m/^(\w+)::(.*)$/; |
389
|
|
|
|
|
|
|
if ($option) { |
390
|
|
|
|
|
|
|
unless (defined $optionCache{$option}) { |
391
|
|
|
|
|
|
|
$optionCache{$option} = defined $value ? $value : 1; |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
else { |
394
|
|
|
|
|
|
|
# we've already seen this option |
395
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
396
|
|
|
|
|
|
|
$self->{errorString} .= "configuration option '$option' encountered twice in config file"; |
397
|
|
|
|
|
|
|
$self->{error} = ($self->{error} > 1) ? $self->{error} : 1; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
if (m/^trace::(.*)/i) { |
402
|
|
|
|
|
|
|
my $trace = $1; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# JM 12/4/03 (#3) |
405
|
|
|
|
|
|
|
# $self->{trace} = 1; |
406
|
|
|
|
|
|
|
# $self->{trace} = $trace if $trace =~ m/^[012]$/; |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
next OPTION_READ if $trace eq ""; |
409
|
|
|
|
|
|
|
if ($trace =~ m/^[012]$/) { |
410
|
|
|
|
|
|
|
$self->{trace} = $trace; |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
else { |
413
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
414
|
|
|
|
|
|
|
$self->{errorString} .= "$trace is an invalid value for option trace."; |
415
|
|
|
|
|
|
|
$self->{error} = ($self->{error} > 1) ? $self->{error} : 1; |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
elsif (m/^cache::(.*)/i) { |
419
|
|
|
|
|
|
|
my $cache = $1; |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
# JM 12/4/03 (#3) |
422
|
|
|
|
|
|
|
# $self->{doCache} = 1; |
423
|
|
|
|
|
|
|
# $self->{doCache} = $cache if $cache =~ m/^[01]$/; |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
next OPTION_READ if $cache eq ""; |
426
|
|
|
|
|
|
|
if ($cache =~ m/^[01]$/) { |
427
|
|
|
|
|
|
|
$self->{doCache} = $cache; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
else { |
430
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
431
|
|
|
|
|
|
|
$self->{errorString} .= "$cache is an invalid value for option cache."; |
432
|
|
|
|
|
|
|
$self->{error} = ($self->{error} > 1) ? $self->{error} : 1; |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
elsif (m/^maxCacheSize::(.*)/i) { |
436
|
|
|
|
|
|
|
my $mcs = $1; |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# JM 12/4/03 (#3) |
439
|
|
|
|
|
|
|
# $self->{maxCacheSize} = DEFAULT_CACHE; |
440
|
|
|
|
|
|
|
# if ($mcs =~ /unlimited/i) { |
441
|
|
|
|
|
|
|
# $self->{maxCacheSize} = UNLIMITED_CACHE; |
442
|
|
|
|
|
|
|
# next; |
443
|
|
|
|
|
|
|
# } |
444
|
|
|
|
|
|
|
# $self->{maxCacheSize} = $mcs if defined ($mcs) and $mcs =~ m/^\d+$/; |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
next OPTION_READ if $mcs eq ""; |
447
|
|
|
|
|
|
|
if ($mcs =~ m/^unlimited/i) { |
448
|
|
|
|
|
|
|
$self->{maxCacheSize} = UNLIMITED_CACHE; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
elsif ($mcs =~ m/^\d+$/) { |
451
|
|
|
|
|
|
|
$self->{maxCacheSize} = $mcs + 0; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
else { |
454
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
455
|
|
|
|
|
|
|
$self->{errorString} .= "$mcs is an invalid value for option maxCacheSize."; |
456
|
|
|
|
|
|
|
$self->{error} = ($self->{error} > 1) ? $self->{error} : 1; |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
# JM 1/26/04 |
460
|
|
|
|
|
|
|
# moved code for the rootNode option to PathFinder.pm |
461
|
|
|
|
|
|
|
else { |
462
|
|
|
|
|
|
|
OPTION_LOOP: |
463
|
|
|
|
|
|
|
foreach my $option (keys %config_options) { |
464
|
|
|
|
|
|
|
my $found = 0; |
465
|
|
|
|
|
|
|
CLASS_LOOP: |
466
|
|
|
|
|
|
|
foreach my $class (keys %{$config_options{$option}}) { |
467
|
|
|
|
|
|
|
if ($self->isa ($class) |
468
|
|
|
|
|
|
|
and defined $config_options{$option}->{$class}) { |
469
|
|
|
|
|
|
|
$found = $class; |
470
|
|
|
|
|
|
|
last CLASS_LOOP; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
next OPTION_LOOP unless $found; |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
if (not defined $config_options{$option}->{$found}) { |
476
|
|
|
|
|
|
|
print STDERR "$option $class\n"; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
my ($required, $type, $dflt)= @{$config_options{$option}->{$found}}; |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
if (m/^${option}::(.*)$/i) { |
482
|
|
|
|
|
|
|
my $t = $1; |
483
|
|
|
|
|
|
|
if ($t =~ m/^\s*$/) { |
484
|
|
|
|
|
|
|
if ($required) { |
485
|
|
|
|
|
|
|
#error |
486
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
487
|
|
|
|
|
|
|
$self->{errorString} .= "Option $option has no value."; |
488
|
|
|
|
|
|
|
$self->{error} .= $self->{error} > 1 ? $self->{error} : 1; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
else { |
491
|
|
|
|
|
|
|
# do nothing |
492
|
|
|
|
|
|
|
$self->{$option} = $dflt |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
else { |
496
|
|
|
|
|
|
|
if ($type eq 'i') { |
497
|
|
|
|
|
|
|
# JM 12/4/03 (#3) |
498
|
|
|
|
|
|
|
# $self->{$option} = int ($t); |
499
|
|
|
|
|
|
|
if ($t =~ m/^\d+$/) { |
500
|
|
|
|
|
|
|
$self->{$option} = $t + 0; |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
else { |
503
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
504
|
|
|
|
|
|
|
$self->{errorString} .= |
505
|
|
|
|
|
|
|
"$t is an invalid value for option $option."; |
506
|
|
|
|
|
|
|
$self->{error} = ($self->{error} > 1) ? $self->{error} : 1; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
elsif ($type eq 'f') { |
510
|
|
|
|
|
|
|
# JM 12/4/03 (#3) |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
# check if this is a float |
513
|
|
|
|
|
|
|
if ($t =~ /^[+-]?(?:\d+\.?\d*|\.\d+)(?:e[+-]?\d+)?$/) { |
514
|
|
|
|
|
|
|
$self->{$option} = $t + 0.0; |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
else { |
517
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
518
|
|
|
|
|
|
|
$self->{errorString} .= |
519
|
|
|
|
|
|
|
"$t is an invalid value for option $option."; |
520
|
|
|
|
|
|
|
$self->{error} = ($self->{error} > 1) ? $self->{error} : 1; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
elsif ($type eq 'p') { |
524
|
|
|
|
|
|
|
if (-e $t) { |
525
|
|
|
|
|
|
|
$self->{$option} = $t; |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
else { |
528
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
529
|
|
|
|
|
|
|
$self->{errorString} .= |
530
|
|
|
|
|
|
|
"$t is not a valid filename for option $option."; |
531
|
|
|
|
|
|
|
$self->{error} = ($self->{error} > 1) ? $self->{error} : 1; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
elsif ($type eq 's') { |
535
|
|
|
|
|
|
|
$self->{$option} = $t; |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
else { |
538
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::configure()) - "; |
539
|
|
|
|
|
|
|
$self->{errorString} .= |
540
|
|
|
|
|
|
|
"Unknown/invalid option type $type.\n"; |
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
next OPTION_READ; |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
# error |
547
|
|
|
|
|
|
|
s/::.*//; |
548
|
|
|
|
|
|
|
my $class = ref $self || $self; |
549
|
|
|
|
|
|
|
$self->{errorString} .="\nWarning (${class}::configure()) - "; |
550
|
|
|
|
|
|
|
$self->{errorString} .= "Ignoring unrecognized option '$_'."; |
551
|
|
|
|
|
|
|
$self->{error} = $self->{error} > 1 ? $self->{error} : 1; |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
close CF; |
555
|
|
|
|
|
|
|
return 1; |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=item $obj->getTraceString(Z<>) |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
Returns the current trace string and resets the trace string to empty. If |
561
|
|
|
|
|
|
|
tracing is turned off, then an empty string will always be returned. |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
=cut |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
sub getTraceString { |
566
|
|
|
|
|
|
|
my $self = shift; |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
return '' unless $self->{trace} and defined $self->{traceString}; |
569
|
|
|
|
|
|
|
my $str = $self->{traceString}; |
570
|
|
|
|
|
|
|
$self->{traceString} = ''; |
571
|
|
|
|
|
|
|
$str =~ s/\n{2,}$//; |
572
|
|
|
|
|
|
|
return $str; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=item $obj->getError(Z<>) |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Checks to see if any errors have occurred. |
578
|
|
|
|
|
|
|
Returns a list of the form S<($level, $string)>. If $level is 0, then |
579
|
|
|
|
|
|
|
no errors have occurred; if $level is non-zero, then an error has occurred. |
580
|
|
|
|
|
|
|
A value of 1 is considered a warning, and a value of 2 is considered an |
581
|
|
|
|
|
|
|
error. If $level is non-zero, then $string will have a (hopefully) |
582
|
|
|
|
|
|
|
meaningful error message. |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
=cut |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
sub getError { |
587
|
|
|
|
|
|
|
my $self = shift; |
588
|
|
|
|
|
|
|
my $error = $self->{error}; |
589
|
|
|
|
|
|
|
my $errorString = $self->{errorString}; |
590
|
|
|
|
|
|
|
$self->{error} = 0; |
591
|
|
|
|
|
|
|
$self->{errorString} = ""; |
592
|
|
|
|
|
|
|
$errorString =~ s/^[\r\n\t ]+//; |
593
|
|
|
|
|
|
|
return ($error, $errorString); |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
=item $obj->traceOptions(Z<>) |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
Prints module-specific options to the trace string. Any module that |
600
|
|
|
|
|
|
|
adds configuration options via addConfigOption should override this |
601
|
|
|
|
|
|
|
method. |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
Options should be printed out using the following format: |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
$self->{traceString} .= "option_name :: $option_value\n" |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
Note that the option name is separated from its current value by a |
608
|
|
|
|
|
|
|
space, two colons, and another space. The string should be terminated |
609
|
|
|
|
|
|
|
by a newline. |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
Since multiple modules may be overriding this method, any module |
612
|
|
|
|
|
|
|
that overrides this method should insure that the superclass' |
613
|
|
|
|
|
|
|
method gets called as well. You do this by putting this line at |
614
|
|
|
|
|
|
|
the end of your method: |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
$self->SUPER::traceOptions(); |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
returns: nothing |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
=cut |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
# JM 12/5/03 (#1) |
623
|
|
|
|
|
|
|
sub traceOptions { |
624
|
|
|
|
|
|
|
# nothing to do here, this is a just a placeholder |
625
|
|
|
|
|
|
|
# subclasses should override this to print all config options to |
626
|
|
|
|
|
|
|
# the traceString |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=item $obj->parseWps($synset1, $synset2) |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
parameters: synset1, synset2 |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
returns: a reference to an array [$word1, $pos1, $sense1, $offset1, $word2, |
635
|
|
|
|
|
|
|
$pos2, $sense2, $offset2] or undef |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
This method checks the format of the two input synsets by calling |
638
|
|
|
|
|
|
|
validateSynset() for each synset. |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
If the synsets are in wps format, a reference to an array will be returned. |
641
|
|
|
|
|
|
|
This array has the form [$word1, $pos1, $sense1, $offset1, $word2, $pos2, |
642
|
|
|
|
|
|
|
$sense2, $offset2] where $word1 is the word part of $wps1, $pos1, is the |
643
|
|
|
|
|
|
|
part of speech of $wps1, $sense1 is the sense from $wps. $offset1 is the |
644
|
|
|
|
|
|
|
offset for $wps1. |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
If an error occurs (such as a synset being poorly-formed), then undef |
647
|
|
|
|
|
|
|
is returned, the error level is set to non-zero, and an error message is |
648
|
|
|
|
|
|
|
appended to the error string. |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=cut |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
sub parseWps |
653
|
|
|
|
|
|
|
{ |
654
|
|
|
|
|
|
|
my $self = shift; |
655
|
|
|
|
|
|
|
my $wps1 = shift; |
656
|
|
|
|
|
|
|
my $wps2 = shift; |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
my $class = ref $self || $self; |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
# Undefined input cannot go unpunished. |
661
|
|
|
|
|
|
|
unless (defined $wps1 and defined $wps2 and length $wps1 and length $wps2) { |
662
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::parseWps()) - "; |
663
|
|
|
|
|
|
|
$self->{errorString} .= "Variable for input synset ".(length($wps1) ? 2 : 1)." undefined."; |
664
|
|
|
|
|
|
|
$self->{error} = ($self->{error} < 1) ? 1 : $self->{error}; |
665
|
|
|
|
|
|
|
return undef; |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
my ($word1, $pos1, $sense1, $offset1) = $self->validateSynset ($wps1); |
669
|
|
|
|
|
|
|
my ($word2, $pos2, $sense2, $offset2) = $self->validateSynset ($wps2); |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
# Check to see if validation of synsets succeeded, if not, then |
672
|
|
|
|
|
|
|
# bail out (error message already set by validateSynset). |
673
|
|
|
|
|
|
|
unless (defined $word1 and defined $pos1 and defined $sense1 |
674
|
|
|
|
|
|
|
and defined $word2 and defined $pos2 and defined $sense2) { |
675
|
|
|
|
|
|
|
return undef; |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
return [$word1, $pos1, $sense1, $offset1, $word2, $pos2, $sense2, $offset2]; |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=item $obj->validateSynset($synset) |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
parameter: synset |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
returns: a list or undef on error |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
synset is a string in word#pos#sense format |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
This method does the following: |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=over |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
=item 1. |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
Verifies that the synset is well-formed (i.e., that it consists of three |
697
|
|
|
|
|
|
|
parts separated by #s, the pos is one of {n, v, a, r} and that sense |
698
|
|
|
|
|
|
|
is a natural number). A synset that matches the pattern '[^\#]+\#[nvar]\#\d+' |
699
|
|
|
|
|
|
|
is considered well-formed. |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=item 2. |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
Checks if the synset exists by trying to find the offset for the synset |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=back |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
If any of these tests fails, then the error level is set to non-zero, a |
708
|
|
|
|
|
|
|
message is appended to the error string, and undef is returned. |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
If the synset is well-formed and exists, then a list is returned that |
711
|
|
|
|
|
|
|
has the format ($word, $pos, $sense, $offset). |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=cut |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
sub validateSynset |
716
|
|
|
|
|
|
|
{ |
717
|
|
|
|
|
|
|
my $self = shift; |
718
|
|
|
|
|
|
|
my $synset = shift; |
719
|
|
|
|
|
|
|
my $class = ref $self || $self; |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
# check to see that synset is in w#p#s format |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
unless (defined ($synset) and length ($synset)) { |
724
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
725
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
726
|
|
|
|
|
|
|
$self->{errorString} .= "Variable representing synset is undefined (or an empty string)."; |
727
|
|
|
|
|
|
|
return undef; |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
my ($word, $pos, $sense) = split (/\#/, $synset); |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
unless (defined $word) { |
733
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
734
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
735
|
|
|
|
|
|
|
$self->{errorString} .= "Invalid synset ($synset): word undefined."; |
736
|
|
|
|
|
|
|
return undef; |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
unless (defined $pos) { |
740
|
|
|
|
|
|
|
no strict 'vars'; |
741
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
742
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
743
|
|
|
|
|
|
|
$self->{errorString} .= "Invalid synset ($synset): part of speech undefined."; |
744
|
|
|
|
|
|
|
return undef; |
745
|
|
|
|
|
|
|
} |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
unless (defined $sense) { |
748
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
749
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
750
|
|
|
|
|
|
|
$self->{errorString} .= "Invalid synset ($synset): sense number undefined."; |
751
|
|
|
|
|
|
|
return undef; |
752
|
|
|
|
|
|
|
} |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# check to make sure the word, pos, and sense are well-formed |
755
|
|
|
|
|
|
|
if ($word !~ /^[^\#]+$/) { |
756
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
757
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
758
|
|
|
|
|
|
|
$self->{errorString} .= "$synset has a poorly-formed word ($word)."; |
759
|
|
|
|
|
|
|
return undef; |
760
|
|
|
|
|
|
|
} |
761
|
|
|
|
|
|
|
elsif ($pos !~ /^[nvar]$/) { |
762
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
763
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
764
|
|
|
|
|
|
|
$self->{errorString} .= "$synset has a bad part of speech ($pos). Part of speech must be one of n, v, a, r."; |
765
|
|
|
|
|
|
|
return undef; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
elsif ($sense !~ /^\d+$/) { |
768
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
769
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
770
|
|
|
|
|
|
|
$self->{errorString} .= "$synset has a bad sense number ($pos). Sense number must be a natural number."; |
771
|
|
|
|
|
|
|
return undef; |
772
|
|
|
|
|
|
|
} |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
# check to see if synset exists |
775
|
|
|
|
|
|
|
my $offset = $self->{wn}->offset ($synset); |
776
|
|
|
|
|
|
|
unless ($offset) { |
777
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
778
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
779
|
|
|
|
|
|
|
$self->{errorString} .= "$synset not found in WordNet."; |
780
|
|
|
|
|
|
|
return undef; |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
return ($word, $pos, $sense, $offset); |
784
|
|
|
|
|
|
|
} |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
=item $obj->getRelatedness($synset1, $synset2) |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
parameters: synset1, synset2 |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
returns: a relatedness score |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
This is a virtual method. It must be overridden by a module that |
793
|
|
|
|
|
|
|
is derived from this class. This method takes two synsets and |
794
|
|
|
|
|
|
|
returns a numeric value as their score of relatedness. |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
=cut |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
sub getRelatedness { |
799
|
|
|
|
|
|
|
my $self = shift; |
800
|
|
|
|
|
|
|
my $class = ref $self || $self; |
801
|
|
|
|
|
|
|
$self->{errorString} .= "\nError (${class}::getRelatedness()) - "; |
802
|
|
|
|
|
|
|
$self->{errorString} .= "This is a virtual method provided by "; |
803
|
|
|
|
|
|
|
$self->{errorString} .= __PACKAGE__ . " that must be overridden."; |
804
|
|
|
|
|
|
|
$self->{error} = 2; |
805
|
|
|
|
|
|
|
return undef; |
806
|
|
|
|
|
|
|
} |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
# Subroutine that takes as input an array of offsets |
809
|
|
|
|
|
|
|
# or offsets(POS) and for each prints to traceString the |
810
|
|
|
|
|
|
|
# WORD#POS#(/) |
811
|
|
|
|
|
|
|
# INPUT PARAMS : $pos .. Part of speech |
812
|
|
|
|
|
|
|
# : ($offestpos1, $offsetpos2, ...) .. Array of offsetPOS's |
813
|
|
|
|
|
|
|
# or offests |
814
|
|
|
|
|
|
|
# RETURN VALUES : none. |
815
|
|
|
|
|
|
|
sub _printSet |
816
|
|
|
|
|
|
|
{ |
817
|
|
|
|
|
|
|
use Carp; |
818
|
|
|
|
|
|
|
carp "This method is deprecated; use printSet instead"; |
819
|
|
|
|
|
|
|
my $self = shift; |
820
|
|
|
|
|
|
|
my $pos = shift; |
821
|
|
|
|
|
|
|
my $wn = $self->{wn}; |
822
|
|
|
|
|
|
|
my @offsets = @_; |
823
|
|
|
|
|
|
|
my $wps; |
824
|
|
|
|
|
|
|
my $opstr = ""; |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
foreach my $offset (@offsets) { |
827
|
|
|
|
|
|
|
$offset =~ m/(\d+)([a-z])?/; |
828
|
|
|
|
|
|
|
$offset = $1; |
829
|
|
|
|
|
|
|
if($offset) { |
830
|
|
|
|
|
|
|
$wps = $wn->getSense($offset, ($2 ? $2 : $pos)); |
831
|
|
|
|
|
|
|
} |
832
|
|
|
|
|
|
|
else { |
833
|
|
|
|
|
|
|
$wps = "*Root*\#". ($2 ? $2 : $pos) ."\#1"; |
834
|
|
|
|
|
|
|
} |
835
|
|
|
|
|
|
|
$wps =~ s/ +/_/g; |
836
|
|
|
|
|
|
|
if($self->{trace} == 2 && defined $offset && $offset != 0) { |
837
|
|
|
|
|
|
|
$wps =~ s/\#[0-9]*$/\#$offset/; |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
$opstr .= "$wps "; |
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
$opstr =~ s/\s+$//; |
842
|
|
|
|
|
|
|
$self->{traceString} .= $opstr if($self->{trace}); |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
=item $obj->printSet ($pos, $mode, @synsets) |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
If tracing is turned on, prints the contents of @synsets to the trace string. |
849
|
|
|
|
|
|
|
The contents of @synsets can be either wps strings or offsets. If they |
850
|
|
|
|
|
|
|
are wps strings, then $mode must be the string 'wps'; if they are offsets, |
851
|
|
|
|
|
|
|
then the mode must be 'offset'. Please don't try to mix wps and offsets. |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
Returns the string that was appended to the trace string. |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
=cut |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
sub printSet |
858
|
|
|
|
|
|
|
{ |
859
|
|
|
|
|
|
|
my $self = shift; |
860
|
|
|
|
|
|
|
my $pos = shift; |
861
|
|
|
|
|
|
|
my $mode = shift; |
862
|
|
|
|
|
|
|
my @synsets = @_; |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
my $opstr = ''; |
865
|
|
|
|
|
|
|
my $wn = $self->{wn}; |
866
|
|
|
|
|
|
|
my $wps; |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
869
|
|
|
|
|
|
|
foreach my $offset (@synsets) { |
870
|
|
|
|
|
|
|
$offset =~ m/^(\d+)([a-z])?/; |
871
|
|
|
|
|
|
|
$offset = $1; |
872
|
|
|
|
|
|
|
if ($offset) { |
873
|
|
|
|
|
|
|
$wps = $wn->getSense ($offset, (defined $2 ? $2 : $pos)); |
874
|
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
|
else { |
876
|
|
|
|
|
|
|
$wps = "*Root*\#" . (defined $2 ? $2 : $pos) . "\#1"; |
877
|
|
|
|
|
|
|
} |
878
|
|
|
|
|
|
|
$wps =~ tr/ /_/; |
879
|
|
|
|
|
|
|
if ($self->{trace} == 2 && defined $offset) { |
880
|
|
|
|
|
|
|
$wps =~ s/\#[0-9]+$/\#$offset/; |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
$opstr .= "$wps "; |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
elsif ($mode eq 'wps') { |
886
|
|
|
|
|
|
|
WPS: |
887
|
|
|
|
|
|
|
foreach my $wps (@synsets) { |
888
|
|
|
|
|
|
|
unless ($self->{trace} == 2) { |
889
|
|
|
|
|
|
|
$opstr .= "$wps "; |
890
|
|
|
|
|
|
|
next WPS; |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
my $offset = scalar ($wps =~ /\*Root\*/i) ? 0 : $wn->offset ($wps); |
893
|
|
|
|
|
|
|
my ($word, $p) = $wps =~ /^(\S+)\#([nvar])\#\d+$/; |
894
|
|
|
|
|
|
|
$opstr .= "$word#$p#$offset "; |
895
|
|
|
|
|
|
|
} |
896
|
|
|
|
|
|
|
} |
897
|
|
|
|
|
|
|
$opstr =~ s/\s+$//; |
898
|
|
|
|
|
|
|
$self->{traceString} .= $opstr if $self->{trace}; |
899
|
|
|
|
|
|
|
return $opstr; |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
# you should only call this if $self->{doCache} is true |
904
|
|
|
|
|
|
|
# nothing bad will happen if you call anyways, but it will slow things down |
905
|
|
|
|
|
|
|
# |
906
|
|
|
|
|
|
|
# NEW! You can specify whether or not relatedness is symmetric: if |
907
|
|
|
|
|
|
|
# relatedness (c1, c2) = relatedness (c2, c1), then relatedness is symmetric. |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
=item $obj->fetchFromCache($wps1, $wps2, $non_symmetric) |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
Looks for the relatedness value of ($wps1, $wps2) in the cache. If |
912
|
|
|
|
|
|
|
$non_symmetric is false (or isn't specified), then the cache is searched |
913
|
|
|
|
|
|
|
for ($wps2, $wps1) if ($wps1, $wps2) isn't found. |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
Returns: a relatedness value or undef if none found in the cache. |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
=cut |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
sub fetchFromCache |
920
|
|
|
|
|
|
|
{ |
921
|
|
|
|
|
|
|
my $self = shift; |
922
|
|
|
|
|
|
|
my ($wps1, $wps2, $non_symmetric) = @_; |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
$self->{doCache} or return undef; |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
$non_symmetric = 0 unless defined $non_symmetric; |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
if (defined $self->{simCache}->{"${wps1}::$wps2"}) { |
929
|
|
|
|
|
|
|
if ($self->{traceCache}->{"${wps1}::$wps2"}) { |
930
|
|
|
|
|
|
|
$self->{traceString} .= $self->{traceCache}->{"${wps1}::$wps2"}; |
931
|
|
|
|
|
|
|
} |
932
|
|
|
|
|
|
|
return $self->{simCache}->{"${wps1}::$wps2"}; |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
elsif (!$non_symmetric and defined $self->{simCache}->{"${wps2}::$wps1"}) { |
935
|
|
|
|
|
|
|
if ($self->{traceCache}->{"${wps2}::$wps1"}) { |
936
|
|
|
|
|
|
|
$self->{traceString} .= $self->{traceCache}->{"${wps2}::$wps1"}; |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
return $self->{simCache}->{"${wps2}::$wps1"}; |
939
|
|
|
|
|
|
|
} |
940
|
|
|
|
|
|
|
return undef; |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
=item $obj->storeToCache ($wps1, $wps2, $score) |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
Stores the relatedness value, $score, of ($wps1, $wps2) to the cache. |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
Returns: nothing |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
=cut |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
sub storeToCache |
952
|
|
|
|
|
|
|
{ |
953
|
|
|
|
|
|
|
my $self = shift; |
954
|
|
|
|
|
|
|
my ($wps1, $wps2, $score) = @_; |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
$self->{doCache} or return; |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
$self->{simCache}->{"${wps1}::$wps2"} = $score; |
959
|
|
|
|
|
|
|
if ($self->{trace}) { |
960
|
|
|
|
|
|
|
$self->{traceCache}->{"${wps1}::$wps2"} = $self->{traceString} |
961
|
|
|
|
|
|
|
} |
962
|
|
|
|
|
|
|
push (@{$self->{cacheQ}}, "${wps1}::$wps2"); |
963
|
|
|
|
|
|
|
if (($self->{maxCacheSize} >= 0) |
964
|
|
|
|
|
|
|
and ($self->{maxCacheSize} != UNLIMITED_CACHE)) { |
965
|
|
|
|
|
|
|
while (scalar (@{$self->{cacheQ}}) > $self->{maxCacheSize}) { |
966
|
|
|
|
|
|
|
my $delItem = shift(@{$self->{cacheQ}}); |
967
|
|
|
|
|
|
|
delete $self->{simCache}->{$delItem}; |
968
|
|
|
|
|
|
|
delete $self->{traceCache}->{$delItem}; |
969
|
|
|
|
|
|
|
} |
970
|
|
|
|
|
|
|
} |
971
|
|
|
|
|
|
|
} |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
1; |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
__END__ |