line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# WordNet::Extend::Insert.pm version 0.040 |
2
|
|
|
|
|
|
|
# Updated: 03/19/17 |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Jon Rusert, University of Minnesota Duluth |
5
|
|
|
|
|
|
|
# ruse0008 at d.umn.edu |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Ted Pedersen, University of Minnesota Duluth |
8
|
|
|
|
|
|
|
# tpederse at d.umn.edu |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# This program is free software: you can redistribute it and/or modify |
11
|
|
|
|
|
|
|
# it under the terms of the GNU General Public License as published by |
12
|
|
|
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
13
|
|
|
|
|
|
|
# (at your option) any later version. |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
16
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
17
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18
|
|
|
|
|
|
|
# GNU General Public License for more details. |
19
|
|
|
|
|
|
|
# |
20
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
21
|
|
|
|
|
|
|
# along with this program. If not, see . |
22
|
|
|
|
|
|
|
# |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
package WordNet::Extend::Insert; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 NAME |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
WordNet::Extend::Insert - Perl module for inserting a lemma into |
29
|
|
|
|
|
|
|
WordNet. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 SYNOPSIS |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head2 Basic Usage Example |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
use WordNet::Extend::Insert; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
my $insert = WordNet::Extend::Insert->new(); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
@in1 = ("crackberry","noun","withdef.1", "A BlackBerry, a handheld device considered addictive for its networking capability."); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
#@in2 = ("slackberry","noun","withdef.2", "A mocking name for crackberry."); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
@loc1 = ("withdef.5","cellphone#n#1"); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
#@loc2 = ("withdef.6","crackberry#n#1"); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$insert->attach(\@in1, \@loc1); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
#$insert->merge(\@in2, \@loc2); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head1 DESCRIPTION |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head2 Introduction |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
WordNet is a widely used tool in NLP and other research areas. A drawback of WordNet is the amount of time between updates. WordNet was last updated and released in December, 2006, and no further updates are planned. WordNet::Extend::Insert aims to allow developers insert their own lemmas into WordNet which can help keep WordNet updated with new language in the world. It can also revert back to the original untouched WordNet (by calling restoreWordNet) if the user makes a mistake or simply wants the untouched WordNet to access. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=over |
58
|
|
|
|
|
|
|
=cut |
59
|
|
|
|
|
|
|
|
60
|
1
|
|
|
1
|
|
816
|
use WordNet::QueryData; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
use Getopt::Long; |
62
|
|
|
|
|
|
|
use File::Spec; |
63
|
|
|
|
|
|
|
use File::Copy; |
64
|
|
|
|
|
|
|
use File::Find; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
%EXPORT_TAGS = (); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
@EXPORT_OK = (); |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
@EXPORT = (); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
$VERSION = '0.040'; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
#**************Variables********************** |
79
|
|
|
|
|
|
|
$wn = WordNet::QueryData->new; #to be used to access data from wordnet |
80
|
|
|
|
|
|
|
$WNHOME = "/usr/local/WordNet-3.0"; |
81
|
|
|
|
|
|
|
$WNSEARCHDICT = "$WNHOME/dict"; |
82
|
|
|
|
|
|
|
$wnCRLength = 29; #number of lines the copyright takes up in data.pos and index.pos. |
83
|
|
|
|
|
|
|
#********************************************* |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
GetOptions('help' => \$help); |
86
|
|
|
|
|
|
|
if($help == 1) |
87
|
|
|
|
|
|
|
{ |
88
|
|
|
|
|
|
|
printHelp(); |
89
|
|
|
|
|
|
|
exit(0); |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head2 Methods |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
The following methods are defined in this package: |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head3 Public methods |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=over |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item $obj->new() |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The constructor for WordNet::Extend::Insert objects. |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Parameters: none. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Return value: the new blessed object |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=cut |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub new |
111
|
|
|
|
|
|
|
{ |
112
|
|
|
|
|
|
|
my $class = shift; |
113
|
|
|
|
|
|
|
my $self = {}; |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
$self->{errorString} = ''; |
116
|
|
|
|
|
|
|
$self->{error}=0; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
bless $self, $class; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
return $self; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item $obj->getError() |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Allows the object to check if any errors have occurred. |
126
|
|
|
|
|
|
|
Returns an array ($error, $errString), where $error |
127
|
|
|
|
|
|
|
value equal to 1 represents a warning and 2 represents |
128
|
|
|
|
|
|
|
an error with the requested commands. (If a user would |
129
|
|
|
|
|
|
|
run attach() without enough arguments, the error code |
130
|
|
|
|
|
|
|
would return 2). $errorString contains what error occurred. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Parameter: None |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Returns: array of the form ($error, $errorString). |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut |
137
|
|
|
|
|
|
|
sub getError() |
138
|
|
|
|
|
|
|
{ |
139
|
|
|
|
|
|
|
my $self = shift; |
140
|
|
|
|
|
|
|
my $error = $self->{error}; |
141
|
|
|
|
|
|
|
my $errString = $self->{errorString}; |
142
|
|
|
|
|
|
|
$self->{error}=0; |
143
|
|
|
|
|
|
|
$self->{errorString} = ""; |
144
|
|
|
|
|
|
|
$errString =~ s/^[\r\n\t ]+//; |
145
|
|
|
|
|
|
|
return ($error, $errString); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=item $obj->attach($newSynset, $location) |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Takes in a new synset and inserts it into WordNet at the specified location |
151
|
|
|
|
|
|
|
by attaching it to the specified location lemma as a hyponym. The location should |
152
|
|
|
|
|
|
|
be represented by "lemma#pos#senseNum". For example, to attach to the 2nd sense |
153
|
|
|
|
|
|
|
of the noun window, the location would be "window#n#2". |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Parameters: Synset array in form (lemma, part-of-speech, item-id, definition) |
156
|
|
|
|
|
|
|
or "word\tpos\titem-id\tdef", and location to be inserted in form |
157
|
|
|
|
|
|
|
(item-id, WordNet sense). |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Returns: nothing |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
162
|
|
|
|
|
|
|
sub attach() |
163
|
|
|
|
|
|
|
{ |
164
|
|
|
|
|
|
|
#need to load in new QueryData |
165
|
|
|
|
|
|
|
$wn = WordNet::QueryData->new; |
166
|
|
|
|
|
|
|
my $base = 0; |
167
|
|
|
|
|
|
|
if(scalar @_ == 3)#checks if method entered by object. |
168
|
|
|
|
|
|
|
{ |
169
|
|
|
|
|
|
|
$base = 1; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
my @newSyn = (); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
if(ref($_[$base]) eq 'ARRAY') |
175
|
|
|
|
|
|
|
{ |
176
|
|
|
|
|
|
|
@newSyn =@{$_[$base]}; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
else |
179
|
|
|
|
|
|
|
{ |
180
|
|
|
|
|
|
|
@newSyn = split("\t", $_[$base]); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
$base = $base +1; |
184
|
|
|
|
|
|
|
my $pos = substr($newSyn[1], 0, 1); |
185
|
|
|
|
|
|
|
my @location = @{$_[$base]}; |
186
|
|
|
|
|
|
|
my $write = 1; #write flag changes to 0 if error occurs so no write() will occur. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
if(scalar @newSyn < 4) |
189
|
|
|
|
|
|
|
{ |
190
|
|
|
|
|
|
|
my $self = shift; |
191
|
|
|
|
|
|
|
$self->{error} = 2; |
192
|
|
|
|
|
|
|
$self->{errorString} = "New synset does not contain enough elements."; |
193
|
|
|
|
|
|
|
$write = 0; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
if(scalar @location < 2) |
197
|
|
|
|
|
|
|
{ |
198
|
|
|
|
|
|
|
my $self = shift; |
199
|
|
|
|
|
|
|
$self->{error} = 2; |
200
|
|
|
|
|
|
|
$self->{errorString} = "Location does not contain enough elements."; |
201
|
|
|
|
|
|
|
$write = 0; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
unless (defined $wn->offset("$location[1]")) |
205
|
|
|
|
|
|
|
{ |
206
|
|
|
|
|
|
|
my $self = shift; |
207
|
|
|
|
|
|
|
$self->{error} = 2; |
208
|
|
|
|
|
|
|
$self->{errorString} = "Location does not exist in WordNet."; |
209
|
|
|
|
|
|
|
$write = 0; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
if($write == 1) |
213
|
|
|
|
|
|
|
{ |
214
|
|
|
|
|
|
|
my $newOffset = findNewOffset($newSyn[1]); |
215
|
|
|
|
|
|
|
my %CRNotice; |
216
|
|
|
|
|
|
|
my %DataSpace; |
217
|
|
|
|
|
|
|
my %offsetMap; #used to stored changes in offsets. |
218
|
|
|
|
|
|
|
my $indexPos = ""; |
219
|
|
|
|
|
|
|
my $dataPos = ""; |
220
|
|
|
|
|
|
|
my $indexSense = ""; |
221
|
|
|
|
|
|
|
my $posNum = 0; |
222
|
|
|
|
|
|
|
my $locationLemma = $location[1]; |
223
|
|
|
|
|
|
|
$locationLemma =~ s/#.*//; #extract lemma |
224
|
|
|
|
|
|
|
my $locationPos = $newSyn[1]; #must be same pos as new. |
225
|
|
|
|
|
|
|
my $locationOffset = $wn->offset("$location[1]"); |
226
|
|
|
|
|
|
|
while(length($locationOffset) < 8) #QueryData->offset() does not keep the 8 digits, need to add back lost 0's |
227
|
|
|
|
|
|
|
{ |
228
|
|
|
|
|
|
|
$locationOffset = "0".$locationOffset; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
my $indexFile = "$WNSEARCHDICT/index.$locationPos"; |
231
|
|
|
|
|
|
|
my $dataFile = "$WNSEARCHDICT/data.$locationPos"; |
232
|
|
|
|
|
|
|
my $senseFile = "$WNSEARCHDICT/index.sense"; |
233
|
|
|
|
|
|
|
open (WNINDEXNEW, '>', "$indexFile.new") or die $!; |
234
|
|
|
|
|
|
|
open (WNDATANEW, '>', "$dataFile.new") or die $!; |
235
|
|
|
|
|
|
|
open (WNSENSENEW, '>', "$senseFile.new") or die $!; |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
#make filehandles hot |
238
|
|
|
|
|
|
|
my $fhIndex = select(WNINDEXNEW); |
239
|
|
|
|
|
|
|
$|=1; |
240
|
|
|
|
|
|
|
select($fhIndex); |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
my $fhData = select(WNDATANEW); |
243
|
|
|
|
|
|
|
$|=1; |
244
|
|
|
|
|
|
|
select($fhData); |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
my $fhSense = select(WNSENSENEW); |
247
|
|
|
|
|
|
|
$|=1; |
248
|
|
|
|
|
|
|
select($fhSense); |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
if($pos eq "n") |
251
|
|
|
|
|
|
|
{ |
252
|
|
|
|
|
|
|
$posNum = 1; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
else |
255
|
|
|
|
|
|
|
{ |
256
|
|
|
|
|
|
|
if($pos eq "v") |
257
|
|
|
|
|
|
|
{ |
258
|
|
|
|
|
|
|
$posNum = 2; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
else |
261
|
|
|
|
|
|
|
{ |
262
|
|
|
|
|
|
|
my $self = shift; |
263
|
|
|
|
|
|
|
$self->{error} = 2; |
264
|
|
|
|
|
|
|
$self->{errorString} = "Part of speech must be verb or noun"; |
265
|
|
|
|
|
|
|
$write = 0; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
if(isNewWord($newSyn[0], $newSyn[1]) == 0) |
271
|
|
|
|
|
|
|
{ |
272
|
|
|
|
|
|
|
my %hypData = %{getDataInfo($locationOffset, $locationPos)}; |
273
|
|
|
|
|
|
|
my %hypInfo = %{getIndexInfo($locationLemma, $locationPos)}; |
274
|
|
|
|
|
|
|
#print from three required files. |
275
|
|
|
|
|
|
|
open WNINDEX, "$indexFile" or die $!; |
276
|
|
|
|
|
|
|
open WNDATA, "$dataFile" or die $!; |
277
|
|
|
|
|
|
|
open WNSENSE, "$senseFile" or die $!; |
278
|
|
|
|
|
|
|
open (WNDATATEMP, '>', "$dataFile.temp") or die $!; |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
my $changed = 0; |
281
|
|
|
|
|
|
|
my $curLine = 1; |
282
|
|
|
|
|
|
|
while() |
283
|
|
|
|
|
|
|
{ |
284
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
285
|
|
|
|
|
|
|
{ |
286
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
287
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
288
|
|
|
|
|
|
|
$DataSpace{$curLine} = length($1); |
289
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
290
|
|
|
|
|
|
|
{ |
291
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
292
|
|
|
|
|
|
|
if($changed == 1) |
293
|
|
|
|
|
|
|
{ |
294
|
|
|
|
|
|
|
my $newNewOffset = $tempLine[0] +18; |
295
|
|
|
|
|
|
|
while(length($newNewOffset) < 8) |
296
|
|
|
|
|
|
|
{ |
297
|
|
|
|
|
|
|
$newNewOffset = "0".$newNewOffset; |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
$offsetMap{$tempLine[0]} = $newNewOffset; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
else |
302
|
|
|
|
|
|
|
{ |
303
|
|
|
|
|
|
|
$offsetMap{$tempLine[0]} = $tempLine[0]; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
if($tempLine[0] == $locationOffset) |
307
|
|
|
|
|
|
|
{ |
308
|
|
|
|
|
|
|
my $newPcnt = $hypData{'p_cnt'} + 1; |
309
|
|
|
|
|
|
|
while(length $newPcnt < 3)#needs to be represented by 3 digits. |
310
|
|
|
|
|
|
|
{ |
311
|
|
|
|
|
|
|
$newPcnt = "0".$newPcnt; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
$newOffset = $newOffset + 18; #18 is the length of new data being added. |
314
|
|
|
|
|
|
|
$tempIn = "$hypData{'synset_offset'} $hypData{'lex_filenum'} $hypData{'ss_type'} $hypData{'w_cnt'} $hypData{'word_lex_id'} $newPcnt $hypData{'ptr'} ~ $newOffset $pos 0000 | $hypData{'gloss'}"; |
315
|
|
|
|
|
|
|
$changed = 1; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
else |
319
|
|
|
|
|
|
|
{ |
320
|
|
|
|
|
|
|
$CRNotice{$curLine} = $tempIn; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
$curLine+=1; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
print WNDATATEMP "$tempIn\n"; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
my $lowerSyn = lc($newSyn[0]); |
330
|
|
|
|
|
|
|
$indexPos ="$lowerSyn $pos 1 1 \@ 1 0 $newOffset"; |
331
|
|
|
|
|
|
|
if($pos eq 'v') |
332
|
|
|
|
|
|
|
{ |
333
|
|
|
|
|
|
|
$dataPos = "$newOffset $hypData{'lex_filenum'} $pos 01 $newSyn[0] 0 001 \@ $hypData{'synset_offset'} $pos 0000 01 + 01 00 | $newSyn[3]"; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
else |
336
|
|
|
|
|
|
|
{ |
337
|
|
|
|
|
|
|
$dataPos = "$newOffset $hypData{'lex_filenum'} $pos 01 $newSyn[0] 0 001 \@ $hypData{'synset_offset'} $pos 0000 | $newSyn[3]"; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
$indexSense = "$lowerSyn%$posNum:$hypData{'lex_filenum'}:00:: $newOffset 1 0"; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
close WNDATATEMP; |
343
|
|
|
|
|
|
|
open WNDATATEMP, "$dataFile.temp" or die $!; |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
$curLine = 1; |
346
|
|
|
|
|
|
|
while() |
347
|
|
|
|
|
|
|
{ |
348
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
349
|
|
|
|
|
|
|
{ |
350
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
351
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
352
|
|
|
|
|
|
|
{ |
353
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
354
|
|
|
|
|
|
|
{ |
355
|
|
|
|
|
|
|
#only change offsets with respect to current pos or first offset |
356
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos || $i == 0) |
357
|
|
|
|
|
|
|
{ |
358
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
366
|
|
|
|
|
|
|
{ |
367
|
|
|
|
|
|
|
for($i=1; $i <= $DataSpace{$curLine}; $i++) |
368
|
|
|
|
|
|
|
{ |
369
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
print WNDATANEW "$tempIn\n"; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
else |
374
|
|
|
|
|
|
|
{ |
375
|
|
|
|
|
|
|
print WNDATANEW "$CRNotice{$curLine}\n"; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
$curLine+=1; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
print WNDATANEW "$dataPos \n"; |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
$curLine = 1; |
383
|
|
|
|
|
|
|
$alpha = 1; |
384
|
|
|
|
|
|
|
while() |
385
|
|
|
|
|
|
|
{ |
386
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
387
|
|
|
|
|
|
|
{ |
388
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
389
|
|
|
|
|
|
|
{ |
390
|
|
|
|
|
|
|
#need to add hyponym pointer if it did not exist before on hypernym |
391
|
|
|
|
|
|
|
if($tempIn =~ /^$locationLemma\b[^-]/) |
392
|
|
|
|
|
|
|
{ |
393
|
|
|
|
|
|
|
unless($tempIn =~ /\~/) |
394
|
|
|
|
|
|
|
{ |
395
|
|
|
|
|
|
|
my $newPcnt = $hypInfo{'p_cnt'}; |
396
|
|
|
|
|
|
|
$newPcnt+=1; |
397
|
|
|
|
|
|
|
$tempIn ="$hypInfo{'lemma'} $hypInfo{'pos'} $hypInfo{'synset_cnt'} $newPcnt $hypInfo{'ptr_symbol'} ~ $hypInfo{'sense_cnt'} $hypInfo{'tagsense_cnt'} $hypInfo{'synset_offset'}"; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
#add in $indexPos alphabetically |
404
|
|
|
|
|
|
|
if($alpha == 1) |
405
|
|
|
|
|
|
|
{ |
406
|
|
|
|
|
|
|
if(($tempLine[0] cmp $lowerSyn) == 1 ) |
407
|
|
|
|
|
|
|
{ |
408
|
|
|
|
|
|
|
print WNINDEXNEW "$indexPos \n"; |
409
|
|
|
|
|
|
|
$alpha = 0; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
my $tmpPcnt = $tempLine[2]; |
414
|
|
|
|
|
|
|
my $offsetPtr = scalar(@tempLine) - 1; |
415
|
|
|
|
|
|
|
while($tmpPcnt > 0) |
416
|
|
|
|
|
|
|
{ |
417
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$offsetPtr]}) |
418
|
|
|
|
|
|
|
{ |
419
|
|
|
|
|
|
|
$tempLine[$offsetPtr] = "$offsetMap{$tempLine[$offsetPtr]}"; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
$tmpPcnt-=1; |
422
|
|
|
|
|
|
|
$offsetPtr-=1; |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
else |
428
|
|
|
|
|
|
|
{ |
429
|
|
|
|
|
|
|
$curLine+=1; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
print WNINDEXNEW "$tempIn \n"; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
$alpha = 1; |
436
|
|
|
|
|
|
|
while() |
437
|
|
|
|
|
|
|
{ |
438
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
439
|
|
|
|
|
|
|
{ |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
442
|
|
|
|
|
|
|
#add in $indexSense alphabetically |
443
|
|
|
|
|
|
|
if($alpha == 1) |
444
|
|
|
|
|
|
|
{ |
445
|
|
|
|
|
|
|
if(($tempLine[0] cmp $lowerSyn) == 1 ) |
446
|
|
|
|
|
|
|
{ |
447
|
|
|
|
|
|
|
print WNSENSENEW "$indexSense\n"; |
448
|
|
|
|
|
|
|
$alpha = 0; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[1]}) |
453
|
|
|
|
|
|
|
{ |
454
|
|
|
|
|
|
|
my $percLoc = index($tempLine[0], '%'); |
455
|
|
|
|
|
|
|
my $tPNumLoc = $perlLoc + 1; |
456
|
|
|
|
|
|
|
my $targetPosNum = substr $tempLine[0], $tPNumLoc, 1; |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
#only change corresponding pos words |
459
|
|
|
|
|
|
|
if($targetPosNum == $posNum) |
460
|
|
|
|
|
|
|
{ |
461
|
|
|
|
|
|
|
$tempLine[1] = "$offsetMap{$tempLine[1]}"; |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
print WNSENSENEW "$tempIn\n"; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
close WNINDEX; |
471
|
|
|
|
|
|
|
close WNDATA; |
472
|
|
|
|
|
|
|
close WNSENSE; |
473
|
|
|
|
|
|
|
close WNDATATEMP; |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
else #lemma already exists |
476
|
|
|
|
|
|
|
{ |
477
|
|
|
|
|
|
|
my %hypData = %{getDataInfo($locationOffset, $locationPos)}; |
478
|
|
|
|
|
|
|
my %hypInfo = %{getIndexInfo($locationLemma, $locationPos)}; |
479
|
|
|
|
|
|
|
my %lemmaIndex = %{getIndexInfo($newSyn[0], $newSyn[1])}; |
480
|
|
|
|
|
|
|
my $newSynNum = $lemmaIndex{'synset_cnt'} + 1; |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
#print to three required files. |
483
|
|
|
|
|
|
|
open WNINDEX, "$indexFile" or die $!; |
484
|
|
|
|
|
|
|
open WNDATA, "$dataFile" or die $!; |
485
|
|
|
|
|
|
|
open WNSENSE, "$senseFile" or die $!; |
486
|
|
|
|
|
|
|
open (WNDATATEMP, '>', "$dataFile.temp") or die $!; |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
my $changed = 0; |
489
|
|
|
|
|
|
|
my $curLine = 1; |
490
|
|
|
|
|
|
|
while() |
491
|
|
|
|
|
|
|
{ |
492
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
493
|
|
|
|
|
|
|
{ |
494
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
495
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
496
|
|
|
|
|
|
|
$DataSpace{$curLine} = length($1); |
497
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
498
|
|
|
|
|
|
|
{ |
499
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
500
|
|
|
|
|
|
|
if($changed == 1) |
501
|
|
|
|
|
|
|
{ |
502
|
|
|
|
|
|
|
my $newNewOffset = $tempLine[0] +18; |
503
|
|
|
|
|
|
|
while(length($newNewOffset) < 8) |
504
|
|
|
|
|
|
|
{ |
505
|
|
|
|
|
|
|
$newNewOffset = "0".$newNewOffset; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
$offsetMap{$tempLine[0]} = $newNewOffset; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
else |
510
|
|
|
|
|
|
|
{ |
511
|
|
|
|
|
|
|
$offsetMap{$tempLine[0]} = $tempLine[0]; |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
if($tempLine[0] == $locationOffset) |
515
|
|
|
|
|
|
|
{ |
516
|
|
|
|
|
|
|
my $newPcnt = $hypData{'p_cnt'} + 1; |
517
|
|
|
|
|
|
|
while(length $newPcnt < 3)#needs to be represented by 3 digits. |
518
|
|
|
|
|
|
|
{ |
519
|
|
|
|
|
|
|
$newPcnt = "0".$newPcnt; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
$newOffset = $newOffset + 18; #14 is the length of new data being added. |
522
|
|
|
|
|
|
|
$tempIn = "$hypData{'synset_offset'} $hypData{'lex_filenum'} $hypData{'ss_type'} $hypData{'w_cnt'} $hypData{'word_lex_id'} $newPcnt $hypData{'ptr'} ~ $newOffset $pos 0000 | $hypData{'gloss'}"; |
523
|
|
|
|
|
|
|
$changed = 1; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
else |
527
|
|
|
|
|
|
|
{ |
528
|
|
|
|
|
|
|
$CRNotice{$curLine} = $tempIn; |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
$curLine+=1; |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
print WNDATATEMP "$tempIn\n"; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
my $lowerSyn = lc($newSyn[0]); |
538
|
|
|
|
|
|
|
$indexPos ="$lowerSyn $pos $newSynNum $lemmaIndex{'p_cnt'} $lemmaIndex{'ptr_symbol'} $newSynNum $lemmaIndex{'tagsense_cnt'} $lemmaIndex{'synset_offset'} $newOffset"; |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
if($pos eq 'v') |
541
|
|
|
|
|
|
|
{ |
542
|
|
|
|
|
|
|
$dataPos = "$newOffset $hypData{'lex_filenum'} $pos 01 $newSyn[0] 0 001 \@ $hypData{'synset_offset'} $pos 0000 01 + 01 00 | $newSyn[3]"; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
else |
545
|
|
|
|
|
|
|
{ |
546
|
|
|
|
|
|
|
$dataPos = "$newOffset $hypData{'lex_filenum'} $pos 01 $newSyn[0] 0 001 \@ $hypData{'synset_offset'} $pos 0000 | $newSyn[3]"; |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
$indexSense = "$lowerSyn%$posNum:$hypData{'lex_filenum'}:00:: $newOffset $newSynNum 0"; |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
close WNDATATEMP; |
552
|
|
|
|
|
|
|
open WNDATATEMP, "$dataFile.temp" or die $!; |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
$curLine = 1; |
555
|
|
|
|
|
|
|
while() |
556
|
|
|
|
|
|
|
{ |
557
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
558
|
|
|
|
|
|
|
{ |
559
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
560
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
561
|
|
|
|
|
|
|
{ |
562
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
563
|
|
|
|
|
|
|
{ |
564
|
|
|
|
|
|
|
#only change offsets with respect to current pos or first offset on line |
565
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos || $i == 0) |
566
|
|
|
|
|
|
|
{ |
567
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
568
|
|
|
|
|
|
|
} |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
575
|
|
|
|
|
|
|
{ |
576
|
|
|
|
|
|
|
for($i=1; $i <= $DataSpace{$curLine}; $i++) |
577
|
|
|
|
|
|
|
{ |
578
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
print WNDATANEW "$tempIn\n"; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
else |
583
|
|
|
|
|
|
|
{ |
584
|
|
|
|
|
|
|
print WNDATANEW "$CRNotice{$curLine}\n"; |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
$curLine+=1; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
print WNDATANEW "$dataPos \n"; |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
$curLine = 1; |
592
|
|
|
|
|
|
|
while() |
593
|
|
|
|
|
|
|
{ |
594
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
595
|
|
|
|
|
|
|
{ |
596
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
597
|
|
|
|
|
|
|
{ |
598
|
|
|
|
|
|
|
#need to add hyponym pointer if it did not exist before on hypernym |
599
|
|
|
|
|
|
|
if($tempIn =~ /^$locationLemma\b[^-]/) |
600
|
|
|
|
|
|
|
{ |
601
|
|
|
|
|
|
|
unless($tempIn =~ /\~/) |
602
|
|
|
|
|
|
|
{ |
603
|
|
|
|
|
|
|
my $newPcnt = $hypInfo{'p_cnt'}; |
604
|
|
|
|
|
|
|
$newPcnt+=1; |
605
|
|
|
|
|
|
|
$tempIn ="$hypInfo{'lemma'} $hypInfo{'pos'} $hypInfo{'synset_cnt'} $newPcnt $hypInfo{'ptr_symbol'} ~ $hypInfo{'sense_cnt'} $hypInfo{'tagsense_cnt'} $hypInfo{'synset_offset'}"; |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
if($tempIn =~ /^$newSyn[0]\b[^-]/) |
610
|
|
|
|
|
|
|
{ |
611
|
|
|
|
|
|
|
$tempIn = "$indexPos"; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
614
|
|
|
|
|
|
|
my $tmpPcnt = $tempLine[2]; |
615
|
|
|
|
|
|
|
my $offsetPtr = scalar(@tempLine) - 1; |
616
|
|
|
|
|
|
|
while($tmpPcnt > 0) |
617
|
|
|
|
|
|
|
{ |
618
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$offsetPtr]}) |
619
|
|
|
|
|
|
|
{ |
620
|
|
|
|
|
|
|
$tempLine[$offsetPtr] = "$offsetMap{$tempLine[$offsetPtr]}"; |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
$tmpPcnt-=1; |
623
|
|
|
|
|
|
|
$offsetPtr-=1; |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
else |
629
|
|
|
|
|
|
|
{ |
630
|
|
|
|
|
|
|
$curLine+=1; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
print WNINDEXNEW "$tempIn \n"; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
$alpha = 1; |
638
|
|
|
|
|
|
|
while() |
639
|
|
|
|
|
|
|
{ |
640
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
641
|
|
|
|
|
|
|
{ |
642
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
643
|
|
|
|
|
|
|
#add in $indexSense alphabetically |
644
|
|
|
|
|
|
|
if($alpha == 1) |
645
|
|
|
|
|
|
|
{ |
646
|
|
|
|
|
|
|
if(($tempLine[0] cmp $lowerSyn) == 1 ) |
647
|
|
|
|
|
|
|
{ |
648
|
|
|
|
|
|
|
print WNSENSENEW "$indexSense\n"; |
649
|
|
|
|
|
|
|
$alpha = 0; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[1]}) |
654
|
|
|
|
|
|
|
{ |
655
|
|
|
|
|
|
|
my $percLoc = index($tempLine[0], '%'); |
656
|
|
|
|
|
|
|
my $tPNumLoc = $perlLoc + 1; |
657
|
|
|
|
|
|
|
my $targetPosNum = substr $tempLine[0], $tPNumLoc, 1; |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
#only change corresponding pos words |
660
|
|
|
|
|
|
|
if($targetPosNum == $posNum) |
661
|
|
|
|
|
|
|
{ |
662
|
|
|
|
|
|
|
$tempLine[1] = "$offsetMap{$tempLine[1]}"; |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
print WNSENSENEW "$tempIn\n"; |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
close WNINDEX; |
672
|
|
|
|
|
|
|
close WNDATA; |
673
|
|
|
|
|
|
|
close WNSENSE; |
674
|
|
|
|
|
|
|
close WNDATATEMP; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
close WNSENSENEW; |
679
|
|
|
|
|
|
|
close WNDATANEW; |
680
|
|
|
|
|
|
|
close WNSENSENEW; |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
my $changeDataFile = ""; |
683
|
|
|
|
|
|
|
if($write == 1)#if write was successful, data files needs to change offsets |
684
|
|
|
|
|
|
|
{ |
685
|
|
|
|
|
|
|
my $changePos = ''; |
686
|
|
|
|
|
|
|
my $changeLocationPos = ''; |
687
|
|
|
|
|
|
|
if($pos eq 'n') |
688
|
|
|
|
|
|
|
{ |
689
|
|
|
|
|
|
|
$changePos1 = 'v'; |
690
|
|
|
|
|
|
|
$changeLocationPos1 = 'verb'; |
691
|
|
|
|
|
|
|
$changePos2 = 'r'; |
692
|
|
|
|
|
|
|
$changeLocationPos2 = 'adv'; |
693
|
|
|
|
|
|
|
$changePos3 = 'a'; |
694
|
|
|
|
|
|
|
$changeLocationPos3 = 'adj'; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
else |
697
|
|
|
|
|
|
|
{ |
698
|
|
|
|
|
|
|
$changePos1 = 'n'; |
699
|
|
|
|
|
|
|
$changeLocationPos1 = 'noun'; |
700
|
|
|
|
|
|
|
$changePos2 = 'r'; |
701
|
|
|
|
|
|
|
$changeLocationPos2 = 'adv'; |
702
|
|
|
|
|
|
|
$changePos3 = 'a'; |
703
|
|
|
|
|
|
|
$changeLocationPos3 = 'adj'; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
$changeDataFile1 = "$WNSEARCHDICT/data.$changeLocationPos1"; |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
open (CHANGEDATANEW, '>', "$changeDataFile1.new") or die $!; |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
my $fhData = select(CHANGEDATANEW); |
711
|
|
|
|
|
|
|
$|=1; |
712
|
|
|
|
|
|
|
select($fhData); |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
open CHANGEDATA, "$changeDataFile1" or die $!; |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
my $curLine = 1; |
717
|
|
|
|
|
|
|
while() |
718
|
|
|
|
|
|
|
{ |
719
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
720
|
|
|
|
|
|
|
{ |
721
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
722
|
|
|
|
|
|
|
{ |
723
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
724
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
725
|
|
|
|
|
|
|
my $spaces = length($1); |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
728
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
729
|
|
|
|
|
|
|
{ |
730
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
731
|
|
|
|
|
|
|
{ |
732
|
|
|
|
|
|
|
#only change offsets with respect to current pos |
733
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos) |
734
|
|
|
|
|
|
|
{ |
735
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
} |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
for($i=1; $i <= $spaces; $i++) |
743
|
|
|
|
|
|
|
{ |
744
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
745
|
|
|
|
|
|
|
} |
746
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
else |
749
|
|
|
|
|
|
|
{ |
750
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
751
|
|
|
|
|
|
|
} |
752
|
|
|
|
|
|
|
$curLine+=1; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
} |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
close CHANGEDATA; |
757
|
|
|
|
|
|
|
close CHANGEDATANEW; |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
$changeDataFile2 = "$WNSEARCHDICT/data.$changeLocationPos2"; |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
open (CHANGEDATANEW, '>', "$changeDataFile2.new") or die $!; |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
my $fhData = select(CHANGEDATANEW); |
764
|
|
|
|
|
|
|
$|=1; |
765
|
|
|
|
|
|
|
select($fhData); |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
open CHANGEDATA, "$changeDataFile2" or die $!; |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
my $curLine = 1; |
770
|
|
|
|
|
|
|
while() |
771
|
|
|
|
|
|
|
{ |
772
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
773
|
|
|
|
|
|
|
{ |
774
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
775
|
|
|
|
|
|
|
{ |
776
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
777
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
778
|
|
|
|
|
|
|
my $spaces = length($1); |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
781
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
782
|
|
|
|
|
|
|
{ |
783
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
784
|
|
|
|
|
|
|
{ |
785
|
|
|
|
|
|
|
#only change offsets with respect to current pos |
786
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos) |
787
|
|
|
|
|
|
|
{ |
788
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
789
|
|
|
|
|
|
|
} |
790
|
|
|
|
|
|
|
} |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
for($i=1; $i <= $spaces; $i++) |
796
|
|
|
|
|
|
|
{ |
797
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
800
|
|
|
|
|
|
|
} |
801
|
|
|
|
|
|
|
else |
802
|
|
|
|
|
|
|
{ |
803
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
804
|
|
|
|
|
|
|
} |
805
|
|
|
|
|
|
|
$curLine+=1; |
806
|
|
|
|
|
|
|
} |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
close CHANGEDATA; |
810
|
|
|
|
|
|
|
close CHANGEDATANEW; |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
$changeDataFile3 = "$WNSEARCHDICT/data.$changeLocationPos3"; |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
open (CHANGEDATANEW, '>', "$changeDataFile3.new") or die $!; |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
my $fhData = select(CHANGEDATANEW); |
817
|
|
|
|
|
|
|
$|=1; |
818
|
|
|
|
|
|
|
select($fhData); |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
open CHANGEDATA, "$changeDataFile3" or die $!; |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
my $curLine = 1; |
823
|
|
|
|
|
|
|
while() |
824
|
|
|
|
|
|
|
{ |
825
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
826
|
|
|
|
|
|
|
{ |
827
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
828
|
|
|
|
|
|
|
{ |
829
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
830
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
831
|
|
|
|
|
|
|
my $spaces = length($1); |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
834
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
835
|
|
|
|
|
|
|
{ |
836
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
837
|
|
|
|
|
|
|
{ |
838
|
|
|
|
|
|
|
#only change offsets with respect to current pos |
839
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos) |
840
|
|
|
|
|
|
|
{ |
841
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
for($i=1; $i <= $spaces; $i++) |
849
|
|
|
|
|
|
|
{ |
850
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
else |
855
|
|
|
|
|
|
|
{ |
856
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
$curLine+=1; |
859
|
|
|
|
|
|
|
} |
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
close CHANGEDATA; |
863
|
|
|
|
|
|
|
close CHANGEDATANEW; |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
if($write == 1)#if write was successful, overwrite old files with new. |
871
|
|
|
|
|
|
|
{ |
872
|
|
|
|
|
|
|
#make backup files for last change |
873
|
|
|
|
|
|
|
#first remove old last files |
874
|
|
|
|
|
|
|
unlink glob "$WNSEARCHDICT/*.last"; |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
#next make new last files |
877
|
|
|
|
|
|
|
copy($indexFile, "$indexFile.last"); |
878
|
|
|
|
|
|
|
copy($dataFile, "$dataFile.last"); |
879
|
|
|
|
|
|
|
copy($senseFile, "$senseFile.last"); |
880
|
|
|
|
|
|
|
copy($changeDataFile1, "$changeDataFile1.last"); |
881
|
|
|
|
|
|
|
copy($changeDataFile2, "$changeDataFile2.last"); |
882
|
|
|
|
|
|
|
copy($changeDataFile3, "$changeDataFile3.last"); |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
#if no backup files exists for restoreWordnet() make for easy revert. |
885
|
|
|
|
|
|
|
my $backupcheck = "$indexFile.backup"; |
886
|
|
|
|
|
|
|
unless(-f $backupcheck) |
887
|
|
|
|
|
|
|
{ |
888
|
|
|
|
|
|
|
copy($indexFile, "$indexFile.backup"); |
889
|
|
|
|
|
|
|
copy($dataFile, "$dataFile.backup"); |
890
|
|
|
|
|
|
|
copy($changeDataFile1, "$changeDataFile1.backup"); |
891
|
|
|
|
|
|
|
copy($changeDataFile2, "$changeDataFile2.backup"); |
892
|
|
|
|
|
|
|
copy($changeDataFile3, "$changeDataFile3.backup"); |
893
|
|
|
|
|
|
|
} |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
unless(-f "$senseFile.backup") |
896
|
|
|
|
|
|
|
{ |
897
|
|
|
|
|
|
|
copy($senseFile, "$senseFile.backup"); |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
if(-f "$dataFile.temp") |
901
|
|
|
|
|
|
|
{ |
902
|
|
|
|
|
|
|
unlink "$dataFile.temp"; |
903
|
|
|
|
|
|
|
} |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
#overwrite old files with new updated files |
906
|
|
|
|
|
|
|
unlink $indexFile; |
907
|
|
|
|
|
|
|
unlink $dataFile; |
908
|
|
|
|
|
|
|
unlink $senseFile; |
909
|
|
|
|
|
|
|
unlink $changeDataFile1; |
910
|
|
|
|
|
|
|
unlink $changeDataFile2; |
911
|
|
|
|
|
|
|
unlink $changeDataFile3; |
912
|
|
|
|
|
|
|
move("$indexFile.new", $indexFile); |
913
|
|
|
|
|
|
|
move("$dataFile.new", $dataFile); |
914
|
|
|
|
|
|
|
move("$senseFile.new", $senseFile); |
915
|
|
|
|
|
|
|
move("$changeDataFile1.new", $changeDataFile1); |
916
|
|
|
|
|
|
|
move("$changeDataFile2.new", $changeDataFile2); |
917
|
|
|
|
|
|
|
move("$changeDataFile3.new", $changeDataFile3); |
918
|
|
|
|
|
|
|
} |
919
|
|
|
|
|
|
|
} |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=item $obj->merge($newSynset, $location) |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
Takes in a new synset and inserts it into WordNet at the specified location |
926
|
|
|
|
|
|
|
by merging it into the specified location lemma as a synset. The location should |
927
|
|
|
|
|
|
|
be represented by "lemma#pos#senseNum". For example, to merge to the 2nd sense |
928
|
|
|
|
|
|
|
of the noun window, the location would be "window#n#2". |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
Parameters: Synset array in form (lemma, part-of-speech, item-id, definition) |
931
|
|
|
|
|
|
|
or "word\tpos\titem-id\tdef", and location to be inserted in form |
932
|
|
|
|
|
|
|
(item-id, WordNet sense). |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
Returns: nothing |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
=cut |
937
|
|
|
|
|
|
|
sub merge() |
938
|
|
|
|
|
|
|
{ |
939
|
|
|
|
|
|
|
#need to load in new QueryData |
940
|
|
|
|
|
|
|
$wn = WordNet::QueryData->new; |
941
|
|
|
|
|
|
|
my $base = 0; |
942
|
|
|
|
|
|
|
if(scalar @_ == 3)#checks if method entered by object. |
943
|
|
|
|
|
|
|
{ |
944
|
|
|
|
|
|
|
$base = 1; |
945
|
|
|
|
|
|
|
} |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
my @newSyn = (); |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
if(ref($_[$base]) eq 'ARRAY') |
950
|
|
|
|
|
|
|
{ |
951
|
|
|
|
|
|
|
@newSyn =@{$_[$base]}; |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
else |
954
|
|
|
|
|
|
|
{ |
955
|
|
|
|
|
|
|
@newSyn = split("\t", $_[$base]); |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
$base = $base +1; |
959
|
|
|
|
|
|
|
my $pos = substr($newSyn[1], 0, 1); |
960
|
|
|
|
|
|
|
my @location = @{$_[$base]}; |
961
|
|
|
|
|
|
|
my $write = 1; #write flag changes to 0 if error occurs so no write() will occur. |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
if(scalar @newSyn < 4) |
964
|
|
|
|
|
|
|
{ |
965
|
|
|
|
|
|
|
my $self = shift; |
966
|
|
|
|
|
|
|
$self->{error} = 2; |
967
|
|
|
|
|
|
|
$self->{errorString} = "New synset does not contain enough elements."; |
968
|
|
|
|
|
|
|
$write = 0; |
969
|
|
|
|
|
|
|
} |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
if(scalar @location < 2) |
972
|
|
|
|
|
|
|
{ |
973
|
|
|
|
|
|
|
my $self = shift; |
974
|
|
|
|
|
|
|
$self->{error} = 2; |
975
|
|
|
|
|
|
|
$self->{errorString} = "Location does not contain enough elements."; |
976
|
|
|
|
|
|
|
$write = 0; |
977
|
|
|
|
|
|
|
} |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
unless (defined $wn->offset("$location[1]")) |
980
|
|
|
|
|
|
|
{ |
981
|
|
|
|
|
|
|
my $self = shift; |
982
|
|
|
|
|
|
|
$self->{error} = 2; |
983
|
|
|
|
|
|
|
$self->{errorString} = "Location does not exist in WordNet."; |
984
|
|
|
|
|
|
|
$write = 0; |
985
|
|
|
|
|
|
|
} |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
if($write == 1) |
988
|
|
|
|
|
|
|
{ |
989
|
|
|
|
|
|
|
my $newOffset = findNewOffset($newSyn[1]); |
990
|
|
|
|
|
|
|
my %offsetMap; #used to stored changes in offsets. |
991
|
|
|
|
|
|
|
my %CRNotice; |
992
|
|
|
|
|
|
|
my %DataSpace; |
993
|
|
|
|
|
|
|
my $indexPos = ""; |
994
|
|
|
|
|
|
|
my $dataPos = ""; |
995
|
|
|
|
|
|
|
my $indexSense = ""; |
996
|
|
|
|
|
|
|
my $posNum = 0; |
997
|
|
|
|
|
|
|
my $locationLemma = $location[1]; |
998
|
|
|
|
|
|
|
$locationLemma =~ s/#.*//; #extract lemma |
999
|
|
|
|
|
|
|
my $locationPos = $newSyn[1]; #must be same pos as new. |
1000
|
|
|
|
|
|
|
my $locationOffset = $wn->offset("$location[1]"); |
1001
|
|
|
|
|
|
|
while(length($locationOffset) < 8) #QueryData->offset() does not keep the 8 digits, need to add back lost 0's |
1002
|
|
|
|
|
|
|
{ |
1003
|
|
|
|
|
|
|
$locationOffset = "0".$locationOffset; |
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
my $indexFile = "$WNSEARCHDICT/index.$locationPos"; |
1006
|
|
|
|
|
|
|
my $dataFile = "$WNSEARCHDICT/data.$locationPos"; |
1007
|
|
|
|
|
|
|
my $senseFile = "$WNSEARCHDICT/index.sense"; |
1008
|
|
|
|
|
|
|
open (WNINDEXNEW, '>', "$indexFile.new") or die $!; |
1009
|
|
|
|
|
|
|
open (WNDATANEW, '>', "$dataFile.new") or die $!; |
1010
|
|
|
|
|
|
|
open (WNSENSENEW, '>', "$senseFile.new") or die $!; |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
#make filehandles hot |
1013
|
|
|
|
|
|
|
my $fhIndex = select(WNINDEXNEW); |
1014
|
|
|
|
|
|
|
$|=1; |
1015
|
|
|
|
|
|
|
select($fhIndex); |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
my $fhData = select(WNDATANEW); |
1018
|
|
|
|
|
|
|
$|=1; |
1019
|
|
|
|
|
|
|
select($fhData); |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
my $fhSense = select(WNSENSENEW); |
1022
|
|
|
|
|
|
|
$|=1; |
1023
|
|
|
|
|
|
|
select($fhSense); |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
if($pos eq "n") |
1026
|
|
|
|
|
|
|
{ |
1027
|
|
|
|
|
|
|
$posNum = 1; |
1028
|
|
|
|
|
|
|
} |
1029
|
|
|
|
|
|
|
else |
1030
|
|
|
|
|
|
|
{ |
1031
|
|
|
|
|
|
|
if($pos eq "v") |
1032
|
|
|
|
|
|
|
{ |
1033
|
|
|
|
|
|
|
$posNum = 2; |
1034
|
|
|
|
|
|
|
} |
1035
|
|
|
|
|
|
|
else |
1036
|
|
|
|
|
|
|
{ |
1037
|
|
|
|
|
|
|
my $self = shift; |
1038
|
|
|
|
|
|
|
$self->{error} = 2; |
1039
|
|
|
|
|
|
|
$self->{errorString} = "Part of speech must be verb or noun"; |
1040
|
|
|
|
|
|
|
$write = 0; |
1041
|
|
|
|
|
|
|
} |
1042
|
|
|
|
|
|
|
} |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
if(isNewWord($newSyn[0], $newSyn[1]) == 0) |
1046
|
|
|
|
|
|
|
{ |
1047
|
|
|
|
|
|
|
my %synIndex = %{getIndexInfo($locationLemma, $locationPos)}; |
1048
|
|
|
|
|
|
|
my %synData = %{getDataInfo($locationOffset, $locationPos)}; |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
#print to three required files. |
1051
|
|
|
|
|
|
|
open WNINDEX, "$indexFile" or die $!; |
1052
|
|
|
|
|
|
|
open WNDATA, "$dataFile" or die $!; |
1053
|
|
|
|
|
|
|
open WNSENSE, "$senseFile" or die $!; |
1054
|
|
|
|
|
|
|
open (WNDATATEMP, '>', "$dataFile.temp") or die $!; |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
my $changed = 0; |
1057
|
|
|
|
|
|
|
my $curLine = 1; |
1058
|
|
|
|
|
|
|
my $newWordLength = length($newSyn[0]) + 2; |
1059
|
|
|
|
|
|
|
while() |
1060
|
|
|
|
|
|
|
{ |
1061
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1062
|
|
|
|
|
|
|
{ |
1063
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
1064
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
1065
|
|
|
|
|
|
|
$DataSpace{$curLine} = length($1); |
1066
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1067
|
|
|
|
|
|
|
{ |
1068
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1069
|
|
|
|
|
|
|
if($changed == 1) |
1070
|
|
|
|
|
|
|
{ |
1071
|
|
|
|
|
|
|
my $newNewOffset = $tempLine[0] + $newWordLength; |
1072
|
|
|
|
|
|
|
while(length($newNewOffset) < 8) |
1073
|
|
|
|
|
|
|
{ |
1074
|
|
|
|
|
|
|
$newNewOffset = "0".$newNewOffset; |
1075
|
|
|
|
|
|
|
} |
1076
|
|
|
|
|
|
|
$offsetMap{$tempLine[0]} = $newNewOffset; |
1077
|
|
|
|
|
|
|
} |
1078
|
|
|
|
|
|
|
else |
1079
|
|
|
|
|
|
|
{ |
1080
|
|
|
|
|
|
|
$offsetMap{$tempLine[0]} = $tempLine[0]; |
1081
|
|
|
|
|
|
|
} |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
if($tempLine[0] == $locationOffset) |
1084
|
|
|
|
|
|
|
{ |
1085
|
|
|
|
|
|
|
$changed = 1; |
1086
|
|
|
|
|
|
|
} |
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
else |
1089
|
|
|
|
|
|
|
{ |
1090
|
|
|
|
|
|
|
$CRNotice{$curLine} = $tempIn; |
1091
|
|
|
|
|
|
|
} |
1092
|
|
|
|
|
|
|
$curLine+=1; |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
print WNDATATEMP "$tempIn\n"; |
1095
|
|
|
|
|
|
|
} |
1096
|
|
|
|
|
|
|
} |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
my $lowerSyn = lc($newSyn[0]); |
1099
|
|
|
|
|
|
|
$indexPos = "$lowerSyn $pos 1 $synIndex{'p_cnt'} $synIndex{'ptr_symbol'} 1 0 $locationOffset"; |
1100
|
|
|
|
|
|
|
my $wcnt = $synData{'w_cnt'} + 1; |
1101
|
|
|
|
|
|
|
if(length $wcnt < 2) |
1102
|
|
|
|
|
|
|
{ |
1103
|
|
|
|
|
|
|
$wcnt = "0".$wcnt; #needs to be represented by 2 digit number. |
1104
|
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
if($pos eq 'v') |
1107
|
|
|
|
|
|
|
{ |
1108
|
|
|
|
|
|
|
$dataPos = "$locationOffset $synData{'lex_filenum'} $synData{'ss_type'} $wcnt $synData{'word_lex_id'} $newSyn[0] 0 $synData{'p_cnt'} $synData{'ptr'} $synData{'f_cnt'} $synData{'frames'} | $synData{'gloss'}"; |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
else |
1112
|
|
|
|
|
|
|
{ |
1113
|
|
|
|
|
|
|
$dataPos = "$locationOffset $synData{'lex_filenum'} $synData{'ss_type'} $wcnt $synData{'word_lex_id'} $newSyn[0] 0 $synData{'p_cnt'} $synData{'ptr'} | $synData{'gloss'}"; |
1114
|
|
|
|
|
|
|
} |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
$indexSense = "$lowerSyn%$posNum:$synData{'lex_filenum'}:00:: $locationOffset 1 0"; |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
close WNDATATEMP; |
1119
|
|
|
|
|
|
|
open WNDATATEMP, "$dataFile.temp" or die $!; |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
$curLine = 1; |
1122
|
|
|
|
|
|
|
while() |
1123
|
|
|
|
|
|
|
{ |
1124
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1125
|
|
|
|
|
|
|
{ |
1126
|
|
|
|
|
|
|
if($tempIn =~ /^$locationOffset\b/) |
1127
|
|
|
|
|
|
|
{ |
1128
|
|
|
|
|
|
|
$tempIn = $dataPos; |
1129
|
|
|
|
|
|
|
} |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1132
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
1133
|
|
|
|
|
|
|
{ |
1134
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
1135
|
|
|
|
|
|
|
{ |
1136
|
|
|
|
|
|
|
#only change offsets with respect to current pos or first offset |
1137
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos || $i == 0) |
1138
|
|
|
|
|
|
|
{ |
1139
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
1140
|
|
|
|
|
|
|
} |
1141
|
|
|
|
|
|
|
} |
1142
|
|
|
|
|
|
|
} |
1143
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1146
|
|
|
|
|
|
|
{ |
1147
|
|
|
|
|
|
|
for($i=1; $i <= $DataSpace{$curLine}; $i++) |
1148
|
|
|
|
|
|
|
{ |
1149
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
1150
|
|
|
|
|
|
|
} |
1151
|
|
|
|
|
|
|
print WNDATANEW "$tempIn\n"; |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
else |
1154
|
|
|
|
|
|
|
{ |
1155
|
|
|
|
|
|
|
print WNDATANEW "$CRNotice{$curLine}\n"; |
1156
|
|
|
|
|
|
|
} |
1157
|
|
|
|
|
|
|
$curLine+=1; |
1158
|
|
|
|
|
|
|
} |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
my $alpha = 1; |
1162
|
|
|
|
|
|
|
$curLine = 1; |
1163
|
|
|
|
|
|
|
while() |
1164
|
|
|
|
|
|
|
{ |
1165
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1166
|
|
|
|
|
|
|
{ |
1167
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1168
|
|
|
|
|
|
|
{ |
1169
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
#add in $indexPos alphabetically |
1172
|
|
|
|
|
|
|
if($alpha == 1) |
1173
|
|
|
|
|
|
|
{ |
1174
|
|
|
|
|
|
|
if(($tempLine[0] cmp $lowerSyn) == 1 ) |
1175
|
|
|
|
|
|
|
{ |
1176
|
|
|
|
|
|
|
print WNINDEXNEW "$indexPos \n"; |
1177
|
|
|
|
|
|
|
$alpha = 0; |
1178
|
|
|
|
|
|
|
} |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
my $tmpPcnt = $tempLine[2]; |
1182
|
|
|
|
|
|
|
my $offsetPtr = scalar(@tempLine) - 1; |
1183
|
|
|
|
|
|
|
while($tmpPcnt > 0) |
1184
|
|
|
|
|
|
|
{ |
1185
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$offsetPtr]}) |
1186
|
|
|
|
|
|
|
{ |
1187
|
|
|
|
|
|
|
$tempLine[$offsetPtr] = "$offsetMap{$tempLine[$offsetPtr]}"; |
1188
|
|
|
|
|
|
|
} |
1189
|
|
|
|
|
|
|
$tmpPcnt-=1; |
1190
|
|
|
|
|
|
|
$offsetPtr-=1; |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1193
|
|
|
|
|
|
|
} |
1194
|
|
|
|
|
|
|
else |
1195
|
|
|
|
|
|
|
{ |
1196
|
|
|
|
|
|
|
$curLine+=1; |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
print WNINDEXNEW "$tempIn \n"; |
1199
|
|
|
|
|
|
|
} |
1200
|
|
|
|
|
|
|
} |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
$alpha = 1; |
1203
|
|
|
|
|
|
|
while() |
1204
|
|
|
|
|
|
|
{ |
1205
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1206
|
|
|
|
|
|
|
{ |
1207
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1208
|
|
|
|
|
|
|
#add in $indexSense alphabetically |
1209
|
|
|
|
|
|
|
if($alpha == 1) |
1210
|
|
|
|
|
|
|
{ |
1211
|
|
|
|
|
|
|
if(($tempLine[0] cmp $lowerSyn) == 1 ) |
1212
|
|
|
|
|
|
|
{ |
1213
|
|
|
|
|
|
|
print WNSENSENEW "$indexSense\n"; |
1214
|
|
|
|
|
|
|
$alpha = 0; |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
} |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[1]}) |
1219
|
|
|
|
|
|
|
{ |
1220
|
|
|
|
|
|
|
my $percLoc = index($tempLine[0], '%'); |
1221
|
|
|
|
|
|
|
my $tPNumLoc = $perlLoc + 1; |
1222
|
|
|
|
|
|
|
my $targetPosNum = substr $tempLine[0], $tPNumLoc, 1; |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
#only change corresponding pos words |
1225
|
|
|
|
|
|
|
if($targetPosNum == $posNum) |
1226
|
|
|
|
|
|
|
{ |
1227
|
|
|
|
|
|
|
$tempLine[1] = "$offsetMap{$tempLine[1]}"; |
1228
|
|
|
|
|
|
|
} |
1229
|
|
|
|
|
|
|
} |
1230
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
print WNSENSENEW "$tempIn\n"; |
1233
|
|
|
|
|
|
|
} |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
print WNSENSENEW "$indexSense\n"; |
1236
|
|
|
|
|
|
|
close WNINDEX; |
1237
|
|
|
|
|
|
|
close WNDATA; |
1238
|
|
|
|
|
|
|
close WNSENSE; |
1239
|
|
|
|
|
|
|
close WNDATATEMP; |
1240
|
|
|
|
|
|
|
} |
1241
|
|
|
|
|
|
|
else #lemma already exists |
1242
|
|
|
|
|
|
|
{ |
1243
|
|
|
|
|
|
|
my %synIndex = %{getIndexInfo($locationLemma, $locationPos)}; |
1244
|
|
|
|
|
|
|
my %synData = %{getDataInfo($locationOffset, $locationPos)}; |
1245
|
|
|
|
|
|
|
my %lemmaIndex = %{getIndexInfo($newSyn[0], $newSyn[1])}; |
1246
|
|
|
|
|
|
|
my $newSynNum =$lemmaIndex{'synset_cnt'} + 1; |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
#print to three required files. |
1249
|
|
|
|
|
|
|
open WNINDEX, "$indexFile" or die $!; |
1250
|
|
|
|
|
|
|
open WNDATA, "$dataFile" or die $!; |
1251
|
|
|
|
|
|
|
open WNSENSE, "$senseFile" or die $!; |
1252
|
|
|
|
|
|
|
open (WNDATATEMP, '>', "$dataFile.temp") or die $!; |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
my $changed = 0; |
1255
|
|
|
|
|
|
|
my $curLine = 1; |
1256
|
|
|
|
|
|
|
my $newWordLength = length($newSyn[0]) + 3; |
1257
|
|
|
|
|
|
|
while() |
1258
|
|
|
|
|
|
|
{ |
1259
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1260
|
|
|
|
|
|
|
{ |
1261
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
1262
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
1263
|
|
|
|
|
|
|
$DataSpace{$curLine} = length($1); |
1264
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1265
|
|
|
|
|
|
|
{ |
1266
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1267
|
|
|
|
|
|
|
if($changed == 1) |
1268
|
|
|
|
|
|
|
{ |
1269
|
|
|
|
|
|
|
my $newNewOffset = $tempLine[0] + $newWordLength; |
1270
|
|
|
|
|
|
|
while(length($newNewOffset) < 8) |
1271
|
|
|
|
|
|
|
{ |
1272
|
|
|
|
|
|
|
$newNewOffset = "0".$newNewOffset; |
1273
|
|
|
|
|
|
|
} |
1274
|
|
|
|
|
|
|
$offsetMap{$tempLine[0]} = $newNewOffset; |
1275
|
|
|
|
|
|
|
} |
1276
|
|
|
|
|
|
|
else |
1277
|
|
|
|
|
|
|
{ |
1278
|
|
|
|
|
|
|
$offsetMap{$tempLine[0]} = $tempLine[0]; |
1279
|
|
|
|
|
|
|
} |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
if($tempLine[0] == $locationOffset) |
1282
|
|
|
|
|
|
|
{ |
1283
|
|
|
|
|
|
|
$changed = 1; |
1284
|
|
|
|
|
|
|
} |
1285
|
|
|
|
|
|
|
} |
1286
|
|
|
|
|
|
|
else |
1287
|
|
|
|
|
|
|
{ |
1288
|
|
|
|
|
|
|
$CRNotice{$curLine} = $tempIn; |
1289
|
|
|
|
|
|
|
} |
1290
|
|
|
|
|
|
|
$curLine+=1; |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
print WNDATATEMP "$tempIn\n"; |
1293
|
|
|
|
|
|
|
} |
1294
|
|
|
|
|
|
|
} |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
my $lowerSyn = lc($newSyn[0]); |
1298
|
|
|
|
|
|
|
$indexPos = "$lowerSyn $pos $newSynNum $lemmaIndex{'p_cnt'} $lemmaIndex{'ptr_symbol'} $newSynNum $lemmaIndex{'tagsense_cnt'} $lemmaIndex{'synset_offset'} $locationOffset"; |
1299
|
|
|
|
|
|
|
my $wcnt = $synData{'w_cnt'} + 1; |
1300
|
|
|
|
|
|
|
if(length $wcnt < 2) |
1301
|
|
|
|
|
|
|
{ |
1302
|
|
|
|
|
|
|
$wcnt = "0".$wcnt; #needs to be represented by 2 digit number. |
1303
|
|
|
|
|
|
|
} |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
if($pos eq 'v') |
1306
|
|
|
|
|
|
|
{ |
1307
|
|
|
|
|
|
|
$dataPos = "$locationOffset $synData{'lex_filenum'} $synData{'ss_type'} $wcnt $synData{'word_lex_id'} $newSyn[0] 0 $synData{'p_cnt'} $synData{'ptr'} $synData{'f_cnt'} $synData{'frames'} | $synData{'gloss'}"; |
1308
|
|
|
|
|
|
|
} |
1309
|
|
|
|
|
|
|
else |
1310
|
|
|
|
|
|
|
{ |
1311
|
|
|
|
|
|
|
$dataPos = "$locationOffset $synData{'lex_filenum'} $synData{'ss_type'} $wcnt $synData{'word_lex_id'} $newSyn[0] 0 $synData{'p_cnt'} $synData{'ptr'} | $synData{'gloss'}"; |
1312
|
|
|
|
|
|
|
} |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
$indexSense = "$lowerSyn%$posNum:$synData{'lex_filenum'}:00:: $locationOffset $newSynNum 0"; |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
close WNDATATEMP; |
1317
|
|
|
|
|
|
|
open WNDATATEMP, "$dataFile.temp" or die $!; |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
$curLine = 1; |
1320
|
|
|
|
|
|
|
while() |
1321
|
|
|
|
|
|
|
{ |
1322
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1323
|
|
|
|
|
|
|
{ |
1324
|
|
|
|
|
|
|
if($tempIn =~ /^$locationOffset\b/) |
1325
|
|
|
|
|
|
|
{ |
1326
|
|
|
|
|
|
|
$tempIn = $dataPos; |
1327
|
|
|
|
|
|
|
} |
1328
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1330
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
1331
|
|
|
|
|
|
|
{ |
1332
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
1333
|
|
|
|
|
|
|
{ |
1334
|
|
|
|
|
|
|
#only change offsets with respect to current pos or first offset |
1335
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos || $i == 0) |
1336
|
|
|
|
|
|
|
{ |
1337
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
1338
|
|
|
|
|
|
|
} |
1339
|
|
|
|
|
|
|
} |
1340
|
|
|
|
|
|
|
} |
1341
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1344
|
|
|
|
|
|
|
{ |
1345
|
|
|
|
|
|
|
for($i=1; $i <= $DataSpace{$curLine}; $i++) |
1346
|
|
|
|
|
|
|
{ |
1347
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
1348
|
|
|
|
|
|
|
} |
1349
|
|
|
|
|
|
|
print WNDATANEW "$tempIn\n"; |
1350
|
|
|
|
|
|
|
} |
1351
|
|
|
|
|
|
|
else |
1352
|
|
|
|
|
|
|
{ |
1353
|
|
|
|
|
|
|
print WNDATANEW "$CRNotice{$curLine}\n"; |
1354
|
|
|
|
|
|
|
} |
1355
|
|
|
|
|
|
|
$curLine+=1; |
1356
|
|
|
|
|
|
|
} |
1357
|
|
|
|
|
|
|
} |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
$curLine = 1; |
1360
|
|
|
|
|
|
|
while() |
1361
|
|
|
|
|
|
|
{ |
1362
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1363
|
|
|
|
|
|
|
{ |
1364
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1365
|
|
|
|
|
|
|
{ |
1366
|
|
|
|
|
|
|
if($tempIn =~ /^$newSyn[0]\b[^-]/) |
1367
|
|
|
|
|
|
|
{ |
1368
|
|
|
|
|
|
|
$tempIn = $indexPos; |
1369
|
|
|
|
|
|
|
} |
1370
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1371
|
|
|
|
|
|
|
my $tmpPcnt = $tempLine[2]; |
1372
|
|
|
|
|
|
|
my $offsetPtr = scalar(@tempLine) - 1; |
1373
|
|
|
|
|
|
|
while($tmpPcnt > 0) |
1374
|
|
|
|
|
|
|
{ |
1375
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$offsetPtr]}) |
1376
|
|
|
|
|
|
|
{ |
1377
|
|
|
|
|
|
|
$tempLine[$offsetPtr] = "$offsetMap{$tempLine[$offsetPtr]}"; |
1378
|
|
|
|
|
|
|
} |
1379
|
|
|
|
|
|
|
$tmpPcnt-=1; |
1380
|
|
|
|
|
|
|
$offsetPtr-=1; |
1381
|
|
|
|
|
|
|
} |
1382
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1383
|
|
|
|
|
|
|
} |
1384
|
|
|
|
|
|
|
else |
1385
|
|
|
|
|
|
|
{ |
1386
|
|
|
|
|
|
|
$curLine+=1; |
1387
|
|
|
|
|
|
|
} |
1388
|
|
|
|
|
|
|
print WNINDEXNEW "$tempIn \n"; |
1389
|
|
|
|
|
|
|
} |
1390
|
|
|
|
|
|
|
} |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
$alpha = 1; |
1393
|
|
|
|
|
|
|
while() |
1394
|
|
|
|
|
|
|
{ |
1395
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1396
|
|
|
|
|
|
|
{ |
1397
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1398
|
|
|
|
|
|
|
#add in $indexSense alphabetically |
1399
|
|
|
|
|
|
|
if($alpha == 1) |
1400
|
|
|
|
|
|
|
{ |
1401
|
|
|
|
|
|
|
if(($tempLine[0] cmp $lowerSyn) == 1 ) |
1402
|
|
|
|
|
|
|
{ |
1403
|
|
|
|
|
|
|
print WNSENSENEW "$indexSense\n"; |
1404
|
|
|
|
|
|
|
$alpha = 0; |
1405
|
|
|
|
|
|
|
} |
1406
|
|
|
|
|
|
|
} |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[1]}) |
1409
|
|
|
|
|
|
|
{ |
1410
|
|
|
|
|
|
|
my $percLoc = index($tempLine[0], '%'); |
1411
|
|
|
|
|
|
|
my $tPNumLoc = $perlLoc + 1; |
1412
|
|
|
|
|
|
|
my $targetPosNum = substr $tempLine[0], $tPNumLoc, 1; |
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
#only change corresponding pos words |
1415
|
|
|
|
|
|
|
if($targetPosNum == $posNum) |
1416
|
|
|
|
|
|
|
{ |
1417
|
|
|
|
|
|
|
$tempLine[1] = "$offsetMap{$tempLine[1]}"; |
1418
|
|
|
|
|
|
|
} |
1419
|
|
|
|
|
|
|
} |
1420
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
print WNSENSENEW "$tempIn\n"; |
1423
|
|
|
|
|
|
|
} |
1424
|
|
|
|
|
|
|
} |
1425
|
|
|
|
|
|
|
print WNSENSENEW "$indexSense\n"; |
1426
|
|
|
|
|
|
|
close WNINDEX; |
1427
|
|
|
|
|
|
|
close WNDATA; |
1428
|
|
|
|
|
|
|
close WNSENSE; |
1429
|
|
|
|
|
|
|
close WNDATATEMP; |
1430
|
|
|
|
|
|
|
} |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
close WNSENSENEW; |
1433
|
|
|
|
|
|
|
close WNDATANEW; |
1434
|
|
|
|
|
|
|
close WNSENSENEW; |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
my $changeDataFile = ""; |
1437
|
|
|
|
|
|
|
if($write == 1)#if write was successful, data files needs to change offsets |
1438
|
|
|
|
|
|
|
{ |
1439
|
|
|
|
|
|
|
my $changePos = ''; |
1440
|
|
|
|
|
|
|
my $changeLocationPos = ''; |
1441
|
|
|
|
|
|
|
if($pos eq 'n') |
1442
|
|
|
|
|
|
|
{ |
1443
|
|
|
|
|
|
|
$changePos1 = 'v'; |
1444
|
|
|
|
|
|
|
$changeLocationPos1 = 'verb'; |
1445
|
|
|
|
|
|
|
$changePos2 = 'r'; |
1446
|
|
|
|
|
|
|
$changeLocationPos2 = 'adv'; |
1447
|
|
|
|
|
|
|
$changePos3 = 'a'; |
1448
|
|
|
|
|
|
|
$changeLocationPos3 = 'adj'; |
1449
|
|
|
|
|
|
|
} |
1450
|
|
|
|
|
|
|
else |
1451
|
|
|
|
|
|
|
{ |
1452
|
|
|
|
|
|
|
$changePos1 = 'n'; |
1453
|
|
|
|
|
|
|
$changeLocationPos1 = 'noun'; |
1454
|
|
|
|
|
|
|
$changePos2 = 'r'; |
1455
|
|
|
|
|
|
|
$changeLocationPos2 = 'adv'; |
1456
|
|
|
|
|
|
|
$changePos3 = 'a'; |
1457
|
|
|
|
|
|
|
$changeLocationPos3 = 'adj'; |
1458
|
|
|
|
|
|
|
} |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
$changeDataFile1 = "$WNSEARCHDICT/data.$changeLocationPos1"; |
1461
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
open (CHANGEDATANEW, '>', "$changeDataFile1.new") or die $!; |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
my $fhData = select(CHANGEDATANEW); |
1465
|
|
|
|
|
|
|
$|=1; |
1466
|
|
|
|
|
|
|
select($fhData); |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
open CHANGEDATA, "$changeDataFile1" or die $!; |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
my $curLine = 1; |
1471
|
|
|
|
|
|
|
while() |
1472
|
|
|
|
|
|
|
{ |
1473
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1474
|
|
|
|
|
|
|
{ |
1475
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1476
|
|
|
|
|
|
|
{ |
1477
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
1478
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
1479
|
|
|
|
|
|
|
my $spaces = length($1); |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1482
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
1483
|
|
|
|
|
|
|
{ |
1484
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
1485
|
|
|
|
|
|
|
{ |
1486
|
|
|
|
|
|
|
#only change offsets with respect to current pos |
1487
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos) |
1488
|
|
|
|
|
|
|
{ |
1489
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
1490
|
|
|
|
|
|
|
} |
1491
|
|
|
|
|
|
|
} |
1492
|
|
|
|
|
|
|
} |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
for($i=1; $i <= $spaces; $i++) |
1497
|
|
|
|
|
|
|
{ |
1498
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
1499
|
|
|
|
|
|
|
} |
1500
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
1501
|
|
|
|
|
|
|
} |
1502
|
|
|
|
|
|
|
else |
1503
|
|
|
|
|
|
|
{ |
1504
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
1505
|
|
|
|
|
|
|
} |
1506
|
|
|
|
|
|
|
$curLine+=1; |
1507
|
|
|
|
|
|
|
} |
1508
|
|
|
|
|
|
|
} |
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
close CHANGEDATA; |
1511
|
|
|
|
|
|
|
close CHANGEDATANEW; |
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
$changeDataFile2 = "$WNSEARCHDICT/data.$changeLocationPos2"; |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
open (CHANGEDATANEW, '>', "$changeDataFile2.new") or die $!; |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
my $fhData = select(CHANGEDATANEW); |
1518
|
|
|
|
|
|
|
$|=1; |
1519
|
|
|
|
|
|
|
select($fhData); |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
open CHANGEDATA, "$changeDataFile2" or die $!; |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
my $curLine = 1; |
1524
|
|
|
|
|
|
|
while() |
1525
|
|
|
|
|
|
|
{ |
1526
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1527
|
|
|
|
|
|
|
{ |
1528
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1529
|
|
|
|
|
|
|
{ |
1530
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
1531
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
1532
|
|
|
|
|
|
|
my $spaces = length($1); |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1535
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
1536
|
|
|
|
|
|
|
{ |
1537
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
1538
|
|
|
|
|
|
|
{ |
1539
|
|
|
|
|
|
|
#only change offsets with respect to current pos |
1540
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos) |
1541
|
|
|
|
|
|
|
{ |
1542
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
1543
|
|
|
|
|
|
|
} |
1544
|
|
|
|
|
|
|
} |
1545
|
|
|
|
|
|
|
} |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
for($i=1; $i <= $spaces; $i++) |
1550
|
|
|
|
|
|
|
{ |
1551
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
1552
|
|
|
|
|
|
|
} |
1553
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
1554
|
|
|
|
|
|
|
} |
1555
|
|
|
|
|
|
|
else |
1556
|
|
|
|
|
|
|
{ |
1557
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
1558
|
|
|
|
|
|
|
} |
1559
|
|
|
|
|
|
|
$curLine+=1; |
1560
|
|
|
|
|
|
|
} |
1561
|
|
|
|
|
|
|
} |
1562
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
close CHANGEDATA; |
1564
|
|
|
|
|
|
|
close CHANGEDATANEW; |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
$changeDataFile3 = "$WNSEARCHDICT/data.$changeLocationPos3"; |
1567
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
open (CHANGEDATANEW, '>', "$changeDataFile3.new") or die $!; |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
my $fhData = select(CHANGEDATANEW); |
1571
|
|
|
|
|
|
|
$|=1; |
1572
|
|
|
|
|
|
|
select($fhData); |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
open CHANGEDATA, "$changeDataFile3" or die $!; |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
my $curLine = 1; |
1577
|
|
|
|
|
|
|
while() |
1578
|
|
|
|
|
|
|
{ |
1579
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1580
|
|
|
|
|
|
|
{ |
1581
|
|
|
|
|
|
|
if($curLine > $wnCRLength) |
1582
|
|
|
|
|
|
|
{ |
1583
|
|
|
|
|
|
|
my $spaceTmp = $tempIn; |
1584
|
|
|
|
|
|
|
$spaceTmp =~ /( *)$/; |
1585
|
|
|
|
|
|
|
my $spaces = length($1); |
1586
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
my @tempLine = split /\s/, $tempIn; |
1588
|
|
|
|
|
|
|
for my $i (0 .. $#tempLine) |
1589
|
|
|
|
|
|
|
{ |
1590
|
|
|
|
|
|
|
if(exists $offsetMap{$tempLine[$i]}) |
1591
|
|
|
|
|
|
|
{ |
1592
|
|
|
|
|
|
|
#only change offsets with respect to current pos |
1593
|
|
|
|
|
|
|
if($tempLine[$i+1] eq $pos) |
1594
|
|
|
|
|
|
|
{ |
1595
|
|
|
|
|
|
|
$tempLine[$i] = "$offsetMap{$tempLine[$i]}"; |
1596
|
|
|
|
|
|
|
} |
1597
|
|
|
|
|
|
|
} |
1598
|
|
|
|
|
|
|
} |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
$tempIn = join(' ', @tempLine); |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
for($i=1; $i <= $spaces; $i++) |
1603
|
|
|
|
|
|
|
{ |
1604
|
|
|
|
|
|
|
$tempIn = $tempIn . " "; |
1605
|
|
|
|
|
|
|
} |
1606
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
1607
|
|
|
|
|
|
|
} |
1608
|
|
|
|
|
|
|
else |
1609
|
|
|
|
|
|
|
{ |
1610
|
|
|
|
|
|
|
print CHANGEDATANEW "$tempIn\n"; |
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
$curLine+=1; |
1613
|
|
|
|
|
|
|
} |
1614
|
|
|
|
|
|
|
} |
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
close CHANGEDATA; |
1617
|
|
|
|
|
|
|
close CHANGEDATANEW; |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
} |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
if($write == 1)#if write was successful, overwrite old files with new. |
1622
|
|
|
|
|
|
|
{ |
1623
|
|
|
|
|
|
|
#make backup files for last change |
1624
|
|
|
|
|
|
|
#first remove old last files |
1625
|
|
|
|
|
|
|
unlink glob "$WNSEARCHDICT/*.last"; |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
#next make new last files |
1628
|
|
|
|
|
|
|
copy($indexFile, "$indexFile.last"); |
1629
|
|
|
|
|
|
|
copy($dataFile, "$dataFile.last"); |
1630
|
|
|
|
|
|
|
copy($senseFile, "$senseFile.last"); |
1631
|
|
|
|
|
|
|
copy($changeDataFile1, "$changeDataFile1.last"); |
1632
|
|
|
|
|
|
|
copy($changeDataFile2, "$changeDataFile2.last"); |
1633
|
|
|
|
|
|
|
copy($changeDataFile3, "$changeDataFile3.last"); |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
#if no backup files exists for restoreWordnet() make for easy revert. |
1636
|
|
|
|
|
|
|
my $backupcheck = "$indexFile.backup"; |
1637
|
|
|
|
|
|
|
unless(-f $backupcheck) |
1638
|
|
|
|
|
|
|
{ |
1639
|
|
|
|
|
|
|
copy($indexFile, "$indexFile.backup"); |
1640
|
|
|
|
|
|
|
copy($dataFile, "$dataFile.backup"); |
1641
|
|
|
|
|
|
|
copy($changeDataFile1, "$changeDataFile1.backup"); |
1642
|
|
|
|
|
|
|
copy($changeDataFile2, "$changeDataFile2.backup"); |
1643
|
|
|
|
|
|
|
copy($changeDataFile3, "$changeDataFile3.backup"); |
1644
|
|
|
|
|
|
|
} |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
unless(-f "$senseFile.backup") |
1647
|
|
|
|
|
|
|
{ |
1648
|
|
|
|
|
|
|
copy($senseFile, "$senseFile.backup"); |
1649
|
|
|
|
|
|
|
} |
1650
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
if(-f "$dataFile.temp") |
1652
|
|
|
|
|
|
|
{ |
1653
|
|
|
|
|
|
|
unlink "$dataFile.temp"; |
1654
|
|
|
|
|
|
|
} |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
#overwrite old files with new updated files |
1657
|
|
|
|
|
|
|
unlink $indexFile; |
1658
|
|
|
|
|
|
|
unlink $dataFile; |
1659
|
|
|
|
|
|
|
unlink $senseFile; |
1660
|
|
|
|
|
|
|
unlink $changeDataFile1; |
1661
|
|
|
|
|
|
|
unlink $changeDataFile2; |
1662
|
|
|
|
|
|
|
unlink $changeDataFile3; |
1663
|
|
|
|
|
|
|
move("$indexFile.new", $indexFile); |
1664
|
|
|
|
|
|
|
move("$dataFile.new", $dataFile); |
1665
|
|
|
|
|
|
|
move("$senseFile.new", $senseFile); |
1666
|
|
|
|
|
|
|
move("$changeDataFile1.new", $changeDataFile1); |
1667
|
|
|
|
|
|
|
move("$changeDataFile2.new", $changeDataFile2); |
1668
|
|
|
|
|
|
|
move("$changeDataFile3.new", $changeDataFile3); |
1669
|
|
|
|
|
|
|
} |
1670
|
|
|
|
|
|
|
} |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
} |
1673
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
=item $obj->restoreWordNet() |
1675
|
|
|
|
|
|
|
|
1676
|
|
|
|
|
|
|
Causes all WordNet\dict files to be restored to their original |
1677
|
|
|
|
|
|
|
state before any inserts were performed. This is equivalent to |
1678
|
|
|
|
|
|
|
installing WordNet\dict fresh on your machine. |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
Parameter: none |
1681
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
Returns: nothing |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
=cut |
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
sub restoreWordNet() |
1687
|
|
|
|
|
|
|
{ |
1688
|
|
|
|
|
|
|
my $backupFlag = 0; |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/index.noun.backup") |
1691
|
|
|
|
|
|
|
{ |
1692
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.noun"; |
1693
|
|
|
|
|
|
|
$backupFlag = 1; |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.noun.backup", "$WNSEARCHDICT/index.noun"); |
1696
|
|
|
|
|
|
|
} |
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/data.noun.backup") |
1699
|
|
|
|
|
|
|
{ |
1700
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/data.noun"; |
1701
|
|
|
|
|
|
|
$backupFlag = 1; |
1702
|
|
|
|
|
|
|
|
1703
|
|
|
|
|
|
|
move("$WNSEARCHDICT/data.noun.backup", "$WNSEARCHDICT/data.noun"); |
1704
|
|
|
|
|
|
|
} |
1705
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/index.verb.backup") |
1707
|
|
|
|
|
|
|
{ |
1708
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.verb"; |
1709
|
|
|
|
|
|
|
$backupFlag = 1; |
1710
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.verb.backup", "$WNSEARCHDICT/index.verb"); |
1712
|
|
|
|
|
|
|
} |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/data.verb.backup") |
1715
|
|
|
|
|
|
|
{ |
1716
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/data.verb"; |
1717
|
|
|
|
|
|
|
$backupFlag = 1; |
1718
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
move("$WNSEARCHDICT/data.verb.backup", "$WNSEARCHDICT/data.verb"); |
1720
|
|
|
|
|
|
|
} |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/index.adj.backup") |
1723
|
|
|
|
|
|
|
{ |
1724
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.adj"; |
1725
|
|
|
|
|
|
|
$backupFlag = 1; |
1726
|
|
|
|
|
|
|
|
1727
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.adj.backup", "$WNSEARCHDICT/index.adj"); |
1728
|
|
|
|
|
|
|
} |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/data.adj.backup") |
1731
|
|
|
|
|
|
|
{ |
1732
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/data.adj"; |
1733
|
|
|
|
|
|
|
$backupFlag = 1; |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
move("$WNSEARCHDICT/data.adj.backup", "$WNSEARCHDICT/data.adj"); |
1736
|
|
|
|
|
|
|
} |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/index.adv.backup") |
1739
|
|
|
|
|
|
|
{ |
1740
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.adv"; |
1741
|
|
|
|
|
|
|
$backupFlag = 1; |
1742
|
|
|
|
|
|
|
|
1743
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.adv.backup", "$WNSEARCHDICT/index.adv"); |
1744
|
|
|
|
|
|
|
} |
1745
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/data.adv.backup") |
1747
|
|
|
|
|
|
|
{ |
1748
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/data.adv"; |
1749
|
|
|
|
|
|
|
$backupFlag = 1; |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
move("$WNSEARCHDICT/data.adv.backup", "$WNSEARCHDICT/data.adv"); |
1752
|
|
|
|
|
|
|
} |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
if($backupFlag == 1) |
1755
|
|
|
|
|
|
|
{ |
1756
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.sense"; |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.sense.backup", "$WNSEARCHDICT/index.sense"); |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
unlink glob "$WNSEARCHDICT/*.last"; |
1761
|
|
|
|
|
|
|
} |
1762
|
|
|
|
|
|
|
} |
1763
|
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
=item $obj->revertLastChange() |
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
Allows the user to undo the last insert made to WordNet. |
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
Parameter: none |
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
Returns: nothing |
1771
|
|
|
|
|
|
|
|
1772
|
|
|
|
|
|
|
=cut |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
sub revertLastChange() |
1775
|
|
|
|
|
|
|
{ |
1776
|
|
|
|
|
|
|
my $backupFlag = 0; |
1777
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/index.noun.last") |
1779
|
|
|
|
|
|
|
{ |
1780
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.noun"; |
1781
|
|
|
|
|
|
|
$backupFlag = 1; |
1782
|
|
|
|
|
|
|
|
1783
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.noun.last", "$WNSEARCHDICT/index.noun"); |
1784
|
|
|
|
|
|
|
} |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/data.noun.last") |
1787
|
|
|
|
|
|
|
{ |
1788
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/data.noun"; |
1789
|
|
|
|
|
|
|
$backupFlag = 1; |
1790
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
move("$WNSEARCHDICT/data.noun.last", "$WNSEARCHDICT/data.noun"); |
1792
|
|
|
|
|
|
|
} |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/index.verb.last") |
1795
|
|
|
|
|
|
|
{ |
1796
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.verb"; |
1797
|
|
|
|
|
|
|
$backupFlag = 1; |
1798
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.verb.last", "$WNSEARCHDICT/index.verb"); |
1800
|
|
|
|
|
|
|
} |
1801
|
|
|
|
|
|
|
|
1802
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/data.verb.last") |
1803
|
|
|
|
|
|
|
{ |
1804
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/data.verb"; |
1805
|
|
|
|
|
|
|
$backupFlag = 1; |
1806
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
move("$WNSEARCHDICT/data.verb.last", "$WNSEARCHDICT/data.verb"); |
1808
|
|
|
|
|
|
|
} |
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/index.adj.last") |
1811
|
|
|
|
|
|
|
{ |
1812
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.adj"; |
1813
|
|
|
|
|
|
|
$backupFlag = 1; |
1814
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.adj.last", "$WNSEARCHDICT/index.adj"); |
1816
|
|
|
|
|
|
|
} |
1817
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/data.adj.last") |
1819
|
|
|
|
|
|
|
{ |
1820
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/data.adj"; |
1821
|
|
|
|
|
|
|
$backupFlag = 1; |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
move("$WNSEARCHDICT/data.adj.last", "$WNSEARCHDICT/data.adj"); |
1824
|
|
|
|
|
|
|
} |
1825
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/index.adv.last") |
1827
|
|
|
|
|
|
|
{ |
1828
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.adv"; |
1829
|
|
|
|
|
|
|
$backupFlag = 1; |
1830
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.adv.last", "$WNSEARCHDICT/index.adv"); |
1832
|
|
|
|
|
|
|
} |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
if(-f "$WNSEARCHDICT/data.adv.last") |
1835
|
|
|
|
|
|
|
{ |
1836
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/data.adv"; |
1837
|
|
|
|
|
|
|
$backupFlag = 1; |
1838
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
move("$WNSEARCHDICT/data.adv.last", "$WNSEARCHDICT/data.adv"); |
1840
|
|
|
|
|
|
|
} |
1841
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
if($backupFlag == 1) |
1843
|
|
|
|
|
|
|
{ |
1844
|
|
|
|
|
|
|
unlink "$WNSEARCHDICT/index.sense"; |
1845
|
|
|
|
|
|
|
|
1846
|
|
|
|
|
|
|
move("$WNSEARCHDICT/index.sense.last", "$WNSEARCHDICT/index.sense"); |
1847
|
|
|
|
|
|
|
} |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
} |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
=item $obj->isNewWord($lemma, $pos) |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
Takes in a lemma and searches wordnet to see if it exists. |
1854
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
Parameter: the lemma to search against along with the part of speech. |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
Returns: 1 if lemma is found or 0 if not. |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
=cut |
1860
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
sub isNewWord() |
1862
|
|
|
|
|
|
|
{ |
1863
|
|
|
|
|
|
|
my $base = 0; |
1864
|
|
|
|
|
|
|
if(scalar @_ == 3) |
1865
|
|
|
|
|
|
|
{ |
1866
|
|
|
|
|
|
|
$base = 1;#checks if method entered by object. |
1867
|
|
|
|
|
|
|
} |
1868
|
|
|
|
|
|
|
my $lemma = $_[$base]; |
1869
|
|
|
|
|
|
|
$base = $base +1; |
1870
|
|
|
|
|
|
|
my $pos = $_[$base]; |
1871
|
|
|
|
|
|
|
my $indexFile = "$WNSEARCHDICT/index.$pos"; #wn file to be searched\ |
1872
|
|
|
|
|
|
|
|
1873
|
|
|
|
|
|
|
open WNINDEX, "$indexFile" or die $!; |
1874
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
while() |
1876
|
|
|
|
|
|
|
{ |
1877
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1878
|
|
|
|
|
|
|
{ |
1879
|
|
|
|
|
|
|
if($tempIn =~ /^$lemma\b[^-]/) |
1880
|
|
|
|
|
|
|
{ |
1881
|
|
|
|
|
|
|
close WNINDEX; |
1882
|
|
|
|
|
|
|
return 1; |
1883
|
|
|
|
|
|
|
} |
1884
|
|
|
|
|
|
|
} |
1885
|
|
|
|
|
|
|
} |
1886
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
close WNINDEX; |
1888
|
|
|
|
|
|
|
return 0; |
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
} |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
=item $obj->getIndexInfo($lemma, $pos) |
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
Takes in lemma and returns the information from the index.pos file. |
1895
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
Parameter: the lemma info required and part of speech |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
Returns: hash lemma info from index.pos with following information: |
1899
|
|
|
|
|
|
|
lemma pos synset_cnt p_cnt ptr_symbol sense_cnt tagsense_cnt synset_offset |
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
=cut |
1902
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
sub getIndexInfo() |
1904
|
|
|
|
|
|
|
{ |
1905
|
|
|
|
|
|
|
my $base = 0; |
1906
|
|
|
|
|
|
|
if(scalar @_ == 3) |
1907
|
|
|
|
|
|
|
{ |
1908
|
|
|
|
|
|
|
$base = 1;#checks if method entered by object. |
1909
|
|
|
|
|
|
|
} |
1910
|
|
|
|
|
|
|
my $lemma = $_[$base]; |
1911
|
|
|
|
|
|
|
$base = $base+1; |
1912
|
|
|
|
|
|
|
my $pos = $_[$base]; |
1913
|
|
|
|
|
|
|
my $indexFile = "$WNSEARCHDICT/index.$pos"; |
1914
|
|
|
|
|
|
|
my $indexInfoLine = ""; |
1915
|
|
|
|
|
|
|
my %indexInfo; |
1916
|
|
|
|
|
|
|
open WNINDEX, "$indexFile" or die $!; |
1917
|
|
|
|
|
|
|
|
1918
|
|
|
|
|
|
|
while() |
1919
|
|
|
|
|
|
|
{ |
1920
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
1921
|
|
|
|
|
|
|
{ |
1922
|
|
|
|
|
|
|
if($tempIn =~ /^$lemma\b[^-]/) |
1923
|
|
|
|
|
|
|
{ |
1924
|
|
|
|
|
|
|
$indexInfoLine = $tempIn; |
1925
|
|
|
|
|
|
|
close WNINDEX; |
1926
|
|
|
|
|
|
|
} |
1927
|
|
|
|
|
|
|
} |
1928
|
|
|
|
|
|
|
} |
1929
|
|
|
|
|
|
|
|
1930
|
|
|
|
|
|
|
my @index = split /\s/, $indexInfoLine; |
1931
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
$indexInfo{'lemma'} = $index[0]; |
1933
|
|
|
|
|
|
|
$indexInfo{'pos'} = $index[1]; |
1934
|
|
|
|
|
|
|
$indexInfo{'synset_cnt'} = $index[2]; |
1935
|
|
|
|
|
|
|
$indexInfo{'p_cnt'} = $index[3]; |
1936
|
|
|
|
|
|
|
|
1937
|
|
|
|
|
|
|
#We gather all pointer symbols into one string for storing in the hash. |
1938
|
|
|
|
|
|
|
my $pcnt = $index[3]; |
1939
|
|
|
|
|
|
|
my $ptrSym = ""; |
1940
|
|
|
|
|
|
|
my $offset = 0; |
1941
|
|
|
|
|
|
|
while($pcnt >0) |
1942
|
|
|
|
|
|
|
{ |
1943
|
|
|
|
|
|
|
my $sym = 4 + $offset; |
1944
|
|
|
|
|
|
|
$ptrSym = $ptrSym . " $index[$sym]"; |
1945
|
|
|
|
|
|
|
$pcnt-=1; |
1946
|
|
|
|
|
|
|
if($pcnt > 0) |
1947
|
|
|
|
|
|
|
{ |
1948
|
|
|
|
|
|
|
$offset += 1; |
1949
|
|
|
|
|
|
|
} |
1950
|
|
|
|
|
|
|
} |
1951
|
|
|
|
|
|
|
$ptrSym =~ s/^\s+//; #remove extra front whitespace |
1952
|
|
|
|
|
|
|
$indexInfo{'ptr_symbol'} = $ptrSym; |
1953
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
my $indexPtr = 5 + $offset; #new pointer to account for different number of ptr symbols |
1955
|
|
|
|
|
|
|
$indexInfo{'sense_cnt'} = $index[$indexPtr]; |
1956
|
|
|
|
|
|
|
$indexPtr+=1; |
1957
|
|
|
|
|
|
|
$indexInfo{'tagsense_cnt'} = $index[$indexPtr]; |
1958
|
|
|
|
|
|
|
$indexPtr+=1; |
1959
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
#Finally we gather all offsets into one string to store in the hash. |
1961
|
|
|
|
|
|
|
my $scnt = $index[2]; |
1962
|
|
|
|
|
|
|
my $indexOffsets = ""; |
1963
|
|
|
|
|
|
|
while($scnt > 0) |
1964
|
|
|
|
|
|
|
{ |
1965
|
|
|
|
|
|
|
$indexOffsets = $indexOffsets . " $index[$indexPtr]"; |
1966
|
|
|
|
|
|
|
$indexPtr+=1; |
1967
|
|
|
|
|
|
|
$scnt-=1; |
1968
|
|
|
|
|
|
|
} |
1969
|
|
|
|
|
|
|
$indexOffsets =~ s/^\s+//; #remove extra front whitespace |
1970
|
|
|
|
|
|
|
$indexInfo{'synset_offset'} = $indexOffsets; |
1971
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
return \%indexInfo; |
1973
|
|
|
|
|
|
|
} |
1974
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
=item $obj->getDataInfo($synsetOffset, $pos) |
1976
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
Takes in synset offset and pos to find data associated with it in data.pos. |
1978
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
Parameters: the synset offset and part of speech |
1980
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
Returns: hash offset info from data.pos with following information: |
1982
|
|
|
|
|
|
|
synset_offset lex_filenum ss_type w_cnt 'word_lex_id' p_cnt ptr | gloss |
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
=cut |
1985
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
sub getDataInfo() |
1987
|
|
|
|
|
|
|
{ |
1988
|
|
|
|
|
|
|
my $base = 0; |
1989
|
|
|
|
|
|
|
if(scalar @_ == 3) |
1990
|
|
|
|
|
|
|
{ |
1991
|
|
|
|
|
|
|
$base = 1;#checks if method entered by object. |
1992
|
|
|
|
|
|
|
} |
1993
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
my $synOffset = $_[$base]; |
1995
|
|
|
|
|
|
|
$base+=1; |
1996
|
|
|
|
|
|
|
my $pos = $_[$base]; |
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
my $dataFile = "$WNSEARCHDICT/data.$pos"; |
1999
|
|
|
|
|
|
|
my $dataInfoLine = ""; |
2000
|
|
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
open WNDATA, "$dataFile" or die $!; |
2002
|
|
|
|
|
|
|
|
2003
|
|
|
|
|
|
|
while() |
2004
|
|
|
|
|
|
|
{ |
2005
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
2006
|
|
|
|
|
|
|
{ |
2007
|
|
|
|
|
|
|
if($tempIn =~ /^$synOffset\b/) |
2008
|
|
|
|
|
|
|
{ |
2009
|
|
|
|
|
|
|
$dataInfoLine = $tempIn; |
2010
|
|
|
|
|
|
|
close WNDATA; |
2011
|
|
|
|
|
|
|
} |
2012
|
|
|
|
|
|
|
} |
2013
|
|
|
|
|
|
|
} |
2014
|
|
|
|
|
|
|
|
2015
|
|
|
|
|
|
|
my @data = split /\s/, $dataInfoLine; |
2016
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
my %dataInfo; |
2018
|
|
|
|
|
|
|
$dataInfo{'synset_offset'} = $data[0]; |
2019
|
|
|
|
|
|
|
$dataInfo{'lex_filenum'} = $data[1]; |
2020
|
|
|
|
|
|
|
$dataInfo{'ss_type'} = $data[2]; |
2021
|
|
|
|
|
|
|
$dataInfo{'w_cnt'} = $data[3]; |
2022
|
|
|
|
|
|
|
|
2023
|
|
|
|
|
|
|
#we must consolidate the words and their lex ids into one string. it should be noted that |
2024
|
|
|
|
|
|
|
# the lex ids for each word are stored within the string in the hash not separately. |
2025
|
|
|
|
|
|
|
my $offset = 0; |
2026
|
|
|
|
|
|
|
my $wcnt = $data[3]; |
2027
|
|
|
|
|
|
|
my $words = ""; |
2028
|
|
|
|
|
|
|
while($wcnt > 0) |
2029
|
|
|
|
|
|
|
{ |
2030
|
|
|
|
|
|
|
my $wptr = 4 + $offset; |
2031
|
|
|
|
|
|
|
$words = $words . " $data[$wptr]"; #appends word |
2032
|
|
|
|
|
|
|
$wptr+=1; |
2033
|
|
|
|
|
|
|
$words = $words . " $data[$wptr]"; #appends lex_id |
2034
|
|
|
|
|
|
|
$wcnt-=1; |
2035
|
|
|
|
|
|
|
if($wcnt > 0) |
2036
|
|
|
|
|
|
|
{ |
2037
|
|
|
|
|
|
|
$offset+=2; #makes up for both the word and lex_id |
2038
|
|
|
|
|
|
|
} |
2039
|
|
|
|
|
|
|
} |
2040
|
|
|
|
|
|
|
$words =~ s/^\s+//; #remove extra front whitespace |
2041
|
|
|
|
|
|
|
$dataInfo{'word_lex_id'} = $words; |
2042
|
|
|
|
|
|
|
|
2043
|
|
|
|
|
|
|
my $dataPtr = 6 + $offset; |
2044
|
|
|
|
|
|
|
$dataInfo{'p_cnt'} = $data[$dataPtr]; |
2045
|
|
|
|
|
|
|
$dataPtr+=1; |
2046
|
|
|
|
|
|
|
|
2047
|
|
|
|
|
|
|
#likewise, we consolidate all ptrs together into a single string. |
2048
|
|
|
|
|
|
|
$offset = 0; |
2049
|
|
|
|
|
|
|
my $pcnt = $dataInfo{'p_cnt'};; |
2050
|
|
|
|
|
|
|
my $ptrs = ""; |
2051
|
|
|
|
|
|
|
while($pcnt > 0) |
2052
|
|
|
|
|
|
|
{ |
2053
|
|
|
|
|
|
|
my $pptr = $dataPtr + $offset; |
2054
|
|
|
|
|
|
|
$ptrs = $ptrs . " $data[$pptr]";#appends ptr symbol |
2055
|
|
|
|
|
|
|
$pptr+=1; |
2056
|
|
|
|
|
|
|
$ptrs = $ptrs . " $data[$pptr]";#appends synset offset |
2057
|
|
|
|
|
|
|
$pptr+=1; |
2058
|
|
|
|
|
|
|
$ptrs = $ptrs . " $data[$pptr]";#appends pos |
2059
|
|
|
|
|
|
|
$pptr+=1; |
2060
|
|
|
|
|
|
|
$ptrs = $ptrs . " $data[$pptr]";#appends source/target |
2061
|
|
|
|
|
|
|
$pptr+=1; |
2062
|
|
|
|
|
|
|
$pcnt-=1; |
2063
|
|
|
|
|
|
|
$offset+=4;#makes up for all extracted data above. |
2064
|
|
|
|
|
|
|
} |
2065
|
|
|
|
|
|
|
$ptrs =~ s/^\s+//; #remove extra front whitespace |
2066
|
|
|
|
|
|
|
$dataInfo{'ptr'} = $ptrs; |
2067
|
|
|
|
|
|
|
|
2068
|
|
|
|
|
|
|
$dataPtr = $dataPtr + $offset; #move ptr past retrieved info. |
2069
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
#if verb obtain frames as well |
2071
|
|
|
|
|
|
|
if($pos eq "verb") |
2072
|
|
|
|
|
|
|
{ |
2073
|
|
|
|
|
|
|
$dataInfo{'f_cnt'} = $data[$dataPtr]; |
2074
|
|
|
|
|
|
|
$dataPtr+=1; |
2075
|
|
|
|
|
|
|
|
2076
|
|
|
|
|
|
|
$offset = 0; |
2077
|
|
|
|
|
|
|
my $fcnt = $dataInfo{'f_cnt'}; |
2078
|
|
|
|
|
|
|
my $frames = ""; |
2079
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
while($fcnt > 0) |
2081
|
|
|
|
|
|
|
{ |
2082
|
|
|
|
|
|
|
my $fptr = $dataPtr + $offset; |
2083
|
|
|
|
|
|
|
$frames = $frames . " $data[$fptr]"; #append + from frames |
2084
|
|
|
|
|
|
|
$fptr+=1; |
2085
|
|
|
|
|
|
|
$frames = $frames . " $data[$fptr]"; |
2086
|
|
|
|
|
|
|
$fptr+=1; |
2087
|
|
|
|
|
|
|
$frames = $frames . " $data[$fptr]"; |
2088
|
|
|
|
|
|
|
$fcnt-=1; |
2089
|
|
|
|
|
|
|
$offset+=3; #makes up for 3 extracted data points |
2090
|
|
|
|
|
|
|
} |
2091
|
|
|
|
|
|
|
$frames =~ s/^\s+//; #remove extra front whitespace |
2092
|
|
|
|
|
|
|
$dataInfo{'frames'} = $frames; |
2093
|
|
|
|
|
|
|
|
2094
|
|
|
|
|
|
|
$dataPtr = $dataPtr + $offset; #move ptr past retrieved info |
2095
|
|
|
|
|
|
|
} |
2096
|
|
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
$dataPtr+=1; #skip over '|' in file. |
2098
|
|
|
|
|
|
|
my $size = scalar @data; |
2099
|
|
|
|
|
|
|
my $gloss = ""; |
2100
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
#all the info that is left is the gloss, extract until no more info remains. |
2102
|
|
|
|
|
|
|
while($dataPtr < $size) |
2103
|
|
|
|
|
|
|
{ |
2104
|
|
|
|
|
|
|
$gloss = $gloss . " $data[$dataPtr]"; |
2105
|
|
|
|
|
|
|
$dataPtr+=1; |
2106
|
|
|
|
|
|
|
} |
2107
|
|
|
|
|
|
|
$gloss =~ s/^\s+//; #remove extra front whitespace |
2108
|
|
|
|
|
|
|
$dataInfo{'gloss'} = $gloss; |
2109
|
|
|
|
|
|
|
|
2110
|
|
|
|
|
|
|
return \%dataInfo; |
2111
|
|
|
|
|
|
|
} |
2112
|
|
|
|
|
|
|
|
2113
|
|
|
|
|
|
|
=item $obj->getSenseInfo($synsetOffset) |
2114
|
|
|
|
|
|
|
|
2115
|
|
|
|
|
|
|
Takes in a synset offset and returns the sense associated with the offset. |
2116
|
|
|
|
|
|
|
|
2117
|
|
|
|
|
|
|
Parameter: the synset offset of the desired lemma |
2118
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
Returns: a hash offset info from index.sense with data: |
2120
|
|
|
|
|
|
|
sense_key synset_offset sense_number tag_cnt |
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
=cut |
2123
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
sub getSenseInfo() |
2125
|
|
|
|
|
|
|
{ |
2126
|
|
|
|
|
|
|
my $base = 0; |
2127
|
|
|
|
|
|
|
if(scalar @_ == 2) |
2128
|
|
|
|
|
|
|
{ |
2129
|
|
|
|
|
|
|
$base = 1;#checks if method entered by object. |
2130
|
|
|
|
|
|
|
} |
2131
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
my $synOffset = $_[$base]; |
2133
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
my $senseFile = "$WNSEARCHDICT/index.sense"; |
2135
|
|
|
|
|
|
|
my $senseInfoLine = ""; |
2136
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
open WNSENSE, "$senseFile" or die $!; |
2138
|
|
|
|
|
|
|
|
2139
|
|
|
|
|
|
|
while() |
2140
|
|
|
|
|
|
|
{ |
2141
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
2142
|
|
|
|
|
|
|
{ |
2143
|
|
|
|
|
|
|
if($tempIn =~ /\b$synOffset\b/) |
2144
|
|
|
|
|
|
|
{ |
2145
|
|
|
|
|
|
|
$senseInfoLine = $tempIn; |
2146
|
|
|
|
|
|
|
close WNSENSE; |
2147
|
|
|
|
|
|
|
} |
2148
|
|
|
|
|
|
|
} |
2149
|
|
|
|
|
|
|
} |
2150
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
my @sense = split /\s/, $senseInfoLine; |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
my %senseInfo; |
2154
|
|
|
|
|
|
|
$senseInfo{'sense_key'} = $sense[0]; |
2155
|
|
|
|
|
|
|
$senseInfo{'synset_offset'} = $sense[1]; |
2156
|
|
|
|
|
|
|
$senseInfo{'sense_number'} = $sense[2]; |
2157
|
|
|
|
|
|
|
$senseInfo{'tag_cnt'} = $sense[3]; |
2158
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
return \%senseInfo; |
2160
|
|
|
|
|
|
|
} |
2161
|
|
|
|
|
|
|
|
2162
|
|
|
|
|
|
|
=item $obj->findNewOffset() |
2163
|
|
|
|
|
|
|
|
2164
|
|
|
|
|
|
|
Searches through and calculates the offset for inserting. |
2165
|
|
|
|
|
|
|
|
2166
|
|
|
|
|
|
|
Parameters: pos of new lemma |
2167
|
|
|
|
|
|
|
|
2168
|
|
|
|
|
|
|
Returns: new unused offset |
2169
|
|
|
|
|
|
|
|
2170
|
|
|
|
|
|
|
=cut |
2171
|
|
|
|
|
|
|
|
2172
|
|
|
|
|
|
|
sub findNewOffset() |
2173
|
|
|
|
|
|
|
{ |
2174
|
|
|
|
|
|
|
my $offset = 0; |
2175
|
|
|
|
|
|
|
my $base = 0; |
2176
|
|
|
|
|
|
|
if(scalar @_ == 3) |
2177
|
|
|
|
|
|
|
{ |
2178
|
|
|
|
|
|
|
$base = 1;#checks if method entered by object. |
2179
|
|
|
|
|
|
|
} |
2180
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
my $pos = $_[$base]; |
2182
|
|
|
|
|
|
|
|
2183
|
|
|
|
|
|
|
my $dataFile = "$WNSEARCHDICT/data.$pos"; |
2184
|
|
|
|
|
|
|
my $dataLastLine = ""; |
2185
|
|
|
|
|
|
|
|
2186
|
|
|
|
|
|
|
open WNDATA, "$dataFile" or die $!; |
2187
|
|
|
|
|
|
|
|
2188
|
|
|
|
|
|
|
while() |
2189
|
|
|
|
|
|
|
{ |
2190
|
|
|
|
|
|
|
for $tempIn (split("\n")) |
2191
|
|
|
|
|
|
|
{ |
2192
|
|
|
|
|
|
|
$dataLastLine = $tempIn; |
2193
|
|
|
|
|
|
|
} |
2194
|
|
|
|
|
|
|
} |
2195
|
|
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
close WNDATA; |
2197
|
|
|
|
|
|
|
my @data = split /\s/, $dataLastLine; |
2198
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
$offset = $data[0] + length($dataLastLine) + 1; |
2200
|
|
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
return $offset; |
2202
|
|
|
|
|
|
|
} |
2203
|
|
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
=item $obj->changeWNLocation() |
2205
|
|
|
|
|
|
|
|
2206
|
|
|
|
|
|
|
NOTE: Method not yet implemented, planned for next update. |
2207
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
Allows the user to temporarily choose the location for WordNet |
2209
|
|
|
|
|
|
|
which can be used to change between different WordNet |
2210
|
|
|
|
|
|
|
dictionaries. |
2211
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
Parameters: New location ex. "/usr/local/WordNet-3.0" |
2213
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
Returns: nothing |
2215
|
|
|
|
|
|
|
|
2216
|
|
|
|
|
|
|
=cut |
2217
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
#sub changeWNLocation() |
2219
|
|
|
|
|
|
|
#{ |
2220
|
|
|
|
|
|
|
# my $base = 0; |
2221
|
|
|
|
|
|
|
# if(scalar @_ == 2) |
2222
|
|
|
|
|
|
|
# { |
2223
|
|
|
|
|
|
|
# $base = 1;#checks if method entered by object. |
2224
|
|
|
|
|
|
|
# } |
2225
|
|
|
|
|
|
|
# my $newLocation = $_[$base]; |
2226
|
|
|
|
|
|
|
# |
2227
|
|
|
|
|
|
|
# #check to see if /dict exists in the new WN location |
2228
|
|
|
|
|
|
|
# $newLocation = "/home/csgrads/ruse0008/WordNet-Insert/WordNet-3.0"; |
2229
|
|
|
|
|
|
|
# |
2230
|
|
|
|
|
|
|
# if(-d "$newLocation/dict") |
2231
|
|
|
|
|
|
|
# { |
2232
|
|
|
|
|
|
|
# $WNHOME = $newLocation; |
2233
|
|
|
|
|
|
|
# } |
2234
|
|
|
|
|
|
|
# else |
2235
|
|
|
|
|
|
|
# { |
2236
|
|
|
|
|
|
|
# my $self = shift; |
2237
|
|
|
|
|
|
|
# $self->{error} = 2; |
2238
|
|
|
|
|
|
|
# $self->{errorString} = "Desired WordNet location does not contain dict/."; |
2239
|
|
|
|
|
|
|
# } |
2240
|
|
|
|
|
|
|
#} |
2241
|
|
|
|
|
|
|
|
2242
|
|
|
|
|
|
|
#**************printHelp()********************** |
2243
|
|
|
|
|
|
|
# Prints indepth help guide to screen. |
2244
|
|
|
|
|
|
|
#*********************************************** |
2245
|
|
|
|
|
|
|
sub printHelp() |
2246
|
|
|
|
|
|
|
{ |
2247
|
|
|
|
|
|
|
printUsage(); |
2248
|
|
|
|
|
|
|
print "Takes in lemmas from file and attempts to\n"; |
2249
|
|
|
|
|
|
|
print "insert them into WordNet by first finding\n"; |
2250
|
|
|
|
|
|
|
print "a hypernym, then either a) merging the \n"; |
2251
|
|
|
|
|
|
|
print "lemma with the hypernym or b) attaching \n"; |
2252
|
|
|
|
|
|
|
print "the lemma to the hypernym.\n"; |
2253
|
|
|
|
|
|
|
} |
2254
|
|
|
|
|
|
|
|
2255
|
|
|
|
|
|
|
1; |