File Coverage

blib/lib/WordNet/Insert.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             # WordNet::Insert.pm version 0.01
2             # Updated: 05/25/16
3             #
4             # Ted Pedersen, University of Minnesota Duluth
5             # tpederse at d.umn.edu
6             #
7             # Jon Rusert, University of Minnesota Duluth
8             # ruse0008 at d.umn.edu
9             #
10             # This program is free software: you can redistribute it and/or modify
11             # it under the terms of the GNU General Public License as published by
12             # the Free Software Foundation, either version 3 of the License, or
13             # (at your option) any later version.
14             #
15             # This program is distributed in the hope that it will be useful,
16             # but WITHOUT ANY WARRANTY; without even the implied warranty of
17             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18             # GNU General Public License for more details.
19             #
20             # You should have received a copy of the GNU General Public License
21             # along with this program. If not, see .
22             #
23              
24             package WordNet::Insert;
25              
26             =head1 NAME
27              
28             WordNet::Insert - Perl modules for calculating where in WordNet a
29             lemma should be inserted.
30              
31             =head1 SYNOPSIS
32              
33             =head2 Basic Usage Example
34              
35             use WordNet::Insert;
36              
37             my $insert = WordNet::Insert->new();
38              
39             $insert->stopList('s/\b(the|is|at)\b//');
40             $insert->setCleanUp(1);
41             $insert->preProcessing();
42             $insert->toggleCompareGlosses(1,1,0);
43             $insert->setBonus(25);
44             $insert->toggleRefineSense(0);
45              
46             $insert->insertFile('data/temptrial','outdata/outtemptrial');
47              
48             =head1 DESCRIPTION
49              
50             =head2 Introduction
51              
52             =head2 Function
53              
54             The following function is defined:
55              
56             =over
57              
58             =cut
59              
60 1     1   20275 use WordNet::QueryData;
  0            
  0            
61             use Wiktionary::Parser;
62             use Getopt::Long;
63             use File::Spec;
64              
65             our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
66              
67             @ISA = qw(Exporter);
68              
69             %EXPORT_TAGS = ();
70              
71             @EXPORT_OK = ();
72              
73             @EXPORT = ();
74              
75             $VERSION = '0.01';
76              
77             #**************Variables**********************
78             #@inLemma = ("", "", "", "", ""); #Current lemma to be inserted into WordNet
79             #inLemma's will hold the Lemma data as follows:
80             #(lemma, part-of-speech, item-id, definition, def source)
81             #@outLemma = ("","","");#Stores proccessed lemma after insertion
82             #outLemma holds data as follows:
83             #(item-id, WordNet sense, operation)
84             $wn = WordNet::QueryData->new; #to be used to access data from wordnet
85             @wordNetNouns; #stores all words for noun sense from wordnet
86             @wordNetVerbs; #stores all words for verb sense from wordnet
87             %wnGlosses = ();
88             @wnNounSenses;
89             @wnVerbSenses;
90             %wnHypes = ();
91             %wnHypos = ();
92             %wnSyns = ();
93             %wnFreq = ();
94             #our $wikParser = Wiktionary::Parser->new(); #Parses data from wiktionary pages.
95             $stopList = "s/\b(the|is|at|which|on|a|an|and|or|up)\b//g"; #default stop list.
96             $preProcessed = 0; #Flag to determine if preProcessing() has been called.
97             $cleanUp = 1; #If cleanUp is on, glosses will be cleanedUp, can be toggled with setCleanUp();
98             $userCleanUp = ""; #Cleanup step specified by user in addCleanUp();
99             $useHypeGlosses = 1; #Toggle for use of hypernym glosses in comparisons.
100             $useHypoGlosses = 1; #Toggle for use of hyponym glosses in comparisons.
101             $useSynsGlosses = 1; #Toggle for use of synset glosses in comparisons.
102             $bonus = 10; #Bonus to be used for lemmas that contain the new lemma. Can be set with setBonus();
103             $refineSense = 1; #Toggle for use of refineSense() method, default on.
104             $help = 0;
105             #*********************************************
106              
107             GetOptions('help' => \$help);
108             if($help == 1)
109             {
110             printHelp();
111             exit(0);
112             }
113              
114             =head2 Methods
115              
116             The following methods are defined in this package:
117              
118             =head3 Public methods
119              
120             =over
121              
122             =item $obj->new()
123              
124             The constructor for WordNet::Insert objects.
125              
126             Parameters: none.
127              
128             Return value: the new blessed object
129              
130             =cut
131              
132             sub new
133             {
134             my $class = shift;
135             my $self = {};
136              
137             $self->{errorString} = '';
138             $self->{error}=0;
139              
140             bless $self, $class;
141            
142             return $self;
143             }
144              
145             =item $obj->getError()
146              
147             Allows the object to check if any errors have occurred.
148             Returns an array ($error, $errString), where $error
149             value equal to 1 represents a warning and greater than that
150             represents an error and $errString contains the possible error.
151              
152             Parameter: None
153              
154             Returns: array of the form ($error, $errorString).
155              
156             =cut
157             sub getError()
158             {
159             my $self = shift;
160             my $error = $self->{error};
161             my $errString = $self->{errorString};
162             $self->{error}=0;
163             $self->{errorString} = "";
164             $errString =~ s/^[\r\n\t ]+//;
165             return ($error, $errString);
166             }
167              
168             =item $obj->insertFile($input_file, $output_file)
169              
170             Attempts to insert each word from input file
171             into WordNet, outputs results to output file.
172              
173             Parameter: location of input file and output file respectively
174              
175             Returns: nothing
176              
177             =cut
178              
179             sub insertFile()
180             {
181             my $input = File::Spec->canonpath($_[1]);
182             my $output = File::Spec->canonpath($_[2]);
183            
184             #Attempts to open input data
185             open DATA, "$input" or die $!;
186             open (OUTDATA, '>', "$output") or die $!;
187            
188             #if preProcessing() hasn't been called, call it.
189             if($preProcessed == 0)
190             {
191             preProcessing();
192             }
193              
194             my @outLemma = ("","","");
195              
196             while() #While lemmas are left in the input data
197             {
198             for $tempIn (split("\n")) #processes data line by line.
199             {
200             @outLemma = @{insert($tempIn)};
201            
202             $tempOut = "$outLemma[0]\t$outLemma[1]\t$outLemma[2]\n";
203              
204             print OUTDATA "$tempOut";
205             }
206             }
207             }
208              
209             =item $obj->insert($wordPosGloss)
210              
211             Takes in single lemma with gloss and returns best insertion
212             point in WordNet.
213              
214             Parameter: Lemma string in format of 'word\tpos\tdef'
215             NOTE: String must only be separated by \t no space.
216              
217             Returns: Array in format of ('
218              
219             =cut
220             sub insert()
221             {
222             my $base = 0;
223             if(scalar @_ == 2)#checks if method entered by object.
224             {
225             $base = 1;
226             }
227             if($preProcessed == 1)
228             {
229             my @inLemma = split("\t", $_[$base]); #stores lemma as formatted above
230             my @outLemma = @{processLemma(\@inLemma)};
231             return \@outLemma;
232             }
233             else
234             {
235             my $self = shift;
236             $self->{error} = 2;
237             $self->{errorString} = "preProcesssing() must be run once before calling insert()";
238             }
239             }
240              
241             =item $obj->stopList($newStopList)
242              
243             Takes in new stop list, in regex form
244              
245             Parameter:the new stop list in regex substitution form s/.../g?
246              
247             Returns: nothing
248              
249             =cut
250              
251             sub stopList()
252             {
253             my $base = 0;
254             if(scalar @_ == 2)#checks if method entered by object.
255             {
256             $base = 1;
257             }
258             my $tempStopList = $_[$base];
259             if($tempStopList =~ /s\/.*?\/g?/g)
260             {
261             $stopList = $tempStopList;
262             }
263             else
264             {
265             my $self = shift;
266             $self->{error} = 1;
267             $self->{errorString} = "Proposed stop list not in regex substition form s/.../g?, default remains";
268             }
269             }
270              
271             =item $obj->setCleanUp($switch)
272              
273             Allows the user to toggle whether or not
274             glosses should be cleaned up.
275              
276             Parameter: 0 or 1 to turn clean up off or on respectively
277              
278             Returns: nothing
279              
280             =cut
281              
282             sub setCleanUp()
283             {
284             my $base = 0;
285             if(scalar @_ == 2)#checks if method entered by object.
286             {
287             $base = 1;
288             }
289              
290             if($_[$base] == 0) #turns cleanUp off.
291             {
292             $cleanUp = 0;
293             }
294             else #turns cleanUp on.
295             {
296             $cleanUp = 1;
297             }
298             }
299              
300             =item $obj->addCleanUp($cleanUp)
301              
302             Allows the user to add their own
303             regex for cleaning up the glosses.
304              
305             Parameter: Regex representing the cleanup
306             the user wants performed.
307              
308             Returns: Nothing
309              
310             =cut
311              
312             sub addCleanUp()
313             {
314             my $base = 0;
315             if(scalar @_ == 2)#checks if method entered by object.
316             {
317             $base = 1;
318             }
319              
320             my $tempCleanUp = $_[$base];
321             if($tempCleanUp =~ /(s|t)\/.*\/g?/g)
322             {
323             $userCleanUp = $tempCleanUp;
324             }
325             else
326             {
327             my $self = shift;
328             $self->{error} = 1;
329             $self->{errorString} = "Clean Up not in regex format '/.../', default remains on";
330             }
331             }
332              
333             =item $obj->preProcessing()
334              
335             Highly increases speed of program by making
336             as many outside calls as possible and storing
337             outside info to be used later.
338              
339             Parameter: none
340              
341             Returns: nothing
342            
343             =cut
344              
345             sub preProcessing()
346             {
347             $preProcessed = 1; #Flag that preProcessing has been called.
348             @wordNetNouns = $wn->listAllWords('noun'); #Stores all nouns from wordNet for multiple uses.
349             @wordNetVerbs = $wn->listAllWords('verb'); #Stores all verbs from wordNet for multiple uses.
350            
351             #Preemptively retrieves glosses, hypes, hypos, and syns for all senses as they will be used every iteration.
352             foreach my $noun (@wordNetNouns)
353             {
354             my @nSenses = $wn->querySense("$noun\#n"); #gets all senses for that word
355             foreach my $curNSense (@nSenses)
356             {
357             #stores in noun senses to differentiate from verbs.
358             push(@wnNounSenses, $curNSense);
359              
360             #obtain each gloss and clean up before inserting into hash.
361             my @nGlosses = $wn->querySense($curNSense, "glos");
362             my $tempSenseGloss = $nGlosses[0];
363            
364             if($cleanUp == 1)
365             {
366             #Clean up the words in the temporary sense gloss.
367             $tempSenseGloss =~ s/(\(|\)|\.)//g;
368             $tempSenseGloss =~ s/^a-zA-Z//g;
369             $tempSenseGloss = lc $tempSenseGloss; #converts all words to lowercase.
370             $tempSenseGloss =~ $stopList; #remove stop words
371             }
372             if($userCleanUp ne "\"\"")
373             {
374             $tempSenseGloss =~ $userCleanUp;
375             }
376            
377             #maps each sense to its gloss
378             $wnGlosses{$curNSense} = $tempSenseGloss;
379            
380             #obtains and stores, hypes, hypos, and syns
381             my @hypes = $wn->querySense($curNSense, "hype");
382             $wnHypes{$curNSense} = \@hypes;
383             my @hypos = $wn->querySense($curNSense, "hypo");
384             $wnHypos{$curNSense} = \@hypos;
385             my @syns = $wn->querySense($curNSense, "syns");
386             $wnSyns{$curNSense} = \@syns;
387             $wnFreq{$curNSense} = $wn->frequency($curNSense);
388             }
389             }
390              
391             #stores verbs' senses' glosses, hypes, hypos, and syns.
392             foreach my $verb (@wordNetVerbs)
393             {
394             my @vSenses = $wn->querySense("$verb\#v"); #gets all senses for that word
395             foreach my $curVSense (@vSenses)
396             {
397             #stores in verb senses to differentiate later.
398             push(@wnVerbSenses, $curVSense);
399              
400             #obtain each gloss and clean up before inserting into hash.
401             my @vGlosses = $wn->querySense($curVSense, "glos");
402             my $tempSenseGloss = $vGlosses[0];
403            
404             if($cleanUp == 1)
405             {
406             #Clean up the words in the temporary sense gloss.
407             $tempSenseGloss =~ s/(\(|\)|\.)//g;
408             $tempSenseGloss =~ s/^a-zA-Z//g;
409             $tempSenseGloss = lc $tempSenseGloss; #converts all words to lowercase.
410             $tempSenseGloss =~ s/\b$stopList\b//g; #remove stop words
411             }
412             if($userCleanUp ne "\"\"")
413             {
414             $tempSenseGloss =~ $userCleanUp;
415             }
416            
417             #maps each sense to its gloss
418             $wnGlosses{$curVSense} = $tempSenseGloss;
419              
420             #obtains and stores, hypes, hypos, and syns
421             my @hypes = $wn->querySense($curVSense, "hype");
422             $wnHypes{$curVSense} = \@hypes;
423             my @hypos = $wn->querySense($curVSense, "hypo");
424             $wnHypos{$curVSense} = \@hypos;
425             my @syns = $wn->querySense($curVSense, "syns");
426             $wnSyns{$curVSense} = \@syns;
427             $wnFreq{$curVSense} = $wn->frequency($curVSense);
428             }
429             }
430              
431              
432             }
433              
434             =item $obj->processLemma(@inLemma)
435              
436             Determines where the OOV Lemma should be
437             inserted into WordNet, returns the output.
438              
439             Parameter: the lemma to be inserted in array form
440             (lemma, part-of-speech, item-id, definition, def source)
441              
442             Returns: chosen lemma in array form
443             (item-id, WordNet sense, operation)
444              
445             =cut
446              
447             sub processLemma()
448             {
449             my $base = 0;
450             if(scalar @_ == 2)#checks if method entered by object.
451             {
452             $base = 1;
453             }
454              
455             my %senseScores = ();
456             my $highSenseScore = 0;
457             my $highSense = "";
458             my @inLemma = @{$_[$base]};
459             my @outLemma = ("","","");
460             my $attachMerge = "";
461             my @senses = ();
462              
463             if($preProcessed == 1)
464             {
465             if($inLemma[1] =~ /noun/)
466             {
467             @senses = @wnNounSenses;
468             }
469             else
470             {
471             @senses = @wnVerbSenses;
472             }
473              
474             foreach $curSense (@senses) #runs through each sense of current word
475             {
476             my $score = scoreSense(\@inLemma, $curSense);
477            
478             if($score >= $highSenseScore)
479             {
480             $highSenseScore = $score;
481             $highSense = $curSense;
482             }
483            
484             $senseScores{$curSense} = $score;
485             }
486            
487             if($refineSense == 1)
488             {
489             $highSense = refineSense(\@inLemma, $highSense);
490             }
491            
492             if($wnFreq{$highSense} == 0)
493             {
494             $attachMerge = "attach";
495             }
496             else
497             {
498             $attachMerge = "merge";
499             }
500            
501             $outLemma[0] = $inLemma[2];
502             $outLemma[1] = $highSense;
503             $outLemma[2] = $attachMerge;
504             return \@outLemma;
505             }
506             else
507             {
508             my $self = shift;
509             $self->{error} = 2;
510             $self->{errorString} = "PreProcessing must be run before processLemma() is called.";
511             }
512             }
513              
514             =item $obj->toggleCompareGlosses($hype,$hypo,$syns)
515              
516             Toggles which glosses are used in score sense.
517             by default, the sense, the sense's hypernyms'
518             glosses,hyponyms' glosses, and synsets' glosses
519             are turned on. This method allows for toggling
520             of hypes,hypos,synsets, by passing in three
521             parameters, 1 for on and 0 for off.
522             Example: toggleCompareGlosses(0,0,0) toggles
523             all three off.
524              
525             Parameters: 0 or 1 for toggling hypernyms, hyponyms,
526             and synset comparisons.
527              
528             Returns: nothing
529              
530             =cut
531              
532             sub toggleCompareGlosses()
533             {
534             my $base = 0;
535             if(scalar @_ == 4)#checks if method entered by object.
536             {
537             $base = 1;
538             }
539              
540             if($_[$base] == 0)
541             {
542             $useHypeGlosses = 0;
543             }
544             else
545             {
546             $useHypeGlosses = 1;
547             }
548              
549             $base++;
550            
551             if($_[$base] == 0)
552             {
553             $useHypoGlosses = 0;
554             }
555             else
556             {
557             $useHypoGlosess = 1;
558             }
559            
560             $base++;
561              
562             if($_[$base] == 0)
563             {
564             $useSynsGlosses = 0;
565             }
566             else
567             {
568             $useSynsGlosses = 1;
569             }
570             }
571              
572             =item $obj->setBonus($bonus)
573              
574             Allows the user to set the bonus that will be
575             used when scoring lemmas that contain the
576             new lemma.
577              
578             Parameter: the multiplier that should be used in
579             calculating the bonus.
580              
581             Returns: nothing
582              
583             =cut
584              
585             sub setBonus()
586             {
587             my $base = 0;
588             if(scalar @_ == 2)#checks if method entered by object.
589             {
590             $base = 1;
591             }
592              
593             $bonus = $_[$base];
594             }
595              
596             =item $obj->scoreSense(@inLemma, $compareSense)
597              
598             Calculates a score for the passed sense then
599             returns that score.
600              
601             Parameters: the in lemma in array form
602             (lemma, part-of-speech, item-id, definition, def source)
603             and the sense that the lemma is being compared to.
604              
605             Returns: a score of how related the in lemma is to the
606             compareSense.
607              
608             =cut
609              
610             sub scoreSense()
611             {
612             my $base = 0;
613             if(scalar @_ == 3)#checks if method entered by object.
614             {
615             $base = 1;
616             }
617              
618             my @inLemma = @{$_[$base]};
619             $base++;
620             my $curSense = $_[$base];
621             my $word = substr($curSense, 0, index($curSense, '#')); #extracts base word.
622              
623             #_________________Sense Gloss_________________________________
624             my @curSenseGloss = split (' ', $wnGlosses{$curSense}); #initialize current sense gloss.
625            
626             my @extendedGloss = getExtendedGloss($curSense);
627              
628             #________________Lemma Gloss_________________________________
629             my $tempLemmaGloss = $inLemma[3];
630              
631            
632             if($cleanUp == 1)
633             {
634             #Clean up the words in the temp lemma gloss.
635             $tempLemmaGloss =~ s/(\(|\)|\.)//g;
636             $tempLemmaGloss =~ s/^a-zA-Z//g;
637             $tempLemmaGloss = lc $tempLemmaGloss;
638             $tempLemmaGloss =~ s/\b$stopList\b//g; #remove stop words
639             }
640             if($userCleanUp ne "\"\"")
641             {
642             $tempLemmaGloss =~ $userCleanUp;
643             }
644            
645             my @curLemmaGloss = split(' ', $tempLemmaGloss);
646              
647              
648             #__________________Overlaps__________________________________
649             my $glossLength = 0;
650             my $overlaps = 0.0; #number of overlapped words.
651              
652             #scan through each word from the sense gloss and see if any overlap on the lemma gloss.
653             for my $lWord (0..$#curLemmaGloss)
654             {
655             $glossLength = $glossLength + length $curLemmaGloss[$lWord];
656             if($curLemmaGloss[$lWord] =~ /\b$word\b/) #if lemma contains current word from sense itself
657             {
658             $overlaps = $overlaps + $bonus*(length $word);
659             }
660            
661             $spaceWord = $word;
662             $spaceWord =~ s/_/ /g; #substitute underscores for spaces for comparison below
663             if($spaceWord =~ /(^\w+\s\b$curLemmaGloss[$lWord]\b$)|(^\b$curLemmaGloss[$lWord]\b\s\w+$)/)
664             {
665             $overlaps = $overlaps + $bonus*(length $curLemmaGloss[$lWord]);
666             }
667              
668             for my $sWord (0..$#curSenseGloss)
669             {
670             if($curLemmaGloss[$lWord] =~ /\b\Q$curSenseGloss[$sWord]\E\b?/)
671             {
672             $overlaps = $overlaps + length $curSenseGloss[$sWord];
673             }
674             }
675             for my $extWord (0..$#extendedGloss)
676             {
677             if($curLemmaGloss[$lWord] =~ /\b\Q$extendedGloss[$extWord]\E\b?/)
678             {
679             $overlaps = $overlaps + length $extendedGloss[$extWord];
680             }
681             }
682            
683             }
684              
685              
686             $score = $overlaps/$glossLength;
687             return $score;
688             }
689              
690             =item $obj->getExtendedGloss($compareSense)
691              
692             Calculates the extended gloss based on which
693             glosses are toggled and returns an array
694             which contains the full glosses.
695              
696             Parameter: the sense which the extended gloss is
697             based on
698              
699             Returns: an array which contains the extended gloss
700              
701             =cut
702              
703             sub getExtendedGloss()
704             {
705             my $base = 0;
706             if(scalar @_ == 2)#checks if method entered by object.
707             {
708             $base = 1;
709             }
710              
711             my $curSense = $_[$base];
712             my @extendedGloss = ();
713              
714             #__________________Hype Gloss_________________________________
715             if($useHypeGlosses == 1)
716             {
717             #Now expands to hypernyms glosses in overlaps
718             my @senseHypes = @{$wnHypes{$curSense}};
719             my @senseHypeGloss = ();
720             my $tempAllHypeGloss = "";
721            
722             for my $hype (0..$#senseHypes)
723             {
724             my $tempHypeGloss = $wnGlosses{$hype};
725            
726             $tempAllHypeGloss = $tempAllHypeGloss . " " . $tempHypeGloss;
727             }
728            
729             @senseHypeGloss = split(' ', $tempAllHypeGloss);
730            
731             push(@extendedGloss, @senseHypeGloss);
732             }
733            
734             #________________Hypo Gloss__________________________________
735             if($useHypoGlosses == 1)
736             {
737             #adds in hyponyms' glosses in overlaps
738             my @senseHypos = @{$wnHypos{$curSense}};
739             my @senseHypoGloss = ();
740             my $tempAllHypoGloss = "";
741            
742             for my $hypo (0..$#senseHypos)
743             {
744             my $tempHypoGloss = $wnGlosses{$hypo};
745            
746             $tempAllHypoGloss = $tempAllHypoGloss . " " . $tempHypoGloss;
747             }
748            
749             @senseHypoGloss = split(' ', $tempAllHypoGloss);
750             push(@extendedGloss, @senseHypoGloss);
751             }
752              
753             #_________________Syns Gloss_________________________________
754             if($useSynsGlosses == 1)
755             {
756             #adds in synsets' glosses in overlaps
757             my @senseSyns = @{$wnSyns{$curSense}};
758             my @senseSynsGloss = ();
759             my $tempAllSynsGloss = "";
760            
761             for my $syns (0..$#senseSyns)
762             {
763             if(!($syns =~ /\b$word\b/)) #do not repeat sense
764             {
765             my $tempSynsGloss = $wnGlosses{$syns};
766            
767             $tempAllSynsGloss = $tempAllSynsGloss . " " . $tempSynsGloss;
768             }
769             }
770            
771             @senseSynsGloss = split(' ', $tempAllSynsGloss);
772             push(@extendedGloss, @senseSynsGloss);
773             }
774              
775             return \@extendedGloss;
776             }
777              
778             =item $obj->toggleRefineSense($toggle)
779            
780             Allows user to toggle refineSense() on/off.
781            
782             Parameter: 0 or 1 to toggle the refine sense method
783             on or off respectively in the processLemma method.
784              
785             Returns: nothing
786              
787             =cut
788              
789             sub toggleRefineSense()
790             {
791             if($_[0] == 0)
792             {
793             $refineSense = 0;
794             }
795             else
796             {
797             $refineSense = 1;
798             }
799             }
800              
801             =item $obj->refineSense(@inLemma, $highSense)
802            
803             Refines chosen sense, by determing which
804             numbered sense should be chosen.
805              
806             Parameters: the in lemma in form of
807             (lemma, part-of-speech, item-id, definition, def source)
808             and the sense which currently bests matches the inlemma.
809              
810             Returns:the new highest scoring sense
811              
812             =cut
813              
814             sub refineSense()
815             {
816             my $base = 0;
817             if(scalar @_ == 3)#checks if method entered by object.
818             {
819             $base = 1;
820             }
821              
822             my @inLemma = @{$_[$base]};
823            
824             $base++;
825             my $highSense = $_[$base];
826             my $word = substr($highSense, 0, index($highSense, '#')); #extracts base word.
827             my $shortSense = substr($inLemma[1], 0, 1);
828             my $sense = $word . "#" . $shortSense;
829             my $highSenseScore = 0;
830             my $rSenseScore = 0;
831             my $refineHigh = "$sense#1"; #assume first sense.
832             my $tempLemmaGloss = $inLemma[3];
833            
834             if($cleanUp == 1)
835             {
836             #Clean up the words in the temp lemma gloss.
837             $tempLemmaGloss =~ s/(\(|\)|\.)//g;
838             $tempLemmaGloss =~ s/^a-zA-Z//g;
839             $tempLemmaGloss = lc $tempLemmaGloss;
840             $tempLemmaGloss =~ $stopList; #remove stop words
841             }
842             if($userCleanUp ne "\"\"")
843             {
844             $tempLemmaGloss =~ $userCleanUp;
845             }
846              
847             my @refineLemmaGloss = split(' ', $tempLemmaGloss);
848            
849             my $rGlossLength = 0.0;
850             my $rOverlaps = 0.0;
851             my @refineSenses = $wn->querySense($sense); #obtains the other senses for the same word.
852             for my $rSense (0..$#refineSenses)
853             {
854             my $tempSenseGloss = $wnGlosses{$rSense};
855            
856             for my $rLemma (0..$#refineLemmaGloss)
857             {
858             $rGlossLength = $rGlossLength + length $refineLemmaGloss[$rLemma];
859             if($refineLemmaGlos[$rLemma] ne $word)
860             {
861             if($tempSenseGloss =~ /$refineLemmaGloss[$rLemma]/)
862             {
863             $rOverlaps = $rOverlaps + length $refineLemmaGloss[$rLemma];
864             }
865             }
866            
867             }
868              
869             $rSenseScore = $rOverlaps/$rGlossLength;
870             if($rSenseScore > $highSenseScore)
871             {
872             $highSenseScore = $rSenseScore;
873             $refineHigh = $rHypo;
874             }
875             }
876            
877             $highSense = $refineHigh;
878              
879             return $highSense;
880            
881             }
882              
883              
884             #**************printHelp()**********************
885             # Prints indepth help guide to screen.
886             #***********************************************
887             sub printHelp()
888             {
889             printUsage();
890             print "Takes in lemmas from file and attempts to\n";
891             print "insert them into WordNet by first finding\n";
892             print "a hypernym, then either a) merging the \n";
893             print "lemma with the hypernym or b) attaching \n";
894             print "the lemma to the hypernym.\n";
895             }
896              
897             1;