File Coverage

blib/lib/WordNet/Insert.pm

Criterion	Covered	Total	%
statement	1	3	33.3
branch			n/a
condition			n/a
subroutine	1	1	100.0
pod			n/a
total	2	4	50.0

line	stmt	sub	time	code
1				# WordNet::Insert.pm version 0.02
2				# Updated: 05/26/16
3				#
4				# Ted Pedersen, University of Minnesota Duluth
5				# tpederse at d.umn.edu
6				#
7				# Jon Rusert, University of Minnesota Duluth
8				# ruse0008 at d.umn.edu
9				#
10				# This program is free software: you can redistribute it and/or modify
11				# it under the terms of the GNU General Public License as published by
12				# the Free Software Foundation, either version 3 of the License, or
13				# (at your option) any later version.
14				#
15				# This program is distributed in the hope that it will be useful,
16				# but WITHOUT ANY WARRANTY; without even the implied warranty of
17				# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18				# GNU General Public License for more details.
19				#
20				# You should have received a copy of the GNU General Public License
21				# along with this program. If not, see .
22				#
23
24				package WordNet::Insert;
25
26				=head1 NAME
27
28				WordNet::Insert - Perl modules for calculating where in WordNet a
29				lemma should be inserted.
30
31				=head1 SYNOPSIS
32
33				=head2 Basic Usage Example
34
35				use WordNet::Insert;
36
37				my $insert = WordNet::Insert->new();
38
39				$insert->stopList('s/\b(the\|is\|at)\b//');
40				$insert->setCleanUp(1);
41				$insert->preProcessing();
42				$insert->toggleCompareGlosses(1,1,0);
43				$insert->setBonus(25);
44				$insert->toggleRefineSense(0);
45
46				$insert->insertFile('data/temptrial','outdata/outtemptrial');
47
48				=head1 DESCRIPTION
49
50				=head2 Introduction
51
52				=head2 Function
53
54				The following function is defined:
55
56				=over
57
58				=cut
59
60	1	1	13851	use WordNet::QueryData;
	0
	0
61				use Wiktionary::Parser;
62				use Getopt::Long;
63				use File::Spec;
64
65				our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
66
67				@ISA = qw(Exporter);
68
69				%EXPORT_TAGS = ();
70
71				@EXPORT_OK = ();
72
73				@EXPORT = ();
74
75				$VERSION = '0.02';
76
77				#************Variables********************
78				#@inLemma = ("", "", "", "", ""); #Current lemma to be inserted into WordNet
79				#inLemma's will hold the Lemma data as follows:
80				#(lemma, part-of-speech, item-id, definition, def source)
81				#@outLemma = ("","","");#Stores proccessed lemma after insertion
82				#outLemma holds data as follows:
83				#(item-id, WordNet sense, operation)
84				$wn = WordNet::QueryData->new; #to be used to access data from wordnet
85				@wordNetNouns; #stores all words for noun sense from wordnet
86				@wordNetVerbs; #stores all words for verb sense from wordnet
87				%wnGlosses = ();
88				@wnNounSenses;
89				@wnVerbSenses;
90				%wnHypes = ();
91				%wnHypos = ();
92				%wnSyns = ();
93				%wnFreq = ();
94				#our $wikParser = Wiktionary::Parser->new(); #Parses data from wiktionary pages.
95				$stopList = "s/\b(the\|is\|at\|which\|on\|a\|an\|and\|or\|up)\b//g"; #default stop list.
96				$preProcessed = 0; #Flag to determine if preProcessing() has been called.
97				$cleanUp = 1; #If cleanUp is on, glosses will be cleanedUp, can be toggled with setCleanUp();
98				$userCleanUp = ""; #Cleanup step specified by user in addCleanUp();
99				$useHypeGlosses = 1; #Toggle for use of hypernym glosses in comparisons.
100				$useHypoGlosses = 1; #Toggle for use of hyponym glosses in comparisons.
101				$useSynsGlosses = 1; #Toggle for use of synset glosses in comparisons.
102				$bonus = 10; #Bonus to be used for lemmas that contain the new lemma. Can be set with setBonus();
103				$refineSense = 1; #Toggle for use of refineSense() method, default on.
104				$help = 0;
105				#*********************************************
106
107				GetOptions('help' => \$help);
108				if($help == 1)
109				{
110				printHelp();
111				exit(0);
112				}
113
114				=head2 Methods
115
116				The following methods are defined in this package:
117
118				=head3 Public methods
119
120				=over
121
122				=item $obj->new()
123
124				The constructor for WordNet::Insert objects.
125
126				Parameters: none.
127
128				Return value: the new blessed object
129
130				=cut
131
132				sub new
133				{
134				my $class = shift;
135				my $self = {};
136
137				$self->{errorString} = '';
138				$self->{error}=0;
139
140				bless $self, $class;
141
142				return $self;
143				}
144
145				=item $obj->getError()
146
147				Allows the object to check if any errors have occurred.
148				Returns an array ($error, $errString), where $error
149				value equal to 1 represents a warning and greater than that
150				represents an error and $errString contains the possible error.
151
152				Parameter: None
153
154				Returns: array of the form ($error, $errorString).
155
156				=cut
157				sub getError()
158				{
159				my $self = shift;
160				my $error = $self->{error};
161				my $errString = $self->{errorString};
162				$self->{error}=0;
163				$self->{errorString} = "";
164				$errString =~ s/^[\r\n\t ]+//;
165				return ($error, $errString);
166				}
167
168				=item $obj->insertFile($input_file, $output_file)
169
170				Attempts to insert each word from input file
171				into WordNet, outputs results to output file.
172
173				Parameter: location of input file and output file respectively
174
175				Returns: nothing
176
177				=cut
178
179				sub insertFile()
180				{
181				my $input = File::Spec->canonpath($_[1]);
182				my $output = File::Spec->canonpath($_[2]);
183
184				#Attempts to open input data
185				open DATA, "$input" or die $!;
186				open (OUTDATA, '>', "$output") or die $!;
187
188				#if preProcessing() hasn't been called, call it.
189				if($preProcessed == 0)
190				{
191				preProcessing();
192				}
193
194				my @outLemma = ("","","");
195
196				while() #While lemmas are left in the input data
197				{
198				for $tempIn (split("\n")) #processes data line by line.
199				{
200				@outLemma = @{insert($tempIn)};
201
202				$tempOut = "$outLemma[0]\t$outLemma[1]\t$outLemma[2]\n";
203
204				print OUTDATA "$tempOut";
205				}
206				}
207				}
208
209				=item $obj->insert($wordPosGloss)
210
211				Takes in single lemma with gloss and returns best insertion
212				point in WordNet.
213
214				Parameter: Lemma string in format of 'word\tpos\tdef'
215				NOTE: String must only be separated by \t no space.
216
217				Returns: Array in format of ('
218
219				=cut
220				sub insert()
221				{
222				my $base = 0;
223				if(scalar @_ == 2)#checks if method entered by object.
224				{
225				$base = 1;
226				}
227				if($preProcessed == 1)
228				{
229				my @inLemma = split("\t", $_[$base]); #stores lemma as formatted above
230				my @outLemma = @{processLemma(\@inLemma)};
231				return \@outLemma;
232				}
233				else
234				{
235				my $self = shift;
236				$self->{error} = 2;
237				$self->{errorString} = "preProcesssing() must be run once before calling insert()";
238				}
239				}
240
241				=item $obj->stopList($newStopList)
242
243				Takes in new stop list, in regex form
244
245				Parameter:the new stop list in regex substitution form s/.../g?
246
247				Returns: nothing
248
249				=cut
250
251				sub stopList()
252				{
253				my $base = 0;
254				if(scalar @_ == 2)#checks if method entered by object.
255				{
256				$base = 1;
257				}
258				my $tempStopList = $_[$base];
259				if($tempStopList =~ /s\/.*?\/g?/g)
260				{
261				$stopList = $tempStopList;
262				}
263				else
264				{
265				my $self = shift;
266				$self->{error} = 1;
267				$self->{errorString} = "Proposed stop list not in regex substition form s/.../g?, default remains";
268				}
269				}
270
271				=item $obj->setCleanUp($switch)
272
273				Allows the user to toggle whether or not
274				glosses should be cleaned up.
275
276				Parameter: 0 or 1 to turn clean up off or on respectively
277
278				Returns: nothing
279
280				=cut
281
282				sub setCleanUp()
283				{
284				my $base = 0;
285				if(scalar @_ == 2)#checks if method entered by object.
286				{
287				$base = 1;
288				}
289
290				if($_[$base] == 0) #turns cleanUp off.
291				{
292				$cleanUp = 0;
293				}
294				else #turns cleanUp on.
295				{
296				$cleanUp = 1;
297				}
298				}
299
300				=item $obj->addCleanUp($cleanUp)
301
302				Allows the user to add their own
303				regex for cleaning up the glosses.
304
305				Parameter: Regex representing the cleanup
306				the user wants performed.
307
308				Returns: Nothing
309
310				=cut
311
312				sub addCleanUp()
313				{
314				my $base = 0;
315				if(scalar @_ == 2)#checks if method entered by object.
316				{
317				$base = 1;
318				}
319
320				my $tempCleanUp = $_[$base];
321				if($tempCleanUp =~ /(s\|t)\/.*\/g?/g)
322				{
323				$userCleanUp = $tempCleanUp;
324				}
325				else
326				{
327				my $self = shift;
328				$self->{error} = 1;
329				$self->{errorString} = "Clean Up not in regex format '/.../', default remains on";
330				}
331				}
332
333				=item $obj->preProcessing()
334
335				Highly increases speed of program by making
336				as many outside calls as possible and storing
337				outside info to be used later.
338
339				Parameter: none
340
341				Returns: nothing
342
343				=cut
344
345				sub preProcessing()
346				{
347				$preProcessed = 1; #Flag that preProcessing has been called.
348				@wordNetNouns = $wn->listAllWords('noun'); #Stores all nouns from wordNet for multiple uses.
349				@wordNetVerbs = $wn->listAllWords('verb'); #Stores all verbs from wordNet for multiple uses.
350
351				#Preemptively retrieves glosses, hypes, hypos, and syns for all senses as they will be used every iteration.
352				foreach my $noun (@wordNetNouns)
353				{
354				my @nSenses = $wn->querySense("$noun\#n"); #gets all senses for that word
355				foreach my $curNSense (@nSenses)
356				{
357				#stores in noun senses to differentiate from verbs.
358				push(@wnNounSenses, $curNSense);
359
360				#obtain each gloss and clean up before inserting into hash.
361				my @nGlosses = $wn->querySense($curNSense, "glos");
362				my $tempSenseGloss = $nGlosses[0];
363
364				if($cleanUp == 1)
365				{
366				#Clean up the words in the temporary sense gloss.
367				$tempSenseGloss =~ s/($\|$\|\.)//g;
368				$tempSenseGloss =~ s/^a-zA-Z//g;
369				$tempSenseGloss = lc $tempSenseGloss; #converts all words to lowercase.
370				$tempSenseGloss =~ $stopList; #remove stop words
371				}
372				if($userCleanUp ne "\"\"")
373				{
374				$tempSenseGloss =~ $userCleanUp;
375				}
376
377				#maps each sense to its gloss
378				$wnGlosses{$curNSense} = $tempSenseGloss;
379
380				#obtains and stores, hypes, hypos, and syns
381				my @hypes = $wn->querySense($curNSense, "hype");
382				$wnHypes{$curNSense} = \@hypes;
383				my @hypos = $wn->querySense($curNSense, "hypo");
384				$wnHypos{$curNSense} = \@hypos;
385				my @syns = $wn->querySense($curNSense, "syns");
386				$wnSyns{$curNSense} = \@syns;
387				$wnFreq{$curNSense} = $wn->frequency($curNSense);
388				}
389				}
390
391				#stores verbs' senses' glosses, hypes, hypos, and syns.
392				foreach my $verb (@wordNetVerbs)
393				{
394				my @vSenses = $wn->querySense("$verb\#v"); #gets all senses for that word
395				foreach my $curVSense (@vSenses)
396				{
397				#stores in verb senses to differentiate later.
398				push(@wnVerbSenses, $curVSense);
399
400				#obtain each gloss and clean up before inserting into hash.
401				my @vGlosses = $wn->querySense($curVSense, "glos");
402				my $tempSenseGloss = $vGlosses[0];
403
404				if($cleanUp == 1)
405				{
406				#Clean up the words in the temporary sense gloss.
407				$tempSenseGloss =~ s/($\|$\|\.)//g;
408				$tempSenseGloss =~ s/^a-zA-Z//g;
409				$tempSenseGloss = lc $tempSenseGloss; #converts all words to lowercase.
410				$tempSenseGloss =~ s/\b$stopList\b//g; #remove stop words
411				}
412				if($userCleanUp ne "\"\"")
413				{
414				$tempSenseGloss =~ $userCleanUp;
415				}
416
417				#maps each sense to its gloss
418				$wnGlosses{$curVSense} = $tempSenseGloss;
419
420				#obtains and stores, hypes, hypos, and syns
421				my @hypes = $wn->querySense($curVSense, "hype");
422				$wnHypes{$curVSense} = \@hypes;
423				my @hypos = $wn->querySense($curVSense, "hypo");
424				$wnHypos{$curVSense} = \@hypos;
425				my @syns = $wn->querySense($curVSense, "syns");
426				$wnSyns{$curVSense} = \@syns;
427				$wnFreq{$curVSense} = $wn->frequency($curVSense);
428				}
429				}
430
431
432				}
433
434				=item $obj->processLemma(@inLemma)
435
436				Determines where the OOV Lemma should be
437				inserted into WordNet, returns the output.
438
439				Parameter: the lemma to be inserted in array form
440				(lemma, part-of-speech, item-id, definition, def source)
441
442				Returns: chosen lemma in array form
443				(item-id, WordNet sense, operation)
444
445				=cut
446
447				sub processLemma()
448				{
449				my $base = 0;
450				if(scalar @_ == 2)#checks if method entered by object.
451				{
452				$base = 1;
453				}
454
455				my %senseScores = ();
456				my $highSenseScore = 0;
457				my $highSense = "";
458				my @inLemma = @{$_[$base]};
459				my @outLemma = ("","","");
460				my $attachMerge = "";
461				my @senses = ();
462
463				if($preProcessed == 1)
464				{
465				if($inLemma[1] =~ /noun/)
466				{
467				@senses = @wnNounSenses;
468				}
469				else
470				{
471				@senses = @wnVerbSenses;
472				}
473
474				foreach $curSense (@senses) #runs through each sense of current word
475				{
476				my $score = scoreSense(\@inLemma, $curSense);
477
478				if($score >= $highSenseScore)
479				{
480				$highSenseScore = $score;
481				$highSense = $curSense;
482				}
483
484				$senseScores{$curSense} = $score;
485				}
486
487				if($refineSense == 1)
488				{
489				$highSense = refineSense(\@inLemma, $highSense);
490				}
491
492				if($wnFreq{$highSense} == 0)
493				{
494				$attachMerge = "attach";
495				}
496				else
497				{
498				$attachMerge = "merge";
499				}
500
501				$outLemma[0] = $inLemma[2];
502				$outLemma[1] = $highSense;
503				$outLemma[2] = $attachMerge;
504				return \@outLemma;
505				}
506				else
507				{
508				my $self = shift;
509				$self->{error} = 2;
510				$self->{errorString} = "PreProcessing must be run before processLemma() is called.";
511				}
512				}
513
514				=item $obj->toggleCompareGlosses($hype,$hypo,$syns)
515
516				Toggles which glosses are used in score sense.
517				by default, the sense, the sense's hypernyms'
518				glosses,hyponyms' glosses, and synsets' glosses
519				are turned on. This method allows for toggling
520				of hypes,hypos,synsets, by passing in three
521				parameters, 1 for on and 0 for off.
522				Example: toggleCompareGlosses(0,0,0) toggles
523				all three off.
524
525				Parameters: 0 or 1 for toggling hypernyms, hyponyms,
526				and synset comparisons.
527
528				Returns: nothing
529
530				=cut
531
532				sub toggleCompareGlosses()
533				{
534				my $base = 0;
535				if(scalar @_ == 4)#checks if method entered by object.
536				{
537				$base = 1;
538				}
539
540				if($_[$base] == 0)
541				{
542				$useHypeGlosses = 0;
543				}
544				else
545				{
546				$useHypeGlosses = 1;
547				}
548
549				$base++;
550
551				if($_[$base] == 0)
552				{
553				$useHypoGlosses = 0;
554				}
555				else
556				{
557				$useHypoGlosess = 1;
558				}
559
560				$base++;
561
562				if($_[$base] == 0)
563				{
564				$useSynsGlosses = 0;
565				}
566				else
567				{
568				$useSynsGlosses = 1;
569				}
570				}
571
572				=item $obj->setBonus($bonus)
573
574				Allows the user to set the bonus that will be
575				used when scoring lemmas that contain the
576				new lemma.
577
578				Parameter: the multiplier that should be used in
579				calculating the bonus.
580
581				Returns: nothing
582
583				=cut
584
585				sub setBonus()
586				{
587				my $base = 0;
588				if(scalar @_ == 2)#checks if method entered by object.
589				{
590				$base = 1;
591				}
592
593				$bonus = $_[$base];
594				}
595
596				=item $obj->scoreSense(@inLemma, $compareSense)
597
598				Calculates a score for the passed sense then
599				returns that score.
600
601				Parameters: the in lemma in array form
602				(lemma, part-of-speech, item-id, definition, def source)
603				and the sense that the lemma is being compared to.
604
605				Returns: a score of how related the in lemma is to the
606				compareSense.
607
608				=cut
609
610				sub scoreSense()
611				{
612				my $base = 0;
613				if(scalar @_ == 3)#checks if method entered by object.
614				{
615				$base = 1;
616				}
617
618				my @inLemma = @{$_[$base]};
619				$base++;
620				my $curSense = $_[$base];
621				my $word = substr($curSense, 0, index($curSense, '#')); #extracts base word.
622
623				#_________________Sense Gloss_________________________________
624				my @curSenseGloss = split (' ', $wnGlosses{$curSense}); #initialize current sense gloss.
625
626				my @extendedGloss = getExtendedGloss($curSense);
627
628				#________________Lemma Gloss_________________________________
629				my $tempLemmaGloss = $inLemma[3];
630
631
632				if($cleanUp == 1)
633				{
634				#Clean up the words in the temp lemma gloss.
635				$tempLemmaGloss =~ s/($\|$\|\.)//g;
636				$tempLemmaGloss =~ s/^a-zA-Z//g;
637				$tempLemmaGloss = lc $tempLemmaGloss;
638				$tempLemmaGloss =~ s/\b$stopList\b//g; #remove stop words
639				}
640				if($userCleanUp ne "\"\"")
641				{
642				$tempLemmaGloss =~ $userCleanUp;
643				}
644
645				my @curLemmaGloss = split(' ', $tempLemmaGloss);
646
647
648				#__________________Overlaps__________________________________
649				my $glossLength = 0;
650				my $overlaps = 0.0; #number of overlapped words.
651
652				#scan through each word from the sense gloss and see if any overlap on the lemma gloss.
653				for my $lWord (0..$#curLemmaGloss)
654				{
655				$glossLength = $glossLength + length $curLemmaGloss[$lWord];
656				if($curLemmaGloss[$lWord] =~ /\b$word\b/) #if lemma contains current word from sense itself
657				{
658				$overlaps = $overlaps + $bonus*(length $word);
659				}
660
661				$spaceWord = $word;
662				$spaceWord =~ s/_/ /g; #substitute underscores for spaces for comparison below
663				if($spaceWord =~ /(^\w+\s\b$curLemmaGloss[$lWord]\b$)\|(^\b$curLemmaGloss[$lWord]\b\s\w+$)/)
664				{
665				$overlaps = $overlaps + $bonus*(length $curLemmaGloss[$lWord]);
666				}
667
668				for my $sWord (0..$#curSenseGloss)
669				{
670				if($curLemmaGloss[$lWord] =~ /\b\Q$curSenseGloss[$sWord]\E\b?/)
671				{
672				$overlaps = $overlaps + length $curSenseGloss[$sWord];
673				}
674				}
675				for my $extWord (0..$#extendedGloss)
676				{
677				if($curLemmaGloss[$lWord] =~ /\b\Q$extendedGloss[$extWord]\E\b?/)
678				{
679				$overlaps = $overlaps + length $extendedGloss[$extWord];
680				}
681				}
682
683				}
684
685
686				$score = $overlaps/$glossLength;
687				return $score;
688				}
689
690				=item $obj->getExtendedGloss($compareSense)
691
692				Calculates the extended gloss based on which
693				glosses are toggled and returns an array
694				which contains the full glosses.
695
696				Parameter: the sense which the extended gloss is
697				based on
698
699				Returns: an array which contains the extended gloss
700
701				=cut
702
703				sub getExtendedGloss()
704				{
705				my $base = 0;
706				if(scalar @_ == 2)#checks if method entered by object.
707				{
708				$base = 1;
709				}
710
711				my $curSense = $_[$base];
712				my @extendedGloss = ();
713
714				#__________________Hype Gloss_________________________________
715				if($useHypeGlosses == 1)
716				{
717				#Now expands to hypernyms glosses in overlaps
718				my @senseHypes = @{$wnHypes{$curSense}};
719				my @senseHypeGloss = ();
720				my $tempAllHypeGloss = "";
721
722				for my $hype (0..$#senseHypes)
723				{
724				my $tempHypeGloss = $wnGlosses{$hype};
725
726				$tempAllHypeGloss = $tempAllHypeGloss . " " . $tempHypeGloss;
727				}
728
729				@senseHypeGloss = split(' ', $tempAllHypeGloss);
730
731				push(@extendedGloss, @senseHypeGloss);
732				}
733
734				#________________Hypo Gloss__________________________________
735				if($useHypoGlosses == 1)
736				{
737				#adds in hyponyms' glosses in overlaps
738				my @senseHypos = @{$wnHypos{$curSense}};
739				my @senseHypoGloss = ();
740				my $tempAllHypoGloss = "";
741
742				for my $hypo (0..$#senseHypos)
743				{
744				my $tempHypoGloss = $wnGlosses{$hypo};
745
746				$tempAllHypoGloss = $tempAllHypoGloss . " " . $tempHypoGloss;
747				}
748
749				@senseHypoGloss = split(' ', $tempAllHypoGloss);
750				push(@extendedGloss, @senseHypoGloss);
751				}
752
753				#_________________Syns Gloss_________________________________
754				if($useSynsGlosses == 1)
755				{
756				#adds in synsets' glosses in overlaps
757				my @senseSyns = @{$wnSyns{$curSense}};
758				my @senseSynsGloss = ();
759				my $tempAllSynsGloss = "";
760
761				for my $syns (0..$#senseSyns)
762				{
763				if(!($syns =~ /\b$word\b/)) #do not repeat sense
764				{
765				my $tempSynsGloss = $wnGlosses{$syns};
766
767				$tempAllSynsGloss = $tempAllSynsGloss . " " . $tempSynsGloss;
768				}
769				}
770
771				@senseSynsGloss = split(' ', $tempAllSynsGloss);
772				push(@extendedGloss, @senseSynsGloss);
773				}
774
775				return \@extendedGloss;
776				}
777
778				=item $obj->toggleRefineSense($toggle)
779
780				Allows user to toggle refineSense() on/off.
781
782				Parameter: 0 or 1 to toggle the refine sense method
783				on or off respectively in the processLemma method.
784
785				Returns: nothing
786
787				=cut
788
789				sub toggleRefineSense()
790				{
791				if($_[0] == 0)
792				{
793				$refineSense = 0;
794				}
795				else
796				{
797				$refineSense = 1;
798				}
799				}
800
801				=item $obj->refineSense(@inLemma, $highSense)
802
803				Refines chosen sense, by determing which
804				numbered sense should be chosen.
805
806				Parameters: the in lemma in form of
807				(lemma, part-of-speech, item-id, definition, def source)
808				and the sense which currently bests matches the inlemma.
809
810				Returns:the new highest scoring sense
811
812				=cut
813
814				sub refineSense()
815				{
816				my $base = 0;
817				if(scalar @_ == 3)#checks if method entered by object.
818				{
819				$base = 1;
820				}
821
822				my @inLemma = @{$_[$base]};
823
824				$base++;
825				my $highSense = $_[$base];
826				my $word = substr($highSense, 0, index($highSense, '#')); #extracts base word.
827				my $shortSense = substr($inLemma[1], 0, 1);
828				my $sense = $word . "#" . $shortSense;
829				my $highSenseScore = 0;
830				my $rSenseScore = 0;
831				my $refineHigh = "$sense#1"; #assume first sense.
832				my $tempLemmaGloss = $inLemma[3];
833
834				if($cleanUp == 1)
835				{
836				#Clean up the words in the temp lemma gloss.
837				$tempLemmaGloss =~ s/($\|$\|\.)//g;
838				$tempLemmaGloss =~ s/^a-zA-Z//g;
839				$tempLemmaGloss = lc $tempLemmaGloss;
840				$tempLemmaGloss =~ $stopList; #remove stop words
841				}
842				if($userCleanUp ne "\"\"")
843				{
844				$tempLemmaGloss =~ $userCleanUp;
845				}
846
847				my @refineLemmaGloss = split(' ', $tempLemmaGloss);
848
849				my $rGlossLength = 0.0;
850				my $rOverlaps = 0.0;
851				my @refineSenses = $wn->querySense($sense); #obtains the other senses for the same word.
852				for my $rSense (0..$#refineSenses)
853				{
854				my $tempSenseGloss = $wnGlosses{$rSense};
855
856				for my $rLemma (0..$#refineLemmaGloss)
857				{
858				$rGlossLength = $rGlossLength + length $refineLemmaGloss[$rLemma];
859				if($refineLemmaGlos[$rLemma] ne $word)
860				{
861				if($tempSenseGloss =~ /$refineLemmaGloss[$rLemma]/)
862				{
863				$rOverlaps = $rOverlaps + length $refineLemmaGloss[$rLemma];
864				}
865				}
866
867				}
868
869				$rSenseScore = $rOverlaps/$rGlossLength;
870				if($rSenseScore > $highSenseScore)
871				{
872				$highSenseScore = $rSenseScore;
873				$refineHigh = $rHypo;
874				}
875				}
876
877				$highSense = $refineHigh;
878
879				return $highSense;
880
881				}
882
883
884				#************printHelp()********************
885				# Prints indepth help guide to screen.
886				#***********************************************
887				sub printHelp()
888				{
889				printUsage();
890				print "Takes in lemmas from file and attempts to\n";
891				print "insert them into WordNet by first finding\n";
892				print "a hypernym, then either a) merging the \n";
893				print "lemma with the hypernym or b) attaching \n";
894				print "the lemma to the hypernym.\n";
895				}
896
897				1;