File Coverage

blib/lib/Text/SenseClusters/LabelEvaluation/Driver.pm

Criterion	Covered	Total	%
statement	162	224	72.3
branch	41	76	53.9
condition			n/a
subroutine	11	16	68.7
pod	2	9	22.2
total	216	325	66.4

line	stmt	bran	sub	pod	time	code
1
2						# Defining the Package for the modules.
3						package Text::SenseClusters::LabelEvaluation::Driver;
4
5	4		4		7898	use strict;
	4				9
	4				155
6	4		4		24	use encoding "utf-8";
	4				7
	4				36
7
8						# Defining the version for the Progrm.
9						our $VERSION = '0.09';
10
11						# Including the FileHandle module.
12	4		4		13780	use FileHandle;
	4				53895
	4				25
13
14						# Including the other dependent Modules.
15	4		4		4217	use Text::SenseClusters::LabelEvaluation::ReadingFilesData;
	4				10
	4				1017
16	4		4		185	use Text::SenseClusters::LabelEvaluation::SimilarityScore;
	4				10
	4				1560
17	4		4		189	use Text::SenseClusters::LabelEvaluation::Wikipedia::GetWikiData;
	4				5
	4				856
18	4		4		39	use Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo;
	4				9
	4				661
19
20
21
22						#######################################################################################################################
23
24						=head1 Name
25
26						Text::SenseClusters::LabelEvaluation::Driver - Module for evaluation of labels of the clusters.
27
28						=head1 SYNOPSIS
29
30
31						The following code snippet will evaluate the labels by comparing
32						them with text data for a gold-standard key from Wikipedia.
33
34						In order to test this module, please copy 'TestData' folder in current directory
35						or adjust directory location while mentioning the label and GoldKeys files.
36
37						# Including the LabelEvaluation Module.
38						use Text::SenseClusters::LabelEvaluation::Driver;
39
40						my $labelFileName = 'TestData/TVS/TVS.label';
41						my $topicFileName = 'TestData/TVS/TVSTopic.txt';
42
43						# Calling the LabelEvaluation modules by passing the following options
44						%inputOptions = (
45						senseClusterLabelFileName => $labelFileName,
46						labelComparisonMethod => 'automate',
47						goldKeyFileName => $topicFileName,
48						goldKeyDataSource => 'wikipedia',
49						weightRatio => 10,
50						isClean => 1,
51						);
52
53
54						# Calling the LabelEvaluation modules by passing the name of the
55						# label and topic files.
56						my $driverObject = Text::SenseClusters::LabelEvaluation::Driver->
57						new (\%inputOptions);
58
59						if($driverObject->{"errorCode"}){
60						print "Please correct the error before proceeding.\n\n";
61						exit();
62						}
63						my $accuracyScore = $driverObject->evaluateLabels();
64
65						# Printing the score.
66						print "\n\nScore of label evaluation is :: $accuracyScore \n";
67
68
69						Note: For more usage, please refer to test-cases in "t" folder of this package.
70
71						=head1 DESCRIPTION
72
73						This Program will compare the result obtained from the SenseClusters with that
74						of Gold Standards. Gold Standards can be obtained from:
75						1. Wikipedia
76						2. Wordnet
77						3. User Provided
78
79						For fetching the Wikipedia data it use the WWW::Wikipedia module from the CPAN
80						and for comparison of Labels with Gold Standards it uses the Text::Similarity
81						Module. The comparison result is then further processed to obtain the result
82						and score of result.
83
84
85
86
87						=head1 FILE FORMATS:
88
89						=head2 senseClusterLabelFileName:
90
91						This tells about the file that will contains the labels for the clusters generated by SenseClusters.
92						The file format for this file should be same as that of generated by SenseClusters.
93
94						For e.g:
95
96						Cluster 0 (Descriptive): George Bush, Russian President, British Prime, British Minister, India Pakistan, US George, Prime Minister,
97						Cluster 0 (Discriminating): Russian President, British Minister, India Pakistan, US George,
98						Cluster 1 (Descriptive): George Bush, British Prime, weapons mass, United Nations, September 11, mass destruction, United States,
99						Prime Minister, military action
100						Cluster 1 (Discriminating): United Nations, September 11, United States
101						Cluster 2 (Descriptive): George Bush, weapons destruction, prime minister, axis evil, Saddam Hussein, weapons mass, mass destruction,
102						Gulf War, military action, Iraqi leader
103						Cluster 2 (Discriminating): weapons destruction, prime minister, axis evil, Saddam Hussein, Gulf War, Iraqi leader
104
105
106						=head2 goldKeyFileName:
107
108						This parameter contains the name of the file that contains the gold standard keys for the labels of clusters generated by
109						SenseClusters.
110
111						The file format provided by user for Gold-Standard key's are dependent on the following
112						two parameters that user pass to call this module:
113
114						=head3 labelComparisonMethod
115
116						This parameter tells that whether is passing the mapping information between
117						goldkeys and clusters or not.
118
119						Two options available are: 1. 'direct' - this says user will provide the mapping info.
120						2. 'automate' - this says module should find the best possible
121						mapping between cluster's label and goldkeys.
122
123						=head3 goldKeyDataSource
124
125						This parameter tell this module from where it can read more information about
126						the goldkeys
127
128						Options for this parameter are: 1. 'wikipedia' - this tells to fetch data from wikipedia.
129						2. 'wordnet' - this tells to fetch data from wordnet.
130						3. 'userData' - this tells user will give the data along
131						with mapping.
132
133
134
135						Combinatios of the various values for the aboue two parameters will give the following six cases:
136
137						(Please note that separator between cluster name and Goldkeys are ":::".
138						Also, the separator between Goldkeys and their data are ":::")
139
140						=head4 Case 1. labelComparisonMethod => 'direct', goldKeyDataSource => 'userData'
141
142
143						a) In this case user should provide the mapping between the clusters and Goldkeys
144						b) User should also provide the data about these goldstandard keys.
145
146						for e.g:
147
148						Cluster0:::Tony Blair
149						Cluster1:::Vladimir Putin
150						Cluster2:::Saddam Hussein
151
152						Tony Blair::: Anthony Charles Lynton Blair (born 6 May 1953)[1] is a British Labour Party politician who served
153						as the Prime Minister of the United Kingdom from 1997 to 2007. He was the Member of Parliament (MP) for Sedgefield
154						from 1983 to 2007 and Leader of the Labour Party from 1994 to 2007. He resigned from all of these positions in
155						June 2007.
156
157						Vladimir Putin::: Vladimir Vladimirovich Putin (Russian: ( listen); born 7 October 1952) is a Russian politician
158						who has been the President of Russia since 7 May 2012. Putin previously served as President from 2000 to 2008, and
159						as Prime Minister of Russia from 1999 to 2000 and again from 2008 to 2012. Putin was also previously the Chairman
160						of United Russia.
161
162						Saddam Hussein::: Saddam Hussein Abd al-Majid al-Tikriti 28 April 1937[2] – 30 December 2006)[3] was the fifth
163						President of Iraq, serving in this capacity from 16 July 1979 until 9 April 2003.[4][5] A leading member of the
164						revolutionary Arab Socialist Ba'ath Party.
165
166						=head4 Case 2. labelComparisonMethod => 'direct', goldKeyDataSource => 'wikipedia'
167
168						a) In this case user just need to provide the mapping between the clusters and Goldkeys.
169						b) User do not need to provide the data about these goldstandard keys. Even though, if user provides the
170						data about these topics, it will be ignored.
171
172
173						for e.g:
174						Cluster0:::Tony Blair
175						Cluster1:::Vladimir Putin
176						Cluster2:::Saddam Hussein
177
178
179						=head4 Case 3. labelComparisonMethod => 'direct', goldKeyDataSource => 'wordnet'
180
181						a) In this case also user just need to provide the mapping between the clusters and Goldkeys.
182						b) User do not need to provide the data about these goldstandard keys.
183
184						for e.g:
185						Cluster0:::Tony Blair
186						Cluster1:::Vladimir Putin
187						Cluster2:::Saddam Hussein
188
189
190						=head4 Case 4. labelComparisonMethod => 'automate', goldKeyDataSource => 'userData'
191
192						a) No Mapping between the clusters and Goldkeys.
193						b) User will just need to provide the data about these goldstandard keys.
194
195
196						for e.g:
197						Tony Blair::: Anthony Charles Lynton Blair (born 6 May 1953)[1] is a British Labour Party politician who served
198						as the Prime Minister of the United Kingdom from 1997 to 2007. He was the Member of Parliament (MP) for Sedgefield
199						from 1983 to 2007 and Leader of the Labour Party from 1994 to 2007. He resigned from all of these positions in
200						June 2007.
201
202						Vladimir Putin::: Vladimir Vladimirovich Putin (Russian: ( listen); born 7 October 1952) is a Russian politician
203						who has been the President of Russia since 7 May 2012. Putin previously served as President from 2000 to 2008, and
204						as Prime Minister of Russia from 1999 to 2000 and again from 2008 to 2012. Putin was also previously the Chairman
205						of United Russia.
206
207						Saddam Hussein::: Saddam Hussein Abd al-Majid al-Tikriti 28 April 1937[2] – 30 December 2006)[3] was the fifth
208						President of Iraq, serving in this capacity from 16 July 1979 until 9 April 2003.[4][5] A leading member of the
209						revolutionary Arab Socialist Ba'ath Party.
210
211
212						=head4 Case 5. labelComparisonMethod => 'automate', goldKeyDataSource => 'wikipedia'
213
214						a) No Mapping between the clusters and Goldkeys.
215						b) User will just need to provide the comma separated goldstandard keys.
216
217						for e.g:
218						Tony Blair , Vladimir Putin, Saddam Hussein
219
220
221
222						=head4 Case 6. labelComparisonMethod => 'automate', goldKeyDataSource => 'wordnet'
223
224						a) No Mapping between the clusters and Goldkeys.
225						b) User will just need to provide the comma separated goldstandard keys.
226
227
228						for e.g:
229						Tony Blair , Vladimir Putin, Saddam Hussein
230
231
232						Sample files for all the cases are included in 'TestData' of the modules.
233
234						1. TestData/TVS/TVS.label- Files containing the Labels generated by SenseClusters.
235
236						2. TestData/TVS/TVSMappingUserData.txt - File contianing GoldKeys, their mapping with clusters and detailed data about the GoldKeys.
237
238						3. TestData/TVS/TVSMapping.txt - File contianing GoldKeys, their mapping with clusters.
239
240						4. TestData/TVS/TVSTopic.txt - File containing the GoldKeys and their mapping with clusters.
241
242						5. TestData/TVS/TVSUserData.txt - File containing the GoldKeys and user provided detailed data about these gold keys.
243
244						6. TestData/TVS/testTVS.pl - Perl test file which tells us, how to use these files in various scenarios.
245
246
247						=head1 RESULT
248
249
250						=head4 a) Contingency Matrix:
251						Based on the similarity comparison of Labels with the gold standards,
252						the Contingency Matrix is generated. Following shows an example of
253						contingency matrix for the example mentioned in synposis:
254
255
256						Original Contingency Matrix:
257
258						Bill Clinton Tony Blair
259						-------------------------------------------------
260						Cluster0 54 48
261						-------------------------------------------------
262						Cluster1 31 16
263						-------------------------------------------------
264
265						=head4 b) Using Hungarian algorithm to display the new contingency matrix,
266						whose diagonal elements indicates the assigned similarity-score
267						between a cluster and a gold-standard key. This format of matrix
268						has the maximum possible diagonal's total.
269
270						Example:
271
272						Contigency Matrix after Hungarian Algorithm:
273
274						Tony Blair Bill Clinton
275						-------------------------------------------------
276						Cluster0 48 54
277						-------------------------------------------------
278						Cluster1 16 31
279						-------------------------------------------------
280
281
282						=head4 c) Conclusion: Displays the conclusion of the Hungarian algorithm:
283
284						Example:
285
286						Final Conclusion using Hungarian Algorithm::
287						Cluster0 <--> Tony Blair
288						Cluster1 <--> Bill Clinton
289
290
291						=head4 d) Displaying the overall accuracy for the label assignment:
292
293						Sum (Diagonal Scores)
294						Accuracy = -------------------------------------------
295						Sum (All the Scores of contingency table)
296
297						Example:
298						Accuracy of labels is 53.02%
299						=cut
300
301						################################################################################################################
302
303						=pod
304
305						=head1 Help
306
307						The LabelEvaluation module expect the 'OptionsHash' as the required argument.
308
309						The 'optionHash' has the following elements:
310
311						=head2 labelFile:
312
313						Name of the file containing the labels from SenseClusters. The syntax of file
314						must be similar to label file from SenseClusters. This is mandatory parameter.
315
316						=head2 labelComparisonMethod:
317
318						Name of the method for comparing the labels with GoldKey. This method tells
319						the program whether the keyFile provided by the User will have the mapping
320						between the assigned labels and expected topics of the clusters.
321
322						Possible options are :
323						A) 'DirectAssignment' and
324						B) 'AutomateAssignment'.
325
326						This is mandatory parameter.
327
328						=head2 goldKeyFile:
329
330						Name of the file containing the actual topics (keys) and their data for the
331						clusters. This is mandatory parameter.
332
333						=head2 goldKeyLength:
334
335						This parameter tells about the length of data to be fetched from the external
336						resource such as Wikipedia. The data will be used as reference data.
337						Default value for this parameter is the first section of the Wikipedia page.
338
339						=head2 goldKeyDataSource:
340
341						This parameter tell the name of external application or user supplied file
342						name from where we will get the key's data.
343
344						Options are:
345						1. 'Wikipedia'
346						2. 'User'
347						3. 'Wordnet' (Will be supported in future).
348
349						This is the mandatory parameter.
350
351
352						=head2 weightRatio:
353
354						This ratio tells us about the weightage we should provide to Discriminating
355						label over the descriptive label. Default value is set to 10.
356
357						=head2 stopList:
358
359						This is the name of file which contains the list of all stop words. This is the
360						optional parameter and its formating should match the requirement of the Text::
361						Simialrity i.e. a single stop word in a single line.
362
363						for e.g:
364						Content of stoplist.txt should look like:
365						the
366						of
367						in
368						:
369						:
370						to
371
372						=head2 isClean:
373
374						This variable will decide whether to keep or delete temporary files.Default
375						value is 'true'.
376
377						=head2 verbose:
378
379						Variable used for the deciding whether to show detailed results to user or
380						not. Default value = Off (0), to make it 'On' change value to 1.
381
382						=head2 help :
383
384						This variable will decide whether to display help to user or not. Default
385						value for this parameter is 0.
386
387						%inputOptions = (
388						senseClusterLabelFileName => '/',
389						labelComparisonMethod => 'DirectAssignmentOrAutomateAssignment',
390						goldKeyFileName => '/',
391						goldKeyLength => '',
392						goldKeyDataSource => '',
393						weightRatio => '',
394						stopListFileLocation => '/',
395						isClean => 1,
396						verbose => 0,
397						help => 0
398						);
399
400
401						=head3 Examples
402
403						=head4 With minimum parameters:
404
405						%inputOptions = (
406						senseClusterLabelFileName => 'labelFile.txt',
407						labelComparisonMethod => 'DirectAssignment',
408						goldKeyFileName => 'goldKeyFile.txt',
409						goldKeyDataSource => 'UserData'
410						);
411
412						The above mentioned four mandatory parameters.
413
414						=head4 For Help:
415
416						%inputOptions = (
417						help => 1
418						);
419
420						=head4 With all parameters:
421
422						%inputOptions = (
423						senseClusterLabelFileName => 'labelFile.txt',
424						labelComparisonMethod => 'AutomateAssignment',
425						goldKeyFileName => 'goldKeyFile.txt',
426						goldKeyLength => 2000,
427						goldKeyDataSource => 'Wikipedia',
428						weightRatio => 10,
429						stopListFileLocation => 'stoplist.txt',
430						isClean => 1,
431						verbose => 1,
432						help => 0
433						);
434
435						=cut
436
437						# Following blocks declare the global variables for the LabelEvaluation module.
438						our $senseClusterLabelFileName = "SenseClusterLabelFileName";
439						our $labelComparisonMethod = "labelComparisonMethod";
440						our $goldKeyFileName = "goldKeyFileName";
441						our $goldKeyLength = "goldKeyLength";
442						our $goldKeyDataSource = "goldKeyDataSource";
443						our $weightRatio = "weightRatio";
444						our $stopListFileLocation = "stopListFileLocation";
445						our $isClean = "isClean";
446						our $verbose = "verbose";
447						our $help = "help";
448
449						# These two parameters are used for error handling.
450						our $errorCode = "errorCode";
451						our $errorMessage = "errorMessage";
452						our $exitCode = "exitCode";
453
454						# Defining the all possible value for the of label-comparison-method.
455						our $labelComparisonMethod_Direct = "direct";
456						our $labelComparisonMethod_Automate = "automate";
457
458						# Defining the name of all possible sources from where we can get the information about
459						# the topics. This are possible values for the parameter "goldKeyDataSource":
460						our $standardReferenceName_Wikipedia = "wikipedia";
461						our $standardReferenceName_WordNet = "wordnet";
462						our $standardReferenceName_UserData = "userdata";
463
464						our $labelType_Descriptive = "descriptive";
465						our $labelType_Discriminating = "discriminating";
466
467						# The following define the exit-code for this program in different situation.
468						our $helpExitCode = 400;
469						our $requiredErrorExitCode = 404;
470						our $unknownErrorExitCode = 502;
471						our $missingMappingErrorExitCode = 401;
472						our $missingKeyDataErrorExitCode = 402;
473
474
475						# Defining the file handle for the output file.
476						our $outFileHandle;
477
478						# Defining the exit code for the module with default value 1.
479						# "1" indicates that program exited with proper execution.
480						our $exitCodeValue = 1;
481
482
483						##########################################################################################
484
485						=head1 Constructor: new()
486
487						This is the constructor which will create object for this class.
488						Reference : http://perldoc.perl.org/perlobj.html
489
490						This constructor takes the hash argument and intialize it for the class.
491
492						%inputOptions = (
493						senseClusterLabelFileName => 'value1',
494						labelComparisonMethod => 'value2',
495						goldKeyFileName => 'value3',
496						goldKeyLength => value4,
497						goldKeyDataSource => 'value5',
498						weightRatio => value6,
499						stopListFileLocation => 'value7',
500						isClean => value8,
501						verbose => value9,
502						help => value10
503						);
504
505						Please refer to section "help" about the detailed discussion on this hash.
506						=cut
507
508						##########################################################################################
509
510						sub new {
511
512						# Creating the object.
513	4		4	1	125	my $class = shift;
514	4				14	my $driverObject = {};
515
516						# Explicit association is created by the built-in bless function.
517	4				15	bless $driverObject, $class;
518
519						# Getting the Hash as the argument.
520	4				12	my $argHash = shift;
521
522						# If the argument is defined then, read its contents and populate the class member
523						# values.
524	4	50			22	if ( defined $argHash ) {
525
526						# Reading the Key and Value from the argument-hash.
527	4				30	while (my ($key, $val ) = each %$argHash ) {
528
529						# Setting the class variables.
530	24	100			236	if ( lc($key) eq lc($senseClusterLabelFileName)) {
		100
		100
		50
		100
		100
		50
		50
		0
		0
531	4	50			16	if($val){
532	4				42	$driverObject->{$senseClusterLabelFileName} = $val;
533						}else{
534						# Raise Error: Missing mandatory parameter.
535	0				0	$driverObject->{$errorCode} = $requiredErrorExitCode;
536	0				0	$driverObject->{$errorMessage}= "Label file from the SenseClusters is missing!";
537	0				0	error($driverObject->{$errorCode}, $driverObject->{$errorMessage});
538						}
539
540						} elsif (lc($key) eq lc($labelComparisonMethod)) {
541	4	50			16	if($val){
542	4				24	$driverObject->{$labelComparisonMethod} = lc($val);
543						}else{
544						# Raise Error: Missing mandatory parameter.
545	0				0	$driverObject->{$errorCode} = $requiredErrorExitCode;
546	0				0	$driverObject->{$errorMessage}= "Comparison method for labels and keys is not mentioned!";
547	0				0	error($driverObject->{$errorCode}, $driverObject->{$errorMessage});
548						}
549
550						} elsif (lc($key) eq lc($goldKeyFileName)) {
551	4	50			19	if($val){
552	4				26	$driverObject->{$goldKeyFileName} = $val;
553						}else{
554						# Raise Error: Missing mandatory parameter.
555	0				0	$driverObject->{$errorCode} = $requiredErrorExitCode;
556	0				0	$driverObject->{$errorMessage}= "Please specify the file name for the GoldKey!";
557	0				0	error($driverObject->{$errorCode}, $driverObject->{$errorMessage});
558						}
559						} elsif ( lc($key) eq lc($goldKeyLength)) {
560	0	0			0	if($val){
561	0				0	$driverObject->{$goldKeyLength} = $val;
562						}
563						} elsif ( lc($key) eq lc($goldKeyDataSource)) {
564	4	50			17	if($val){
565	4				27	$driverObject->{$goldKeyDataSource} = $val;
566						}else{
567						# Raise Error: Missing mandatory parameter.
568	0				0	$driverObject->{$errorCode} = $requiredErrorExitCode;
569	0				0	$driverObject->{$errorMessage}= "Please specify the name of the source from which information about the topic will be feteched!";
570	0				0	error($driverObject->{$errorCode}, $driverObject->{$errorMessage});
571						}
572						} elsif ( lc($key) eq lc($weightRatio)) {
573	4	50			15	if($val){
574	4				153	$driverObject->{$weightRatio} = $val;
575						}else{
576	0				0	$driverObject->{$weightRatio} = 10;
577						}
578						} elsif ( lc($key) eq lc($stopListFileLocation)) {
579	0	0			0	if($val){
580	0				0	$driverObject->{$stopListFileLocation} = $val;
581						}else{
582	0				0	$driverObject->{$stopListFileLocation} = "";
583						}
584						} elsif ( lc($key) eq lc($isClean)) {
585	4	50			15	if($val){
586	4				22	$driverObject->{$isClean} = $val;
587						}else{
588	0				0	$driverObject->{$isClean} = 0;
589						}
590						} elsif ( lc($key) eq lc($verbose)) {
591	0	0			0	if($val){
592	0				0	$driverObject->{$verbose} = $val;
593						}else{
594	0				0	$driverObject->{$verbose} = 0;
595						}
596						} elsif ( lc($key) eq lc($help)) {
597	0	0			0	if($val == 1){
598	0				0	$driverObject->{$exitCode} = help();
599						}else{
600	0				0	$driverObject->{$help} = 0;
601						}
602						}
603						}
604						}
605						# Returning the blessed hash refered by $self.
606	4				14	return $driverObject;
607						}
608
609
610						# Function to print the input parameters of the program.
611						sub printInputParameter {
612	0		0	0	0	my $driverObject = shift;
613	0				0	print "SenseClusterLabelFileName:: $driverObject->{$senseClusterLabelFileName} \n";
614	0				0	print "labelComparisonMethod:: $driverObject->{$labelComparisonMethod} \n";
615	0				0	print "goldKeyFileName:: $driverObject->{$goldKeyFileName} \n";
616	0				0	print "goldKeyLength:: $driverObject->{$goldKeyLength} \n";
617	0				0	print "goldKeyDataSource:: $driverObject->{$goldKeyDataSource} \n";
618	0				0	print "weightRatio:: $driverObject->{$weightRatio} \n";
619	0				0	print "stopListFileLocation:: $driverObject->{$stopListFileLocation} \n";
620	0				0	print "isClean:: $driverObject->{$isClean} \n";
621	0				0	print "verbose:: $driverObject->{$verbose} \n";
622	0				0	print "help:: $driverObject->{$help} \n";
623	0				0	print "ExitCode:: $driverObject->{$exitCode} \n";
624	0				0	print "ErrorCode:: $driverObject->{$errorCode} \n";
625	0				0	print "ErrorMessage:: $driverObject->{$errorMessage} \n";
626						}
627
628
629						# Method for printing the help to end user.
630						sub help{
631	0		0	1	0	print "\nPlease pass values of the parameters of the option-hash in the following format:
632						%inputOptions = (
633						senseClusterLabelFileName => 'labelFile.txt',
634						labelComparisonMethod => 'AutomateAssignment',
635						goldKeyFileName => 'goldKeyFile.txt',
636						goldKeyLength => 2000,
637						goldKeyDataSource => 'Wikipedia',
638						weightRatio => 10,
639						stopListFileLocation => 'stoplist.txt',
640						isClean => 0,
641						verbose => 1,
642						help => 0
643						);
644						\nNote that only 'senseClusterLabelFileName', 'labelComparisonMethod', 'goldKeyFileName'".
645						" and 'goldKeyDataSource' are mandatory parameters.\n".
646						"For detailed explanation and more examples, please refer the HELP and SYNOPSIS section of this module.\n\n" ;
647
648						# Returning the exit code for the "help".
649	0				0	return $helpExitCode;
650						}
651
652
653						# Method for printing the help to end user.
654						sub error{
655	0		0	0	0	my $errorCode = shift;
656	0				0	my $errorMessage = shift;
657
658	0				0	print STDERR "Program exiting with the error. ";
659	0				0	print STDERR "\nError Code=$errorCode. \n\t$errorMessage \n\n";
660						}
661
662
663
664						# Method for generating the error for "missing mapping".
665						sub errorMissingMapping{
666						# Getting the object from the argument.
667	0		0	0	0	my $driverObject = shift;
668
669						# Raise Error: Missing Cluster's label and GoldStandard Key mapping.
670	0				0	$driverObject->{$errorCode} = $missingMappingErrorExitCode;
671	0				0	$driverObject->{$errorMessage}= "Missing the mapping between Clusters and GoldStandard Keys.".
672						"\n\tPlease specify the mapping in File containing information about GoldStandard Keys!".
673						"\n\tRefer this module's cpan documentation on \"FILE FORMATS\" - Case1 or Case2 or Case3".
674						"(labelComparisonMethod => 'direct') \nabout how to specify the mapping inside a GoldKey file.";
675
676						# Calling method for printing the error message.
677	0				0	error($driverObject->{$errorCode}, $driverObject->{$errorMessage});
678
679	0				0	exit $driverObject->{$errorCode};
680						}
681
682
683						# Method for generating the error for "missing user data in the GoldKeyFile".
684						sub errorMissingUserData{
685
686						# Getting the object from the argument.
687	0		0	0	0	my $driverObject = shift;
688
689						# Raise Error: Missing user data for the GoldStandard Key.
690	0				0	$driverObject->{$errorCode} = $missingKeyDataErrorExitCode;
691	0				0	$driverObject->{$errorMessage}= "Missing the data for GoldStandard Keys.".
692						"\n\tPlease specify the data for the GoldStandard Keys!".
693						"\n\tRefer this module's cpan documentation on \"FILE FORMATS\" - Case1 or Case4".
694						"(goldKeyDataSource => 'userData') \nabout how to specify the data for the gold stadndard key.";
695	0				0	error($driverObject->{$errorCode}, $driverObject->{$errorMessage});
696
697	0				0	exit $driverObject->{$errorCode};
698						}
699
700
701
702						########################################################################################
703						=head1 Function: evaluateLabels
704
705						Function which is responsible for evaluating the labels of the clusters. This
706						function will call the other modules for completing the process.
707
708						@argument : $driverObject : Object of the current file.
709
710						@return : $accuracy : DataType(Float)
711						Indicates the overall accuracy of the assignments.
712
713						@description :
714
715						Overall algorithm for calculating the accuracy of the labels assignment with the help of gold
716						standard keys are:
717
718						Step 1: Read the clusters and their labels information from the ClusterLabel file.
719
720						=head3 Case A: User has provided the mapping information about the cluster and gold standard key.
721
722						Step 2:Read Clusters-Topics mapping information.
723
724						=head4 Subcase1: User provides data for gold standard keys.
725
726						Step 3:Read the gold standard keys and their data from the file provided by user.
727						Step 4: continue to next step :).
728
729						=head4 Subcase2: User provides the gold standard keys. We will fetch data from Wikipedia.
730
731						User will just provide the data about the topics, but no mapping.
732
733						Step 3:Read gold standard keys from the file provided by user.
734						Step 4:Read data about the gold standard keys from the Wikipedia.
735
736						=head4 Subcase3: User provides the gold standard keys. We will fetch data from Wordnet.
737
738						Step 3:Read gold standard keys from the file provided by user.
739						Step 4:Read data about the gold standard keys from the Wordnet.
740
741						Step 5: Create contingency matrix with similarity-scores of cluster's label against each
742						gold standard key's data (obtained from steps 3 and 4.)
743						Step 6: Using the mapping provided by user(step 2) to calculate the diagonal score for the
744						contingency matrix.
745						Step 7: Overall Accuracy for the current cluster's label assignment can be calculated as :
746
747						Sum (Diagonal Scores)
748						Accuracy =--------------------------------------------------
749						Sum (All the Scores of contingency table)
750
751						=head3 Case B: User has not provided the mapping information about the cluster and gold standard key.
752
753						We will use the Hungarian algorithm to compute the mapping.
754
755						=head4 Subcase1: User provides data for gold standard keys.
756
757						Step 2: Read the gold standard keys and their data from the file provided by user.
758
759						Step 3: Continue to next step :).
760
761						=head4 Subcase2: User provides the gold standard keys. We will fetch data from Wikipedia.
762						User will just provide the data about the topics, but no mapping.
763
764						Step 2: Read gold standard keys from the file provided by user.
765
766						Step 3: Read data about the gold standard keys from the Wikipedia.
767
768						=head4 Subcase3: User provides the gold standard keys. We will fetch data from Wordnet.
769
770						Step 2: Read gold standard keys from the file provided by user.
771
772						Step 3: Read data about the gold standard keys from the Wordnet.
773
774
775						=head3 Common Steps for the all three subcases.
776
777						Step 4: Create contingency matrix with similarity-scores of cluster's label against each
778						gold standard key's data (obtained from steps 3 and 4.)
779
780						Step 5: Use Hungarian algorithm to determine the mapping of Clusters with gold standard keys.
781
782						Step 6: Use the above mapping to calculate the total diagonal score for the new contingency matrix.
783
784						Step 7: Overall Accuracy for the current cluster's label assignment can be calculated as :
785
786
787						Sum (Diagonal Scores)
788						Accuracy = --------------------------------------------------
789						Sum (All the Scores of contingency table)
790
791						=cut
792
793
794						#########################################################################################
795						# Method for evaluting the labels.
796						# Steps:
797						# Step 1. Get the mapping.
798						sub evaluateLabels{
799						# Getting the current class object as the argument.
800	4		4	0	37	my $driverObject = shift;
801
802						# Getting the clusters file name, from the $driverObject.
803	4				14	my $clusterFileName = $driverObject->{$senseClusterLabelFileName};
804
805						# Getting the "isClean" parameter from the class variable.
806	4				12	my $isCleaned = $driverObject->{$isClean};
807
808						# Getting the "verbose" option from the class variable.
809	4				13	my $verboseOption = $driverObject->{$verbose};
810
811						# Creating the read-file object for reading the cluster's label.
812	4				53	my $readClusterFileObject =
813						Text::SenseClusters::LabelEvaluation::ReadingFilesData->new ($clusterFileName);
814
815						# Defining hash which will hold the cluster and its labels.
816	4				11	my %labelSenseClustersHash = ();
817						# Calling the function to read the cluster and its labels data in the hash.S
818	4				38	my $labelSenseClustersHashRef =
819						$readClusterFileObject->readLinesFromClusterFile(\%labelSenseClustersHash);
820	4				24	%labelSenseClustersHash = %$labelSenseClustersHashRef;
821
822						# Getting the topics file name.
823	4				16	my $topicsFileName = $driverObject->{$goldKeyFileName};
824
825						# Defining the variable which will hold the accuracy score for the labesl to be evaluated
826	4				10	my $accuracyScore = 0;
827
828						# Creating the read-file object for standard-gold-keys.
829	4				21	my $readTopicFileObject =
830						Text::SenseClusters::LabelEvaluation::ReadingFilesData->new ($topicsFileName);
831
832
833						# CASE A: User has provided the mapping information about the cluster and gold standard key.
834	4	100			38	if(lc($driverObject->{$labelComparisonMethod}) eq $labelComparisonMethod_Direct){
		50
835
836						# Read Cluster-Topic mapping information and store it in hash.
837	2				11	my ($hashRef, $topicArrayRef) = $readTopicFileObject->readMappingFromTopicFile();
838
839						# Reading the hash from its reference.
840	2				11	my %mappingHash = %$hashRef;
841	2				6	my @topicArray = @$topicArrayRef;
842
843
844						# If there is no mapping, then generate error here.....
845	2	50			19	if(!%mappingHash){
846	0				0	errorMissingMapping($driverObject);
847						}
848
849
850						# Subcase1: User provides data for gold standard keys.
851	2	100			14	if(lc($driverObject->{$goldKeyDataSource}) eq $standardReferenceName_UserData){
		50
		0
852
853						# Call user comparison method.
854
855						# Reading the topic-data from the user file.
856						# User will provide the name and data of the topics along with mapping.
857	1				6	my $topicDataHashRef = $readTopicFileObject->readTopicDataFromTopicFile(\@topicArray);
858
859						# Reading the hash from its reference.
860	1				6	my %topicDataHash = %$topicDataHashRef;
861
862						# If there is no user's data for the topics, generate error here.....
863	1	50			7	if(!%topicDataHash){
864	0				0	errorMissingUserData($driverObject);
865						}
866
867
868
869						# Calling the function 'makeContigencyMatrix' to get the contingency matrix of similarity-scores.
870	1				9	my ($matrixScoreRef, $colHeaderRef, $rowHeaderRef, $totalMatrixScore) =
871						makeContigencyMatrix(\%labelSenseClustersHash, \%topicDataHash, $driverObject->{$weightRatio},
872						$driverObject->{$stopListFileLocation}, $verboseOption);
873
874						# Calling the function 'printMatrix' to print the contingency matrix.
875	1				12	Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo::printMatrix
876						($matrixScoreRef, $colHeaderRef,$rowHeaderRef);
877
878						# Calling function to calculate the overall accuracy for the label assignment.
879	1				7	$accuracyScore = calculateAccuracy
880						(\%mappingHash, $matrixScoreRef, $colHeaderRef, $rowHeaderRef, $totalMatrixScore);
881
882						}elsif (lc($driverObject->{$goldKeyDataSource}) eq $standardReferenceName_Wikipedia){
883
884						#
885						# Subcase2: User provides the gold standard keys. We will fetch data from Wikipedia.
886						# User will just provide the data about the topics, but no mapping.
887						#
888
889
890	1				3	my %topicDataHash = ();
891	1				2	foreach my $topic (@topicArray){
892						# Call wikipedia function.
893	3				16	my $topicData =
894						Text::SenseClusters::LabelEvaluation::Wikipedia::GetWikiData::getWikiDataForTopic(
895						$topic, $isCleaned);
896	3				16	$topicDataHash{$topic} = $topicData;
897						#print "$topic $topicData\n";
898						}
899
900						# Calling the function 'makeContigencyMatrix' to get the contingency matrix of similarity-scores.
901	1				11	my ($matrixScoreRef, $colHeaderRef, $rowHeaderRef, $totalMatrixScore) =
902						makeContigencyMatrix(\%labelSenseClustersHash, \%topicDataHash, $driverObject->{$weightRatio},
903						$driverObject->{$stopListFileLocation}, $verboseOption);
904	1				129	print STDERR "\nContigency Matrix based on user input::\n";
905
906						# Calling the function 'printMatrix' to print the contingency matrix.
907	1				10	Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo::printMatrix
908						($matrixScoreRef, $colHeaderRef,$rowHeaderRef);
909
910						# Calling function to calculate the overall accuracy for the label assignment.
911	1				6	$accuracyScore = calculateAccuracy
912						(\%mappingHash, $matrixScoreRef, $colHeaderRef, $rowHeaderRef, $totalMatrixScore);
913
914						}elsif (lc($driverObject->{$goldKeyDataSource}) eq $standardReferenceName_WordNet){
915
916						# Subcase3: User provides the gold standard keys. We will fetch data from Wordnet.
917
918						# Call wordnet comparison method. User will just provide the topic name.
919						# TODO: Left for future implementation.
920						}
921
922						# CASE B: User has not provided the mapping information about the cluster and gold standard key.
923						# We will use the Hungarian algorithm to compute the mapping.
924						}elsif(lc($driverObject->{$labelComparisonMethod}) eq $labelComparisonMethod_Automate){
925
926						# Subcase1: User provides data for gold standard keys.
927						# User will just provide the data about the topics, but no mapping.
928	2	100			16	if(lc($driverObject->{$goldKeyDataSource}) eq $standardReferenceName_UserData){
		50
		0
929
930						# Empty array for holding the topics.
931	1				2	my @tempTopicNameArray = ();
932
933						# Reading the topic-data from the user file.
934	1				10	my $topicDataHashRef = $readTopicFileObject->readTopicDataFromTopicFile(\@tempTopicNameArray);
935						# Reading the hash from its reference.
936	1				7	my %topicDataHash = %$topicDataHashRef;
937
938						# If there is no user's data for the topics, generate error here.....
939	1	50			8	if(!%topicDataHash){
940	0				0	errorMissingUserData($driverObject);
941						}
942
943						# Calling the function which will create the contingency matrix for given set of inputs.
944	1				11	my ($matrixScoreRef, $colHeaderRef, $rowHeaderRef,$totalMatrixScore) =
945						makeContigencyMatrix(\%labelSenseClustersHash, \%topicDataHash, $driverObject->{$weightRatio},
946						$driverObject->{$stopListFileLocation}, $verboseOption);
947
948						# Reading the array from its referece.
949	1				7	my @matrixScore = @$matrixScoreRef;
950	1				4	my @colHeader = @$colHeaderRef;
951	1				3	my @rowHeader = @$rowHeaderRef;
952
953						# Creating the Hungarian object.
954	1				19	my $hungarainObject = Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo
955						->new(\@matrixScore, \@colHeader, \@rowHeader);
956
957						# Reading the Mapping with help of function.
958	1				7	my ($accuracy,$finalMatrixRef,$newColumnHeaderRef) = $hungarainObject->reAssigningWithHungarianAlgo();
959
960
961						# Rounding off accuracy to decimal place.
962	1				23	$accuracyScore = sprintf("%.2f", ($accuracy*100));
963	1				57	print STDERR "\n\nAccuracy of labels is $accuracyScore\% \n\n";
964
965	1	50			29	if($accuracy == 0){
966	0				0	print STDERR "\n\n Accuracy score \"zero\" indicates either of the following two facts::\n";
967	0				0	print STDERR " 1. Labels assigned to Cluster is completely wronged. OR\n";
968	0				0	print STDERR " 2. Gold-Keys provided by you are not correct.... \n";
969						}
970
971						# Subcase2: User provides the gold standard keys. We will fetch data from Wikipedia.
972						# User will just provide the data about the topics, but no mapping.
973						}elsif (lc($driverObject->{$goldKeyDataSource}) eq $standardReferenceName_Wikipedia){
974
975						# Calling readLinesFromTopicFile function to get the list of all the topics.
976	1				6	our $standardTerms = $readTopicFileObject->readLinesFromTopicFile();
977
978						# Spliting the standard terms on "," to get the Topic name.
979						# For e.g: "Bill Clinton , Tony Blair"
980	1				7	my @topicArray = split(/[\,]/, $standardTerms);
981
982						# Call wikipedia function. User will just provide the topic name.
983	1				4	my %topicDataHash = ();
984	1				4	foreach my $topic (@topicArray){
985						# Call wikipedia function.
986	3				18	my $topicData =
987						Text::SenseClusters::LabelEvaluation::Wikipedia::GetWikiData::getWikiDataForTopic($topic, $isCleaned);
988
989						# Setting the data about the topic into hash.
990	3				19	$topicDataHash{$topic} = $topicData;
991						}
992
993						# Calling the function which will create the contingency matrix for given set of inputs.
994	1				12	my ($matrixScoreRef, $colHeaderRef, $rowHeaderRef, $totalMatrixScore) =
995						makeContigencyMatrix(\%labelSenseClustersHash, \%topicDataHash, $driverObject->{$weightRatio},
996						$driverObject->{$stopListFileLocation}, $verboseOption);
997
998						# Reading the array from its referece.
999	1				9	my @matrixScore = @$matrixScoreRef;
1000	1				4	my @colHeader = @$colHeaderRef;
1001	1				3	my @rowHeader = @$rowHeaderRef;
1002
1003						# Creating the object of the class AssigningLabelUsingHungarianAlgo.
1004	1				23	my $hungarainObject = Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo
1005						->new(\@matrixScore, \@colHeader, \@rowHeader);
1006
1007						# Reading the Mapping with help of function.
1008	1				5	my ($accuracy,$finalMatrixRef,$newColumnHeaderRef) = $hungarainObject->reAssigningWithHungarianAlgo();
1009
1010						# Rounding off accuracy to decimal place.
1011	1				28	$accuracyScore = sprintf("%.2f", ($accuracy*100));
1012	1				99	print STDERR "\n\nAccuracy of labels is $accuracyScore\% \n\n";
1013
1014						}elsif (lc($driverObject->{$goldKeyDataSource}) eq $standardReferenceName_WordNet){
1015
1016						# Subcase3: User provides the gold standard keys. We will fetch data from Wordnet.
1017
1018						# Call wordnet comparison method. User will just provide the topic name.
1019						# TODO. Left for future implementation.
1020						}
1021						}
1022
1023						# Returning the accuracy of the labels of the clusters.
1024	4				132	return $accuracyScore;
1025						}
1026
1027
1028						##########################################################################################
1029						=head1 Function: makeContigencyMatrix
1030
1031						This method is responsible for making the Contigency Matrix containing the similarity-scores of the labels with the data of the gold standard keys.
1032
1033						@argument : $labelSenseClustersHashRef (Hash containing the labels generated by the SenseClusters)
1034
1035						@argument : $topicDataHashRef (Hash containing the data of the gold standard keys)
1036
1037						@argument : $weightageRatio (Parameter which tells the weightage to be given to discriminating labels over descriptive labels of the SenseClusters)
1038
1039						@return : 1. @matrixScore - Contingency matrix containing the similarity-scores.
1040
1041						@return : 2. @colHeader - Array containing the column header for the contingency matrix.
1042
1043						@return : 3. @rowHeader - Array containing the row header for the contingency matrix.
1044
1045						@return : 4. $totalMatrixScore - Total similarity scores of the contingency matrix.
1046
1047
1048						@description :
1049
1050						1). It will iterate through the hash (%labelSenseClustersHash) and extracts the descriptive and discriminating labels for each clusters.
1051
1052						2). It will read the data about each gold standard key from the hash (%topicDataHash).
1053
1054						3). It then uses the module, Text::SenseClusters::LabelEvaluation::SimilarityScore to get various similarity score.
1055
1056						4). Finally, it uses the raw-lesk scores to prepare the contingency matrix.
1057
1058						=cut
1059						##########################################################################################
1060
1061						sub makeContigencyMatrix{
1062						# Getting the reference of the Hash containing the cluster's label.
1063	4		4	0	11	my $labelSenseClustersHashRef = shift;
1064						# Reading the hash from its reference.
1065	4				22	my %labelSenseClustersHash = %$labelSenseClustersHashRef;
1066
1067						# Getting the reference of the hash containing the topic and its infomation.
1068	4				10	my $topicDataHashRef = shift;
1069						# Reading the hash from its reference.
1070	4				20	my %topicDataHash = %$topicDataHashRef;
1071
1072						# Getting the weightage for discriminating and descriptive labels.
1073	4				67	my $weightageRatio = shift;
1074
1075						# Getting the stop list file location.
1076	4				13	my $stopListFileLoc = shift;
1077
1078						# Getting the verbose option.
1079	4				7	my $verboseOpt = shift;
1080
1081						# Defining the matrix which contains the score.
1082	4				12	my @matrixScore = ();
1083						# Defining the internal Index for the matrix score.
1084	4				9	my $firstDimIndex = 0;
1085						# Variable which will hold TotalMatrixScore.
1086	4				9	my $totalMatrixScore = 0;
1087
1088						# Array that will contain Row Header (Cluster name).
1089	4				28	my @rowHeader = sort keys %labelSenseClustersHash;
1090						# Array that will contain Column Header (Topic name).
1091	4				20	my @colHeader = sort keys %topicDataHash;
1092
1093						# Iterating through each cluster entry .
1094	4				20	foreach my $key (sort keys %labelSenseClustersHash){
1095						# Variable to store the two type of labels for the cluster.
1096	12				31	my $clusterDescriptiveLabel ="";
1097	12				28	my $clusterDiscriminatingLabel ="";
1098
1099						# Reading the labels for a cluster from the hash.
1100	12				28	for my $innerkey (keys %{$labelSenseClustersHash{$key}}){
	12				68
1101	24	100			135	if(lc($innerkey) eq $labelType_Descriptive){
		50
1102	12				49	$clusterDescriptiveLabel = $labelSenseClustersHash{$key}{$innerkey};
1103						}elsif(lc($innerkey) eq $labelType_Discriminating){
1104	12				49	$clusterDiscriminatingLabel = $labelSenseClustersHash{$key}{$innerkey};
1105						}
1106						}
1107
1108						# Defining Index for the second dimension.
1109	12				28	my $secondDimIndex = 0;
1110
1111						# Iterating through the topics.
1112	12				57	for my $topicKey (sort keys %topicDataHash){
1113
1114						# Calling the SimilarityScore module to get the Similarity Score between
1115						# Descriptive labels and Gold Key Data.
1116	36				341	my $similarityObject = Text::SenseClusters::LabelEvaluation::SimilarityScore
1117						->new($clusterDescriptiveLabel,$topicDataHash{$topicKey},
1118						$stopListFileLoc,$verboseOpt );
1119
1120						# Calling the SimilarityScore module to get the overlapping score.
1121	36				161	my ($score, %allScores) = $similarityObject->computeOverlappingScores();
1122	36				980	my $descriptiveScore = $allScores{'raw_lesk'};
1123
1124						# Calling the SimilarityScore module to get the Similarity Score between
1125						# Discriminating labels and Gold Key Data.
1126	36				339	$similarityObject = Text::SenseClusters::LabelEvaluation::SimilarityScore
1127						->new($clusterDiscriminatingLabel,$topicDataHash{$topicKey},
1128						$stopListFileLoc, $verboseOpt);
1129
1130						# Calling the SimilarityScore module to get the overlapping score.
1131	36				163	($score, %allScores) = $similarityObject->computeOverlappingScores();
1132	36				1183	my $discriminatingScore = $allScores{'raw_lesk'};
1133
1134
1135						# Calculating Total-Similarity-Score for the labels and gold-key.
1136	36				112	my $totalScore = $descriptiveScore + $weightageRatio * $discriminatingScore;
1137						# Storing the similarity score into 2D-Array MatricScore.
1138	36				136	$matrixScore[$firstDimIndex][$secondDimIndex++] = $totalScore;
1139
1140						# Adding the current similarity-score to overall total similarity score.
1141	36				268	$totalMatrixScore = $totalMatrixScore + $totalScore;
1142						}
1143	12				54	$firstDimIndex++;
1144						}
1145						# Returning the Array contianing Similarity Score, row and column headers.
1146	4				42	return (\@matrixScore, \@colHeader, \@rowHeader, $totalMatrixScore);
1147						}
1148
1149
1150						########################################################################################
1151						=head1 Function: calculateAccuracy
1152
1153						Method used for calculating the Accuracy score for the labels generated by the
1154						SenseClusters or others.
1155
1156						@argument1 : $mappingHashRef (Reference to Hash which contains the mapping information about the cluster and gold standard)
1157
1158						@argument2 : $matrixScoreRef (2-D Array/Matrix which contains the similarity-scores of each labels)
1159
1160						@argument3 : $colHeaderRef (Reference of array which contains the column header)
1161
1162						@argument4 : $rowHeaderRef (Reference of array which contains the row header)
1163
1164						@argument5 : $totalMatrixScore (Total similarity score of the labels with gold standard)
1165
1166						@return : Return the overall accuracy of the labels assigned by the SenseClusters.
1167
1168						@description :
1169
1170						1). With the help of ()$mappingHashRef $matrixScoreRef $colHeaderRef $rowHeaderRef),
1171						this function try to calculate the sum of all diagonal elements.
1172
1173						2). It will then calculate the accuracy for the assignment as
1174
1175						Sum (Diagonal Scores)
1176						Accuracy = -----------------------------------
1177						Sum (All the Scores)
1178
1179						=cut
1180
1181						#########################################################################################
1182						sub calculateAccuracy{
1183	2		2	0	5	my $mappingHashRef = shift;
1184	2				5	my $matrixScoreRef = shift;
1185	2				3	my $colHeaderRef = shift;
1186	2				3	my $rowHeaderRef = shift;
1187	2				5	my $totalMatrixScore = shift;
1188
1189	2				13	my %mappingHash = %$mappingHashRef;
1190	2				8	my @matrixScore = @$matrixScoreRef;
1191						# Array that will contain Row Header (Cluster name).
1192	2				5	my @rowHeader = @$rowHeaderRef;
1193						# Array that will contain Column Header (Topic name).
1194	2				5	my @colHeader = @$colHeaderRef;
1195
1196						# Defining the internal Index for the matrix score.
1197	2				4	my $firstDimIndex = 0;
1198						# Variable which will hold TotalMatrixScore.
1199	2				4	my $diagonalScore = 0;
1200
1201	2				94	print STDERR "\n\n Mapping provided by user\n";
1202	2				9	for my $key (keys %mappingHash){
1203	6				8	my $rowIndex = 0;
1204	6				7	my $colIndex = 0;
1205
1206						#print STDERR "\n$key $mappingHash{$key} \t @rowHeader \t @colHeader \n\n\n";
1207	6				13	for my $index(0..@rowHeader-1){
1208	18	100			48	if($key eq $rowHeader[$index]){
1209	6				12	$rowIndex = $index;
1210						}
1211						}
1212	6				11	for my $index(0..@colHeader-1){
1213	18	100			40	if($mappingHash{$key} eq $colHeader[$index]){
1214	6				11	$colIndex = $index;
1215						}
1216						}
1217						# Getting the diagonal.
1218	6				11	$diagonalScore = $diagonalScore + $matrixScore[$rowIndex][$colIndex];
1219	6				277	print STDERR "\t$key\t<-->\t$mappingHash{$key} \n";
1220						}
1221
1222						# Defining the accuracy.
1223	2				6	my $accuracy = 0;
1224
1225	2	50			8	if($totalMatrixScore == 0){
1226	0				0	$accuracy = 0;
1227						}else{
1228						# Making the accuracy in percentage and rounding off it to 2 decimal place.
1229	2				42	$accuracy = sprintf("%.2f", ($diagonalScore *100 /$totalMatrixScore));
1230						}
1231
1232	2				108	print STDERR "\nAccuracy of assigned labels =". $accuracy ."\%\n\n";
1233
1234						# Returning the accuracy.
1235	2				41	return $accuracy;
1236						}
1237
1238
1239
1240						#######################################################################################################
1241						=pod
1242
1243						=head1 BUGS
1244
1245						=over
1246
1247						=item * Currently not supporting the WordNet gold standards comparison.
1248
1249						=back
1250
1251						=head1 SEE ALSO
1252
1253						http://senseclusters.cvs.sourceforge.net/viewvc/senseclusters/LabelEvaluation/
1254
1255						Last modified by :
1256						$Id: Driver.pm,v 1.6 2013/03/18 02:59:42 jhaxx030 Exp $
1257
1258						=head1 AUTHORS
1259
1260						Anand Jha, University of Minnesota, Duluth
1261						jhaxx030 at d.umn.edu
1262
1263						Ted Pedersen, University of Minnesota, Duluth
1264						tpederse at d.umn.edu
1265
1266
1267						=head1 COPYRIGHT AND LICENSE
1268
1269						Copyright (C) 2012-2013 Ted Pedersen, Anand Jha
1270
1271						See http://dev.perl.org/licenses/ for more information.
1272
1273						This program is free software; you can redistribute it and/or modify
1274						it under the terms of the GNU General Public License as published by
1275						the Free Software Foundation; either version 2 of the License, or
1276						(at your option) any later version.
1277
1278						This program is distributed in the hope that it will be useful,
1279						but WITHOUT ANY WARRANTY; without even the implied warranty of
1280						MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1281						GNU General Public License for more details.
1282
1283						You should have received a copy of the GNU General Public License
1284						along with this program; if not, write to:
1285
1286
1287						The Free Software Foundation, Inc., 59 Temple Place, Suite 330,
1288						Boston, MA 02111-1307 USA
1289
1290
1291						=cut
1292						#######################################################################################################
1293
1294						1;