File Coverage

blib/lib/Text/SenseClusters/LabelEvaluation/SimilarityScore.pm
Criterion Covered Total %
statement 34 34 100.0
branch 5 8 62.5
condition n/a
subroutine 6 6 100.0
pod 0 2 0.0
total 45 50 90.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             # Declaring the Package for the module.
4             package Text::SenseClusters::LabelEvaluation::SimilarityScore;
5              
6 5     5   41395 use strict;
  5         11  
  5         245  
7 5     5   1263 use encoding "utf-8";
  5         19875  
  5         49  
8              
9             # The following two lines will make this module inherit from the Exporter Class.
10             require Exporter;
11             our @ISA = qw(Exporter);
12              
13              
14             # Using Text Similarity Module.
15             # Reference: http://search.cpan.org/~tpederse
16             # /Text-Similarity-0.08/lib/Text/Similarity.pm
17 5     5   8032 use Text::Similarity::Overlaps;
  5         34112  
  5         332  
18              
19              
20             #######################################################################################################################
21              
22             =head1 Name
23              
24             Text::SenseClusters::LabelEvaluation::SimilarityScore - Module for getting the similarity score between the contents of the two files.
25              
26             =head1 SYNOPSIS
27              
28             # The following code snippet will show how to use SimilarityScore.
29             package Text::SenseClusters::LabelEvaluation::Test_SimilarityScore;
30              
31             # Including the LabelEvaluation Module.
32             use Text::SenseClusters::LabelEvaluation::SimilarityScore;
33              
34              
35             my $firstString = "IBM::: vice president, million dollars, Wall Street, Deep Blue, ".
36             "International Business, Business Machines, International Machines, ".
37             "United States, Justice Department, personal computers";
38             my $secondString = "vice president, million dollars, Deep Blue, International Business, ".
39             "Business Machines, International Machines, United States, Justice Department";
40            
41             my $similarityObject = Text::SenseClusters::LabelEvaluation::SimilarityScore->
42             new($firstString,$secondString, "./stoplist.txt");
43            
44             #my $score = $similarityObject->computeOverlappingScores();
45             my ($score, %allScores) = $similarityObject->computeOverlappingScores();
46            
47             print "Score:: $score \n";
48             print "Lesk Score :: $allScores{'lesk'} \n";
49             print "Raw Lesk Score :: $allScores{'raw_lesk'} \n";
50             print "precision Score :: $allScores{'precision'} \n";
51             print "recall Score :: $allScores{'recall'} \n";
52             print "F Score :: $allScores{'F'} \n";
53             print "dice Score :: $allScores{'dice'} \n";
54             print "E Score :: $allScores{'E'} \n";
55             print "cosine Score :: $allScores{'cosine'} \n";
56             print "\n\n";
57              
58              
59             =head1 DESCRIPTION
60              
61             This module provide a function that will compare the two strings and return
62             the overlapping scores. Please refer the following for details description
63             how it will calculate the similarity score:
64             http://search.cpan.org/~tpederse/Text-Similarity-0.09/
65            
66             =cut
67              
68              
69             # Member variable of the class.
70             my $clusterData = "ClusterData";
71             my $topicData = "TopicData";
72             my $stopListFileLoc = "StopListLoc";
73             my $verbose = "Verbose";
74            
75              
76             ##########################################################################################
77              
78             =head1 Constructor: new()
79              
80             This is the constructor which will create object for this class.
81             Reference : http://perldoc.perl.org/perlobj.html
82              
83             This constructor takes these argument and intialize it for the class:
84            
85             1. $clusterData : Datatype: String
86             This variable contains the labels generated by the SenseClusters.
87             2. $scoreObject : Datatype: String
88             This variable contains the Gold standard key's data.
89             3. $stopListFileLoc : Datatype: String
90             This variable contains the user defined location for the stop list file.
91             4. $verbose : Datatype: integer
92             This variable tells whether to display all type of similarity score or not.
93            
94             =cut
95              
96             ##########################################################################################
97             sub new {
98              
99             # Creating the object.
100 73     73 0 263 my $class = shift;
101 73         188 my $scoreObject = {};
102              
103             # Explicit association is created by the built-in bless function.
104 73         615 bless $scoreObject, $class;
105              
106             # Getting the ClusterData from the argument.
107 73         433 $scoreObject->{$clusterData} = shift;
108              
109             # Getting the Topic data from the argument.
110 73         263 $scoreObject->{$topicData} = shift;
111              
112             # Getting the stop list file location.
113 73         241 $scoreObject->{$stopListFileLoc} = shift;
114            
115             # Getting the verbose option by user.
116 73         230 $scoreObject->{$verbose} = shift;
117            
118             # Returning the blessed hash refered by $self.
119 73         255 return $scoreObject;
120             }
121              
122              
123             ########################################################################################
124             =head1 Function: computeOverlappingScores
125              
126             Function that will compare the labels file with the wiki files and
127             will return the overlapping score.
128              
129             @argument1 : Name of the cluster file.
130             @argument2 : Name of the file containing the data from Wikipedia.
131             @argument3 : Name of the file containing the stop word lists.
132            
133             @return : Return the overlapping scores between these files.
134            
135             @description :
136             1). Reading the file name from the command line argument.
137             2). Invoking the Text::Similarity::Overlaps module and passing
138             the file names for similarity comparison.
139             3). Then overlapping scores obtained from this module is returned
140             as the similarity value.
141              
142             =cut
143              
144             #########################################################################################
145              
146             sub computeOverlappingScores{
147            
148             # Reading the object as the argument.
149 73     73 0 430 my $readFileObject = shift;
150            
151             # Getting the Cluster's Label as the FirstString.
152 73         229 my $firstString = $readFileObject->{$clusterData};
153            
154             # Getting the Gold Data as the SecondString for comparison.
155 73         184 my $secondString = $readFileObject->{$topicData};
156              
157             # Getting the stop list file location.
158 73         166 my $stopListFileLocation = $readFileObject->{$stopListFileLoc};
159            
160             # Getting the verbose option by user.
161 73         156 my $verboseOption = $readFileObject->{$verbose};
162            
163 73 100       292 if(!defined $stopListFileLocation){
164             # Getting the module name.
165 72         152 my $module = "Text/SenseClusters/LabelEvaluation/SimilarityScore.pm";
166            
167             # Finding its installed location.
168 72         290 my $moduleInstalledLocation = $INC{$module};
169            
170             # Getting the prefix of installed location. This will be one of
171             # the values in array @INC.
172 72         902 $moduleInstalledLocation =~
173             m/(.*)Text\/SenseClusters\/LabelEvaluation\/SimilarityScore\.pm$/g;
174            
175             # Getting the installed stopList.txt location using above location.
176             # For e.g.:
177             # /usr/local/share/perl/5.10.1/Text/SenseClusters
178             # /LabelEvaluation/stoplist.txt
179 72         493 $stopListFileLocation
180             = $1."/Text/SenseClusters/LabelEvaluation/stoplist.txt";
181             }
182            
183             # Setting the Options for getting the results from the Text::Similarity
184             # Module.
185 73         419 my %options = ('verbose' => $verboseOption, 'stoplist' => $stopListFileLocation);
186              
187             # Creating the new Overlaps Object.
188 73         468 my $mod = Text::Similarity::Overlaps->new (\%options);
189            
190             # If the object is not created, then quit the program with error message.
191 73 50       227224 defined $mod or die "Construction of Text::Similarity::Overlaps failed";
192              
193             # Getting the overlapping score from the Similarity function.
194 73         420 my ($score, %allScores)= $mod->getSimilarityStrings ($firstString, $secondString);
195              
196              
197             # Printing the Similarity Score for the files.
198             #print "The similarity of $firstString and $secondString is : $score\n";
199             #print "The similarity of $firstString and $secondString is : $allScores{'lesk'}\n";
200              
201             # Reference : http://perldoc.perl.org/functions/wantarray.html
202 73 50       21859292 return wantarray ? ($score, %allScores) : $score;
203             }
204              
205              
206             sub DESTROY {
207 73     73   1650 my $self = shift;
208 73 50       794 $self->{handle}->close() if $self->{handle};
209             }
210              
211             #######################################################################################################
212             =pod
213              
214              
215             =head1 SEE ALSO
216              
217             http://senseclusters.cvs.sourceforge.net/viewvc/senseclusters/LabelEvaluation/
218            
219            
220             Last modified by :
221             $Id: SimilarityScore.pm,v 1.6 2013/03/18 00:47:24 jhaxx030 Exp $
222              
223            
224             =head1 AUTHORS
225              
226             Anand Jha, University of Minnesota, Duluth
227             jhaxx030 at d.umn.edu
228              
229             Ted Pedersen, University of Minnesota, Duluth
230             tpederse at d.umn.edu
231              
232             =head1 COPYRIGHT AND LICENSE
233              
234             Copyright (C) 2012 Ted Pedersen, Anand Jha
235              
236             See http://dev.perl.org/licenses/ for more information.
237              
238             This program is free software; you can redistribute it and/or modify
239             it under the terms of the GNU General Public License as published by
240             the Free Software Foundation; either version 2 of the License, or
241             (at your option) any later version.
242              
243             This program is distributed in the hope that it will be useful,
244             but WITHOUT ANY WARRANTY; without even the implied warranty of
245             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
246             GNU General Public License for more details.
247              
248             You should have received a copy of the GNU General Public License
249             along with this program; if not, write to:
250            
251            
252             The Free Software Foundation, Inc., 59 Temple Place, Suite 330,
253             Boston, MA 02111-1307 USA
254            
255            
256             =cut
257             #######################################################################################################
258              
259              
260             # Making the default return statement as 1;
261             # Reference : http://lists.netisland.net/archives/phlpm/phlpm-2001/msg00426.html
262             1;