File Coverage

blib/lib/Text/SenseClusters/LabelEvaluation/SimilarityScore.pm
Criterion Covered Total %
statement 21 25 84.0
branch 2 4 50.0
condition n/a
subroutine 5 5 100.0
pod 0 1 0.0
total 28 35 80.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             # Declaring the Package for the module.
4             package Text::SenseClusters::LabelEvaluation::SimilarityScore;
5              
6 2     2   41111 use strict;
  2         6  
  2         84  
7 2     2   1463 use encoding "utf-8";
  2         52017  
  2         15  
8              
9             # The following two lines will make this module inherit from the Exporter Class.
10             require Exporter;
11             our @ISA = qw(Exporter);
12              
13             # Using WWW::Wikipedia Module.
14             # Reference: http://search.cpan.org/dist/WWW-Wikipedia/lib/WWW/Wikipedia.pm
15 2     2   2223 use WWW::Wikipedia;
  2         319786  
  2         90  
16              
17             # Defining the Variable for using the Wikipedia Module.
18             # Reference: http://search.cpan.org/~bricas/WWW-Wikipedia-2.00/
19             my $wiki = WWW::Wikipedia->new();
20              
21             # Using Text Similarity Module.
22             # Reference: http://search.cpan.org/~tpederse
23             # /Text-Similarity-0.08/lib/Text/Similarity.pm
24 2     2   2766 use Text::Similarity::Overlaps;
  2         21557  
  2         227  
25              
26              
27             #######################################################################################################################
28              
29             =head1 Name
30              
31             Text::SenseClusters::LabelEvaluation::SimilarityScore - Module for getting the similarity score between the contents of the two files.
32              
33             =head1 SYNOPSIS
34              
35             # The following code snippet will show how to use SimilarityScore.
36             package Text::SenseClusters::LabelEvaluation::Test_SimilarityScore;
37              
38             # Including the LabelEvaluation Module.
39             use Text::SenseClusters::LabelEvaluation::SimilarityScore;
40              
41             # Including the FileHandle module.
42             use FileHandle;
43              
44             # File that will contain the label information.
45             my $labelFileName = "temp_ClusterLabel.txt";
46              
47             # Defining the file handle for the label file.
48             our $labelFileHandle = FileHandle->new(">$labelFileName");
49              
50             # Writing into the label file.
51             print $labelFileHandle "U S, Al Gore, White House, more than, President 1993, George W,".
52             "York Times, New York, Prime Minister, New Democrat, National Governors";
53            
54             # File that will contain the topic information.
55             my $topicFileName = "temp_TopicData.txt";
56              
57             # Defining the file handle for the topic file.
58             our $topicFileHandle = FileHandle->new(">$topicFileName");
59              
60             # Writing into the Topic file.
61             # Bill Clinton , Tony Blair
62             print $topicFileHandle "Bill Clinton is an American politician who served as the 42nd President of".
63             "the United States from 1993 to 2001. Inaugurated at age 46, he was the third-youngest president.".
64             "He took office at the end of the Cold War, and was the first president of the baby boomer generation.".
65             "Clinton has been described as a New Democrat. Many of his policies have been attributed to a centrist".
66             "Third Way philosophy of governance. He is married to Hillary Rodham Clinton, who has served as the".
67             "United States Secretary of State since 2009 and was a Senator from New York from 2001 to 2009.".
68             "As Governor of Arkansas, Clinton overhauled the state's education system, and served as Chair ".
69             "of the National Governors Association.Clinton was elected president in 1992, defeating incumbent".
70             "president George H. W. Bush. The Congressional Budget Office reported a budget surplus between ".
71             "the years 1998 and 2000, the last three years of Clinton's presidency. Since leaving office,".
72             "Clinton has been rated highly in public opinion polls of U.S. presidents. \n";
73              
74             # Closing the handles.
75             close($labelFileHandle);
76             close($topicFileHandle);
77              
78             my $stopListFileLocation ="";
79              
80             my $similarityScore = Text::SenseClusters::LabelEvaluation::SimilarityScore::computeOverlappingScores(
81             $labelFileName,$topicFileName, $stopListFileLocation);
82              
83             print "\n Similarity Score for the Cluster-labels and Bill-Clinton-Wiki data is $similarityScore \n";
84              
85             # Deleting the temporary label and topic files.
86             unlink $labelFileName or warn "Could not unlink $labelFileName: $!";
87             unlink $topicFileName or warn "Could not unlink $topicFileName: $!";
88              
89             =head1 DESCRIPTION
90              
91             This module provide a function that will compare the two files and return
92             the overlapping score.
93            
94             =cut
95              
96              
97              
98              
99              
100             ########################################################################################
101             =head1 Function: computeOverlappingScores
102             ------------------------------------------------
103              
104              
105             Function that will compare the labels file with the wiki files and
106             will return the overlapping score.
107              
108             @argument1 : Name of the cluster file.
109             @argument2 : Name of the file containing the data from Wikipedia.
110             @argument3 : Name of the file containing the stop word lists.
111            
112             @return : Return the overlapping score between these files.
113            
114             @description :
115             1). Reading the file name from the command line argument.
116             2). Invoking the Text::Similarity::Overlaps module and passing
117             the file names for similarity comparison.
118             3). Then overlapping score obtained from this module is returned
119             as the similarity value.
120              
121             =cut
122              
123             #########################################################################################
124              
125             sub computeOverlappingScores{
126            
127             # Getting the ClusterFileName from the argument.
128 5     5 0 380 my $clusterFileName = shift;
129            
130             # Getting the TopicFileName from the argument.
131 5         16 my $topicFileName = shift;
132              
133             # Getting the stop list file location.
134 5         9 my $stopListFileLocation = shift;
135            
136 5 50       31 if(!defined $stopListFileLocation){
137             # Getting the module name.
138 0         0 my $module = "Text/SenseClusters/Wikipedia/SimilarityScore.pm";
139            
140             # Finding its installed location.
141 0         0 my $moduleInstalledLocation = $INC{$module};
142            
143             # Getting the prefix of installed location. This will be one of
144             # the values in array @INC.
145 0         0 $moduleInstalledLocation =~
146             m/(.*)Text\/SenseClusters\/Wikipedia\/SimilarityScore\.pm$/g;
147            
148             # Getting the installed stopList.txt location using above location.
149             # For e.g.:
150             # /usr/local/share/perl/5.10.1/Text/SenseClusters
151             # /Wikipedia/stoplist.txt
152 0         0 $stopListFileLocation
153             = $1."/Text/SenseClusters/Wikipedia/stoplist.txt";
154            
155              
156             }
157             # Setting the Options for getting the results from the Text::Similarity
158             # Module.
159 5         32 my %options = ('verbose' => 0, 'stoplist' => $stopListFileLocation);
160              
161             # Creating the new Overlaps Object.
162 5         48 my $mod = Text::Similarity::Overlaps->new (\%options);
163            
164             # If the object is not created, then quit the program with error message.
165 5 50       424 defined $mod or die "Construction of Text::Similarity::Overlaps failed";
166              
167             # Getting the overlapping score from the Similarity function.
168 5         25 my $score = $mod->getSimilarity ($clusterFileName, $topicFileName);
169              
170             # Printing the Similarity Score for the files.
171             # print "The similarity of $clusterFile and $topicFile is : $score\n";
172            
173             # Returning the overlapping Score.
174 5         792891 return $score;
175             }
176              
177              
178             #######################################################################################################
179             =pod
180              
181              
182             =head1 SEE ALSO
183              
184             http://senseclusters.cvs.sourceforge.net/viewvc/senseclusters/LabelEvaluation/
185            
186            
187             @Last modified by : Anand Jha
188             @Last_Modified_Date : 24th Dec. 2012
189             @Modified Version : 1.4
190            
191             =head1 AUTHORS
192              
193             Ted Pedersen, University of Minnesota, Duluth
194             tpederse at d.umn.edu
195              
196             Anand Jha, University of Minnesota, Duluth
197             jhaxx030 at d.umn.edu
198              
199              
200              
201             =head1 COPYRIGHT AND LICENSE
202              
203             Copyright (C) 2012 Ted Pedersen, Anand Jha
204              
205             See http://dev.perl.org/licenses/ for more information.
206              
207             This program is free software; you can redistribute it and/or modify
208             it under the terms of the GNU General Public License as published by
209             the Free Software Foundation; either version 2 of the License, or
210             (at your option) any later version.
211              
212             This program is distributed in the hope that it will be useful,
213             but WITHOUT ANY WARRANTY; without even the implied warranty of
214             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
215             GNU General Public License for more details.
216              
217             You should have received a copy of the GNU General Public License
218             along with this program; if not, write to:
219            
220            
221             The Free Software Foundation, Inc., 59 Temple Place, Suite 330,
222             Boston, MA 02111-1307 USA
223            
224            
225             =cut
226             #######################################################################################################
227              
228              
229             # Making the default return statement as 1;
230             # Reference : http://lists.netisland.net/archives/phlpm/phlpm-2001/msg00426.html
231             1;