File Coverage

blib/lib/WordNet/Similarity/wup.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # WordNet::Similarity::wup.pm version 2.04
2             # (Last updated $Id: wup.pm,v 1.27 2008/03/27 06:21:17 sidz1979 Exp $)
3             #
4             # Semantic Similarity Measure package implementing the semantic
5             # relatedness measure described by Wu & Palmer (1994) as revised
6             # by Resnik (1999).
7             #
8             # Copyright (c) 2005,
9             #
10             # Ted Pedersen, University of Minnesota Duluth
11             # tpederse at d.umn.edu
12             #
13             # Jason Michelizzi, Univeristy of Minnesota Duluth
14             # mich0212 at d.umn.edu
15             #
16             # Siddharth Patwardhan, University of Utah, Salt Lake City
17             # sidd at cs.utah.edu
18             #
19             # This program is free software; you can redistribute it and/or
20             # modify it under the terms of the GNU General Public License
21             # as published by the Free Software Foundation; either version 2
22             # of the License, or (at your option) any later version.
23             #
24             # This program is distributed in the hope that it will be useful,
25             # but WITHOUT ANY WARRANTY; without even the implied warranty of
26             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27             # GNU General Public License for more details.
28             #
29             # You should have received a copy of the GNU General Public License
30             # along with this program; if not, write to
31             #
32             # The Free Software Foundation, Inc.,
33             # 59 Temple Place - Suite 330,
34             # Boston, MA 02111-1307, USA.
35             #
36             # ------------------------------------------------------------------
37              
38             package WordNet::Similarity::wup;
39              
40             =head1 NAME
41              
42             WordNet::Similarity::wup - Perl module for computing semantic
43             relatedness of word senses using the edge counting method of the
44             of Wu & Palmer (1994)
45              
46             =head1 SYNOPSIS
47              
48             use WordNet::Similarity::wup;
49              
50             use WordNet::QueryData;
51              
52             my $wn = WordNet::QueryData->new();
53              
54             my $wup = WordNet::Similarity::wup->new($wn);
55              
56             my $value = $wup->getRelatedness('dog#n#1', 'cat#n#1');
57              
58             my ($error, $errorString) = $wup->getError();
59              
60             die $errorString if $error;
61              
62             print "dog (sense 1) <-> cat (sense 1) = $value\n";
63              
64             =head1 DESCRIPTION
65              
66             Resnik (1999) revises the Wu & Palmer (1994) method of measuring semantic
67             relatedness. Resnik uses use an edge distance method by taking into
68             account the most specific node subsuming the two concepts. Here we have
69             implemented the original Wu & Palmer method, which uses node-counting.
70              
71             =head2 Methods
72              
73             This module defines the following methods:
74              
75             =over
76              
77             =cut
78              
79 4     4   6350 use strict;
  4         39  
  4         91  
80 4     4   46 use warnings;
  4         7  
  4         121  
81              
82 4     4   121 use WordNet::Similarity::DepthFinder;
  0            
  0            
83              
84             our @ISA = qw/WordNet::Similarity::DepthFinder/;
85              
86             our $VERSION = '2.04';
87              
88             =item $wup->getRelatedness ($synset1, $synset2)
89              
90             Computes the relatedness of two word senses using a node counting scheme.
91             For details on how relatedness is computed, see the discussion section
92             below.
93              
94             Parameters: two word senses in "word#pos#sense" format.
95              
96             Returns: Unless a problem occurs, the return value is the relatedness
97             score. If no path exists between the two word senses, then a large
98             negative number is returned. If an error occurs, then the error level is
99             set to non-zero and an error string is created (see the description
100             of getError()). Note: the error level will also be set to 1 and an error
101             string will be created if no path exists between the words.
102              
103             =cut
104              
105             sub getRelatedness
106             {
107             my $self = shift;
108             my $in1 = shift;
109             my $in2 = shift;
110              
111             my $class = ref $self || $self;
112              
113             # initialize trace string
114             $self->{traceString} = "";
115              
116             # JM 1-21-04
117             # moved input validation code to WordNet::Similarity::parseWps()
118             my $ret = $self->parseWps ($in1, $in2);
119             ref $ret or return $ret;
120             my ($word1, $pos1, $sense1, $offset1, $word2, $pos2, $sense2, $offset2)
121             = @{$ret};
122              
123             defined $word1 or return undef;
124              
125             my $wps1 = "$word1#$pos1#$sense1";
126             my $wps2 = "$word2#$pos2#$sense2";
127              
128             my $score = $self->fetchFromCache ($wps1, $wps2);
129             return $score if defined $score;
130              
131             my @LCSs = $self->getLCSbyDepth ($wps1, $wps2, $pos1, 'wps');
132             my $temp = shift @LCSs;
133             unless (ref $temp) {
134             return $temp;
135             }
136             my ($lcs, $depth, $root) = @{$temp};
137              
138             unless (defined $lcs) {
139             # no lcs found, return unrelated (errors already generated)
140             return $self->UNRELATED;
141             }
142              
143             # now find the depth of $wps1 and $wps2
144             my $trace = $self->{trace};
145             $self->{trace} = 0;
146             my @paths1 = $self->getShortestPath ($wps1, $lcs, $pos1, 'wps');
147             my @paths2 = $self->getShortestPath ($wps2, $lcs, $pos1, 'wps');
148             $self->{trace} = $trace;
149              
150             my ($length1, undef) = @{shift @paths1};
151             my ($length2, undef) = @{shift @paths2};
152              
153             if (ref $length1) {
154             die "Length 1 is a ref\n";
155             }
156             # If we've already found an lcs, then there must be a path, so this
157             # error should never occur, but there's little harm in checking anyways
158             unless (defined $length1) {
159             $self->{errorString} .= "Length 1 is undefined.";
160             $self->{error} = 1;
161             return undef;
162             }
163             unless (defined $length2) {
164             $self->{errorString} .= "Length 2 is undefined.";
165             $self->{error} = 1;
166             return undef;
167             }
168              
169             my $depth1 = $depth + $length1 - 1;
170             my $depth2 = $depth + $length2 - 1;
171              
172             $score = 2 * $depth / ($depth1 + $depth2);
173              
174             $self->storeToCache ($wps1, $wps2, $score) if $self->{doCache};
175              
176             if ($self->{trace}) {
177             $self->{traceString} .= "\nDepth(";
178             $self->printSet ($pos1, 'wps', $in1);
179             $self->{traceString} .= ") = $depth1\nDepth(";
180             $self->printSet ($pos1, 'wps', $in2);
181             $self->{traceString} .= ") = $depth2\n";
182             }
183             return $score;
184             }
185              
186             ###
187             # JM 1-16-04
188             # All of the code that used to follow here has been replaced by code in
189             # PathFinder.
190             ###
191              
192             1;
193              
194             __END__