File Coverage

blib/lib/WordNet/Similarity/DepthFinder.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # WordNet::Similarity::DepthFinder version 2.04
2             # (Last updated $Id: DepthFinder.pm,v 1.20 2008/03/27 06:21:17 sidz1979 Exp $)
3             #
4             # Module containing code to find the depths of (noun and verb) synsets in
5             # the WordNet 'is-a' taxonomies
6             #
7             # Copyright (c) 2005,
8             #
9             # Ted Pedersen, University of Minnesota Duluth
10             # tpederse at d.umn.edu
11             #
12             # Jason Michelizzi, Univeristy of Minnesota Duluth
13             # mich0212 at d.umn.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31             #
32             # ------------------------------------------------------------------
33              
34             package WordNet::Similarity::DepthFinder;
35              
36             =head1 NAME
37              
38             WordNet::Similarity::DepthFinder - methods to find the depth of synsets in
39             WordNet taxonomies
40              
41             =head1 SYNOPSIS
42              
43             use WordNet::QueryData;
44             my $wn = WordNet::QueryData->new;
45             defined $wn or die "Construction of WordNet::QueryData failed";
46              
47             use WordNet::Similarity::DepthFinder;
48              
49             my $obj = WordNet::Similarity::DepthFinder->new ($wn);
50             my ($err, $errString) = $obj->getError ();
51             $err and die $errString;
52              
53             my $wps1 = 'car#n#4';
54             my $wps2 = 'oil#n#1';
55              
56             my $offset1 = $wn -> offset ($wps1);
57             my $offset2= $wn -> offset ($wps2);
58              
59             my @roots = $obj->getTaxonomies ($offset1, 'n');
60             my $taxonomy_depth = $obj->getTaxonomyDepth ($roots[0], 'n');
61             print "The maximum depth of the taxonomy where $wps1 is found is $taxonomy_depth\n";
62              
63             my @depths = $obj->getSynsetDepth ($offset1, 'n');
64             print "The depth of $offset1 is $depths[0]->[0]\n";
65              
66             my @lcsbyic = $obj -> getLCSbyDepth($wps1,$wps2,'n','wps');
67             print "$wps1 and $wps2 have LCS $lcsbyic[0]->[0] with Depth $lcsbyic[0]->[1]\n";
68              
69             my @lcsbyic = $obj -> getLCSbyDepth($offset1,$offset2,'n','offset');
70             print "$offset1 and $offset2 have LCS $lcsbyic[0]->[0] with Depth $lcsbyic[0]->[1]\n";
71              
72             =head1 DESCRIPTION
73              
74             The following methods are provided by this module:
75              
76             =over
77              
78             =cut
79              
80 4     4   19 use strict;
  4         7  
  4         1205  
81 4     4   19 use warnings;
  4         7  
  4         94  
82              
83 4     4   174 use WordNet::Similarity::PathFinder;
  0            
  0            
84              
85             our @ISA = qw/WordNet::Similarity::PathFinder/;
86              
87             our $VERSION = '2.04';
88              
89             WordNet::Similarity::addConfigOption ("taxonomyDepthsFile", 1, "p", undef);
90             WordNet::Similarity::addConfigOption ("synsetDepthsFile", 1, "p", undef);
91              
92             =item $obj->initialize ($configfile)
93              
94             Overrides the initialize method in WordNet::Similarity to look for and
95             process depths files. The initialize method of the superclass is also called.
96              
97             =cut
98              
99             sub initialize
100             {
101             my $self = shift;
102             my $class = ref $self || $self;
103              
104             my $wn = $self->{wn};
105              
106             my $defaultdepths = "synsetdepths.dat";
107             my $defaultroots = "treedepths.dat";
108              
109             $self->SUPER::initialize (@_);
110              
111             my $depthsfile = $self->{synsetDepthsFile};
112              
113             unless (defined $depthsfile) {
114             DEPTHS_SEARCH:
115             foreach (@INC) {
116             my $file = File::Spec->catfile ($_, 'WordNet', $defaultdepths);
117             if (-e $file) {
118             if (-r $file) {
119             $depthsfile = $file;
120             last DEPTHS_SEARCH;
121             }
122             else {
123             # The file not readable--is this an error?
124             # I suppose we shouldn't punish people for having
125             # unreadable files lying around; let's do nothing.
126             }
127             }
128             }
129             }
130              
131             unless (defined $depthsfile) {
132             $self->{error} = 2;
133             $self->{errorString} .= "\nError (${class}::initialize()) - ";
134             $self->{errorString} .= "No depths file found.";
135             return undef;
136             }
137              
138             $self->_processSynsetsFile ($depthsfile) or return undef;
139              
140             my $rootsfile = $self->{treeDepthsFile};
141              
142             unless (defined $rootsfile) {
143             TAXONOMY_SEARCH:
144             foreach (@INC) {
145             my $file = File::Spec->catfile ($_, 'WordNet', $defaultroots);
146             if (-e $file) {
147             if (-r $file) {
148             $rootsfile = $file;
149             last TAXONOMY_SEARCH;
150             }
151             else {
152             # The file not readable--is this an error?
153             # I suppose we shouldn't punish people for having
154             # unreadable files lying around; let's do nothing.
155             }
156             }
157             }
158             }
159              
160             $self->_processTaxonomyFile ($rootsfile) or return undef;
161              
162             return 1;
163             }
164              
165              
166             =item $obj->getSynsetDepth ($offset, $pos)
167              
168             Returns the depth(s) of the synset denoted by $offset and $pos. The return
169             value is a list of references to arrays. Each array has the form
170             S<(depth, root)>.
171              
172             =cut
173              
174             sub getSynsetDepth
175             {
176             my $self = shift;
177             my $class = ref $self || $self;
178             my $offset = shift;
179             my $pos = shift;
180              
181             my $ref = $self->{depths}->{$pos}->{$offset};
182             my @depths = @$ref;
183              
184              
185             unless (defined $depths[0]) {
186             $self->{errorString} .= "\nWarning (${class}::getSynsetDepth()) - ";
187             $self->{errorString} .= "No depth found for '$offset#$pos'.";
188             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
189             return undef;
190             }
191              
192             return @depths;
193             }
194              
195              
196             =item $obj->getTaxonomyDepth ($offset, $pos)
197              
198             Returns the maximum depth of the taxonomy rooted at the synset identified
199             by $offset and $pos. If $offset and $pos does not identify a root of
200             a taxonomy, then undef is returned and an error is raised.
201              
202             =cut
203              
204             sub getTaxonomyDepth
205             {
206             my $self = shift;
207             my $class = ref $self || $self;
208             my $synset = shift;
209             my $pos = shift;
210              
211             my $depth = $self->{taxonomyDepths}->{$pos}->{$synset};
212              
213             unless (defined $depth) {
214             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
215             $self->{errorString} .= "\nWarning (${class}::getTaxonomyDepth()) - ";
216             $self->{errorString} .= "No taxonomy is rooted at $synset#$pos.";
217             return undef;
218             }
219              
220             return $depth;
221             }
222              
223             =item $obj->getTaxonomies ($offset, $pos)
224              
225             Returns a list of the roots of the taxonomies to which the synset identified
226             by $offset and $pos belongs.
227              
228             =cut
229              
230             sub getTaxonomies
231             {
232             my $self = shift;
233             my $offset = shift;
234             my $pos = shift;
235             my $class = ref $self || $self;
236              
237             my $ref = $self->{depths}->{$pos}->{$offset};
238             my @tmp = @$ref;
239             my %tmp;
240             foreach (@tmp) {
241             $tmp{$_->[1]} = 1;
242             }
243             my @rtn = keys %tmp;
244             unless (defined $rtn[0]) {
245             $self->{errorString} .= "\nWarning (${class}::getTaxonomies()) - ";
246             $self->{errorString} .= "No root information for $offset#$pos.";
247             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
248             return undef;
249             }
250             return @rtn;
251             }
252              
253             =item getLCSbyDepth($synset1, $synset2, $pos, $mode)
254              
255             Given two input synsets, finds the least common subsumer (LCS) of them.
256             If there are multiple candidates for the LCS (due to multiple inheritance
257             in WordNet), the LCS with the greatest depth is chosen (i.e., the candidate
258             whose shortest path to the root is the longest).
259              
260             Parameters: a blessed reference, two synsets, a part of speech, and a mode.
261             The mode must the either the string 'wps' or 'offset'. If the mode is wps,
262             then the two input synsets must be in word#pos#sense format. If the mode
263             is offset, then the input synsets must be WordNet offsets.
264              
265             Returns: a list of the form ($lcs, $depth) where $lcs is the LCS (in wps
266             format if mode is 'wps' or an offset if mode is 'offset'. $depth is the
267             depth of the LCS in its taxonomy. Returns undef on error.
268              
269             =cut
270              
271             sub getLCSbyDepth
272             {
273             my $self = shift;
274             my $synset1 = shift;
275             my $synset2 = shift;
276             my $pos = shift;
277             my $mode = shift;
278             my $class = ref $self || $self;
279              
280             my @paths = $self->getAllPaths ($synset1, $synset2, $pos, $mode);
281             unless (defined $paths[0]) {
282             # no paths found
283             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
284             $self->{errorString} .= "\nWarning (${class}::getLCSbyDepth()) - ";
285             $self->{errorString} .= "No path between synsets found.";
286             return $self->UNRELATED;
287             }
288              
289             my $wn = $self->{wn};
290             my %depth; # a hash to hold the depth of each LCS candidate
291              
292             # find the depth of each LCS candidate
293             foreach (@paths) {
294             my $offset = $_->[0];
295             if ($mode eq 'wps') {
296             if (index ($_->[0], "*Root*") >= $[) {
297             $offset = 0;
298             }
299             else {
300             $offset = $wn->offset ($_->[0]);
301             }
302             }
303              
304             my @depths = $self->getSynsetDepth ($offset, $pos);
305             my ($depth, $root) = @{$depths[0]};
306             unless (defined $depth) {
307             # serious internal error -- possible problem with depths file?
308             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
309             $self->{errorString} .= "\nWarning (${class}::getLCSbyDepth()) - ";
310             $self->{errorString} .= "Undefined depth for $_->[0]. ";
311             $self->{errorString} .= "Possible problem with the depths file?";
312             return undef;
313             }
314             $depth{$_->[0]} = [$depth, $root];
315             }
316              
317             # sort according to depth (descending order)
318             my @tmp = sort {$b->[1] <=> $a->[1]} map [$_, @{$depth{$_}}], keys %depth;
319              
320             # remove from the array all the subsumers that are not tied for best
321             foreach (0..$#tmp) {
322             if ($tmp[$_]->[1] == $tmp[0]->[1]) {
323             # do nothing
324             }
325             else {
326             # kill the rest of the array and exit the loop
327             $#tmp = $_ - 1;
328             last;
329             }
330             }
331              
332             unless (defined $tmp[0]) {
333             my $wps1 = $synset1;
334             my $wps2 = $synset2;
335             if ($mode eq 'offset') {
336             $wps1 = $synset1 ? $wn->getSense ($synset1, $pos) : "*Root*#$pos#1";
337             $wps2 = $synset2 ? $wn->getSynse ($synset2, $pos) : "*Root*#$pos#1";
338             }
339              
340             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
341             $self->{errorString} .= "\nWarning (${class}::getLCSbyDepth() - ";
342             $self->{errorString} .= "No LCS found for $wps1 and $wps2.";
343              
344             if ($self->{trace}) {
345             $self->{traceString} .= "\nNo LCS found for ";
346             $self->printSet ($pos, 'wps', $wps1);
347             $self->{traceString} .= ", ";
348             $self->printSet ($pos, 'wps', $wps2);
349             $self->{traceString} .= ".";
350             }
351             return undef;
352             }
353              
354             if ($self->{trace}) {
355             $self->{traceString} .= "\nLowest Common Subsumers: ";
356             foreach (@tmp) {
357             $self->printSet ($pos, $mode, $_->[0]);
358             $self->{traceString} .= " (Depth=$_->[1]) ";
359             }
360             }
361              
362             return @tmp;
363             }
364              
365              
366              
367             =item $obj->_processSynsetsFile ($filename)
368              
369             Reads and processes a synsets file as output by wnDepths.pl
370              
371             =cut
372              
373             sub _processSynsetsFile
374             {
375             my $self = shift;
376             my $file = shift;
377             my $class = ref $self || $self;
378             my $wnver = $self->{wntools}->hashCode ();
379              
380             unless (open FH, '<', $file) {
381             $self->{error} = 2;
382             $self->{errorString} .= "\nError (${class}::_processSynsetsFile()) - ";
383             $self->{errorString} .= "Cannot open $file for reading: $!.";
384             return 0;
385             }
386              
387             my $line = ;
388             unless ($line =~ /^wnver::(\S+)$/) {
389             $self->{errorString} .= "\nError (${class}::_processSynsetsFile()) - ";
390             $self->{errorString} .= "File $file has bad format.";
391             $self->{error} = 2;
392             return 0;
393             }
394             unless ($1 eq $wnver) {
395             $self->{errorString} .= "\nError (${class}::_processSynsetsFile()) - ";
396             $self->{errorString} .= "Bad WordNet hash-code in $file, $1, should be $wnver.";
397             $self->{error} = 2;
398             return 0;
399             }
400              
401             # If we are using a root node, then we need to slightly adjust all
402             # the synset depths. Thus, the correction will be 1 if the root node
403             # is on and 0 otherwise.
404             my $correction = $self->{rootNode} ? 1 : 0;
405              
406             while ($line = ) {
407             my ($pos, $offset, @depths) = split /\s+/, $line;
408             # convert the offset string to a number. When we make the number
409             # into a string again, there won't be any leading zeros.
410             $offset = 0 + $offset;
411              
412             # We assume the the first depth listed is the smallest.
413             # The wnDepths.pl program should guarantee this behavior.
414             my @refs;
415             foreach (@depths) {
416             my ($depth, $root) = split /:/;
417             # make root a number; see above for why. If the root node
418             # is on, then all roots are the root node, so adjust for that.
419             $root = $self->{rootNode} ? 0 : $root + 0;
420             $depth += $correction;
421             push @refs, [$depth, $root];
422             }
423             $self->{depths}->{$pos}->{$offset} = [@refs];
424             }
425              
426             if ($self->{rootNode}) {
427             # set the depth of the root nodes to be one
428             $self->{depths}->{n}->{0} = [[1, 0]];
429             $self->{depths}->{v}->{0} = [[1, 0]];
430             }
431              
432             return 1;
433             }
434              
435             =item $obj->_processTaxonomyFile ($filename)
436              
437             Reads and processes a taxonomies file as produced by wnDepths.pl
438              
439             =cut
440              
441             sub _processTaxonomyFile
442             {
443             my $self = shift;
444             my $filename = shift;
445             my $class = ref $self || $self;
446              
447             unless (open FH, '<', $filename) {
448             $self->{errorString} .= "Error (${class}::_processTaxonomyFile()) - ";
449             $self->{errorString} .= "Could not open '$filename' for reading: $!.";
450             $self->{error} = 2;
451             return 0;
452             }
453              
454             my $line = ;
455              
456             unless ($line =~ /^wnver::(\S+)$/) {
457             $self->{errorString} .= "Error (${class}::_processTaxonomyFile()) - ";
458             $self->{errorString} .= "Bad file format for $filename.";
459             $self->{error} = 2;
460             return 0;
461             }
462              
463             while ($line = ) {
464             my ($p, $o, $d) = split /\s+/, $line;
465              
466             # add 0 to offset to make it a number; see above for why
467             $o = $o + 0;
468              
469             $self->{taxonomyDepths}->{$p}->{$o} = $d;
470             }
471              
472             close FH;
473             return 1;
474             }
475              
476             1;
477              
478             __END__