| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # WordNet::get_wn_info.pm version 2.04 | 
| 2 |  |  |  |  |  |  | # (Last updated $Id: get_wn_info.pm,v 1.1 2008/03/27 05:13:01 sidz1979 Exp $) | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Package used by WordNet::Similarity::lesk module that | 
| 5 |  |  |  |  |  |  | # computes semantic relatedness of word senses in WordNet | 
| 6 |  |  |  |  |  |  | # using gloss overlaps. | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # Copyright (c) 2005, | 
| 9 |  |  |  |  |  |  | # | 
| 10 |  |  |  |  |  |  | # Ted Pedersen, University of Minnesota, Duluth | 
| 11 |  |  |  |  |  |  | # tpederse at d.umn.edu | 
| 12 |  |  |  |  |  |  | # | 
| 13 |  |  |  |  |  |  | # Satanjeev Banerjee, Carnegie Mellon University, Pittsburgh | 
| 14 |  |  |  |  |  |  | # banerjee+ at cs.cmu.edu | 
| 15 |  |  |  |  |  |  | # | 
| 16 |  |  |  |  |  |  | # This program is free software; you can redistribute it and/or | 
| 17 |  |  |  |  |  |  | # modify it under the terms of the GNU General Public License | 
| 18 |  |  |  |  |  |  | # as published by the Free Software Foundation; either version 2 | 
| 19 |  |  |  |  |  |  | # of the License, or (at your option) any later version. | 
| 20 |  |  |  |  |  |  | # | 
| 21 |  |  |  |  |  |  | # This program is distributed in the hope that it will be useful, | 
| 22 |  |  |  |  |  |  | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 23 |  |  |  |  |  |  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 24 |  |  |  |  |  |  | # GNU General Public License for more details. | 
| 25 |  |  |  |  |  |  | # | 
| 26 |  |  |  |  |  |  | # You should have received a copy of the GNU General Public License | 
| 27 |  |  |  |  |  |  | # along with this program; if not, write to | 
| 28 |  |  |  |  |  |  | # | 
| 29 |  |  |  |  |  |  | # The Free Software Foundation, Inc., | 
| 30 |  |  |  |  |  |  | # 59 Temple Place - Suite 330, | 
| 31 |  |  |  |  |  |  | # Boston, MA  02111-1307, USA. | 
| 32 |  |  |  |  |  |  | # | 
| 33 |  |  |  |  |  |  | # ------------------------------------------------------------------ | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | package WordNet::get_wn_info; | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | =head1 NAME | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | WordNet::get_wn_info - Provides access to glosses related to a concept in WordNet | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | use WordNet::get_wn_info; | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | my $gwn = WordNet::get_wn_info->new($wn, 0); | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | my $hypHash = $gwn->hype($synsHash, 0); | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | my $holHash = $gwn->holo($synsHash, 0); | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | This module provides a layer between the user and the WordNet::QueryData | 
| 54 |  |  |  |  |  |  | module, by providing easy access to glosses and synsets related to | 
| 55 |  |  |  |  |  |  | particular concpets in WordNet. | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | =head2 Methods | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | =over | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | =cut | 
| 62 |  |  |  |  |  |  |  | 
| 63 | 1 |  |  | 1 |  | 552 | use WordNet::stem; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 38 |  | 
| 64 | 1 |  |  | 1 |  | 5 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 26 |  | 
| 65 | 1 |  |  | 1 |  | 5 | use Exporter; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 36 |  | 
| 66 | 1 |  |  | 1 |  | 10 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 4514 |  | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | @ISA = qw(Exporter); | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | %EXPORT_TAGS = (); | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | @EXPORT_OK = (); | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | @EXPORT = (); | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | $VERSION = '2.04'; | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | =item new | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | Creates a new WordNet::get_wn_info object. | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | Parameters: $wn, $stemFlag | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | Returns: $gwn | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | =cut | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | # function to set up the wordnet object and the various boundary indices | 
| 89 |  |  |  |  |  |  | sub new | 
| 90 |  |  |  |  |  |  | { | 
| 91 | 0 |  |  | 0 | 1 |  | my $className; | 
| 92 | 0 |  |  |  |  |  | my $self = {}; | 
| 93 | 0 |  |  |  |  |  | my $wn; | 
| 94 |  |  |  |  |  |  | my $stemmingReqd; | 
| 95 | 0 |  |  |  |  |  | my $stemmer; | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | # get the class name | 
| 98 | 0 |  |  |  |  |  | $className = shift; | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | # get wordnet object | 
| 101 | 0 |  |  |  |  |  | $wn = shift; | 
| 102 | 0 |  |  |  |  |  | $self->{'wn'} = $wn; | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | # check WordNet::QueryData version | 
| 105 | 0 |  |  |  |  |  | $wn->VERSION(1.39); | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | # check if stemming called for | 
| 108 | 0 |  |  |  |  |  | $stemmingReqd = shift; | 
| 109 | 0 |  |  |  |  |  | $self->{'stem'} = $stemmingReqd; | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 0 | 0 |  |  |  |  | if($stemmingReqd) | 
| 113 |  |  |  |  |  |  | { | 
| 114 | 0 |  |  |  |  |  | $stemmer = WordNet::stem->new($wn); | 
| 115 | 0 |  |  |  |  |  | $self->{'stemmer'} = $stemmer; | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | # set up various boundaries. | 
| 119 | 0 |  |  |  |  |  | $self->{'glosBoundaryIndex'} = 0; | 
| 120 | 0 |  |  |  |  |  | $self->{'exampleBoundaryIndex'} = 0; | 
| 121 | 0 |  |  |  |  |  | $self->{'synonymBoundaryIndex'} = 0; | 
| 122 | 0 |  |  |  |  |  | bless($self, $className); | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 0 |  |  |  |  |  | return $self; | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | =item hype | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | Returns the hypernyms of a synset. | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | Returns: $hypeHash | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | =cut | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | # NOTE: Thanks to Wybo Wiersma for contributing optimizations | 
| 138 |  |  |  |  |  |  | #       in the following code. | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | # function to take a set of synsets and to return their | 
| 141 |  |  |  |  |  |  | # hypernyms. both input and output will be arrays of fully qualified | 
| 142 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 143 |  |  |  |  |  |  | sub hype | 
| 144 |  |  |  |  |  |  | { | 
| 145 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 146 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 147 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 150 |  |  |  |  |  |  | # function | 
| 151 | 0 | 0 |  |  |  |  | return(1, 1) if(defined($outprep)); | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 0 |  |  |  |  |  | my %newsynsetsh; | 
| 154 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | { | 
| 156 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 157 |  |  |  |  |  |  | # TODO: Return error code instead of "exit" | 
| 158 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 159 |  |  |  |  |  |  | { | 
| 160 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 161 | 0 |  |  |  |  |  | exit; | 
| 162 |  |  |  |  |  |  | } | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | # get the hypernyms | 
| 165 | 0 |  |  |  |  |  | my @hypernyms = $wn->querySense($syns, "hypes"); | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | # put the hypernyms in a hash. this way we will avoid multiple | 
| 168 |  |  |  |  |  |  | # copies of the same hypernym | 
| 169 | 0 |  |  |  |  |  | my $temp; | 
| 170 | 0 |  |  |  |  |  | foreach $temp (@hypernyms) | 
| 171 |  |  |  |  |  |  | { | 
| 172 | 0 |  |  |  |  |  | $newsynsetsh{$temp} = 1; | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | # return the hypernyms in an hash ref | 
| 177 | 0 |  |  |  |  |  | return(\%newsynsetsh); | 
| 178 |  |  |  |  |  |  | } | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | =item hypo | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | Returns the hyponyms of a synset. | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | Returns: $hypoHash | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | =cut | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | # function to take a set of synsets and to return their | 
| 191 |  |  |  |  |  |  | # hyponyms. both input and output will be arrays of fully qualified | 
| 192 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 193 |  |  |  |  |  |  | sub hypo | 
| 194 |  |  |  |  |  |  | { | 
| 195 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 196 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 197 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 200 |  |  |  |  |  |  | # function | 
| 201 | 0 | 0 |  |  |  |  | return(1, 1) if(defined($outprep)); | 
| 202 |  |  |  |  |  |  |  | 
| 203 | 0 |  |  |  |  |  | my %hyponymHash; | 
| 204 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  | { | 
| 206 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 207 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 208 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 209 |  |  |  |  |  |  | { | 
| 210 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 211 | 0 |  |  |  |  |  | exit; | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | # get the hyponyms | 
| 215 | 0 |  |  |  |  |  | my @hyponyms = $wn->querySense($syns, "hypos"); | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | # put the hyponyms in a hash. this way we will avoid multiple | 
| 218 |  |  |  |  |  |  | # copies of the same hyponym | 
| 219 | 0 |  |  |  |  |  | my $temp; | 
| 220 | 0 |  |  |  |  |  | foreach $temp (@hyponyms) | 
| 221 |  |  |  |  |  |  | { | 
| 222 | 0 |  |  |  |  |  | $hyponymHash{$temp} = 1; | 
| 223 |  |  |  |  |  |  | } | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | # return the hyponyms in an hash ref | 
| 227 | 0 |  |  |  |  |  | return(\%hyponymHash); | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | =item holo | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | Returns the holonyms of a synset. | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | Returns: $holoHash | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | =cut | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | # function to take a set of synsets and to return their | 
| 241 |  |  |  |  |  |  | # holonyms. both input and output will be arrays of fully qualified | 
| 242 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 243 |  |  |  |  |  |  | sub holo | 
| 244 |  |  |  |  |  |  | { | 
| 245 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 246 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 247 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 248 | 0 |  |  |  |  |  | my %holonymHash = (); | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 251 |  |  |  |  |  |  | # function | 
| 252 | 0 | 0 |  |  |  |  | return(1, 1) if(defined($outprep)); | 
| 253 |  |  |  |  |  |  |  | 
| 254 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | { | 
| 256 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 257 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 258 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 259 |  |  |  |  |  |  | { | 
| 260 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 261 | 0 |  |  |  |  |  | exit; | 
| 262 |  |  |  |  |  |  | } | 
| 263 |  |  |  |  |  |  |  | 
| 264 |  |  |  |  |  |  | # get the holonyms | 
| 265 | 0 |  |  |  |  |  | my @holonyms = $wn->querySense($syns, "holo"); | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | # put the holonyms in a hash. this way we will avoid multiple | 
| 268 |  |  |  |  |  |  | # copies of the same holonym | 
| 269 | 0 |  |  |  |  |  | my $temp; | 
| 270 | 0 |  |  |  |  |  | foreach $temp (@holonyms) | 
| 271 |  |  |  |  |  |  | { | 
| 272 | 0 |  |  |  |  |  | $holonymHash{$temp} = 1; | 
| 273 |  |  |  |  |  |  | } | 
| 274 |  |  |  |  |  |  | } | 
| 275 |  |  |  |  |  |  |  | 
| 276 |  |  |  |  |  |  | # return the holonyms in an hash ref | 
| 277 | 0 |  |  |  |  |  | return(\%holonymHash); | 
| 278 |  |  |  |  |  |  | } | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | =item mero | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | Returns the meronyms of a synset. | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | Returns: $meroHash | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | =cut | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | # function to take a set of synsets and to return their | 
| 291 |  |  |  |  |  |  | # meronyms. both input and output will be arrays of fully qualified | 
| 292 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 293 |  |  |  |  |  |  | sub mero | 
| 294 |  |  |  |  |  |  | { | 
| 295 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 296 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 297 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 298 | 0 |  |  |  |  |  | my %meronymHash = (); | 
| 299 |  |  |  |  |  |  |  | 
| 300 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 301 |  |  |  |  |  |  | # function | 
| 302 | 0 | 0 |  |  |  |  | return (1, 1) if(defined($outprep)); | 
| 303 |  |  |  |  |  |  |  | 
| 304 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | { | 
| 306 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 307 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 308 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 309 |  |  |  |  |  |  | { | 
| 310 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 311 | 0 |  |  |  |  |  | exit; | 
| 312 |  |  |  |  |  |  | } | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | # get the meronyms | 
| 315 | 0 |  |  |  |  |  | my @meronyms = $wn->querySense($syns, "mero"); | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | # put the meronyms in a hash. this way we will avoid multiple | 
| 318 |  |  |  |  |  |  | # copies of the same meronym | 
| 319 | 0 |  |  |  |  |  | my $temp; | 
| 320 | 0 |  |  |  |  |  | foreach $temp (@meronyms) | 
| 321 |  |  |  |  |  |  | { | 
| 322 | 0 |  |  |  |  |  | $meronymHash{$temp} = 1; | 
| 323 |  |  |  |  |  |  | } | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | # return the meronyms in an hash ref | 
| 327 | 0 |  |  |  |  |  | return(\%meronymHash); | 
| 328 |  |  |  |  |  |  | } | 
| 329 |  |  |  |  |  |  |  | 
| 330 |  |  |  |  |  |  | =item attr | 
| 331 |  |  |  |  |  |  |  | 
| 332 |  |  |  |  |  |  | Returns the attributes of a synset. | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 335 |  |  |  |  |  |  |  | 
| 336 |  |  |  |  |  |  | Returns: $attrHash | 
| 337 |  |  |  |  |  |  |  | 
| 338 |  |  |  |  |  |  | =cut | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | # function to take a set of synsets and to return their | 
| 341 |  |  |  |  |  |  | # attributes. both input and output will be arrays of fully qualified | 
| 342 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 343 |  |  |  |  |  |  | sub attr | 
| 344 |  |  |  |  |  |  | { | 
| 345 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 346 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 347 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 348 | 0 |  |  |  |  |  | my %attrHash = (); | 
| 349 |  |  |  |  |  |  |  | 
| 350 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 351 |  |  |  |  |  |  | # function | 
| 352 | 0 | 0 |  |  |  |  | return (1, 1) if(defined($outprep)); | 
| 353 |  |  |  |  |  |  |  | 
| 354 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | { | 
| 356 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 357 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 358 |  |  |  |  |  |  | { | 
| 359 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 360 | 0 |  |  |  |  |  | exit; | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | # get the attrs | 
| 364 | 0 |  |  |  |  |  | my @attrs = $wn->querySense($syns, "attr"); | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | # put the attrs in a hash. this way we will avoid multiple | 
| 367 |  |  |  |  |  |  | # copies of the same attr | 
| 368 | 0 |  |  |  |  |  | my $temp; | 
| 369 | 0 |  |  |  |  |  | foreach $temp (@attrs) | 
| 370 |  |  |  |  |  |  | { | 
| 371 | 0 |  |  |  |  |  | $attrHash{$temp} = 1; | 
| 372 |  |  |  |  |  |  | } | 
| 373 |  |  |  |  |  |  | } | 
| 374 |  |  |  |  |  |  |  | 
| 375 |  |  |  |  |  |  | # return the attrs in an hash ref | 
| 376 | 0 |  |  |  |  |  | return(\%attrHash); | 
| 377 |  |  |  |  |  |  | } | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | =item also | 
| 380 |  |  |  |  |  |  |  | 
| 381 |  |  |  |  |  |  | Returns the also-see of a synset. | 
| 382 |  |  |  |  |  |  |  | 
| 383 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 384 |  |  |  |  |  |  |  | 
| 385 |  |  |  |  |  |  | Returns: $alsoHash | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | =cut | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | # function to take a set of synsets and to return their also-see | 
| 390 |  |  |  |  |  |  | # synsets. both input and output will be arrays of fully qualified | 
| 391 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 392 |  |  |  |  |  |  | sub also | 
| 393 |  |  |  |  |  |  | { | 
| 394 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 395 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 396 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 397 | 0 |  |  |  |  |  | my %alsoSeeHash = (); | 
| 398 |  |  |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 400 |  |  |  |  |  |  | # function | 
| 401 | 0 | 0 |  |  |  |  | return (1, 1) if(defined($outprep)); | 
| 402 |  |  |  |  |  |  |  | 
| 403 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  | { | 
| 405 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 406 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 407 |  |  |  |  |  |  | { | 
| 408 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 409 | 0 |  |  |  |  |  | exit; | 
| 410 |  |  |  |  |  |  | } | 
| 411 |  |  |  |  |  |  |  | 
| 412 |  |  |  |  |  |  | # get the also see synsets | 
| 413 | 0 |  |  |  |  |  | my @alsoSees = $wn->queryWord($syns, "also"); | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | # put the synsets in a hash. this way we will avoid multiple | 
| 416 |  |  |  |  |  |  | # copies of the same synset | 
| 417 | 0 |  |  |  |  |  | my $temp; | 
| 418 | 0 |  |  |  |  |  | foreach $temp (@alsoSees) | 
| 419 |  |  |  |  |  |  | { | 
| 420 | 0 |  |  |  |  |  | $alsoSeeHash{$temp} = 1; | 
| 421 |  |  |  |  |  |  | } | 
| 422 |  |  |  |  |  |  | } | 
| 423 |  |  |  |  |  |  |  | 
| 424 |  |  |  |  |  |  | # return the synsets in an hash ref | 
| 425 | 0 |  |  |  |  |  | return(\%alsoSeeHash); | 
| 426 |  |  |  |  |  |  | } | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | =item deri | 
| 429 |  |  |  |  |  |  |  | 
| 430 |  |  |  |  |  |  | Returns the derived forms of a synset. | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  | Returns: $deriHash | 
| 435 |  |  |  |  |  |  |  | 
| 436 |  |  |  |  |  |  | =cut | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | # function to take a set of words and to return their derived forms. | 
| 439 |  |  |  |  |  |  | # both input and output will be arrays of fully qualified | 
| 440 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 441 |  |  |  |  |  |  | sub deri | 
| 442 |  |  |  |  |  |  | { | 
| 443 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 444 | 0 |  |  |  |  |  | my $wn = $self->{wn}; | 
| 445 | 0 |  |  |  |  |  | my ($wordsh, $outprep) = @_; | 
| 446 | 0 |  |  |  |  |  | my %deriHash = (); | 
| 447 |  |  |  |  |  |  |  | 
| 448 | 0 | 0 |  |  |  |  | return (1, 1) if(defined($outprep)); | 
| 449 |  |  |  |  |  |  |  | 
| 450 | 0 |  |  |  |  |  | foreach my $word (keys %{$wordsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | { | 
| 452 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 453 | 0 | 0 |  |  |  |  | if($word !~ m/\#\w+\#\d+/) | 
| 454 |  |  |  |  |  |  | { | 
| 455 | 0 |  |  |  |  |  | print STDERR "$word is not in WORD#POS#SENSE format!\n"; | 
| 456 | 0 |  |  |  |  |  | exit 1; | 
| 457 |  |  |  |  |  |  | } | 
| 458 | 0 |  |  |  |  |  | my @deris = $wn->queryWord($word, "deri"); | 
| 459 |  |  |  |  |  |  |  | 
| 460 | 0 |  |  |  |  |  | foreach my $temp (@deris) | 
| 461 |  |  |  |  |  |  | { | 
| 462 | 0 |  |  |  |  |  | $deriHash{$temp} = 1; | 
| 463 |  |  |  |  |  |  | } | 
| 464 |  |  |  |  |  |  | } | 
| 465 | 0 |  |  |  |  |  | return(\%deriHash); | 
| 466 |  |  |  |  |  |  | } | 
| 467 |  |  |  |  |  |  |  | 
| 468 |  |  |  |  |  |  | =item domn | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | Returns the domains of a synset. | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 473 |  |  |  |  |  |  |  | 
| 474 |  |  |  |  |  |  | Returns: $domnHash | 
| 475 |  |  |  |  |  |  |  | 
| 476 |  |  |  |  |  |  | =cut | 
| 477 |  |  |  |  |  |  |  | 
| 478 |  |  |  |  |  |  | # function to take a set of synsets and to return their domain | 
| 479 |  |  |  |  |  |  | # synsets. both input and output will be arrays of fully qualified | 
| 480 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 481 |  |  |  |  |  |  | sub domn | 
| 482 |  |  |  |  |  |  | { | 
| 483 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 484 | 0 |  |  |  |  |  | my $wn = $self->{wn}; | 
| 485 | 0 |  |  |  |  |  | my ($wordsh, $outprep) = @_; | 
| 486 | 0 |  |  |  |  |  | my %domnHash = (); | 
| 487 |  |  |  |  |  |  |  | 
| 488 | 0 | 0 |  |  |  |  | return(1, 1) if(defined($outprep)); | 
| 489 |  |  |  |  |  |  |  | 
| 490 | 0 |  |  |  |  |  | foreach my $word (keys %{$wordsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 491 |  |  |  |  |  |  | { | 
| 492 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 493 | 0 | 0 |  |  |  |  | if($word !~ m/\#\w+\#\d+/) | 
| 494 |  |  |  |  |  |  | { | 
| 495 | 0 |  |  |  |  |  | print STDERR "$word is not in WORD#POS#SENSE format!\n"; | 
| 496 | 0 |  |  |  |  |  | exit 1; | 
| 497 |  |  |  |  |  |  | } | 
| 498 | 0 |  |  |  |  |  | my @domns = $wn->queryWord($word, "domn"); | 
| 499 |  |  |  |  |  |  |  | 
| 500 | 0 |  |  |  |  |  | foreach my $temp (@domns) | 
| 501 |  |  |  |  |  |  | { | 
| 502 | 0 |  |  |  |  |  | $domnHash{$temp} = 1; | 
| 503 |  |  |  |  |  |  | } | 
| 504 |  |  |  |  |  |  | } | 
| 505 | 0 |  |  |  |  |  | return (\%domnHash); | 
| 506 |  |  |  |  |  |  | } | 
| 507 |  |  |  |  |  |  |  | 
| 508 |  |  |  |  |  |  | =item domt | 
| 509 |  |  |  |  |  |  |  | 
| 510 |  |  |  |  |  |  | Returns the domain terms of a synset. | 
| 511 |  |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 513 |  |  |  |  |  |  |  | 
| 514 |  |  |  |  |  |  | Returns: $domtHash | 
| 515 |  |  |  |  |  |  |  | 
| 516 |  |  |  |  |  |  | =cut | 
| 517 |  |  |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | # function to take a set of synsets and to return their domain term | 
| 519 |  |  |  |  |  |  | # synsets. both input and output will be arrays of fully qualified | 
| 520 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 521 |  |  |  |  |  |  | sub domt | 
| 522 |  |  |  |  |  |  | { | 
| 523 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 524 | 0 |  |  |  |  |  | my $wn = $self->{wn}; | 
| 525 | 0 |  |  |  |  |  | my ($wordsh, $outprep) = @_; | 
| 526 | 0 |  |  |  |  |  | my %domtHash = (); | 
| 527 |  |  |  |  |  |  |  | 
| 528 | 0 | 0 |  |  |  |  | return (1, 1) if(defined($outprep)); | 
| 529 |  |  |  |  |  |  |  | 
| 530 | 0 |  |  |  |  |  | foreach my $word (keys %{$wordsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 531 |  |  |  |  |  |  | { | 
| 532 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 533 | 0 | 0 |  |  |  |  | if($word != m/\#\w+\#\d+/) | 
| 534 |  |  |  |  |  |  | { | 
| 535 | 0 |  |  |  |  |  | print STDERR "$word is not in WORD#POS#SENSE format!\n"; | 
| 536 | 0 |  |  |  |  |  | exit 1; | 
| 537 |  |  |  |  |  |  | } | 
| 538 | 0 |  |  |  |  |  | my @domts = $wn->queryWord ($word, "domt"); | 
| 539 |  |  |  |  |  |  |  | 
| 540 | 0 |  |  |  |  |  | foreach my $temp (@domts) | 
| 541 |  |  |  |  |  |  | { | 
| 542 | 0 |  |  |  |  |  | $domtHash{$temp} = 1; | 
| 543 |  |  |  |  |  |  | } | 
| 544 |  |  |  |  |  |  | } | 
| 545 | 0 |  |  |  |  |  | return (\%domtHash); | 
| 546 |  |  |  |  |  |  |  | 
| 547 |  |  |  |  |  |  | } | 
| 548 |  |  |  |  |  |  |  | 
| 549 |  |  |  |  |  |  | =item sim | 
| 550 |  |  |  |  |  |  |  | 
| 551 |  |  |  |  |  |  | Returns the similar-to synsets. | 
| 552 |  |  |  |  |  |  |  | 
| 553 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 554 |  |  |  |  |  |  |  | 
| 555 |  |  |  |  |  |  | Returns: $simHash | 
| 556 |  |  |  |  |  |  |  | 
| 557 |  |  |  |  |  |  | =cut | 
| 558 |  |  |  |  |  |  |  | 
| 559 |  |  |  |  |  |  | # function to take a set of synsets and to return their similar-to | 
| 560 |  |  |  |  |  |  | # synsets. both input and output will be arrays of fully qualified | 
| 561 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 562 |  |  |  |  |  |  | sub sim | 
| 563 |  |  |  |  |  |  | { | 
| 564 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 565 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 566 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 567 | 0 |  |  |  |  |  | my %simHash = (); | 
| 568 |  |  |  |  |  |  |  | 
| 569 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 570 |  |  |  |  |  |  | # function | 
| 571 | 0 | 0 |  |  |  |  | return (1, 1) if(defined($outprep)); | 
| 572 |  |  |  |  |  |  |  | 
| 573 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 574 |  |  |  |  |  |  | { | 
| 575 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 576 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 577 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 578 |  |  |  |  |  |  | { | 
| 579 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 580 | 0 |  |  |  |  |  | exit; | 
| 581 |  |  |  |  |  |  | } | 
| 582 |  |  |  |  |  |  |  | 
| 583 |  |  |  |  |  |  | # get the sim synsets | 
| 584 | 0 |  |  |  |  |  | my @sims = $wn->querySense($syns, "sim"); | 
| 585 |  |  |  |  |  |  |  | 
| 586 |  |  |  |  |  |  | # put the synsets in a hash. this way we will avoid multiple | 
| 587 |  |  |  |  |  |  | # copies of the same synset | 
| 588 | 0 |  |  |  |  |  | my $temp; | 
| 589 | 0 |  |  |  |  |  | foreach $temp (@sims) | 
| 590 |  |  |  |  |  |  | { | 
| 591 | 0 |  |  |  |  |  | $simHash{$temp} = 1; | 
| 592 |  |  |  |  |  |  | } | 
| 593 |  |  |  |  |  |  | } | 
| 594 |  |  |  |  |  |  |  | 
| 595 |  |  |  |  |  |  | # return the synsets in an hash ref | 
| 596 | 0 |  |  |  |  |  | return(\%simHash); | 
| 597 |  |  |  |  |  |  | } | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | =item enta | 
| 600 |  |  |  |  |  |  |  | 
| 601 |  |  |  |  |  |  | Returns the entailment of a synset. | 
| 602 |  |  |  |  |  |  |  | 
| 603 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 604 |  |  |  |  |  |  |  | 
| 605 |  |  |  |  |  |  | Returns: $entaHash | 
| 606 |  |  |  |  |  |  |  | 
| 607 |  |  |  |  |  |  | =cut | 
| 608 |  |  |  |  |  |  |  | 
| 609 |  |  |  |  |  |  | # function to take a set of synsets and to return their entailment | 
| 610 |  |  |  |  |  |  | # synsets. both input and output will be arrays of fully qualified | 
| 611 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 612 |  |  |  |  |  |  | sub enta | 
| 613 |  |  |  |  |  |  | { | 
| 614 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 615 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 616 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 617 | 0 |  |  |  |  |  | my %entailsHash = (); | 
| 618 |  |  |  |  |  |  |  | 
| 619 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 620 |  |  |  |  |  |  | # function | 
| 621 | 0 | 0 |  |  |  |  | return (1, 1) if(defined($outprep)); | 
| 622 |  |  |  |  |  |  |  | 
| 623 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 624 |  |  |  |  |  |  | { | 
| 625 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 626 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 627 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 628 |  |  |  |  |  |  | { | 
| 629 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 630 | 0 |  |  |  |  |  | exit; | 
| 631 |  |  |  |  |  |  | } | 
| 632 |  |  |  |  |  |  |  | 
| 633 |  |  |  |  |  |  | # get the entails synsets | 
| 634 | 0 |  |  |  |  |  | my @entails = $wn->querySense($syns, "enta"); | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  | # put the entails synsets in a hash. this way we will avoid | 
| 637 |  |  |  |  |  |  | # multiple copies of the same entails synset | 
| 638 | 0 |  |  |  |  |  | my $temp; | 
| 639 | 0 |  |  |  |  |  | foreach $temp (@entails) | 
| 640 |  |  |  |  |  |  | { | 
| 641 | 0 |  |  |  |  |  | $entailsHash{$temp} = 1; | 
| 642 |  |  |  |  |  |  | } | 
| 643 |  |  |  |  |  |  | } | 
| 644 |  |  |  |  |  |  |  | 
| 645 |  |  |  |  |  |  | # return the causs in an hash ref | 
| 646 | 0 |  |  |  |  |  | return(\%entailsHash); | 
| 647 |  |  |  |  |  |  | } | 
| 648 |  |  |  |  |  |  |  | 
| 649 |  |  |  |  |  |  | =item caus | 
| 650 |  |  |  |  |  |  |  | 
| 651 |  |  |  |  |  |  | Returns the cause of a synset. | 
| 652 |  |  |  |  |  |  |  | 
| 653 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 654 |  |  |  |  |  |  |  | 
| 655 |  |  |  |  |  |  | Returns: $causHash | 
| 656 |  |  |  |  |  |  |  | 
| 657 |  |  |  |  |  |  | =cut | 
| 658 |  |  |  |  |  |  |  | 
| 659 |  |  |  |  |  |  | # function to take a set of synsets and to return their cause | 
| 660 |  |  |  |  |  |  | # synsets. both input and output will be arrays of fully qualified | 
| 661 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 662 |  |  |  |  |  |  | sub caus | 
| 663 |  |  |  |  |  |  | { | 
| 664 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 665 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 666 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 667 | 0 |  |  |  |  |  | my %causeHash = (); | 
| 668 |  |  |  |  |  |  |  | 
| 669 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 670 |  |  |  |  |  |  | # function | 
| 671 | 0 | 0 |  |  |  |  | return(1, 1) if(defined($outprep)); | 
| 672 |  |  |  |  |  |  |  | 
| 673 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 674 |  |  |  |  |  |  | { | 
| 675 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 676 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 677 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 678 |  |  |  |  |  |  | { | 
| 679 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 680 | 0 |  |  |  |  |  | exit; | 
| 681 |  |  |  |  |  |  | } | 
| 682 |  |  |  |  |  |  |  | 
| 683 |  |  |  |  |  |  | # get the cause synsets | 
| 684 | 0 |  |  |  |  |  | my @cause = $wn->querySense($syns, "caus"); | 
| 685 |  |  |  |  |  |  |  | 
| 686 |  |  |  |  |  |  | # put the cause synsets in a hash. this way we will avoid | 
| 687 |  |  |  |  |  |  | # multiple copies of the same cause synset | 
| 688 | 0 |  |  |  |  |  | my $temp; | 
| 689 | 0 |  |  |  |  |  | foreach $temp (@cause) | 
| 690 |  |  |  |  |  |  | { | 
| 691 | 0 |  |  |  |  |  | $causeHash{$temp} = 1; | 
| 692 |  |  |  |  |  |  | } | 
| 693 |  |  |  |  |  |  | } | 
| 694 |  |  |  |  |  |  |  | 
| 695 |  |  |  |  |  |  | # return the causs in an hash ref | 
| 696 | 0 |  |  |  |  |  | return(\%causeHash); | 
| 697 |  |  |  |  |  |  | } | 
| 698 |  |  |  |  |  |  |  | 
| 699 |  |  |  |  |  |  | =item part | 
| 700 |  |  |  |  |  |  |  | 
| 701 |  |  |  |  |  |  | Returns the participles of a synset. | 
| 702 |  |  |  |  |  |  |  | 
| 703 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 704 |  |  |  |  |  |  |  | 
| 705 |  |  |  |  |  |  | Returns: $partHash | 
| 706 |  |  |  |  |  |  |  | 
| 707 |  |  |  |  |  |  | =cut | 
| 708 |  |  |  |  |  |  |  | 
| 709 |  |  |  |  |  |  | # function to take a set of synsets and to return their participle | 
| 710 |  |  |  |  |  |  | # synsets. both input and output will be arrays of fully qualified | 
| 711 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 712 |  |  |  |  |  |  | sub part | 
| 713 |  |  |  |  |  |  | { | 
| 714 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 715 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 716 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 717 | 0 |  |  |  |  |  | my %partHash = (); | 
| 718 |  |  |  |  |  |  |  | 
| 719 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 720 |  |  |  |  |  |  | # function | 
| 721 | 0 | 0 |  |  |  |  | return(1, 1) if(defined($outprep)); | 
| 722 |  |  |  |  |  |  |  | 
| 723 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 724 |  |  |  |  |  |  | { | 
| 725 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 726 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 727 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 728 |  |  |  |  |  |  | { | 
| 729 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 730 | 0 |  |  |  |  |  | exit; | 
| 731 |  |  |  |  |  |  | } | 
| 732 |  |  |  |  |  |  |  | 
| 733 |  |  |  |  |  |  | # get the part synsets | 
| 734 | 0 |  |  |  |  |  | my @part = $wn->queryWord($syns, "part"); | 
| 735 |  |  |  |  |  |  |  | 
| 736 |  |  |  |  |  |  | # put the part synsets in a hash. this way we will avoid | 
| 737 |  |  |  |  |  |  | # multiple copies of the same part synset | 
| 738 | 0 |  |  |  |  |  | my $temp; | 
| 739 | 0 |  |  |  |  |  | foreach $temp (@part) | 
| 740 |  |  |  |  |  |  | { | 
| 741 | 0 |  |  |  |  |  | $partHash{$temp} = 1; | 
| 742 |  |  |  |  |  |  | } | 
| 743 |  |  |  |  |  |  | } | 
| 744 |  |  |  |  |  |  |  | 
| 745 |  |  |  |  |  |  | # return the causs in an hash ref | 
| 746 | 0 |  |  |  |  |  | return(\%partHash); | 
| 747 |  |  |  |  |  |  | } | 
| 748 |  |  |  |  |  |  |  | 
| 749 |  |  |  |  |  |  | =item pert | 
| 750 |  |  |  |  |  |  |  | 
| 751 |  |  |  |  |  |  | Returns the pertainyms of a synset. | 
| 752 |  |  |  |  |  |  |  | 
| 753 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 754 |  |  |  |  |  |  |  | 
| 755 |  |  |  |  |  |  | Returns: $pertHash | 
| 756 |  |  |  |  |  |  |  | 
| 757 |  |  |  |  |  |  | =cut | 
| 758 |  |  |  |  |  |  |  | 
| 759 |  |  |  |  |  |  | # function to take a set of synsets and to return their pertainym | 
| 760 |  |  |  |  |  |  | # synsets. both input and output will be arrays of fully qualified | 
| 761 |  |  |  |  |  |  | # WordNet senses (in WORD#POS#SENSE format). | 
| 762 |  |  |  |  |  |  | sub pert | 
| 763 |  |  |  |  |  |  | { | 
| 764 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 765 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 766 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 767 | 0 |  |  |  |  |  | my %pertHash = (); | 
| 768 |  |  |  |  |  |  |  | 
| 769 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 770 |  |  |  |  |  |  | # function | 
| 771 | 0 | 0 |  |  |  |  | return (1, 1) if(defined($outprep)); | 
| 772 |  |  |  |  |  |  |  | 
| 773 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 774 |  |  |  |  |  |  | { | 
| 775 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 776 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 777 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 778 |  |  |  |  |  |  | { | 
| 779 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 780 | 0 |  |  |  |  |  | exit; | 
| 781 |  |  |  |  |  |  | } | 
| 782 |  |  |  |  |  |  |  | 
| 783 |  |  |  |  |  |  | # get the pert synsets | 
| 784 | 0 |  |  |  |  |  | my @pert = $wn->queryWord($syns, "pert"); | 
| 785 |  |  |  |  |  |  |  | 
| 786 |  |  |  |  |  |  | # put the pert synsets in a hash. this way we will avoid | 
| 787 |  |  |  |  |  |  | # multiple copies of the same pert synset | 
| 788 | 0 |  |  |  |  |  | my $temp; | 
| 789 | 0 |  |  |  |  |  | foreach $temp (@pert) | 
| 790 |  |  |  |  |  |  | { | 
| 791 | 0 |  |  |  |  |  | $pertHash{$temp} = 1; | 
| 792 |  |  |  |  |  |  | } | 
| 793 |  |  |  |  |  |  | } | 
| 794 |  |  |  |  |  |  |  | 
| 795 |  |  |  |  |  |  | # return the causs in an hash ref | 
| 796 | 0 |  |  |  |  |  | return(\%pertHash); | 
| 797 |  |  |  |  |  |  | } | 
| 798 |  |  |  |  |  |  |  | 
| 799 |  |  |  |  |  |  | =item glos | 
| 800 |  |  |  |  |  |  |  | 
| 801 |  |  |  |  |  |  | Returns the gloss of a synset. | 
| 802 |  |  |  |  |  |  |  | 
| 803 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 804 |  |  |  |  |  |  |  | 
| 805 |  |  |  |  |  |  | Returns: $glossString | 
| 806 |  |  |  |  |  |  |  | 
| 807 |  |  |  |  |  |  | =cut | 
| 808 |  |  |  |  |  |  |  | 
| 809 |  |  |  |  |  |  | # function to take a set of synsets and to return the concatenation of | 
| 810 |  |  |  |  |  |  | # their glosses | 
| 811 |  |  |  |  |  |  | sub glos | 
| 812 |  |  |  |  |  |  | { | 
| 813 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 814 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 815 | 0 |  |  |  |  |  | my $stemmer = $self->{'stemmer'}; | 
| 816 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 817 | 0 |  |  |  |  |  | my $returnString = ""; | 
| 818 |  |  |  |  |  |  |  | 
| 819 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 820 |  |  |  |  |  |  | # function | 
| 821 | 0 | 0 |  |  |  |  | return (1, 2) if(defined($outprep)); | 
| 822 |  |  |  |  |  |  |  | 
| 823 | 0 |  |  |  |  |  | my @synshkeys = keys %{$synsetsh}; | 
|  | 0 |  |  |  |  |  |  | 
| 824 | 0 |  |  |  |  |  | my $i = 0; | 
| 825 | 0 |  |  |  |  |  | foreach my $syns (@synshkeys) | 
| 826 |  |  |  |  |  |  | { | 
| 827 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 828 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 829 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 830 |  |  |  |  |  |  | { | 
| 831 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 832 | 0 |  |  |  |  |  | exit; | 
| 833 |  |  |  |  |  |  | } | 
| 834 |  |  |  |  |  |  |  | 
| 835 |  |  |  |  |  |  | # get the glos | 
| 836 | 0 |  |  |  |  |  | my $glosString; | 
| 837 | 0 |  |  |  |  |  | ($glosString) = $wn->querySense($syns, "glos"); | 
| 838 |  |  |  |  |  |  |  | 
| 839 |  |  |  |  |  |  | # regularize the glos | 
| 840 | 0 |  |  |  |  |  | $glosString =~ s/\".*//; | 
| 841 |  |  |  |  |  |  |  | 
| 842 |  |  |  |  |  |  | # get rid of most punctuation | 
| 843 | 0 |  |  |  |  |  | $glosString =~ tr/.;:,?!(){}\x22\x60\x24\x25\x40<>/ /; | 
| 844 |  |  |  |  |  |  | # get rid of apostrophes not surrounded by word chars | 
| 845 | 0 |  |  |  |  |  | $glosString =~ s/(? | 
| 846 | 0 |  |  |  |  |  | $glosString =~ s/\x27(?!\w)/ /g; | 
| 847 |  |  |  |  |  |  | # remove dashes, but not hyphens | 
| 848 | 0 |  |  |  |  |  | $glosString =~ s/--/ /g; | 
| 849 |  |  |  |  |  |  |  | 
| 850 |  |  |  |  |  |  | # this causes "plane's" to become "plane s" | 
| 851 |  |  |  |  |  |  | # $glosString =~ s/[^\w]/ /g; | 
| 852 |  |  |  |  |  |  |  | 
| 853 | 0 |  |  |  |  |  | $glosString =~ s/\s+/ /g; | 
| 854 | 0 |  |  |  |  |  | $glosString = lc $glosString; | 
| 855 |  |  |  |  |  |  |  | 
| 856 |  |  |  |  |  |  | # stem the glos if asked for | 
| 857 | 0 | 0 |  |  |  |  | $glosString = $stemmer->stemString($glosString, 1) if($self->{stem}); | 
| 858 |  |  |  |  |  |  |  | 
| 859 | 0 |  |  |  |  |  | $glosString =~ s/^\s*/ /; | 
| 860 | 0 |  |  |  |  |  | $glosString =~ s/\s*$/ /; | 
| 861 |  |  |  |  |  |  |  | 
| 862 |  |  |  |  |  |  | # append to return string | 
| 863 | 0 |  |  |  |  |  | $returnString .= $glosString; | 
| 864 |  |  |  |  |  |  |  | 
| 865 |  |  |  |  |  |  | # put in boundary if more glosses coming! | 
| 866 | 0 | 0 |  |  |  |  | if($i < $#synshkeys) | 
| 867 |  |  |  |  |  |  | { | 
| 868 | 0 |  |  |  |  |  | my $boundary = sprintf("GGG%05dGGG", $self->{'glosBoundaryIndex'}); | 
| 869 | 0 |  |  |  |  |  | $returnString .= $boundary; | 
| 870 | 0 |  |  |  |  |  | ($self->{'glosBoundaryIndex'})++; | 
| 871 |  |  |  |  |  |  | } | 
| 872 | 0 |  |  |  |  |  | $i++; | 
| 873 |  |  |  |  |  |  | } | 
| 874 |  |  |  |  |  |  |  | 
| 875 |  |  |  |  |  |  | # and we are done! | 
| 876 | 0 |  |  |  |  |  | return($returnString); | 
| 877 |  |  |  |  |  |  | } | 
| 878 |  |  |  |  |  |  |  | 
| 879 |  |  |  |  |  |  | =item example | 
| 880 |  |  |  |  |  |  |  | 
| 881 |  |  |  |  |  |  | Returns the example of a synset. | 
| 882 |  |  |  |  |  |  |  | 
| 883 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 884 |  |  |  |  |  |  |  | 
| 885 |  |  |  |  |  |  | Returns: $example | 
| 886 |  |  |  |  |  |  |  | 
| 887 |  |  |  |  |  |  | =cut | 
| 888 |  |  |  |  |  |  |  | 
| 889 |  |  |  |  |  |  | # function to take a set of synsets and to return the concatenation of | 
| 890 |  |  |  |  |  |  | # their example strings | 
| 891 |  |  |  |  |  |  | sub example | 
| 892 |  |  |  |  |  |  | { | 
| 893 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 894 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 895 | 0 |  |  |  |  |  | my $stemmer = $self->{'stemmer'}; | 
| 896 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 897 | 0 |  |  |  |  |  | my @exampleStrings = (); | 
| 898 |  |  |  |  |  |  |  | 
| 899 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 900 |  |  |  |  |  |  | # function | 
| 901 | 0 | 0 |  |  |  |  | return (1, 2) if(defined($outprep)); | 
| 902 |  |  |  |  |  |  |  | 
| 903 |  |  |  |  |  |  | # first get all the example strings into an array | 
| 904 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 905 |  |  |  |  |  |  | { | 
| 906 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 907 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 908 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 909 |  |  |  |  |  |  | { | 
| 910 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 911 | 0 |  |  |  |  |  | exit; | 
| 912 |  |  |  |  |  |  | } | 
| 913 |  |  |  |  |  |  |  | 
| 914 |  |  |  |  |  |  | # get the glos | 
| 915 | 0 |  |  |  |  |  | my $exampleString; | 
| 916 | 0 |  |  |  |  |  | ($exampleString) = $wn->querySense($syns, "glos"); | 
| 917 |  |  |  |  |  |  |  | 
| 918 |  |  |  |  |  |  | # check if this has any examples | 
| 919 | 0 | 0 |  |  |  |  | if($exampleString !~ /\"/) {next;} | 
|  | 0 |  |  |  |  |  |  | 
| 920 |  |  |  |  |  |  |  | 
| 921 | 0 |  |  |  |  |  | while($exampleString =~ /\"([^\"]*)\"/g) | 
| 922 |  |  |  |  |  |  | { | 
| 923 | 0 |  |  |  |  |  | push @exampleStrings, $1; | 
| 924 |  |  |  |  |  |  | } | 
| 925 |  |  |  |  |  |  | } | 
| 926 |  |  |  |  |  |  |  | 
| 927 |  |  |  |  |  |  | # now put the example strings together to form the return | 
| 928 |  |  |  |  |  |  | # string. separate examples with the example boundary | 
| 929 |  |  |  |  |  |  |  | 
| 930 | 0 |  |  |  |  |  | my $returnString = ""; | 
| 931 | 0 |  |  |  |  |  | my $i; | 
| 932 | 0 |  |  |  |  |  | for ($i = 0; $i <= $#exampleStrings; $i++) | 
| 933 |  |  |  |  |  |  | { | 
| 934 |  |  |  |  |  |  | # preprocess | 
| 935 |  |  |  |  |  |  |  | 
| 936 |  |  |  |  |  |  | ### | 
| 937 |  |  |  |  |  |  | # get rid of most punctuation | 
| 938 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ tr/.;:,?!(){}\x22\x60\x24\x25\x40<>/ /; | 
| 939 |  |  |  |  |  |  | # get rid of apostrophes not surrounded by word chars | 
| 940 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ s/(? | 
| 941 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ s/\x27(?!\w)/ /g; | 
| 942 |  |  |  |  |  |  | # remove dashes, but not hyphens | 
| 943 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ s/--/ /g; | 
| 944 |  |  |  |  |  |  | ###$exampleStrings[$i] =~ s/[^\w]/ /g; | 
| 945 |  |  |  |  |  |  |  | 
| 946 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ s/\s+/ /g; | 
| 947 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ s/^\s*/ /; | 
| 948 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ s/\s*$/ /; | 
| 949 |  |  |  |  |  |  |  | 
| 950 | 0 |  |  |  |  |  | $exampleStrings[$i] = lc($exampleStrings[$i]); | 
| 951 |  |  |  |  |  |  |  | 
| 952 |  |  |  |  |  |  | # stem if so required | 
| 953 | 0 | 0 |  |  |  |  | $exampleStrings[$i] = $stemmer->stemString($exampleStrings[$i], 1) | 
| 954 |  |  |  |  |  |  | if($self->{'stem'}); | 
| 955 |  |  |  |  |  |  |  | 
| 956 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ s/^\s*/ /; | 
| 957 | 0 |  |  |  |  |  | $exampleStrings[$i] =~ s/\s*$/ /; | 
| 958 |  |  |  |  |  |  |  | 
| 959 |  |  |  |  |  |  | # append to $returnString | 
| 960 | 0 |  |  |  |  |  | $returnString .= $exampleStrings[$i]; | 
| 961 |  |  |  |  |  |  |  | 
| 962 |  |  |  |  |  |  | # put in boundary if more examples coming! | 
| 963 | 0 | 0 |  |  |  |  | if($i < $#exampleStrings) | 
| 964 |  |  |  |  |  |  | { | 
| 965 | 0 |  |  |  |  |  | my $boundary = sprintf("EEE%05dEEE", $self->{'exampleBoundaryIndex'}); | 
| 966 | 0 |  |  |  |  |  | $returnString .= $boundary; | 
| 967 | 0 |  |  |  |  |  | ($self->{'exampleBoundaryIndex'})++; | 
| 968 |  |  |  |  |  |  | } | 
| 969 |  |  |  |  |  |  | } | 
| 970 |  |  |  |  |  |  |  | 
| 971 |  |  |  |  |  |  | # and we are done! | 
| 972 | 0 |  |  |  |  |  | return($returnString); | 
| 973 |  |  |  |  |  |  | } | 
| 974 |  |  |  |  |  |  |  | 
| 975 |  |  |  |  |  |  | =item syns | 
| 976 |  |  |  |  |  |  |  | 
| 977 |  |  |  |  |  |  | Returns the words in the synset. | 
| 978 |  |  |  |  |  |  |  | 
| 979 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 980 |  |  |  |  |  |  |  | 
| 981 |  |  |  |  |  |  | Returns: $wordString | 
| 982 |  |  |  |  |  |  |  | 
| 983 |  |  |  |  |  |  | =cut | 
| 984 |  |  |  |  |  |  |  | 
| 985 |  |  |  |  |  |  | # function to take a set of synsets and to return the concatenation of | 
| 986 |  |  |  |  |  |  | # all the words in them. repeated words are returned only once. | 
| 987 |  |  |  |  |  |  | sub syns | 
| 988 |  |  |  |  |  |  | { | 
| 989 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 990 | 0 |  |  |  |  |  | my $wn = $self->{'wn'}; | 
| 991 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 992 | 0 |  |  |  |  |  | my $returnString = ""; | 
| 993 |  |  |  |  |  |  |  | 
| 994 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 995 |  |  |  |  |  |  | # function | 
| 996 | 0 | 0 |  |  |  |  | return (1, 2) if(defined($outprep)); | 
| 997 |  |  |  |  |  |  |  | 
| 998 | 0 |  |  |  |  |  | my %synonymHash = (); | 
| 999 | 0 |  |  |  |  |  | foreach my $syns (keys %{$synsetsh}) | 
|  | 0 |  |  |  |  |  |  | 
| 1000 |  |  |  |  |  |  | { | 
| 1001 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 1002 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 1003 | 0 | 0 |  |  |  |  | if($syns !~ /\#\w\#\d+/) | 
| 1004 |  |  |  |  |  |  | { | 
| 1005 | 0 |  |  |  |  |  | print STDERR "$syns is not in WORD\#POS\#SENSE format!\n"; | 
| 1006 | 0 |  |  |  |  |  | exit; | 
| 1007 |  |  |  |  |  |  | } | 
| 1008 |  |  |  |  |  |  |  | 
| 1009 |  |  |  |  |  |  | # get the words | 
| 1010 | 0 |  |  |  |  |  | my @synsetWords = $wn->querySense($syns, "syns"); | 
| 1011 |  |  |  |  |  |  |  | 
| 1012 |  |  |  |  |  |  | # for each word, remove the POS and SENSE, and put only the | 
| 1013 |  |  |  |  |  |  | # word in a hash | 
| 1014 | 0 |  |  |  |  |  | my $temp; | 
| 1015 | 0 |  |  |  |  |  | foreach $temp (@synsetWords) | 
| 1016 |  |  |  |  |  |  | { | 
| 1017 | 0 |  |  |  |  |  | $temp =~ s/\#.*//; | 
| 1018 | 0 |  |  |  |  |  | $synonymHash{$temp} = 1; | 
| 1019 |  |  |  |  |  |  | } | 
| 1020 |  |  |  |  |  |  | } | 
| 1021 |  |  |  |  |  |  |  | 
| 1022 |  |  |  |  |  |  | # now get hold of all the words in sorted order | 
| 1023 | 0 |  |  |  |  |  | my @synonymArray = sort keys %synonymHash; | 
| 1024 |  |  |  |  |  |  |  | 
| 1025 |  |  |  |  |  |  | # concatenate them, using the synonym boundary | 
| 1026 | 0 |  |  |  |  |  | for(my $i = 0; $i <= $#synonymArray; $i++) | 
| 1027 |  |  |  |  |  |  | { | 
| 1028 | 0 |  |  |  |  |  | $synonymArray[$i] =~ s/ /_/g; | 
| 1029 | 0 |  |  |  |  |  | $returnString .= " $synonymArray[$i] "; | 
| 1030 |  |  |  |  |  |  |  | 
| 1031 |  |  |  |  |  |  | # put in boundary if more examples coming! | 
| 1032 | 0 | 0 |  |  |  |  | if($i < $#synonymArray) | 
| 1033 |  |  |  |  |  |  | { | 
| 1034 | 0 |  |  |  |  |  | my $boundary = sprintf("SSS%05dSSS", $self->{synonymBoundaryIndex}); | 
| 1035 | 0 |  |  |  |  |  | $returnString .= $boundary; | 
| 1036 | 0 |  |  |  |  |  | ($self->{synonymBoundaryIndex})++; | 
| 1037 |  |  |  |  |  |  | } | 
| 1038 |  |  |  |  |  |  | } | 
| 1039 |  |  |  |  |  |  |  | 
| 1040 |  |  |  |  |  |  | # and we are done! | 
| 1041 | 0 |  |  |  |  |  | return($returnString); | 
| 1042 |  |  |  |  |  |  | } | 
| 1043 |  |  |  |  |  |  |  | 
| 1044 |  |  |  |  |  |  | =item glosexample | 
| 1045 |  |  |  |  |  |  |  | 
| 1046 |  |  |  |  |  |  | Returns the gloss and example of a synset. | 
| 1047 |  |  |  |  |  |  |  | 
| 1048 |  |  |  |  |  |  | Parameters: $synsHash, $ipType | 
| 1049 |  |  |  |  |  |  |  | 
| 1050 |  |  |  |  |  |  | Returns: $glosExampleString | 
| 1051 |  |  |  |  |  |  |  | 
| 1052 |  |  |  |  |  |  | =back | 
| 1053 |  |  |  |  |  |  |  | 
| 1054 |  |  |  |  |  |  | =cut | 
| 1055 |  |  |  |  |  |  |  | 
| 1056 |  |  |  |  |  |  | # function to take a set of synsets and to return the concatenation of | 
| 1057 |  |  |  |  |  |  | # their glosses (including the examples) | 
| 1058 |  |  |  |  |  |  | sub glosexample | 
| 1059 |  |  |  |  |  |  | { | 
| 1060 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 1061 | 0 |  |  |  |  |  | my $wn = $self->{wn}; | 
| 1062 | 0 |  |  |  |  |  | my $stemmer = $self->{stemmer}; | 
| 1063 | 0 |  |  |  |  |  | my ($synsetsh, $outprep) = @_; | 
| 1064 | 0 |  |  |  |  |  | my $returnString = ""; | 
| 1065 |  |  |  |  |  |  |  | 
| 1066 |  |  |  |  |  |  | # check if this is a request for the input-output types of this | 
| 1067 |  |  |  |  |  |  | # function | 
| 1068 | 0 | 0 |  |  |  |  | return (1, 2) if(defined($outprep)); | 
| 1069 |  |  |  |  |  |  |  | 
| 1070 | 0 |  |  |  |  |  | my @synshkeys = keys %{$synsetsh}; | 
|  | 0 |  |  |  |  |  |  | 
| 1071 | 0 |  |  |  |  |  | for(my $i = 0; $i < scalar(@synshkeys); $i++) | 
| 1072 |  |  |  |  |  |  | { | 
| 1073 |  |  |  |  |  |  | # check if in word-pos-sense format | 
| 1074 |  |  |  |  |  |  | # TODO: Replace error message and exit with return error code. | 
| 1075 | 0 | 0 |  |  |  |  | if($synshkeys[$i] !~ /\#\w\#\d+/) | 
| 1076 |  |  |  |  |  |  | { | 
| 1077 | 0 |  |  |  |  |  | print STDERR "$synshkeys[$i] is not in WORD\#POS\#SENSE format!\n"; | 
| 1078 | 0 |  |  |  |  |  | exit; | 
| 1079 |  |  |  |  |  |  | } | 
| 1080 |  |  |  |  |  |  |  | 
| 1081 |  |  |  |  |  |  | # get the glos | 
| 1082 | 0 |  |  |  |  |  | my $glosString; | 
| 1083 | 0 |  |  |  |  |  | ($glosString) = $wn->querySense($synshkeys[$i], "glos"); | 
| 1084 |  |  |  |  |  |  |  | 
| 1085 |  |  |  |  |  |  | # regularize the glos | 
| 1086 |  |  |  |  |  |  | ###$glosString =~ s/\'//g; | 
| 1087 |  |  |  |  |  |  | ###$glosString =~ s/[^\w]/ /g; | 
| 1088 |  |  |  |  |  |  |  | 
| 1089 |  |  |  |  |  |  | # get rid of most punctuation | 
| 1090 | 0 |  |  |  |  |  | $glosString =~ tr/.;:,?!(){}\x22\x60\x24\x25\x40<>/ /; | 
| 1091 |  |  |  |  |  |  | # get rid of apostrophes not surrounded by word chars | 
| 1092 | 0 |  |  |  |  |  | $glosString =~ s/(? | 
| 1093 | 0 |  |  |  |  |  | $glosString =~ s/\x27(?!\w)/ /g; | 
| 1094 |  |  |  |  |  |  | # remove dashes, but not hyphens | 
| 1095 | 0 |  |  |  |  |  | $glosString =~ s/--/ /g; | 
| 1096 |  |  |  |  |  |  | ### | 
| 1097 |  |  |  |  |  |  |  | 
| 1098 | 0 |  |  |  |  |  | $glosString =~ s/\s+/ /g; | 
| 1099 | 0 |  |  |  |  |  | $glosString = lc $glosString; | 
| 1100 |  |  |  |  |  |  |  | 
| 1101 |  |  |  |  |  |  | # stem the glos if asked for | 
| 1102 | 0 | 0 |  |  |  |  | $glosString = $stemmer->stemString($glosString, 1) if($self->{stem}); | 
| 1103 |  |  |  |  |  |  |  | 
| 1104 | 0 |  |  |  |  |  | $glosString =~ s/^\s*/ /; | 
| 1105 | 0 |  |  |  |  |  | $glosString =~ s/\s*$/ /; | 
| 1106 |  |  |  |  |  |  |  | 
| 1107 |  |  |  |  |  |  | # append to return string | 
| 1108 | 0 |  |  |  |  |  | $returnString .= $glosString; | 
| 1109 |  |  |  |  |  |  |  | 
| 1110 |  |  |  |  |  |  | # put in boundary if more glosses coming! | 
| 1111 | 0 | 0 |  |  |  |  | if($i < $#synshkeys) | 
| 1112 |  |  |  |  |  |  | { | 
| 1113 | 0 |  |  |  |  |  | my $boundary = sprintf("XXX%05dXXX", $self->{glosBoundaryIndex}); | 
| 1114 | 0 |  |  |  |  |  | $returnString .= $boundary; | 
| 1115 | 0 |  |  |  |  |  | ($self->{glosBoundaryIndex})++; | 
| 1116 |  |  |  |  |  |  | } | 
| 1117 |  |  |  |  |  |  | } | 
| 1118 |  |  |  |  |  |  |  | 
| 1119 |  |  |  |  |  |  | # and we are done! | 
| 1120 | 0 |  |  |  |  |  | return($returnString); | 
| 1121 |  |  |  |  |  |  | } | 
| 1122 |  |  |  |  |  |  |  | 
| 1123 |  |  |  |  |  |  | 1; | 
| 1124 |  |  |  |  |  |  |  | 
| 1125 |  |  |  |  |  |  | __END__ |