| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # UMLS::Interface::ICFinder | 
| 2 |  |  |  |  |  |  | # (Last Updated $Id: ICFinder.pm,v 1.37 2014/06/27 13:23:47 btmcinnes Exp $) | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Perl module that provides a perl interface to the | 
| 5 |  |  |  |  |  |  | # Unified Medical Language System (UMLS) | 
| 6 |  |  |  |  |  |  | # | 
| 7 |  |  |  |  |  |  | # Copyright (c) 2004-2010, | 
| 8 |  |  |  |  |  |  | # | 
| 9 |  |  |  |  |  |  | # Bridget T. McInnes, University of Minnesota Twin Cities | 
| 10 |  |  |  |  |  |  | # bthomson at cs.umn.edu | 
| 11 |  |  |  |  |  |  | # | 
| 12 |  |  |  |  |  |  | # Siddharth Patwardhan, University of Utah, Salt Lake City | 
| 13 |  |  |  |  |  |  | # sidd at cs.utah.edu | 
| 14 |  |  |  |  |  |  | # | 
| 15 |  |  |  |  |  |  | # Serguei Pakhomov, University of Minnesota Twin Cities | 
| 16 |  |  |  |  |  |  | # pakh0002 at umn.edu | 
| 17 |  |  |  |  |  |  | # | 
| 18 |  |  |  |  |  |  | # Ted Pedersen, University of Minnesota, Duluth | 
| 19 |  |  |  |  |  |  | # tpederse at d.umn.edu | 
| 20 |  |  |  |  |  |  | # | 
| 21 |  |  |  |  |  |  | # Ying Liu, University of Minnesota | 
| 22 |  |  |  |  |  |  | # liux0935 at umn.edu | 
| 23 |  |  |  |  |  |  | # | 
| 24 |  |  |  |  |  |  | # This program is free software; you can redistribute it and/or | 
| 25 |  |  |  |  |  |  | # modify it under the terms of the GNU General Public License | 
| 26 |  |  |  |  |  |  | # as published by the Free Software Foundation; either version 2 | 
| 27 |  |  |  |  |  |  | # of the License, or (at your option) any later version. | 
| 28 |  |  |  |  |  |  | # | 
| 29 |  |  |  |  |  |  | # This program is distributed in the hope that it will be useful, | 
| 30 |  |  |  |  |  |  | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 31 |  |  |  |  |  |  | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 32 |  |  |  |  |  |  | # GNU General Public License for more details. | 
| 33 |  |  |  |  |  |  | # | 
| 34 |  |  |  |  |  |  | # You should have received a copy of the GNU General Public License | 
| 35 |  |  |  |  |  |  | # along with this program; if not, write to | 
| 36 |  |  |  |  |  |  | # | 
| 37 |  |  |  |  |  |  | # The Free Software Foundation, Inc., | 
| 38 |  |  |  |  |  |  | # 59 Temple Place - Suite 330, | 
| 39 |  |  |  |  |  |  | # Boston, MA  02111-1307, USA. | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | package UMLS::Interface::ICFinder; | 
| 42 |  |  |  |  |  |  |  | 
| 43 | 24 |  |  | 24 |  | 90 | use Fcntl; | 
|  | 24 |  |  |  |  | 31 |  | 
|  | 24 |  |  |  |  | 4595 |  | 
| 44 | 24 |  |  | 24 |  | 110 | use strict; | 
|  | 24 |  |  |  |  | 25 |  | 
|  | 24 |  |  |  |  | 485 |  | 
| 45 | 24 |  |  | 24 |  | 84 | use warnings; | 
|  | 24 |  |  |  |  | 27 |  | 
|  | 24 |  |  |  |  | 652 |  | 
| 46 | 24 |  |  | 24 |  | 77 | use DBI; | 
|  | 24 |  |  |  |  | 27 |  | 
|  | 24 |  |  |  |  | 763 |  | 
| 47 | 24 |  |  | 24 |  | 82 | use bytes; | 
|  | 24 |  |  |  |  | 28 |  | 
|  | 24 |  |  |  |  | 142 |  | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | my $pkg = "UMLS::Interface::ICFinder"; | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | my $root = ""; | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | my $debug = 0; | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | my %propagationFreq  = (); | 
| 56 |  |  |  |  |  |  | my %propagationHash  = (); | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | my $propagationFile  = ""; | 
| 59 |  |  |  |  |  |  | my $frequencyFile    = ""; | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | my %frequencyHash = (); | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | my $option_realtime      = undef; | 
| 64 |  |  |  |  |  |  | my $option_icpropagation = undef; | 
| 65 |  |  |  |  |  |  | my $option_icfrequency   = undef; | 
| 66 |  |  |  |  |  |  | my $option_t             = undef; | 
| 67 |  |  |  |  |  |  | my $smooth               = 0; | 
| 68 |  |  |  |  |  |  | my $configN              = 0; | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | my $errorhandler = ""; | 
| 71 |  |  |  |  |  |  | my $cuifinder    = ""; | 
| 72 |  |  |  |  |  |  | my $pathfinder   = ""; | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | my $max_leaves = 0; | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | # UMLS-specific stuff ends ---------- | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | # -------------------- Class methods start here -------------------- | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | #  method to create a new UMLS::Interface::PathFinder object | 
| 81 |  |  |  |  |  |  | sub new { | 
| 82 | 0 |  |  | 0 | 0 |  | my $self = {}; | 
| 83 | 0 |  |  |  |  |  | my $className = shift; | 
| 84 | 0 |  |  |  |  |  | my $params    = shift; | 
| 85 | 0 |  |  |  |  |  | my $chandler  = shift; | 
| 86 | 0 |  |  |  |  |  | my $phandler  = shift; | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | # initialize error handler | 
| 89 | 0 |  |  |  |  |  | $errorhandler = UMLS::Interface::ErrorHandler->new(); | 
| 90 | 0 | 0 |  |  |  |  | if(! defined $errorhandler) { | 
| 91 | 0 |  |  |  |  |  | print STDERR "The error handler did not get passed properly.\n"; | 
| 92 | 0 |  |  |  |  |  | exit; | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | #  initialize the cuifinder | 
| 96 | 0 |  |  |  |  |  | $cuifinder = $chandler; | 
| 97 | 0 | 0 |  |  |  |  | if(! (defined $chandler)) { | 
| 98 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, | 
| 99 |  |  |  |  |  |  | "new", | 
| 100 |  |  |  |  |  |  | "The CuiFinder handler did not get passed properly", | 
| 101 |  |  |  |  |  |  | 8); | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | #  initialize the pathfinder | 
| 105 | 0 |  |  |  |  |  | $pathfinder = $phandler; | 
| 106 | 0 | 0 |  |  |  |  | if(! (defined $phandler)) { | 
| 107 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, | 
| 108 |  |  |  |  |  |  | "new", | 
| 109 |  |  |  |  |  |  | "The PathFinder handler did not get passed properly", | 
| 110 |  |  |  |  |  |  | 8); | 
| 111 |  |  |  |  |  |  | } | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | # bless the object. | 
| 114 | 0 |  |  |  |  |  | bless($self, $className); | 
| 115 |  |  |  |  |  |  |  | 
| 116 | 0 |  |  |  |  |  | return $self; | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | # Method to initialize the UMLS::Interface::ICFinder object. | 
| 120 |  |  |  |  |  |  | sub _setPropagationParameters | 
| 121 |  |  |  |  |  |  | { | 
| 122 | 0 |  |  | 0 |  |  | my $self      = shift; | 
| 123 | 0 |  |  |  |  |  | my $params    = shift; | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | #  set function name | 
| 126 | 0 |  |  |  |  |  | my $function = "_setPropagationParameters"; | 
| 127 | 0 |  |  |  |  |  | &_debug($function); | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | #  check self | 
| 130 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 131 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 132 |  |  |  |  |  |  | } | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | #  get the umlsinterfaceindex database from CuiFinder | 
| 135 | 0 |  |  |  |  |  | my $sdb = $cuifinder->_getIndexDB(); | 
| 136 | 0 | 0 |  |  |  |  | if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); } | 
|  | 0 |  |  |  |  |  |  | 
| 137 | 0 |  |  |  |  |  | $self->{'sdb'} = $sdb; | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | #  get the root | 
| 140 | 0 |  |  |  |  |  | $root = $cuifinder->_root(); | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | #  set up the options | 
| 143 | 0 |  |  |  |  |  | $self->_setOptions($params); | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | #  load the propagation hash if the option is specified | 
| 146 | 0 | 0 |  |  |  |  | if($option_icpropagation) { | 
| 147 | 0 |  |  |  |  |  | $self->_loadPropagationHashFromFile(); | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | #  load the frequency hash if hte option is specified | 
| 151 | 0 | 0 |  |  |  |  | if($option_icfrequency) { | 
| 152 | 0 |  |  |  |  |  | $self->_loadFrequencyHash(); | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | #  print out the function name to standard error | 
| 158 |  |  |  |  |  |  | #  input : $function <- string containing function name | 
| 159 |  |  |  |  |  |  | #  output: | 
| 160 |  |  |  |  |  |  | sub _debug { | 
| 161 | 0 |  |  | 0 |  |  | my $function = shift; | 
| 162 | 0 | 0 |  |  |  |  | if($debug) { print STDERR "In UMLS::Interface::ICFinder::$function\n"; } | 
|  | 0 |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | #  method to set the global parameter options | 
| 166 |  |  |  |  |  |  | #  input : $params <- reference to a hash | 
| 167 |  |  |  |  |  |  | #  output: | 
| 168 |  |  |  |  |  |  | sub _setOptions | 
| 169 |  |  |  |  |  |  | { | 
| 170 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 171 | 0 |  |  |  |  |  | my $params = shift; | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 0 |  |  |  |  |  | my $function = "_setOptions"; | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | #  check self | 
| 176 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 177 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 178 |  |  |  |  |  |  | } | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | #  get all the parameters | 
| 181 | 0 |  |  |  |  |  | my $debugoption   = $params->{'debug'}; | 
| 182 | 0 |  |  |  |  |  | my $t             = $params->{'t'}; | 
| 183 | 0 |  |  |  |  |  | my $icpropagation = $params->{'icpropagation'}; | 
| 184 | 0 |  |  |  |  |  | my $icfrequency   = $params->{'icfrequency'}; | 
| 185 | 0 |  |  |  |  |  | my $icsmooth      = $params->{'smooth'}; | 
| 186 | 0 |  |  |  |  |  | my $realtime      = $params->{'realtime'}; | 
| 187 |  |  |  |  |  |  |  | 
| 188 | 0 |  |  |  |  |  | my $output = ""; | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | #  check if options have been defined | 
| 191 | 0 | 0 | 0 |  |  |  | if(defined $icpropagation || defined $icfrequency || defined $realtime || | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 192 |  |  |  |  |  |  | defined $debugoption   || defined $icsmooth) { | 
| 193 | 0 |  |  |  |  |  | $output .= "\nICFinder User Options:\n"; | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | #  check if the debug option has been been defined | 
| 197 | 0 | 0 |  |  |  |  | if(defined $debugoption) { | 
| 198 | 0 |  |  |  |  |  | $debug = 1; | 
| 199 | 0 |  |  |  |  |  | $output .= "   --debug option set\n"; | 
| 200 |  |  |  |  |  |  | } | 
| 201 |  |  |  |  |  |  |  | 
| 202 | 0 | 0 |  |  |  |  | if(defined $icsmooth) { | 
| 203 | 0 |  |  |  |  |  | $smooth = $icsmooth; | 
| 204 | 0 |  |  |  |  |  | $output .= "  --smooth\n"; | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | #  check if the propagation option has been identified | 
| 208 | 0 | 0 |  |  |  |  | if(defined $icpropagation) { | 
| 209 | 0 |  |  |  |  |  | $option_icpropagation = 1; | 
| 210 | 0 |  |  |  |  |  | $propagationFile    = $icpropagation; | 
| 211 | 0 |  |  |  |  |  | $output .= "  --icpropagation $icpropagation\n"; | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | #  check if the frequency option has been identified | 
| 215 | 0 | 0 |  |  |  |  | if(defined $icfrequency) { | 
| 216 | 0 |  |  |  |  |  | $option_icfrequency = 1; | 
| 217 | 0 |  |  |  |  |  | $frequencyFile    = $icfrequency; | 
| 218 | 0 |  |  |  |  |  | $output .= "  --icfrequency $icfrequency\n"; | 
| 219 |  |  |  |  |  |  | } | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | #  check if the realtime option has been identified | 
| 222 | 0 | 0 |  |  |  |  | if(defined $realtime) { | 
| 223 | 0 |  |  |  |  |  | $option_realtime = 1; | 
| 224 | 0 |  |  |  |  |  | $output .= "  --realtime option set\n"; | 
| 225 |  |  |  |  |  |  | } | 
| 226 |  |  |  |  |  |  |  | 
| 227 | 0 |  |  |  |  |  | &_debug($function); | 
| 228 |  |  |  |  |  |  |  | 
| 229 | 0 | 0 |  |  |  |  | if(defined $t) { | 
| 230 | 0 |  |  |  |  |  | $option_t = 1; | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  | else { | 
| 233 | 0 |  |  |  |  |  | print STDERR "$output\n"; | 
| 234 |  |  |  |  |  |  | } | 
| 235 |  |  |  |  |  |  | } | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | #  method to set the realtime global parameter options | 
| 239 |  |  |  |  |  |  | #  input : bool <- 1 (turn on) 0 (turn off) | 
| 240 |  |  |  |  |  |  | #  output: | 
| 241 |  |  |  |  |  |  | sub _setRealtimeOption { | 
| 242 |  |  |  |  |  |  |  | 
| 243 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 244 | 0 |  |  |  |  |  | my $option = shift; | 
| 245 |  |  |  |  |  |  |  | 
| 246 | 0 |  |  |  |  |  | my $function = "_setRealtimeOption"; | 
| 247 | 0 |  |  |  |  |  | &_debug($function); | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | #  check self | 
| 250 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 251 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 252 |  |  |  |  |  |  | } | 
| 253 |  |  |  |  |  |  |  | 
| 254 | 0 | 0 |  |  |  |  | if($option == 1) { | 
| 255 | 0 |  |  |  |  |  | $option_realtime = 1; | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  | else { | 
| 258 | 0 |  |  |  |  |  | $option_realtime = 0; | 
| 259 |  |  |  |  |  |  | } | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | #  method returns the configN - the total number of CUIs | 
| 263 |  |  |  |  |  |  | #  input: | 
| 264 |  |  |  |  |  |  | #  output: int | 
| 265 |  |  |  |  |  |  | sub _getN | 
| 266 |  |  |  |  |  |  | { | 
| 267 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 268 |  |  |  |  |  |  |  | 
| 269 | 0 |  |  |  |  |  | my $function = "_getN"; | 
| 270 | 0 |  |  |  |  |  | &_debug($function); | 
| 271 |  |  |  |  |  |  |  | 
| 272 | 0 | 0 |  |  |  |  | if($configN == 0) { | 
| 273 | 0 |  |  |  |  |  | my $hash = $cuifinder->_getCuiList(); | 
| 274 | 0 |  |  |  |  |  | $configN = keys %{$hash}; | 
|  | 0 |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | } | 
| 276 | 0 |  |  |  |  |  | return $configN; | 
| 277 |  |  |  |  |  |  | } | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | #  returns the intrinsic information content (IC) of a cui | 
| 281 |  |  |  |  |  |  | #  input : $concept <- string containing a cui | 
| 282 |  |  |  |  |  |  | #  output: $double  <- double containing its IC | 
| 283 |  |  |  |  |  |  | sub _getSecoIntrinsicIC | 
| 284 |  |  |  |  |  |  | { | 
| 285 | 0 |  |  | 0 |  |  | my $self     = shift; | 
| 286 | 0 |  |  |  |  |  | my $concept  = shift; | 
| 287 |  |  |  |  |  |  |  | 
| 288 | 0 |  |  |  |  |  | my $function = "_getSecoInrinsicIC"; | 
| 289 | 0 |  |  |  |  |  | &_debug($function); | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | #  check self | 
| 292 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 293 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 294 |  |  |  |  |  |  | } | 
| 295 |  |  |  |  |  |  |  | 
| 296 |  |  |  |  |  |  | #  check concept was obtained | 
| 297 | 0 | 0 |  |  |  |  | if(!$concept) { | 
| 298 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4); | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | #  check if valid concept | 
| 302 | 0 | 0 |  |  |  |  | if(! ($errorhandler->_validCui($concept)) ) { | 
| 303 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6); | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 | 0 |  |  |  |  |  | my $children = $cuifinder->_getChildren($concept); | 
| 307 |  |  |  |  |  |  |  | 
| 308 | 0 |  |  |  |  |  | my $n = _getN(); | 
| 309 |  |  |  |  |  |  |  | 
| 310 | 0 |  |  |  |  |  | my $children_num = ($#{$children}) + 2; | 
|  | 0 |  |  |  |  |  |  | 
| 311 | 0 |  |  |  |  |  | my $ic = 1 - ( (log($children_num)/log(10)) / (log($n)/log(10)) ); | 
| 312 |  |  |  |  |  |  |  | 
| 313 | 0 |  |  |  |  |  | return $ic; | 
| 314 |  |  |  |  |  |  | } | 
| 315 |  |  |  |  |  |  |  | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | sub _getDecendents | 
| 318 |  |  |  |  |  |  | { | 
| 319 | 0 |  |  | 0 |  |  | my $concept   = shift; | 
| 320 | 0 |  |  |  |  |  | my $array     = shift; | 
| 321 |  |  |  |  |  |  |  | 
| 322 | 0 | 0 |  |  |  |  | if($concept=~/^\s*$/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | #  if concept is one of the following just return | 
| 325 |  |  |  |  |  |  | #C1274012|Ambiguous concept (inactive concept) | 
| 326 | 0 | 0 |  |  |  |  | if($concept=~/C1274012/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | #C1274013|Duplicate concept (inactive concept) | 
| 328 | 0 | 0 |  |  |  |  | if($concept=~/C1274013/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 329 |  |  |  |  |  |  | #C1276325|Reason not stated concept (inactive concept) | 
| 330 | 0 | 0 |  |  |  |  | if($concept=~/C1276325/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | #C1274014|Outdated concept (inactive concept) | 
| 332 | 0 | 0 |  |  |  |  | if($concept=~/C1274014/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | #C1274015|Erroneous concept (inactive concept) | 
| 334 | 0 | 0 |  |  |  |  | if($concept=~/C1274015/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | #C1274021|Moved elsewhere (inactive concept) | 
| 336 | 0 | 0 |  |  |  |  | if($concept=~/C1274021/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | #C2733115|limited status concept | 
| 338 | 0 | 0 |  |  |  |  | if($concept=~/C2733115/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | #C1443286| | 
| 340 | 0 | 0 |  |  |  |  | if($concept=~/C1443286/) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | #  set the new path | 
| 343 | 0 |  |  |  |  |  | my @path = @{$array}; | 
|  | 0 |  |  |  |  |  |  | 
| 344 | 0 |  |  |  |  |  | push @path, $concept; | 
| 345 |  |  |  |  |  |  |  | 
| 346 | 0 |  |  |  |  |  | my $series = join " ", @path; | 
| 347 |  |  |  |  |  |  |  | 
| 348 |  |  |  |  |  |  | #  get all the children | 
| 349 | 0 |  |  |  |  |  | my $children = $cuifinder->_getChildren($concept); | 
| 350 |  |  |  |  |  |  |  | 
| 351 | 0 |  |  |  |  |  | my %subsumers = (); my %leaves = (); | 
|  | 0 |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | #  search through the children | 
| 354 | 0 |  |  |  |  |  | foreach my $child (@{$children}) { | 
|  | 0 |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  | #  check if child cui has already in the path | 
| 357 | 0 |  |  |  |  |  | my $flag = 0; | 
| 358 | 0 |  |  |  |  |  | foreach my $cui (@path) { | 
| 359 | 0 | 0 |  |  |  |  | if($cui eq $child) { | 
| 360 | 0 |  |  |  |  |  | $flag = 1; | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  | } | 
| 363 |  |  |  |  |  |  |  | 
| 364 |  |  |  |  |  |  | #  if it isn't continue on with the depth first search | 
| 365 | 0 | 0 |  |  |  |  | if($flag == 0) { | 
| 366 | 0 |  |  |  |  |  | my ($s, $l) = &_getDecendents($child, \@path); | 
| 367 | 0 |  |  |  |  |  | %subsumers = (%subsumers, %{$s}); %leaves = (%leaves, %{$l}); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 368 |  |  |  |  |  |  | } | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  |  | 
| 371 | 0 | 0 |  |  |  |  | if($#{$children} < 0) { $leaves{$concept}++; } $subsumers{$concept}++; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 372 |  |  |  |  |  |  |  | 
| 373 | 0 |  |  |  |  |  | return (\%subsumers, \%leaves); | 
| 374 |  |  |  |  |  |  | } | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | #  returns the intrinsic information content (IC) of a cui | 
| 377 |  |  |  |  |  |  | #  input : $concept <- string containing a cui | 
| 378 |  |  |  |  |  |  | #  output: $double  <- double containing its IC | 
| 379 |  |  |  |  |  |  | sub _getSanchezIntrinsicIC | 
| 380 |  |  |  |  |  |  | { | 
| 381 | 0 |  |  | 0 |  |  | my $self     = shift; | 
| 382 | 0 |  |  |  |  |  | my $concept  = shift; | 
| 383 |  |  |  |  |  |  |  | 
| 384 | 0 |  |  |  |  |  | my $function = "_getSanchezIntrinsicIC"; | 
| 385 | 0 |  |  |  |  |  | &_debug($function); | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | #  check self | 
| 388 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 389 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 390 |  |  |  |  |  |  | } | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | #  check concept was obtained | 
| 393 | 0 | 0 |  |  |  |  | if(!$concept) { | 
| 394 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4); | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | #  check if valid concept | 
| 398 | 0 | 0 |  |  |  |  | if(! ($errorhandler->_validCui($concept)) ) { | 
| 399 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6); | 
| 400 |  |  |  |  |  |  | } | 
| 401 |  |  |  |  |  |  |  | 
| 402 |  |  |  |  |  |  | #  get the leaves | 
| 403 | 0 |  |  |  |  |  | my $leaves = 0; my $maxleaves = 0; | 
|  | 0 |  |  |  |  |  |  | 
| 404 | 0 | 0 |  |  |  |  | if($option_realtime) { | 
| 405 | 0 |  |  |  |  |  | my @path = (); | 
| 406 | 0 |  |  |  |  |  | my ($d, $l) = _getDecendents($concept, \@path); | 
| 407 | 0 |  |  |  |  |  | $leaves = keys %{$l}; | 
|  | 0 |  |  |  |  |  |  | 
| 408 | 0 |  |  |  |  |  | $maxleaves = _getMaxLeaves(); | 
| 409 |  |  |  |  |  |  | } | 
| 410 |  |  |  |  |  |  | else { | 
| 411 |  |  |  |  |  |  | #  get the umlsinterfaceindex database from CuiFinder | 
| 412 | 0 |  |  |  |  |  | my $sdb = $cuifinder->_getIndexDB(); | 
| 413 | 0 | 0 |  |  |  |  | if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); } | 
|  | 0 |  |  |  |  |  |  | 
| 414 | 0 |  |  |  |  |  | $self->{'sdb'} = $sdb; | 
| 415 |  |  |  |  |  |  |  | 
| 416 |  |  |  |  |  |  | #  get the intrinsic table name | 
| 417 | 0 |  |  |  |  |  | my $intrinsicTableName = $cuifinder->_getIntrinsicTableName(); | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | #  check that it exists in the index | 
| 420 | 0 |  |  |  |  |  | my $arrRefCheck = $sdb->selectcol_arrayref("select count(*) from tableindex where HEX=\'$intrinsicTableName\'"); | 
| 421 | 0 |  |  |  |  |  | $errorhandler->_checkDbError($pkg, $function, $sdb); | 
| 422 | 0 |  |  |  |  |  | my $check = shift @{$arrRefCheck}; | 
|  | 0 |  |  |  |  |  |  | 
| 423 |  |  |  |  |  |  |  | 
| 424 | 0 | 0 |  |  |  |  | if($check != 1) { | 
| 425 | 0 |  |  |  |  |  | print STDERR "The index does not contain the intrinsic table for the\n"; | 
| 426 | 0 |  |  |  |  |  | print STDERR "sources/relations in the configration file. It must have\n"; | 
| 427 | 0 |  |  |  |  |  | print STDERR "been created with an earlier version of UMLS-Interface.\n"; | 
| 428 | 0 |  |  |  |  |  | print STDERR "Please either recreate the index by removing it using the\n"; | 
| 429 | 0 |  |  |  |  |  | print STDERR "removeConfigData.pl or run with the --realtime option.\n\n"; | 
| 430 | 0 |  |  |  |  |  | exit; | 
| 431 |  |  |  |  |  |  | } | 
| 432 |  |  |  |  |  |  |  | 
| 433 |  |  |  |  |  |  | #  get subsumers and leaves from the intrinsic table | 
| 434 | 0 |  |  |  |  |  | my $arrRefLeaves = $sdb->selectcol_arrayref("select LEAVES from $intrinsicTableName where CUI=\'$concept\'"); | 
| 435 | 0 |  |  |  |  |  | $errorhandler->_checkDbError($pkg, $function, $sdb); | 
| 436 |  |  |  |  |  |  |  | 
| 437 | 0 |  |  |  |  |  | my $arrRefMaxLeaves = $sdb->selectcol_arrayref("select LEAVES from $intrinsicTableName where CUI=\'$root\'"); | 
| 438 | 0 |  |  |  |  |  | $errorhandler->_checkDbError($pkg, $function, $sdb); | 
| 439 |  |  |  |  |  |  |  | 
| 440 | 0 |  |  |  |  |  | $maxleaves = shift @{$arrRefMaxLeaves}; | 
|  | 0 |  |  |  |  |  |  | 
| 441 | 0 |  |  |  |  |  | $leaves    = shift @{$arrRefLeaves}; | 
|  | 0 |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  | } | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | #  get the subsumers | 
| 445 | 0 |  |  |  |  |  | my $paths = $pathfinder->_pathsToRoot($concept); | 
| 446 | 0 |  |  |  |  |  | my %subhash = (); | 
| 447 | 0 |  |  |  |  |  | foreach my $path (@{$paths}) { | 
|  | 0 |  |  |  |  |  |  | 
| 448 | 0 |  |  |  |  |  | my @array = split/\s+/, $path; | 
| 449 | 0 |  |  |  |  |  | foreach my $element (@array) { $subhash{$element}++; } | 
|  | 0 |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | } | 
| 451 | 0 |  |  |  |  |  | my $subsumers = keys %subhash; | 
| 452 |  |  |  |  |  |  |  | 
| 453 | 0 |  |  |  |  |  | my $a = 0; | 
| 454 | 0 | 0 |  |  |  |  | if(defined $leaves) { | 
| 455 | 0 | 0 | 0 |  |  |  | if($leaves != 0 && $subsumers != 0) { | 
| 456 | 0 |  |  |  |  |  | $a = $leaves/$subsumers; | 
| 457 |  |  |  |  |  |  | } | 
| 458 | 0 |  |  |  |  |  | }$a++; | 
| 459 |  |  |  |  |  |  |  | 
| 460 | 0 |  |  |  |  |  | my $b = $maxleaves; $b++; | 
|  | 0 |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  |  | 
| 462 | 0 |  |  |  |  |  | my $ic = -1 * ( (log( $a/$b )/log(2)) ); | 
| 463 |  |  |  |  |  |  |  | 
| 464 | 0 |  |  |  |  |  | return $ic; | 
| 465 |  |  |  |  |  |  | } | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | sub _getMaxLeaves { | 
| 468 |  |  |  |  |  |  |  | 
| 469 | 0 | 0 |  | 0 |  |  | if($max_leaves == 0) { | 
| 470 |  |  |  |  |  |  |  | 
| 471 | 0 |  |  |  |  |  | my @path = (); | 
| 472 | 0 |  |  |  |  |  | my ($s, $l) = _getDecendents($cuifinder->_root(), \@path); | 
| 473 |  |  |  |  |  |  |  | 
| 474 | 0 |  |  |  |  |  | $max_leaves = keys %{$l}; | 
|  | 0 |  |  |  |  |  |  | 
| 475 |  |  |  |  |  |  | } | 
| 476 |  |  |  |  |  |  |  | 
| 477 | 0 |  |  |  |  |  | return $max_leaves; | 
| 478 |  |  |  |  |  |  | } | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | #  returns the information content (IC) of a cui | 
| 481 |  |  |  |  |  |  | #  input : $concept <- string containing a cui | 
| 482 |  |  |  |  |  |  | #  output: $double  <- double containing its IC | 
| 483 |  |  |  |  |  |  | sub _getIC | 
| 484 |  |  |  |  |  |  | { | 
| 485 | 0 |  |  | 0 |  |  | my $self     = shift; | 
| 486 | 0 |  |  |  |  |  | my $concept  = shift; | 
| 487 |  |  |  |  |  |  |  | 
| 488 | 0 |  |  |  |  |  | my $function = "_getIC"; | 
| 489 | 0 |  |  |  |  |  | &_debug($function); | 
| 490 |  |  |  |  |  |  |  | 
| 491 |  |  |  |  |  |  | #  check self | 
| 492 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 493 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 494 |  |  |  |  |  |  | } | 
| 495 |  |  |  |  |  |  |  | 
| 496 |  |  |  |  |  |  | #  check concept was obtained | 
| 497 | 0 | 0 |  |  |  |  | if(!$concept) { | 
| 498 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4); | 
| 499 |  |  |  |  |  |  | } | 
| 500 |  |  |  |  |  |  |  | 
| 501 |  |  |  |  |  |  | #  check if valid concept | 
| 502 | 0 | 0 |  |  |  |  | if(! ($errorhandler->_validCui($concept)) ) { | 
| 503 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6); | 
| 504 |  |  |  |  |  |  | } | 
| 505 |  |  |  |  |  |  |  | 
| 506 |  |  |  |  |  |  | #  if option frequency then the propagation hash | 
| 507 |  |  |  |  |  |  | #  hash has not been loaded and we should determine | 
| 508 |  |  |  |  |  |  | #  the information content of the concept using the | 
| 509 |  |  |  |  |  |  | #  frequency information in the file in realtime | 
| 510 | 0 | 0 |  |  |  |  | if($option_icfrequency) { | 
| 511 |  |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  | #  initialize the propagation hash | 
| 513 | 0 |  |  |  |  |  | $self->_initializePropagationHash(); | 
| 514 |  |  |  |  |  |  |  | 
| 515 |  |  |  |  |  |  | #  load the propagation frequency hash | 
| 516 | 0 |  |  |  |  |  | $self->_loadPropagationFreq(\%frequencyHash); | 
| 517 |  |  |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | #  propogate the counts | 
| 519 | 0 |  |  |  |  |  | &_debug("_propagation"); | 
| 520 | 0 |  |  |  |  |  | my @array = (); | 
| 521 | 0 |  |  |  |  |  | $self->_propagation($concept, \@array); | 
| 522 |  |  |  |  |  |  |  | 
| 523 |  |  |  |  |  |  | #  tally up the propagation counts | 
| 524 | 0 |  |  |  |  |  | $self->_tallyCounts(); | 
| 525 |  |  |  |  |  |  | } | 
| 526 |  |  |  |  |  |  |  | 
| 527 | 0 |  |  |  |  |  | my $prob = $propagationHash{$concept}; | 
| 528 |  |  |  |  |  |  |  | 
| 529 | 0 | 0 |  |  |  |  | if(!defined $prob) { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 530 |  |  |  |  |  |  |  | 
| 531 | 0 |  |  |  |  |  | my $ic = 0; | 
| 532 | 0 | 0 | 0 |  |  |  | if($prob > 0 and $prob < 1) { $ic = -1 * (log($prob) / log(10)); } | 
|  | 0 |  |  |  |  |  |  | 
| 533 |  |  |  |  |  |  |  | 
| 534 | 0 |  |  |  |  |  | return $ic; | 
| 535 |  |  |  |  |  |  | } | 
| 536 |  |  |  |  |  |  |  | 
| 537 |  |  |  |  |  |  |  | 
| 538 |  |  |  |  |  |  | #  returns the probability | 
| 539 |  |  |  |  |  |  | #  input : $concept <- string containing a cui | 
| 540 |  |  |  |  |  |  | #  output: $double  <- double containing its probability | 
| 541 |  |  |  |  |  |  | sub _getProbability | 
| 542 |  |  |  |  |  |  | { | 
| 543 | 0 |  |  | 0 |  |  | my $self     = shift; | 
| 544 | 0 |  |  |  |  |  | my $concept  = shift; | 
| 545 |  |  |  |  |  |  |  | 
| 546 | 0 |  |  |  |  |  | my $function = "_getProbability"; | 
| 547 | 0 |  |  |  |  |  | &_debug($function); | 
| 548 |  |  |  |  |  |  |  | 
| 549 |  |  |  |  |  |  | #  check self | 
| 550 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 551 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 552 |  |  |  |  |  |  | } | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | #  check concept was obtained | 
| 555 | 0 | 0 |  |  |  |  | if(!$concept) { | 
| 556 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4); | 
| 557 |  |  |  |  |  |  | } | 
| 558 |  |  |  |  |  |  |  | 
| 559 |  |  |  |  |  |  | #  check if valid concept | 
| 560 | 0 | 0 |  |  |  |  | if(! ($errorhandler->_validCui($concept)) ) { | 
| 561 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6); | 
| 562 |  |  |  |  |  |  | } | 
| 563 |  |  |  |  |  |  |  | 
| 564 |  |  |  |  |  |  | #  if option frequency then the propagation hash | 
| 565 |  |  |  |  |  |  | #  hash has not been loaded and we should determine | 
| 566 |  |  |  |  |  |  | #  the information content of the concept using the | 
| 567 |  |  |  |  |  |  | #  frequency information in the file in realtime | 
| 568 | 0 | 0 |  |  |  |  | if($option_icfrequency) { | 
| 569 |  |  |  |  |  |  |  | 
| 570 |  |  |  |  |  |  | #  initialize the propagation hash | 
| 571 | 0 |  |  |  |  |  | $self->_initializePropagationHash(); | 
| 572 |  |  |  |  |  |  |  | 
| 573 |  |  |  |  |  |  | #  load the propagation frequency hash | 
| 574 | 0 |  |  |  |  |  | $self->_loadPropagationFreq(\%frequencyHash); | 
| 575 |  |  |  |  |  |  |  | 
| 576 |  |  |  |  |  |  | #  propogate the counts | 
| 577 | 0 |  |  |  |  |  | &_debug("_propagation"); | 
| 578 | 0 |  |  |  |  |  | my @array = (); | 
| 579 | 0 |  |  |  |  |  | $self->_propagation($concept, \@array); | 
| 580 |  |  |  |  |  |  |  | 
| 581 |  |  |  |  |  |  | #  tally up the propagation counts | 
| 582 | 0 |  |  |  |  |  | $self->_tallyCounts(); | 
| 583 |  |  |  |  |  |  | } | 
| 584 |  |  |  |  |  |  |  | 
| 585 | 0 |  |  |  |  |  | my $prob = $propagationHash{$concept}; | 
| 586 |  |  |  |  |  |  |  | 
| 587 |  |  |  |  |  |  |  | 
| 588 | 0 | 0 |  |  |  |  | if(!defined $prob) { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 589 |  |  |  |  |  |  |  | 
| 590 | 0 |  |  |  |  |  | return $prob; | 
| 591 |  |  |  |  |  |  | } | 
| 592 |  |  |  |  |  |  |  | 
| 593 |  |  |  |  |  |  | #  returns the propagation count (frequency)  of a cui | 
| 594 |  |  |  |  |  |  | #  input : $concept <- string containing a cui | 
| 595 |  |  |  |  |  |  | #  output: $double  <- frequency | 
| 596 |  |  |  |  |  |  | sub _getFrequency | 
| 597 |  |  |  |  |  |  | { | 
| 598 | 0 |  |  | 0 |  |  | my $self     = shift; | 
| 599 | 0 |  |  |  |  |  | my $concept  = shift; | 
| 600 |  |  |  |  |  |  |  | 
| 601 | 0 |  |  |  |  |  | my $function = "_getFrequency"; | 
| 602 | 0 |  |  |  |  |  | &_debug($function); | 
| 603 |  |  |  |  |  |  |  | 
| 604 |  |  |  |  |  |  | #  check self | 
| 605 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 606 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 607 |  |  |  |  |  |  | } | 
| 608 |  |  |  |  |  |  |  | 
| 609 |  |  |  |  |  |  | #  check concept was obtained | 
| 610 | 0 | 0 |  |  |  |  | if(!$concept) { | 
| 611 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4); | 
| 612 |  |  |  |  |  |  | } | 
| 613 |  |  |  |  |  |  |  | 
| 614 |  |  |  |  |  |  | #  check if valid concept | 
| 615 | 0 | 0 |  |  |  |  | if(! ($errorhandler->_validCui($concept)) ) { | 
| 616 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6); | 
| 617 |  |  |  |  |  |  | } | 
| 618 |  |  |  |  |  |  |  | 
| 619 |  |  |  |  |  |  | #  if option frequency then the propagation hash | 
| 620 |  |  |  |  |  |  | #  hash has not been loaded and we should determine | 
| 621 |  |  |  |  |  |  | #  the information content of the concept using the | 
| 622 |  |  |  |  |  |  | #  frequency information in the file in realtime | 
| 623 | 0 | 0 |  |  |  |  | if($option_icfrequency) { | 
| 624 |  |  |  |  |  |  |  | 
| 625 |  |  |  |  |  |  | #  initialize the propagation hash | 
| 626 | 0 |  |  |  |  |  | $self->_initializePropagationHash(); | 
| 627 |  |  |  |  |  |  |  | 
| 628 |  |  |  |  |  |  | #  load the propagation frequency hash | 
| 629 | 0 |  |  |  |  |  | $self->_loadPropagationFreq(\%frequencyHash); | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  | #  propogate the counts | 
| 632 | 0 |  |  |  |  |  | &_debug("_propagation"); | 
| 633 | 0 |  |  |  |  |  | my @array = (); | 
| 634 | 0 |  |  |  |  |  | $self->_propagation($concept, \@array); | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  | #  tally up the propagation counts | 
| 637 | 0 |  |  |  |  |  | $self->_tallyCounts(); | 
| 638 |  |  |  |  |  |  | } | 
| 639 |  |  |  |  |  |  |  | 
| 640 | 0 |  |  |  |  |  | my $freq = int($propagationHash{$concept} * $configN); | 
| 641 |  |  |  |  |  |  |  | 
| 642 | 0 |  |  |  |  |  | return $freq; | 
| 643 |  |  |  |  |  |  | } | 
| 644 |  |  |  |  |  |  |  | 
| 645 |  |  |  |  |  |  | #  this method obtains the CUIs in the sources which | 
| 646 |  |  |  |  |  |  | #  are going to be propagated | 
| 647 |  |  |  |  |  |  | #  input : | 
| 648 |  |  |  |  |  |  | #  output: $hash <- reference to hash containing the cuis | 
| 649 |  |  |  |  |  |  | sub _loadFrequencyHash { | 
| 650 |  |  |  |  |  |  |  | 
| 651 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 652 |  |  |  |  |  |  |  | 
| 653 | 0 |  |  |  |  |  | my $function = "_loadFrequencyHash"; | 
| 654 | 0 |  |  |  |  |  | &_debug($function); | 
| 655 |  |  |  |  |  |  |  | 
| 656 |  |  |  |  |  |  |  | 
| 657 |  |  |  |  |  |  | #  check self | 
| 658 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 659 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 660 |  |  |  |  |  |  | } | 
| 661 |  |  |  |  |  |  |  | 
| 662 |  |  |  |  |  |  | #  open the frequency file | 
| 663 | 0 | 0 |  |  |  |  | open(FILE, $frequencyFile) || die "Could not open file $frequencyFile\n"; | 
| 664 |  |  |  |  |  |  |  | 
| 665 |  |  |  |  |  |  | #  get the source and relations associated with the propagation file | 
| 666 | 0 |  |  |  |  |  | my $sab  = ; chomp $sab; | 
|  | 0 |  |  |  |  |  |  | 
| 667 | 0 |  |  |  |  |  | my $rel  = ; chomp $rel; | 
|  | 0 |  |  |  |  |  |  | 
| 668 |  |  |  |  |  |  |  | 
| 669 |  |  |  |  |  |  | #  get the rela realtions associated with the propagation file if one exists | 
| 670 | 0 |  |  |  |  |  | my $rela = ; chomp $rela; | 
|  | 0 |  |  |  |  |  |  | 
| 671 |  |  |  |  |  |  |  | 
| 672 |  |  |  |  |  |  | #  if it does exist in then get N otherwise we got N already. | 
| 673 | 0 |  |  |  |  |  | my $ninfo = $rela; | 
| 674 | 0 | 0 |  |  |  |  | if($rela=~/RELA/) { | 
| 675 | 0 |  |  |  |  |  | $ninfo = ; chomp $ninfo; | 
|  | 0 |  |  |  |  |  |  | 
| 676 |  |  |  |  |  |  | } | 
| 677 |  |  |  |  |  |  | else { | 
| 678 | 0 |  |  |  |  |  | $rela = ""; | 
| 679 |  |  |  |  |  |  | } | 
| 680 |  |  |  |  |  |  |  | 
| 681 | 0 |  |  |  |  |  | $ninfo=~/N\s*\:\:\s*([0-9]+)/; | 
| 682 | 0 |  |  |  |  |  | $configN = $1; | 
| 683 |  |  |  |  |  |  |  | 
| 684 |  |  |  |  |  |  | #  get the source and relations from config file or the defaults | 
| 685 | 0 |  |  |  |  |  | my $configsab = $cuifinder->_getSabString(); | 
| 686 | 0 |  |  |  |  |  | my $configrel = $cuifinder->_getRelString(); | 
| 687 |  |  |  |  |  |  |  | 
| 688 |  |  |  |  |  |  | #  check the source information is correct | 
| 689 | 0 | 0 |  |  |  |  | if(! ($self->_checkParameters($configsab, $sab)) ) { | 
| 690 | 0 |  |  |  |  |  | my $str = "SAB information ($sab) does not match the config file ($configsab)."; | 
| 691 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, $str, 5); | 
| 692 |  |  |  |  |  |  | } | 
| 693 |  |  |  |  |  |  |  | 
| 694 |  |  |  |  |  |  | #  check that that the relation information is correct | 
| 695 | 0 | 0 |  |  |  |  | if(! ($self->_checkParameters($configrel, $rel)) ) { | 
| 696 | 0 |  |  |  |  |  | my $str = "REL information ($rel) does not match the config file ($configrel)."; | 
| 697 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, $str, 5); | 
| 698 |  |  |  |  |  |  | } | 
| 699 |  |  |  |  |  |  |  | 
| 700 |  |  |  |  |  |  | #  check if rela information was used | 
| 701 | 0 | 0 |  |  |  |  | if($rela ne "") { | 
| 702 | 0 | 0 |  |  |  |  | if(!($self->_checkParameters($_, $cuifinder->_getRelaString()))) { | 
| 703 | 0 |  |  |  |  |  | my $str = "RELA information does not match the config file ($_)."; | 
| 704 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, $str, 5); | 
| 705 |  |  |  |  |  |  | } | 
| 706 |  |  |  |  |  |  | } | 
| 707 |  |  |  |  |  |  | #  check that the relations used are acceptable for propagation | 
| 708 |  |  |  |  |  |  | #  the only acceptable relations are RB/RN and PAR/CHD | 
| 709 | 0 | 0 |  |  |  |  | if(! ($self->_checkHierarchicalRelations ($configrel)) ) { | 
| 710 | 0 |  |  |  |  |  | my $str = "REL information ($rel) contains relations other than RB/RN and PAR/CHD."; | 
| 711 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, $str, 11); | 
| 712 |  |  |  |  |  |  | } | 
| 713 |  |  |  |  |  |  |  | 
| 714 |  |  |  |  |  |  | #  obtain the frequency counts storing them in the frequency hash table | 
| 715 | 0 |  |  |  |  |  | while() { | 
| 716 | 0 |  |  |  |  |  | chomp; | 
| 717 |  |  |  |  |  |  |  | 
| 718 |  |  |  |  |  |  | #  if blank line more on | 
| 719 | 0 | 0 |  |  |  |  | if($_=~/^\s*$/) { next; } | 
|  | 0 |  |  |  |  |  |  | 
| 720 |  |  |  |  |  |  |  | 
| 721 |  |  |  |  |  |  | #  get the cui and its frequency count | 
| 722 | 0 |  |  |  |  |  | my($cui, $freq) = split/<>/; | 
| 723 |  |  |  |  |  |  |  | 
| 724 |  |  |  |  |  |  | #  make certain that it is a cui and a frequency | 
| 725 |  |  |  |  |  |  | #  and if it is load it into the frequency hash | 
| 726 | 0 | 0 | 0 |  |  |  | if( ($cui=~/C[0-9]/) && ($freq=~/[0-9]+/) ) { | 
| 727 | 0 | 0 |  |  |  |  | if(exists $frequencyHash{$cui}) { | 
| 728 | 0 |  |  |  |  |  | $frequencyHash{$cui} += $freq; | 
| 729 |  |  |  |  |  |  | } | 
| 730 |  |  |  |  |  |  | else { | 
| 731 | 0 |  |  |  |  |  | $frequencyHash{$cui} = $freq; | 
| 732 |  |  |  |  |  |  | } | 
| 733 |  |  |  |  |  |  | } | 
| 734 |  |  |  |  |  |  | } | 
| 735 |  |  |  |  |  |  |  | 
| 736 | 0 |  |  |  |  |  | close(FILE); | 
| 737 |  |  |  |  |  |  | } | 
| 738 |  |  |  |  |  |  |  | 
| 739 |  |  |  |  |  |  | #  this method obtains the CUIs in the sources which | 
| 740 |  |  |  |  |  |  | #  are going to be propagated | 
| 741 |  |  |  |  |  |  | #  input : | 
| 742 |  |  |  |  |  |  | #  output: $hash <- reference to hash containing the cuis | 
| 743 |  |  |  |  |  |  | sub _getPropagationCuis { | 
| 744 |  |  |  |  |  |  |  | 
| 745 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 746 |  |  |  |  |  |  |  | 
| 747 | 0 |  |  |  |  |  | my $function = "_getPropagationCuis"; | 
| 748 | 0 |  |  |  |  |  | &_debug($function); | 
| 749 |  |  |  |  |  |  |  | 
| 750 |  |  |  |  |  |  | #  check self | 
| 751 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 752 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 753 |  |  |  |  |  |  | } | 
| 754 |  |  |  |  |  |  |  | 
| 755 |  |  |  |  |  |  | #  return the reference to a hash | 
| 756 | 0 |  |  |  |  |  | return $cuifinder->_getCuiList(); | 
| 757 |  |  |  |  |  |  | } | 
| 758 |  |  |  |  |  |  |  | 
| 759 |  |  |  |  |  |  | #  initialize the propgation hash | 
| 760 |  |  |  |  |  |  | #  input : | 
| 761 |  |  |  |  |  |  | #  output: | 
| 762 |  |  |  |  |  |  | sub _initializePropagationHash { | 
| 763 |  |  |  |  |  |  |  | 
| 764 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 765 |  |  |  |  |  |  |  | 
| 766 | 0 |  |  |  |  |  | my $function = "_initializePropagationHash"; | 
| 767 | 0 |  |  |  |  |  | &_debug($function); | 
| 768 |  |  |  |  |  |  |  | 
| 769 |  |  |  |  |  |  | #  check self | 
| 770 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 771 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 772 |  |  |  |  |  |  | } | 
| 773 |  |  |  |  |  |  |  | 
| 774 |  |  |  |  |  |  | #  clear out the hash just in case | 
| 775 | 0 |  |  |  |  |  | my $hash = $self->_getPropagationCuis(); | 
| 776 |  |  |  |  |  |  |  | 
| 777 |  |  |  |  |  |  | #  add the cuis to the propagation hash | 
| 778 | 0 |  |  |  |  |  | foreach my $cui (sort keys %{$hash}) { | 
|  | 0 |  |  |  |  |  |  | 
| 779 | 0 |  |  |  |  |  | $propagationHash{$cui} = ""; | 
| 780 | 0 |  |  |  |  |  | $propagationFreq{$cui} = $smooth; | 
| 781 |  |  |  |  |  |  | } | 
| 782 |  |  |  |  |  |  | } | 
| 783 |  |  |  |  |  |  |  | 
| 784 |  |  |  |  |  |  | #  load the propagation frequency has with the frequency counts | 
| 785 |  |  |  |  |  |  | #  input : $hash <- reference to hash containing frequency counts | 
| 786 |  |  |  |  |  |  | #  output: | 
| 787 |  |  |  |  |  |  | sub _loadPropagationFreq { | 
| 788 |  |  |  |  |  |  |  | 
| 789 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 790 | 0 |  |  |  |  |  | my $fhash = shift; | 
| 791 |  |  |  |  |  |  |  | 
| 792 | 0 |  |  |  |  |  | my $function = "_loadPropagationFreq"; | 
| 793 | 0 |  |  |  |  |  | &_debug($function); | 
| 794 |  |  |  |  |  |  |  | 
| 795 |  |  |  |  |  |  | #  check self | 
| 796 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 797 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 798 |  |  |  |  |  |  | } | 
| 799 |  |  |  |  |  |  |  | 
| 800 |  |  |  |  |  |  | #  loop through and set the frequency count | 
| 801 | 0 |  |  |  |  |  | my $N = 0; | 
| 802 | 0 |  |  |  |  |  | foreach my $cui (sort keys %{$fhash}) { | 
|  | 0 |  |  |  |  |  |  | 
| 803 | 0 | 0 |  |  |  |  | if($cui=~/^\s*$/) { next; } | 
|  | 0 |  |  |  |  |  |  | 
| 804 |  |  |  |  |  |  |  | 
| 805 | 0 |  |  |  |  |  | my $freq = ${$fhash}{$cui}; | 
|  | 0 |  |  |  |  |  |  | 
| 806 | 0 | 0 |  |  |  |  | if(exists $propagationFreq{$cui}) { | 
| 807 | 0 |  |  |  |  |  | $propagationFreq{$cui} += $freq; | 
| 808 |  |  |  |  |  |  | } | 
| 809 | 0 |  |  |  |  |  | $N = $N + $freq; | 
| 810 |  |  |  |  |  |  | } | 
| 811 |  |  |  |  |  |  |  | 
| 812 |  |  |  |  |  |  | #  check if something has been set | 
| 813 | 0 | 0 |  |  |  |  | if($smooth == 1) { | 
| 814 | 0 |  |  |  |  |  | my $pkeys = keys %propagationFreq; | 
| 815 | 0 |  |  |  |  |  | $N += $pkeys; | 
| 816 |  |  |  |  |  |  | } | 
| 817 |  |  |  |  |  |  |  | 
| 818 |  |  |  |  |  |  | #  set N for the config file | 
| 819 | 0 |  |  |  |  |  | $configN = $N; | 
| 820 |  |  |  |  |  |  |  | 
| 821 |  |  |  |  |  |  | #  loop through again and set the probability | 
| 822 | 0 |  |  |  |  |  | foreach my $cui (sort keys %propagationFreq) { | 
| 823 | 0 |  |  |  |  |  | $propagationFreq{$cui} = ($propagationFreq{$cui}) / $N; | 
| 824 |  |  |  |  |  |  |  | 
| 825 |  |  |  |  |  |  | } | 
| 826 |  |  |  |  |  |  | } | 
| 827 |  |  |  |  |  |  |  | 
| 828 |  |  |  |  |  |  | #  check that the parameters in config file match | 
| 829 |  |  |  |  |  |  | #  input : $string1 <- string containing parameter | 
| 830 |  |  |  |  |  |  | #          $string2 <- string containing configuratation parameter | 
| 831 |  |  |  |  |  |  | #  output: 0|1      <- true or false | 
| 832 |  |  |  |  |  |  | sub _checkParameters { | 
| 833 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 834 | 0 |  |  |  |  |  | my $string1 = shift; | 
| 835 | 0 |  |  |  |  |  | my $string2 = shift; | 
| 836 |  |  |  |  |  |  |  | 
| 837 | 0 |  |  |  |  |  | my $function = "_checkParameters"; | 
| 838 | 0 |  |  |  |  |  | &_debug($function); | 
| 839 |  |  |  |  |  |  |  | 
| 840 | 0 | 0 | 0 |  |  |  | if( !(defined $string1) && !(defined $string2) ) { return 1; } | 
|  | 0 |  |  |  |  |  |  | 
| 841 | 0 | 0 | 0 |  |  |  | if( ($string1=~/^\s*$/) && ($string2=~/^\s*$/) ) { return 1; } | 
|  | 0 |  |  |  |  |  |  | 
| 842 | 0 | 0 | 0 |  |  |  | if( ($string1=~/^\s*$/) && !($string2=~/^\s*$/) ) { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 843 | 0 | 0 | 0 |  |  |  | if( !($string1=~/^\s*$/) && ($string2=~/^\s*$/) ) { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 844 |  |  |  |  |  |  |  | 
| 845 | 0 | 0 |  |  |  |  | if(!($string1=~/([A-Z]+) :: (include|exclude) (.*?)$/)) { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 846 | 0 | 0 |  |  |  |  | if(!($string2=~/([A-Z]+) :: (include|exclude) (.*?)$/)) { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 847 |  |  |  |  |  |  |  | 
| 848 | 0 |  |  |  |  |  | $string1=~/([A-Z]+) :: (include|exclude) (.*?)$/; | 
| 849 | 0 |  |  |  |  |  | my $option1 = $1; | 
| 850 | 0 |  |  |  |  |  | my $type1   = $2; | 
| 851 | 0 |  |  |  |  |  | my $param1  = $3; | 
| 852 |  |  |  |  |  |  |  | 
| 853 | 0 |  |  |  |  |  | $string2=~/([A-Z]+) :: (include|exclude) (.*?)$/; | 
| 854 | 0 |  |  |  |  |  | my $option2 = $1; | 
| 855 | 0 |  |  |  |  |  | my $type2   = $2; | 
| 856 | 0 |  |  |  |  |  | my $param2  = $3; | 
| 857 |  |  |  |  |  |  |  | 
| 858 | 0 | 0 |  |  |  |  | if($option1 ne $option2) { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 859 | 0 | 0 |  |  |  |  | if($type1 ne $type2)     { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 860 |  |  |  |  |  |  |  | 
| 861 | 0 |  |  |  |  |  | my @array1 = split/\,/, $param1; | 
| 862 | 0 |  |  |  |  |  | my @array2 = split/\,/, $param2; | 
| 863 |  |  |  |  |  |  |  | 
| 864 |  |  |  |  |  |  |  | 
| 865 | 0 |  |  |  |  |  | my %hash = (); | 
| 866 |  |  |  |  |  |  |  | 
| 867 | 0 |  |  |  |  |  | foreach my $element (@array1) { $element=~s/\s+//g; $hash{$element}++; } | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 868 | 0 |  |  |  |  |  | foreach my $element (@array2) { $element=~s/\s+//g; $hash{$element}++; } | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 869 |  |  |  |  |  |  |  | 
| 870 | 0 |  |  |  |  |  | foreach my $element (sort keys %hash) { | 
| 871 | 0 | 0 |  |  |  |  | if($hash{$element} != 2) { return 0; } | 
|  | 0 |  |  |  |  |  |  | 
| 872 |  |  |  |  |  |  | } | 
| 873 |  |  |  |  |  |  |  | 
| 874 | 0 |  |  |  |  |  | return 1; | 
| 875 |  |  |  |  |  |  | } | 
| 876 |  |  |  |  |  |  |  | 
| 877 |  |  |  |  |  |  | #  check that the relations used are only RB/RN and/or PAR/CHD relations | 
| 878 |  |  |  |  |  |  | #  input : string  <- contain the relation line from config file | 
| 879 |  |  |  |  |  |  | #  output: 1|0     <- indicating if the string contains relations other | 
| 880 |  |  |  |  |  |  | #                     than RB/RN or PAR/CHD relations | 
| 881 |  |  |  |  |  |  | sub _checkHierarchicalRelations { | 
| 882 |  |  |  |  |  |  |  | 
| 883 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 884 | 0 |  |  |  |  |  | my $string  = shift; | 
| 885 |  |  |  |  |  |  |  | 
| 886 | 0 |  |  |  |  |  | my $function = "_checkHierarchicalRelations"; | 
| 887 | 0 |  |  |  |  |  | &_debug($function); | 
| 888 |  |  |  |  |  |  |  | 
| 889 | 0 |  |  |  |  |  | $string=~/([A-Z]+) :: (include|exclude) (.*?)$/; | 
| 890 | 0 |  |  |  |  |  | my $option = $1; | 
| 891 | 0 |  |  |  |  |  | my $type   = $2; | 
| 892 | 0 |  |  |  |  |  | my $param  = $3; | 
| 893 |  |  |  |  |  |  |  | 
| 894 | 0 |  |  |  |  |  | $param=~s/\s+//g; | 
| 895 |  |  |  |  |  |  |  | 
| 896 | 0 |  |  |  |  |  | my @rels = split/\s*\,\s*/, $param; | 
| 897 |  |  |  |  |  |  |  | 
| 898 | 0 |  |  |  |  |  | foreach my $rel (@rels) { | 
| 899 | 0 | 0 |  |  |  |  | if( !($rel=~/(PAR|CHD|RB|RN)/) ) { | 
| 900 | 0 |  |  |  |  |  | return 0; | 
| 901 |  |  |  |  |  |  | } | 
| 902 |  |  |  |  |  |  | } | 
| 903 |  |  |  |  |  |  |  | 
| 904 | 0 |  |  |  |  |  | return 1; | 
| 905 |  |  |  |  |  |  | } | 
| 906 |  |  |  |  |  |  |  | 
| 907 |  |  |  |  |  |  | #  load the propagation hash | 
| 908 |  |  |  |  |  |  | #  input : | 
| 909 |  |  |  |  |  |  | #  output: | 
| 910 |  |  |  |  |  |  | sub _loadPropagationHashFromFile { | 
| 911 |  |  |  |  |  |  |  | 
| 912 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 913 |  |  |  |  |  |  |  | 
| 914 | 0 |  |  |  |  |  | my $function = "_loadPropagationHashFromFile"; | 
| 915 | 0 |  |  |  |  |  | &_debug($function); | 
| 916 |  |  |  |  |  |  |  | 
| 917 |  |  |  |  |  |  | #  check self | 
| 918 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 919 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 920 |  |  |  |  |  |  | } | 
| 921 |  |  |  |  |  |  |  | 
| 922 |  |  |  |  |  |  | #  open the propagation file | 
| 923 | 0 | 0 |  |  |  |  | open(FILE, $propagationFile) || die "Could not open file $propagationFile\n"; | 
| 924 |  |  |  |  |  |  | #  check if smoothing was set | 
| 925 | 0 |  |  |  |  |  | my $psmooth = ; | 
| 926 |  |  |  |  |  |  |  | 
| 927 |  |  |  |  |  |  | #  get the source and relations associated with the propagation file | 
| 928 | 0 |  |  |  |  |  | my $sab  = ; chomp $sab; | 
|  | 0 |  |  |  |  |  |  | 
| 929 | 0 |  |  |  |  |  | my $rel  = ; chomp $rel; | 
|  | 0 |  |  |  |  |  |  | 
| 930 |  |  |  |  |  |  |  | 
| 931 |  |  |  |  |  |  | #  get the rela realtions associated with the propagation file if one exists | 
| 932 | 0 |  |  |  |  |  | my $rela = ; chomp $rela; | 
|  | 0 |  |  |  |  |  |  | 
| 933 |  |  |  |  |  |  |  | 
| 934 |  |  |  |  |  |  | #  if it does exist in then get N otherwise we got N already. | 
| 935 | 0 |  |  |  |  |  | my $ninfo = $rela; | 
| 936 | 0 | 0 |  |  |  |  | if($rela=~/RELA/) { | 
| 937 | 0 |  |  |  |  |  | $ninfo = ; chomp $ninfo; | 
|  | 0 |  |  |  |  |  |  | 
| 938 |  |  |  |  |  |  | } | 
| 939 |  |  |  |  |  |  | else { | 
| 940 | 0 |  |  |  |  |  | $rela = ""; | 
| 941 |  |  |  |  |  |  | } | 
| 942 |  |  |  |  |  |  |  | 
| 943 | 0 |  |  |  |  |  | $ninfo=~/N\s*\:\:\s*([0-9]+)/; | 
| 944 | 0 |  |  |  |  |  | $configN = $1; | 
| 945 |  |  |  |  |  |  |  | 
| 946 |  |  |  |  |  |  | #  get the source and relations from config file or the defaults | 
| 947 | 0 |  |  |  |  |  | my $configsab = $cuifinder->_getSabString(); | 
| 948 | 0 |  |  |  |  |  | my $configrel = $cuifinder->_getRelString(); | 
| 949 |  |  |  |  |  |  |  | 
| 950 |  |  |  |  |  |  | #  check the source information is correct | 
| 951 | 0 | 0 |  |  |  |  | if(! ($self->_checkParameters($configsab, $sab)) ) { | 
| 952 | 0 |  |  |  |  |  | my $str = "SAB information ($sab) does not match the config file ($configsab)."; | 
| 953 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, $str, 5); | 
| 954 |  |  |  |  |  |  | } | 
| 955 |  |  |  |  |  |  |  | 
| 956 |  |  |  |  |  |  | #  check that that the relation information is correct | 
| 957 | 0 | 0 |  |  |  |  | if(! ($self->_checkParameters($configrel, $rel)) ) { | 
| 958 | 0 |  |  |  |  |  | my $str = "REL information ($rel) does not match the config file ($configrel)."; | 
| 959 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, $str, 5); | 
| 960 |  |  |  |  |  |  | } | 
| 961 |  |  |  |  |  |  |  | 
| 962 |  |  |  |  |  |  | #  check if rela information was used | 
| 963 | 0 | 0 |  |  |  |  | if($rela ne "") { | 
| 964 | 0 | 0 |  |  |  |  | if(!($self->_checkParameters($_, $cuifinder->_getRelaString()))) { | 
| 965 | 0 |  |  |  |  |  | my $str = "RELA information does not match the config file ($_)."; | 
| 966 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, $str, 5); | 
| 967 |  |  |  |  |  |  | } | 
| 968 |  |  |  |  |  |  | } | 
| 969 |  |  |  |  |  |  | #  check that the relations used are acceptable for propagation | 
| 970 |  |  |  |  |  |  | #  the only acceptable relations are RB/RN and PAR/CHD | 
| 971 | 0 | 0 |  |  |  |  | if(! ($self->_checkHierarchicalRelations ($configrel)) ) { | 
| 972 | 0 |  |  |  |  |  | my $str = "REL information ($rel) contains relations other than RB/RN and PAR/CHD."; | 
| 973 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, $str, 11); | 
| 974 |  |  |  |  |  |  | } | 
| 975 |  |  |  |  |  |  |  | 
| 976 | 0 |  |  |  |  |  | while() { | 
| 977 | 0 |  |  |  |  |  | chomp; | 
| 978 |  |  |  |  |  |  |  | 
| 979 |  |  |  |  |  |  | #  if blank line move on | 
| 980 | 0 | 0 |  |  |  |  | if($_=~/^\s*$/) { next; } | 
|  | 0 |  |  |  |  |  |  | 
| 981 |  |  |  |  |  |  |  | 
| 982 |  |  |  |  |  |  | #  get the cui and its frequency count | 
| 983 | 0 |  |  |  |  |  | my ($cui, $freq) = split/<>/; | 
| 984 |  |  |  |  |  |  |  | 
| 985 |  |  |  |  |  |  | #  load it into the propagation hash | 
| 986 | 0 |  |  |  |  |  | $propagationHash{$cui} = $freq; | 
| 987 |  |  |  |  |  |  | } | 
| 988 |  |  |  |  |  |  | } | 
| 989 |  |  |  |  |  |  |  | 
| 990 |  |  |  |  |  |  | #  DEBUNKED FUNCTION? CHECK | 
| 991 |  |  |  |  |  |  | #  get the propagation count for a given cui | 
| 992 |  |  |  |  |  |  | #  input : $concept   <- string containing the cui | 
| 993 |  |  |  |  |  |  | #  output: $double|-1 <- the propagation count otherwise | 
| 994 |  |  |  |  |  |  | #                        a -1 if none existed for that cui | 
| 995 |  |  |  |  |  |  | sub _getPropagationCount { | 
| 996 |  |  |  |  |  |  |  | 
| 997 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 998 | 0 |  |  |  |  |  | my $concept = shift; | 
| 999 |  |  |  |  |  |  |  | 
| 1000 | 0 |  |  |  |  |  | my $function = "_getPropagationCount"; | 
| 1001 | 0 |  |  |  |  |  | &_debug($function); | 
| 1002 |  |  |  |  |  |  |  | 
| 1003 |  |  |  |  |  |  | #  check self | 
| 1004 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 1005 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 1006 |  |  |  |  |  |  | } | 
| 1007 |  |  |  |  |  |  |  | 
| 1008 |  |  |  |  |  |  | #  check concept was obtained | 
| 1009 | 0 | 0 |  |  |  |  | if(!$concept) { | 
| 1010 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4); | 
| 1011 |  |  |  |  |  |  | } | 
| 1012 |  |  |  |  |  |  |  | 
| 1013 |  |  |  |  |  |  | #  check if valid concept | 
| 1014 | 0 | 0 |  |  |  |  | if(! ($errorhandler->_validCui($concept)) ) { | 
| 1015 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6); | 
| 1016 |  |  |  |  |  |  | } | 
| 1017 |  |  |  |  |  |  |  | 
| 1018 |  |  |  |  |  |  | #  propagate the counts | 
| 1019 | 0 |  |  |  |  |  | $self->_propagateCounts(); | 
| 1020 |  |  |  |  |  |  |  | 
| 1021 |  |  |  |  |  |  | #  if the concept exists in the propagation hash | 
| 1022 |  |  |  |  |  |  | #  return the probability otherwise return a -1 | 
| 1023 | 0 | 0 |  |  |  |  | if(exists $propagationHash{$concept}) { | 
| 1024 | 0 |  |  |  |  |  | return $propagationHash{$concept}; | 
| 1025 |  |  |  |  |  |  | } | 
| 1026 |  |  |  |  |  |  | else { | 
| 1027 | 0 |  |  |  |  |  | return -1; | 
| 1028 |  |  |  |  |  |  | } | 
| 1029 |  |  |  |  |  |  |  | 
| 1030 |  |  |  |  |  |  | } | 
| 1031 |  |  |  |  |  |  |  | 
| 1032 |  |  |  |  |  |  | #  method which actually propagates the counts | 
| 1033 |  |  |  |  |  |  | #  input : $hash <- reference to the hash containing | 
| 1034 |  |  |  |  |  |  | #                   the frequency counts | 
| 1035 |  |  |  |  |  |  | #  output: | 
| 1036 |  |  |  |  |  |  | sub _propagateCounts { | 
| 1037 |  |  |  |  |  |  |  | 
| 1038 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 1039 | 0 |  |  |  |  |  | my $fhash = shift; | 
| 1040 |  |  |  |  |  |  |  | 
| 1041 | 0 |  |  |  |  |  | my $function = "_propagateCounts"; | 
| 1042 | 0 |  |  |  |  |  | &_debug($function); | 
| 1043 |  |  |  |  |  |  |  | 
| 1044 |  |  |  |  |  |  | #  check self | 
| 1045 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 1046 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 1047 |  |  |  |  |  |  | } | 
| 1048 |  |  |  |  |  |  |  | 
| 1049 |  |  |  |  |  |  | #  check the parameters | 
| 1050 | 0 | 0 |  |  |  |  | if(!defined $fhash) { | 
| 1051 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Input variable \%fhash  not defined.", 4); | 
| 1052 |  |  |  |  |  |  | } | 
| 1053 |  |  |  |  |  |  |  | 
| 1054 |  |  |  |  |  |  | #  initialize the propagation hash | 
| 1055 | 0 |  |  |  |  |  | $self->_initializePropagationHash(); | 
| 1056 |  |  |  |  |  |  |  | 
| 1057 |  |  |  |  |  |  | #  load the propagation frequency hash | 
| 1058 | 0 |  |  |  |  |  | $self->_loadPropagationFreq($fhash); | 
| 1059 |  |  |  |  |  |  |  | 
| 1060 |  |  |  |  |  |  | #  propagate the counts | 
| 1061 | 0 |  |  |  |  |  | my @array = (); | 
| 1062 | 0 |  |  |  |  |  | $self->_propagation($root, \@array); | 
| 1063 |  |  |  |  |  |  |  | 
| 1064 |  |  |  |  |  |  | #  tally up the propagation counts | 
| 1065 | 0 |  |  |  |  |  | $self->_tallyCounts(); | 
| 1066 |  |  |  |  |  |  |  | 
| 1067 | 0 |  |  |  |  |  | my $k = keys %propagationHash; | 
| 1068 |  |  |  |  |  |  |  | 
| 1069 |  |  |  |  |  |  | #  return the propagation counts | 
| 1070 | 0 |  |  |  |  |  | return \%propagationHash; | 
| 1071 |  |  |  |  |  |  | } | 
| 1072 |  |  |  |  |  |  |  | 
| 1073 |  |  |  |  |  |  | #  method that tallys up the probability counts of the | 
| 1074 |  |  |  |  |  |  | #  cui and its decendants and then calculates the ic | 
| 1075 |  |  |  |  |  |  | #  input : | 
| 1076 |  |  |  |  |  |  | #  output: | 
| 1077 |  |  |  |  |  |  | sub _tallyCounts { | 
| 1078 |  |  |  |  |  |  |  | 
| 1079 | 0 |  |  | 0 |  |  | my $self = shift; | 
| 1080 |  |  |  |  |  |  |  | 
| 1081 | 0 |  |  |  |  |  | my $function = "_tallyCounts"; | 
| 1082 | 0 |  |  |  |  |  | &_debug($function); | 
| 1083 |  |  |  |  |  |  |  | 
| 1084 |  |  |  |  |  |  | #  check self | 
| 1085 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 1086 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 1087 |  |  |  |  |  |  | } | 
| 1088 |  |  |  |  |  |  |  | 
| 1089 | 0 |  |  |  |  |  | foreach my $cui (sort keys %propagationHash) { | 
| 1090 | 0 |  |  |  |  |  | my $set    = $propagationHash{$cui}; | 
| 1091 | 0 |  |  |  |  |  | my $pcount = $propagationFreq{$cui}; | 
| 1092 |  |  |  |  |  |  |  | 
| 1093 | 0 | 0 |  |  |  |  | if(defined $set) { | 
| 1094 | 0 |  |  |  |  |  | my %hash = (); | 
| 1095 | 0 |  |  |  |  |  | while($set=~/(C[0-9][0-9][0-9][0-9][0-9][0-9][0-9])/g) { | 
| 1096 | 0 |  |  |  |  |  | my $c = $1; | 
| 1097 | 0 | 0 |  |  |  |  | if(! (exists $hash{$c}) ) { | 
| 1098 | 0 |  |  |  |  |  | $pcount += $propagationFreq{$c}; | 
| 1099 | 0 |  |  |  |  |  | $hash{$c}++; | 
| 1100 |  |  |  |  |  |  | } | 
| 1101 |  |  |  |  |  |  | } | 
| 1102 |  |  |  |  |  |  | } | 
| 1103 | 0 |  |  |  |  |  | $propagationHash{$cui} = $pcount; | 
| 1104 |  |  |  |  |  |  | } | 
| 1105 |  |  |  |  |  |  | } | 
| 1106 |  |  |  |  |  |  |  | 
| 1107 |  |  |  |  |  |  | #  recursive method that acuatlly performs the propagation | 
| 1108 |  |  |  |  |  |  | #  input : $concept <- string containing the cui | 
| 1109 |  |  |  |  |  |  | #          $array   <- reference to the array containing | 
| 1110 |  |  |  |  |  |  | #                      the cui's decendants | 
| 1111 |  |  |  |  |  |  | #  output: $concept <- string containing the cui | 
| 1112 |  |  |  |  |  |  | #          $array   <- reference to the array containing | 
| 1113 |  |  |  |  |  |  | #                      the cui's decendants | 
| 1114 |  |  |  |  |  |  | sub _propagation { | 
| 1115 |  |  |  |  |  |  |  | 
| 1116 | 0 |  |  | 0 |  |  | my $self    = shift; | 
| 1117 | 0 |  |  |  |  |  | my $concept = shift; | 
| 1118 | 0 |  |  |  |  |  | my $array   = shift; | 
| 1119 |  |  |  |  |  |  |  | 
| 1120 | 0 |  |  |  |  |  | my $function = "_propagation"; | 
| 1121 |  |  |  |  |  |  |  | 
| 1122 |  |  |  |  |  |  | #  check self | 
| 1123 | 0 | 0 | 0 |  |  |  | if(!defined $self || !ref $self) { | 
| 1124 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "", 2); | 
| 1125 |  |  |  |  |  |  | } | 
| 1126 |  |  |  |  |  |  |  | 
| 1127 |  |  |  |  |  |  | #  check concept was obtained | 
| 1128 | 0 | 0 |  |  |  |  | if(!$concept) { | 
| 1129 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4); | 
| 1130 |  |  |  |  |  |  | } | 
| 1131 |  |  |  |  |  |  |  | 
| 1132 |  |  |  |  |  |  | #  check if valid concept | 
| 1133 | 0 | 0 |  |  |  |  | if(! ($errorhandler->_validCui($concept)) ) { | 
| 1134 | 0 |  |  |  |  |  | $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6); | 
| 1135 |  |  |  |  |  |  | } | 
| 1136 |  |  |  |  |  |  |  | 
| 1137 |  |  |  |  |  |  | #  if the concept is inactive | 
| 1138 | 0 | 0 |  |  |  |  | if($cuifinder->_forbiddenConcept($concept)) { return; } | 
|  | 0 |  |  |  |  |  |  | 
| 1139 |  |  |  |  |  |  |  | 
| 1140 |  |  |  |  |  |  | #  set up the new path | 
| 1141 | 0 |  |  |  |  |  | my @intermediate = @{$array}; | 
|  | 0 |  |  |  |  |  |  | 
| 1142 | 0 |  |  |  |  |  | push @intermediate, $concept; | 
| 1143 | 0 |  |  |  |  |  | my $series = join " ", @intermediate; | 
| 1144 |  |  |  |  |  |  |  | 
| 1145 |  |  |  |  |  |  | #  initialize the set | 
| 1146 | 0 |  |  |  |  |  | my $set = $propagationHash{$concept}; | 
| 1147 |  |  |  |  |  |  |  | 
| 1148 |  |  |  |  |  |  | #  if the propagation hash already contains a list of CUIs it | 
| 1149 |  |  |  |  |  |  | #  is from its decendants so it has been here before so all we | 
| 1150 |  |  |  |  |  |  | #  have to do is return the list of ancestors with it added | 
| 1151 | 0 | 0 |  |  |  |  | if(defined $set) { | 
| 1152 | 0 | 0 |  |  |  |  | if(! ($set=~/^\s*$/)) { | 
| 1153 | 0 |  |  |  |  |  | $set .= " $concept"; | 
| 1154 | 0 |  |  |  |  |  | return $set; | 
| 1155 |  |  |  |  |  |  | } | 
| 1156 |  |  |  |  |  |  | } | 
| 1157 |  |  |  |  |  |  |  | 
| 1158 |  |  |  |  |  |  | #  get all the children | 
| 1159 | 0 |  |  |  |  |  | my $children = $cuifinder->_getChildren($concept); | 
| 1160 |  |  |  |  |  |  |  | 
| 1161 |  |  |  |  |  |  | #  search through the children | 
| 1162 | 0 |  |  |  |  |  | foreach my $child (@{$children}) { | 
|  | 0 |  |  |  |  |  |  | 
| 1163 |  |  |  |  |  |  |  | 
| 1164 | 0 |  |  |  |  |  | my $flag = 0; | 
| 1165 |  |  |  |  |  |  |  | 
| 1166 |  |  |  |  |  |  | #  check that the concept is not one of the forbidden concepts | 
| 1167 | 0 | 0 |  |  |  |  | if($cuifinder->_forbiddenConcept($child)) { $flag = 1; } | 
|  | 0 |  |  |  |  |  |  | 
| 1168 |  |  |  |  |  |  |  | 
| 1169 |  |  |  |  |  |  | #  check if child cui has already in the path | 
| 1170 | 0 |  |  |  |  |  | foreach my $cui (@intermediate) { | 
| 1171 | 0 | 0 |  |  |  |  | if($cui eq $child) { $flag = 1; } | 
|  | 0 |  |  |  |  |  |  | 
| 1172 |  |  |  |  |  |  | } | 
| 1173 |  |  |  |  |  |  |  | 
| 1174 |  |  |  |  |  |  | #  if it isn't continue on with the depth first search | 
| 1175 | 0 | 0 |  |  |  |  | if($flag == 0) { | 
| 1176 | 0 |  |  |  |  |  | $set .= " "; | 
| 1177 | 0 |  |  |  |  |  | $set .= $self->_propagation($child, \@intermediate); | 
| 1178 |  |  |  |  |  |  | } | 
| 1179 |  |  |  |  |  |  | } | 
| 1180 |  |  |  |  |  |  |  | 
| 1181 |  |  |  |  |  |  | #  remove duplicates from the set | 
| 1182 | 0 |  |  |  |  |  | my $rset; | 
| 1183 | 0 | 0 |  |  |  |  | if(defined $set) { | 
| 1184 | 0 |  |  |  |  |  | $rset = _breduce($set); | 
| 1185 |  |  |  |  |  |  | } | 
| 1186 |  |  |  |  |  |  | #  store the set in the propagation hash | 
| 1187 | 0 |  |  |  |  |  | $propagationHash{$concept} = $rset; | 
| 1188 |  |  |  |  |  |  |  | 
| 1189 |  |  |  |  |  |  | #  add the concept to the set | 
| 1190 | 0 |  |  |  |  |  | $rset .= " $concept"; | 
| 1191 |  |  |  |  |  |  |  | 
| 1192 |  |  |  |  |  |  | #  return the set | 
| 1193 | 0 |  |  |  |  |  | return $rset; | 
| 1194 |  |  |  |  |  |  | } | 
| 1195 |  |  |  |  |  |  |  | 
| 1196 |  |  |  |  |  |  | #  removes duplicates in an array | 
| 1197 |  |  |  |  |  |  | #  input : $array <- reference to an array | 
| 1198 |  |  |  |  |  |  | #  output: | 
| 1199 |  |  |  |  |  |  | sub _breduce { | 
| 1200 |  |  |  |  |  |  |  | 
| 1201 | 0 |  |  | 0 |  |  | local($_)= @_; | 
| 1202 | 0 |  |  |  |  |  | my (@words)= split; | 
| 1203 | 0 |  |  |  |  |  | my (%newwords); | 
| 1204 | 0 |  |  |  |  |  | for (@words) { $newwords{$_}=1 } | 
|  | 0 |  |  |  |  |  |  | 
| 1205 | 0 |  |  |  |  |  | join ' ', keys(%newwords); | 
| 1206 |  |  |  |  |  |  | } | 
| 1207 |  |  |  |  |  |  |  | 
| 1208 |  |  |  |  |  |  |  | 
| 1209 |  |  |  |  |  |  | 1; | 
| 1210 |  |  |  |  |  |  |  | 
| 1211 |  |  |  |  |  |  | __END__ |