line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# UMLS::Interface::STFinder |
2
|
|
|
|
|
|
|
# (Last Updated $Id: STFinder.pm,v 1.5 2011/05/12 17:21:24 btmcinnes Exp $) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Perl module that provides a perl interface to the |
5
|
|
|
|
|
|
|
# Unified Medical Language System (UMLS) |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright (c) 2004-2010, |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Bridget T. McInnes, University of Minnesota Twin Cities |
10
|
|
|
|
|
|
|
# bthomson at cs.umn.edu |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# Serguei Pakhomov, University of Minnesota Twin Cities |
13
|
|
|
|
|
|
|
# pakh0002 at umn.edu |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# Ted Pedersen, University of Minnesota, Duluth |
16
|
|
|
|
|
|
|
# tpederse at d.umn.edu |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
# Ying Liu, University of Minnesota |
19
|
|
|
|
|
|
|
# liux0935 at umn.edu |
20
|
|
|
|
|
|
|
# |
21
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
22
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License |
23
|
|
|
|
|
|
|
# as published by the Free Software Foundation; either version 2 |
24
|
|
|
|
|
|
|
# of the License, or (at your option) any later version. |
25
|
|
|
|
|
|
|
# |
26
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
27
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
28
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
29
|
|
|
|
|
|
|
# GNU General Public License for more details. |
30
|
|
|
|
|
|
|
# |
31
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
32
|
|
|
|
|
|
|
# along with this program; if not, write to |
33
|
|
|
|
|
|
|
# |
34
|
|
|
|
|
|
|
# The Free Software Foundation, Inc., |
35
|
|
|
|
|
|
|
# 59 Temple Place - Suite 330, |
36
|
|
|
|
|
|
|
# Boston, MA 02111-1307, USA. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
package UMLS::Interface::STFinder; |
39
|
|
|
|
|
|
|
|
40
|
24
|
|
|
24
|
|
131
|
use Fcntl; |
|
24
|
|
|
|
|
48
|
|
|
24
|
|
|
|
|
6813
|
|
41
|
24
|
|
|
24
|
|
128
|
use strict; |
|
24
|
|
|
|
|
46
|
|
|
24
|
|
|
|
|
556
|
|
42
|
24
|
|
|
24
|
|
126
|
use warnings; |
|
24
|
|
|
|
|
39
|
|
|
24
|
|
|
|
|
667
|
|
43
|
24
|
|
|
24
|
|
122
|
use bytes; |
|
24
|
|
|
|
|
38
|
|
|
24
|
|
|
|
|
146
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $pkg = "UMLS::Interface::STFinder"; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $debug = 0; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
my $root = "T000"; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
my $stN = 0; |
52
|
|
|
|
|
|
|
my $smooth = 0; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
my $option_verbose = 0; |
55
|
|
|
|
|
|
|
my $option_debugpath = 0; |
56
|
|
|
|
|
|
|
my $option_t = 0; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
my %children = (); |
59
|
|
|
|
|
|
|
my %parents = (); |
60
|
|
|
|
|
|
|
my %propagationFreq = (); |
61
|
|
|
|
|
|
|
my %propagationHash = (); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
my %maxDepth = (); |
64
|
|
|
|
|
|
|
my %minDepth = (); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
my $errorhandler = ""; |
67
|
|
|
|
|
|
|
my $cuifinder = ""; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
local(*DEBUG_FILE); |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# UMLS-specific stuff ends ---------- |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# -------------------- Class methods start here -------------------- |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# method to create a new UMLS::Interface::STFinder object |
76
|
|
|
|
|
|
|
sub new { |
77
|
|
|
|
|
|
|
|
78
|
0
|
|
|
0
|
0
|
|
my $self = {}; |
79
|
0
|
|
|
|
|
|
my $className = shift; |
80
|
0
|
|
|
|
|
|
my $params = shift; |
81
|
0
|
|
|
|
|
|
my $handler = shift; |
82
|
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
my $function = "new"; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# bless the object. |
86
|
0
|
|
|
|
|
|
bless($self, $className); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# initialize the global variables |
89
|
0
|
|
|
|
|
|
$self->_initializeGlobalVariables(); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# initialize error handler |
92
|
0
|
|
|
|
|
|
$errorhandler = UMLS::Interface::ErrorHandler->new(); |
93
|
0
|
0
|
|
|
|
|
if(! defined $errorhandler) { |
94
|
0
|
|
|
|
|
|
print STDERR "The error handler did not get passed properly.\n"; |
95
|
0
|
|
|
|
|
|
exit; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# initialize the cuifinder |
99
|
0
|
|
|
|
|
|
$cuifinder = $handler; |
100
|
0
|
0
|
|
|
|
|
if(! (defined $handler)) { |
101
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, |
102
|
|
|
|
|
|
|
"new", |
103
|
|
|
|
|
|
|
"The CuiFinder handler did not get passed properly", |
104
|
|
|
|
|
|
|
8); |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# set up the options |
108
|
0
|
|
|
|
|
|
$self->_setOptions($params); |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# get the umls database from CuiFinder |
111
|
0
|
|
|
|
|
|
my $db = $cuifinder->_getDB(); |
112
|
0
|
0
|
|
|
|
|
if(!$db) { $errorhandler->_error($pkg, $function, "Error with db.", 3); } |
|
0
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
|
$self->{'db'} = $db; |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# load the semantic network |
116
|
0
|
|
|
|
|
|
$self->_loadSemanticNetwork(); |
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
return $self; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# returns the information content (IC) of a semantic type |
123
|
|
|
|
|
|
|
# input : $semantic type <- string containing a semantic type |
124
|
|
|
|
|
|
|
# output: $double <- double containing its IC |
125
|
|
|
|
|
|
|
sub _getStIC |
126
|
|
|
|
|
|
|
{ |
127
|
0
|
|
|
0
|
|
|
my $self = shift; |
128
|
0
|
|
|
|
|
|
my $st = shift; |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my $function = "_getIC"; |
131
|
0
|
|
|
|
|
|
&_debug($function); |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# check self |
134
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
135
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# check st was obtained |
139
|
0
|
0
|
|
|
|
|
if(!$st) { |
140
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st.", 4); |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# check if valid semantic type |
144
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st)) ) { |
145
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Semantic Type ($st) in not valid.", 6); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
my $prob = $propagationHash{$st}; |
149
|
|
|
|
|
|
|
|
150
|
0
|
0
|
|
|
|
|
if(!defined $prob) { return 0; } |
|
0
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
0
|
|
|
|
|
|
my $ic = 0; |
153
|
0
|
0
|
0
|
|
|
|
if($prob > 0 and $prob < 1) { $ic = -1 * (log($prob) / log(10)); } |
|
0
|
|
|
|
|
|
|
154
|
0
|
|
|
|
|
|
return $ic; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# returns the probability of the semantic type |
159
|
|
|
|
|
|
|
# input : $semantic type <- string containing a semantic type |
160
|
|
|
|
|
|
|
# output: $double <- double containing its probability |
161
|
|
|
|
|
|
|
sub _getStProbability |
162
|
|
|
|
|
|
|
{ |
163
|
0
|
|
|
0
|
|
|
my $self = shift; |
164
|
0
|
|
|
|
|
|
my $st = shift; |
165
|
|
|
|
|
|
|
|
166
|
0
|
|
|
|
|
|
my $function = "_getStProbability"; |
167
|
0
|
|
|
|
|
|
&_debug($function); |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# check self |
170
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
171
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# check st was obtained |
175
|
0
|
0
|
|
|
|
|
if(!$st) { |
176
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st.", 4); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# check if valid semantic type |
180
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st)) ) { |
181
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Semantic Type ($st) in not valid.", 6); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
0
|
|
|
|
|
|
my $prob = $propagationHash{$st}; |
185
|
|
|
|
|
|
|
|
186
|
0
|
0
|
|
|
|
|
if(!defined $prob) { return 0; } |
|
0
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
0
|
|
|
|
|
|
return $prob; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# method to set the smoothing parameter and increments the frequency |
192
|
|
|
|
|
|
|
# count to one |
193
|
|
|
|
|
|
|
# input : |
194
|
|
|
|
|
|
|
# output: |
195
|
|
|
|
|
|
|
sub _setStSmoothing |
196
|
|
|
|
|
|
|
{ |
197
|
0
|
|
|
0
|
|
|
my $self = shift; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# set function name |
200
|
0
|
|
|
|
|
|
my $function = "_setSmoothing"; |
201
|
0
|
|
|
|
|
|
&_debug($function); |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# check self |
204
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
205
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
|
foreach my $st (sort keys %propagationFreq) { |
209
|
0
|
|
|
|
|
|
$propagationFreq{$st} = 1; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
|
$smooth = 1; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# propagates the given frequency counts of the semantic types |
216
|
|
|
|
|
|
|
# input : $hash <- reference to the hash containing |
217
|
|
|
|
|
|
|
# the frequency counts |
218
|
|
|
|
|
|
|
# output: $hash <- containing the propagation counts of all |
219
|
|
|
|
|
|
|
# the semantic types |
220
|
|
|
|
|
|
|
sub _propagateStCounts |
221
|
|
|
|
|
|
|
{ |
222
|
0
|
|
|
0
|
|
|
my $self = shift; |
223
|
0
|
|
|
|
|
|
my $hash = shift; |
224
|
|
|
|
|
|
|
|
225
|
0
|
|
|
|
|
|
my $function = "_propagateStCounts"; |
226
|
0
|
|
|
|
|
|
&_debug($function); |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# check self |
229
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
230
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# check the parameters |
234
|
0
|
0
|
|
|
|
|
if(!defined $hash) { |
235
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Input variable \%hash not defined.", 4); |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# load the propagation frequency hash |
239
|
0
|
|
|
|
|
|
$self->_loadStPropagationFreq($hash); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# propagate the counts |
242
|
0
|
|
|
|
|
|
my @array = (); |
243
|
0
|
|
|
|
|
|
$self->_propagation($root, \@array); |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# tally up the propagation counts |
246
|
0
|
|
|
|
|
|
$self->_tallyCounts(); |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# return the propagation counts |
249
|
0
|
|
|
|
|
|
return \%propagationHash; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# method that tallys up the probability counts of the |
254
|
|
|
|
|
|
|
# cui and its decendants and then calculates the ic |
255
|
|
|
|
|
|
|
# input : |
256
|
|
|
|
|
|
|
# output: |
257
|
|
|
|
|
|
|
sub _tallyCounts { |
258
|
|
|
|
|
|
|
|
259
|
0
|
|
|
0
|
|
|
my $self = shift; |
260
|
|
|
|
|
|
|
|
261
|
0
|
|
|
|
|
|
my $function = "_tallyCounts"; |
262
|
0
|
|
|
|
|
|
&_debug($function); |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# check self |
265
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
266
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
0
|
|
|
|
|
|
foreach my $st (sort keys %propagationHash) { |
270
|
0
|
|
|
|
|
|
my $set = $propagationHash{$st}; |
271
|
0
|
|
|
|
|
|
my $pcount = $propagationFreq{$st}; |
272
|
|
|
|
|
|
|
|
273
|
0
|
0
|
|
|
|
|
if(defined $set) { |
274
|
0
|
|
|
|
|
|
print "$st : $set\n"; |
275
|
0
|
|
|
|
|
|
my %hash = (); |
276
|
0
|
|
|
|
|
|
while($set=~/(T[0-9][0-9][0-9])/g) { |
277
|
0
|
|
|
|
|
|
my $s = $1; |
278
|
0
|
0
|
|
|
|
|
if(! (exists $hash{$s}) ) { |
279
|
0
|
|
|
|
|
|
$pcount += $propagationFreq{$s}; |
280
|
0
|
|
|
|
|
|
$hash{$s}++; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |
284
|
0
|
|
|
|
|
|
$propagationHash{$st} = $pcount; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# returns the maximum depth of a semantic type in the network |
289
|
|
|
|
|
|
|
# input : $st <- string containing the semantic type |
290
|
|
|
|
|
|
|
# output: $int <- maximum depth of hte semantic type |
291
|
|
|
|
|
|
|
sub _getMaxStDepth { |
292
|
|
|
|
|
|
|
|
293
|
0
|
|
|
0
|
|
|
my $self = shift; |
294
|
0
|
|
|
|
|
|
my $st = shift; |
295
|
|
|
|
|
|
|
|
296
|
0
|
|
|
|
|
|
my $function = "_getMaxStDepth"; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# check self |
299
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
300
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# check st was obtained |
304
|
0
|
0
|
|
|
|
|
if(!$st) { |
305
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st.", 4); |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# check if valid semantic type |
309
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st)) ) { |
310
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Semantic Type ($st) in not valid.", 6); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
0
|
0
|
|
|
|
|
if(exists $maxDepth{$st}) { return $maxDepth{$st}; } |
|
0
|
|
|
|
|
|
|
314
|
0
|
|
|
|
|
|
else { return -1; } |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# returns the minimum depth of a semantic type in the network |
318
|
|
|
|
|
|
|
# input : $st <- string containing the semantic type |
319
|
|
|
|
|
|
|
# output: $int <- minimum depth of hte semantic type |
320
|
|
|
|
|
|
|
sub _getMinStDepth { |
321
|
|
|
|
|
|
|
|
322
|
0
|
|
|
0
|
|
|
my $self = shift; |
323
|
0
|
|
|
|
|
|
my $st = shift; |
324
|
|
|
|
|
|
|
|
325
|
0
|
|
|
|
|
|
my $function = "_getMinStDepth"; |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# check self |
328
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
329
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# check st was obtained |
333
|
0
|
0
|
|
|
|
|
if(!$st) { |
334
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st.", 4); |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# check if valid semantic type |
338
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st)) ) { |
339
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Semantic Type ($st) in not valid.", 6); |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
0
|
0
|
|
|
|
|
if(exists $minDepth{$st}) { return $minDepth{$st}; } |
|
0
|
|
|
|
|
|
|
343
|
0
|
|
|
|
|
|
else { return -1; } |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# recursive method that actually performs the propagation |
348
|
|
|
|
|
|
|
# input : $st <- string containing the semantic type |
349
|
|
|
|
|
|
|
# $array <- reference to the array containing |
350
|
|
|
|
|
|
|
# the semantic type's decendants |
351
|
|
|
|
|
|
|
# output: $st <- string containing the semantic type |
352
|
|
|
|
|
|
|
# $array <- reference to the array containing |
353
|
|
|
|
|
|
|
# the semantic type's decendants |
354
|
|
|
|
|
|
|
sub _propagation { |
355
|
0
|
|
|
0
|
|
|
my $self = shift; |
356
|
0
|
|
|
|
|
|
my $st = shift; |
357
|
0
|
|
|
|
|
|
my $array = shift; |
358
|
|
|
|
|
|
|
|
359
|
0
|
|
|
|
|
|
my $function = "_propagation"; |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# check self |
362
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
363
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# check st was obtained |
367
|
0
|
0
|
|
|
|
|
if(!$st) { |
368
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st.", 4); |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# check if valid semantic type |
372
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st)) ) { |
373
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Semantic Type ($st) in not valid.", 6); |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
# set up the new path |
377
|
0
|
|
|
|
|
|
my @intermediate = @{$array}; |
|
0
|
|
|
|
|
|
|
378
|
0
|
|
|
|
|
|
push @intermediate, $st; |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# get depth |
381
|
0
|
|
|
|
|
|
my $depth = $#intermediate; |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# set the maximum depth |
384
|
0
|
0
|
|
|
|
|
if(exists $maxDepth{$st}) { |
385
|
0
|
0
|
|
|
|
|
if($maxDepth{$st} < $depth) { $maxDepth{$st} = $depth; } |
|
0
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
|
} else { $maxDepth{$st} = $depth; } |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# set the minimum depth |
389
|
0
|
0
|
|
|
|
|
if(exists $minDepth{$st}) { |
390
|
0
|
0
|
|
|
|
|
if($minDepth{$st} > $depth) { $minDepth{$st} = $depth; } |
|
0
|
|
|
|
|
|
|
391
|
0
|
|
|
|
|
|
} else { $minDepth{$st} = $depth; } |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
# initialize the set |
395
|
0
|
|
|
|
|
|
my $set = $propagationHash{$st}; |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# if the propagation hash already contains a list of CUIs it |
398
|
|
|
|
|
|
|
# is from its decendants so it has been here before so all we |
399
|
|
|
|
|
|
|
# have to do is return the list of ancestors with it added |
400
|
0
|
0
|
|
|
|
|
if(defined $set) { |
401
|
0
|
0
|
|
|
|
|
if(! ($set=~/^\s*$/)) { |
402
|
0
|
|
|
|
|
|
$set .= " $st"; |
403
|
0
|
|
|
|
|
|
return $set; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# search through the children |
408
|
0
|
|
|
|
|
|
foreach my $child (@{$children{$st}}) { |
|
0
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
|
410
|
0
|
|
|
|
|
|
my $flag = 0; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
# check if child semantic type has already in the path |
413
|
0
|
|
|
|
|
|
foreach my $s (@intermediate) { |
414
|
0
|
0
|
|
|
|
|
if($s eq $child) { $flag = 1; } |
|
0
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
# if it isn't continue on with the depth first search |
418
|
0
|
0
|
|
|
|
|
if($flag == 0) { |
419
|
0
|
|
|
|
|
|
$set .= " "; |
420
|
0
|
|
|
|
|
|
$set .= $self->_propagation($child, \@intermediate); |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# remove duplicates from the set |
425
|
0
|
|
|
|
|
|
my $rset; |
426
|
0
|
0
|
|
|
|
|
if(defined $set) { |
427
|
0
|
|
|
|
|
|
$rset = _breduce($set); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
# store the set in the propagation hash |
430
|
0
|
|
|
|
|
|
$propagationHash{$st} = $rset; |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# add the concept to the set |
433
|
0
|
|
|
|
|
|
$rset .= " $st"; |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# return the set |
436
|
0
|
|
|
|
|
|
return $rset; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# removes duplicates in an array |
440
|
|
|
|
|
|
|
# input : $array <- reference to an array |
441
|
|
|
|
|
|
|
# output: |
442
|
|
|
|
|
|
|
sub _breduce { |
443
|
|
|
|
|
|
|
|
444
|
0
|
|
|
0
|
|
|
local($_)= @_; |
445
|
0
|
|
|
|
|
|
my (@words)= split; |
446
|
0
|
|
|
|
|
|
my (%newwords); |
447
|
0
|
|
|
|
|
|
for (@words) { $newwords{$_}=1 } |
|
0
|
|
|
|
|
|
|
448
|
0
|
|
|
|
|
|
join ' ', keys(%newwords); |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# load the propagation frequency has with the frequency counts |
452
|
|
|
|
|
|
|
# input : $hash <- reference to hash containing frequency counts |
453
|
|
|
|
|
|
|
# output: |
454
|
|
|
|
|
|
|
sub _loadStPropagationFreq { |
455
|
|
|
|
|
|
|
|
456
|
0
|
|
|
0
|
|
|
my $self = shift; |
457
|
0
|
|
|
|
|
|
my $fhash = shift; |
458
|
|
|
|
|
|
|
|
459
|
0
|
|
|
|
|
|
my $function = "_loadStPropagationFreq"; |
460
|
0
|
|
|
|
|
|
&_debug($function); |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
# check self |
463
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
464
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
# loop through and set the frequency count |
468
|
0
|
|
|
|
|
|
my $N = 0; |
469
|
0
|
|
|
|
|
|
foreach my $st (sort keys %{$fhash}) { |
|
0
|
|
|
|
|
|
|
470
|
0
|
0
|
|
|
|
|
if($st=~/^\s*$/) { next; } |
|
0
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
|
my $freq = ${$fhash}{$st}; |
|
0
|
|
|
|
|
|
|
472
|
0
|
0
|
|
|
|
|
if(exists $propagationFreq{$st}) { |
473
|
0
|
|
|
|
|
|
$propagationFreq{$st} += $freq; |
474
|
|
|
|
|
|
|
} |
475
|
0
|
|
|
|
|
|
$N = $N + $freq; |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
0
|
0
|
|
|
|
|
if($smooth == 1) { |
479
|
0
|
|
|
|
|
|
my $k = keys %propagationFreq; |
480
|
0
|
|
|
|
|
|
$N += $k; |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# set N for the config file |
484
|
0
|
|
|
|
|
|
$stN = $N; |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
# loop through again and set the probability |
487
|
0
|
|
|
|
|
|
foreach my $st (sort keys %propagationFreq) { |
488
|
0
|
|
|
|
|
|
$propagationFreq{$st} = ($propagationFreq{$st}) / $N; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
# load the propagation hash has with the probability counts |
492
|
|
|
|
|
|
|
# input : $hash <- reference to hash containing the probability counts |
493
|
|
|
|
|
|
|
# output: |
494
|
|
|
|
|
|
|
sub _loadStPropagationHash { |
495
|
|
|
|
|
|
|
|
496
|
0
|
|
|
0
|
|
|
my $self = shift; |
497
|
0
|
|
|
|
|
|
my $fhash = shift; |
498
|
|
|
|
|
|
|
|
499
|
0
|
|
|
|
|
|
my $function = "_loadStPropagationHash"; |
500
|
0
|
|
|
|
|
|
&_debug($function); |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# check self |
503
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
504
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
# load the propagation hash with the probabilities |
508
|
0
|
|
|
|
|
|
%propagationHash = (); |
509
|
0
|
|
|
|
|
|
foreach my $st (sort keys %{$fhash}) { |
|
0
|
|
|
|
|
|
|
510
|
0
|
0
|
|
|
|
|
if($st=~/^\s*$/) { next; } |
|
0
|
|
|
|
|
|
|
511
|
0
|
|
|
|
|
|
my $prob = ${$fhash}{$st}; |
|
0
|
|
|
|
|
|
|
512
|
0
|
|
|
|
|
|
$propagationHash{$st} = $prob; |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
# returns the stN - the total number of semantic types |
517
|
|
|
|
|
|
|
# input : |
518
|
|
|
|
|
|
|
# output: integer <- total number of semantic types |
519
|
|
|
|
|
|
|
sub _getStN |
520
|
|
|
|
|
|
|
{ |
521
|
0
|
|
|
0
|
|
|
my $self = shift; |
522
|
|
|
|
|
|
|
|
523
|
0
|
|
|
|
|
|
my $function = "_getStN"; |
524
|
0
|
|
|
|
|
|
&_debug($function); |
525
|
|
|
|
|
|
|
|
526
|
0
|
|
|
|
|
|
return $stN; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
# method to load the semantic network |
530
|
|
|
|
|
|
|
# input : |
531
|
|
|
|
|
|
|
# output: |
532
|
|
|
|
|
|
|
sub _loadSemanticNetwork { |
533
|
|
|
|
|
|
|
|
534
|
0
|
|
|
0
|
|
|
my $self = shift; |
535
|
|
|
|
|
|
|
|
536
|
0
|
|
|
|
|
|
my $function = "_loadSemanticNetwork"; |
537
|
0
|
|
|
|
|
|
&_debug($function); |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# check self |
540
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
541
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
# set the index DB handler |
545
|
0
|
|
|
|
|
|
my $db = $self->{'db'}; |
546
|
0
|
0
|
|
|
|
|
if(!$db) { $errorhandler->_error($pkg, $function, "Error with db.", 3); } |
|
0
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
|
548
|
0
|
|
|
|
|
|
my %upper = (); |
549
|
|
|
|
|
|
|
# get the is-a relations (T186) between the semantic types |
550
|
|
|
|
|
|
|
# set the parent taxonomy |
551
|
0
|
|
|
|
|
|
my $sql = qq{ SELECT UI1, UI3 FROM SRSTRE1 where UI2='T186'}; |
552
|
0
|
|
|
|
|
|
my $sth = $db->prepare( $sql ); |
553
|
0
|
|
|
|
|
|
$sth->execute(); |
554
|
0
|
|
|
|
|
|
my($st1, $st2); |
555
|
0
|
|
|
|
|
|
$sth->bind_columns( undef, \$st1, \$st2 ); |
556
|
0
|
|
|
|
|
|
while( $sth->fetch() ) { |
557
|
0
|
|
|
|
|
|
push @{$children{$st2}}, $st1; |
|
0
|
|
|
|
|
|
|
558
|
0
|
|
|
|
|
|
push @{$parents{$st1}}, $st2; |
|
0
|
|
|
|
|
|
|
559
|
0
|
|
|
|
|
|
$upper{$st1}++; |
560
|
0
|
|
|
|
|
|
$propagationFreq{$st1} = 0; |
561
|
0
|
|
|
|
|
|
$propagationFreq{$st2} = 0; |
562
|
|
|
|
|
|
|
} |
563
|
0
|
|
|
|
|
|
$errorhandler->_checkDbError($pkg, $function, $sth); |
564
|
0
|
|
|
|
|
|
$sth->finish(); |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
# set the upper level taxonomy |
567
|
0
|
|
|
|
|
|
foreach my $st (sort keys %propagationFreq) { |
568
|
0
|
0
|
|
|
|
|
if(! (exists $upper{$st})) { |
569
|
0
|
|
|
|
|
|
push@{$children{$root}}, $st; |
|
0
|
|
|
|
|
|
|
570
|
0
|
|
|
|
|
|
push@{$parents{$st}}, $root; |
|
0
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
# add the root to the propagationFreq |
574
|
0
|
|
|
|
|
|
$propagationFreq{$root} = 0; |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
# initialize package variables |
578
|
|
|
|
|
|
|
# input : |
579
|
|
|
|
|
|
|
# output: |
580
|
|
|
|
|
|
|
sub _initializeGlobalVariables { |
581
|
|
|
|
|
|
|
|
582
|
0
|
|
|
0
|
|
|
$debug = 0; |
583
|
|
|
|
|
|
|
|
584
|
0
|
|
|
|
|
|
$option_verbose = 0; |
585
|
0
|
|
|
|
|
|
$option_debugpath = 0; |
586
|
0
|
|
|
|
|
|
$option_t = 0; |
587
|
|
|
|
|
|
|
|
588
|
0
|
|
|
|
|
|
$errorhandler = ""; |
589
|
0
|
|
|
|
|
|
$cuifinder = ""; |
590
|
|
|
|
|
|
|
|
591
|
0
|
|
|
|
|
|
%propagationFreq = (); |
592
|
0
|
|
|
|
|
|
%propagationHash = (); |
593
|
0
|
|
|
|
|
|
%children = (); |
594
|
0
|
|
|
|
|
|
%parents = (); |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# method to set the global parameter options |
600
|
|
|
|
|
|
|
# input : $params <- reference to a hash |
601
|
|
|
|
|
|
|
# output: |
602
|
|
|
|
|
|
|
sub _setOptions { |
603
|
|
|
|
|
|
|
|
604
|
0
|
|
|
0
|
|
|
my $self = shift; |
605
|
0
|
|
|
|
|
|
my $params = shift; |
606
|
|
|
|
|
|
|
|
607
|
0
|
|
|
|
|
|
my $function = "_setOptions"; |
608
|
0
|
|
|
|
|
|
&_debug($function); |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
# check self |
611
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
612
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# get all the parameters |
616
|
0
|
|
|
|
|
|
my $verbose = $params->{'verbose'}; |
617
|
0
|
|
|
|
|
|
my $t = $params->{'t'}; |
618
|
0
|
|
|
|
|
|
my $debugoption = $params->{'debug'}; |
619
|
0
|
|
|
|
|
|
my $debugpath = $params->{'debugpath'}; |
620
|
|
|
|
|
|
|
|
621
|
0
|
0
|
|
|
|
|
if(defined $debugoption) { |
622
|
0
|
|
|
|
|
|
$debug = 1; |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
0
|
|
|
|
|
|
my $output = ""; |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
# check if debugpath option |
628
|
0
|
0
|
|
|
|
|
if(defined $debugpath) { |
629
|
0
|
|
|
|
|
|
$option_debugpath = 1; |
630
|
0
|
|
|
|
|
|
$output .= " --debugpath $debugpath\n"; |
631
|
0
|
0
|
|
|
|
|
open(DEBUG_FILE, ">$debugpath") || |
632
|
|
|
|
|
|
|
die "Could not open depthpath file $debugpath\n"; |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# check verbose option |
636
|
0
|
0
|
|
|
|
|
if(defined $verbose) { |
637
|
0
|
|
|
|
|
|
$output .= "\nSTFinder User Options:\n"; |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
# check that this is not a test case |
641
|
0
|
0
|
|
|
|
|
if(defined $t) { $option_t = 1; } |
|
0
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
# check if verbose run has been identified |
644
|
0
|
0
|
|
|
|
|
if(defined $verbose) { |
645
|
0
|
|
|
|
|
|
$option_verbose = 1; |
646
|
|
|
|
|
|
|
|
647
|
0
|
|
|
|
|
|
$output .= " --verbose option set\n"; |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
0
|
0
|
|
|
|
|
if($option_t == 0) { |
651
|
0
|
|
|
|
|
|
print STDERR "$output\n"; |
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# This is like a reverse DFS only it is not recursive |
656
|
|
|
|
|
|
|
# due to the stack overflow errors I received when it was |
657
|
|
|
|
|
|
|
# input : $tui <- string containing the semantic type TUI |
658
|
|
|
|
|
|
|
# output: $array <- reference to an array containing the path information |
659
|
|
|
|
|
|
|
sub _pathsToRoot { |
660
|
|
|
|
|
|
|
|
661
|
0
|
|
|
0
|
|
|
my $self = shift; |
662
|
0
|
|
|
|
|
|
my $tui = shift; |
663
|
|
|
|
|
|
|
|
664
|
0
|
0
|
0
|
|
|
|
return () if(!defined $self || !ref $self); |
665
|
|
|
|
|
|
|
|
666
|
0
|
|
|
|
|
|
my $function = "_pathsToRoot($tui)"; |
667
|
0
|
|
|
|
|
|
&_debug($function); |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
# check self |
670
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
671
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
# check st was obtained |
675
|
0
|
0
|
|
|
|
|
if(!$tui) { |
676
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$tui.", 4); |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
# check if valid semantic type |
680
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($tui)) ) { |
681
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Semantic Type ($tui) in not valid.", 6); |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
# set the storage |
685
|
0
|
|
|
|
|
|
my @path_storage = (); |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
# set the stack |
688
|
0
|
|
|
|
|
|
my @stack = (); |
689
|
0
|
|
|
|
|
|
push @stack, $tui; |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
# set the count |
692
|
0
|
|
|
|
|
|
my %visited = (); |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# set the paths |
695
|
0
|
|
|
|
|
|
my @paths = (); |
696
|
0
|
|
|
|
|
|
my @empty = (); |
697
|
0
|
|
|
|
|
|
push @paths, \@empty; |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# now loop through the stack |
700
|
0
|
|
|
|
|
|
while($#stack >= 0) { |
701
|
|
|
|
|
|
|
|
702
|
0
|
|
|
|
|
|
my $st = $stack[$#stack]; |
703
|
0
|
|
|
|
|
|
my $path = $paths[$#paths]; |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
# set up the new path |
706
|
0
|
|
|
|
|
|
my @intermediate = @{$path}; |
|
0
|
|
|
|
|
|
|
707
|
0
|
|
|
|
|
|
push @intermediate, $st; |
708
|
0
|
|
|
|
|
|
my $series = join " ", @intermediate; |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
# check if st has been visited already |
711
|
0
|
0
|
|
|
|
|
if(exists $visited{$series}) { |
712
|
0
|
|
|
|
|
|
pop @stack; pop @paths; |
|
0
|
|
|
|
|
|
|
713
|
0
|
|
|
|
|
|
next; |
714
|
|
|
|
|
|
|
} |
715
|
0
|
|
|
|
|
|
else { $visited{$series}++; } |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
# print information into the file if debugpath option is set |
718
|
0
|
0
|
|
|
|
|
if($option_debugpath) { |
719
|
0
|
|
|
|
|
|
my $d = $#intermediate+1; |
720
|
0
|
|
|
|
|
|
print DEBUG_FILE "$st\t$d\t@intermediate\n"; |
721
|
|
|
|
|
|
|
} |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
# if the st is the umls root - we are done |
724
|
0
|
0
|
|
|
|
|
if($st eq $root) { |
725
|
|
|
|
|
|
|
# this is a complete path to the root so push it on the paths |
726
|
0
|
|
|
|
|
|
my @reversed = reverse(@intermediate); |
727
|
0
|
|
|
|
|
|
my $rseries = join " ", @reversed; |
728
|
0
|
|
|
|
|
|
push @path_storage, $rseries; |
729
|
0
|
|
|
|
|
|
next; |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
# if there are no parents we are finished with this semantic type |
733
|
0
|
0
|
|
|
|
|
if($#{$parents{$st}} < 0) { |
|
0
|
|
|
|
|
|
|
734
|
0
|
|
|
|
|
|
pop @stack; pop @paths; |
|
0
|
|
|
|
|
|
|
735
|
0
|
|
|
|
|
|
next; |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# search through the parents |
739
|
0
|
|
|
|
|
|
my $stackflag = 0; |
740
|
0
|
|
|
|
|
|
foreach my $parent (@{$parents{$st}}) { |
|
0
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# check if concept is already in the path |
743
|
0
|
0
|
|
|
|
|
if($series=~/$parent/) { next; } |
|
0
|
|
|
|
|
|
|
744
|
0
|
0
|
|
|
|
|
if($st eq $parent) { next; } |
|
0
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
# if it isn't continue on with the depth first search |
747
|
0
|
|
|
|
|
|
push @stack, $parent; |
748
|
0
|
|
|
|
|
|
push @paths, \@intermediate; |
749
|
0
|
|
|
|
|
|
$stackflag++; |
750
|
|
|
|
|
|
|
} |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
# check to make certain there were actually children |
753
|
0
|
0
|
|
|
|
|
if($stackflag == 0) { |
754
|
0
|
|
|
|
|
|
pop @stack; pop @paths; |
|
0
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
} |
757
|
|
|
|
|
|
|
|
758
|
0
|
|
|
|
|
|
return \@path_storage; |
759
|
|
|
|
|
|
|
} |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
# this function finds the shortest path between two semantic types and returns the |
762
|
|
|
|
|
|
|
# path. in the process it determines the least common subsumer for that path so it |
763
|
|
|
|
|
|
|
# returns both |
764
|
|
|
|
|
|
|
# input : $st1 <- string containing the first TUI |
765
|
|
|
|
|
|
|
# $st2 <- string containing the second TUI |
766
|
|
|
|
|
|
|
# output: $hash <- reference to a hash containing the |
767
|
|
|
|
|
|
|
# lcs as the key and the path as the |
768
|
|
|
|
|
|
|
# value |
769
|
|
|
|
|
|
|
sub _shortestPath { |
770
|
|
|
|
|
|
|
|
771
|
0
|
|
|
0
|
|
|
my $self = shift; |
772
|
0
|
|
|
|
|
|
my $st1 = shift; |
773
|
0
|
|
|
|
|
|
my $st2 = shift; |
774
|
|
|
|
|
|
|
|
775
|
0
|
|
|
|
|
|
my $function = "_shortestPath"; |
776
|
0
|
|
|
|
|
|
&_debug($function); |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
# check self |
779
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
780
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
# check parameter exists |
784
|
0
|
0
|
|
|
|
|
if(!defined $st1) { |
785
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st1.", 4); |
786
|
|
|
|
|
|
|
} |
787
|
0
|
0
|
|
|
|
|
if(!defined $st2) { |
788
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st2.", 4); |
789
|
|
|
|
|
|
|
} |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
# check if valid semantic type TUI |
792
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st1)) ) { |
793
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "TUI ($st1) in not valid.", 12); |
794
|
|
|
|
|
|
|
} |
795
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st2)) ) { |
796
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "TUI ($st2) in not valid.", 12); |
797
|
|
|
|
|
|
|
} |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
# Get the paths to root for each ofhte concepts |
800
|
0
|
|
|
|
|
|
my $lTrees = $self->_pathsToRoot($st1); |
801
|
|
|
|
|
|
|
|
802
|
0
|
|
|
|
|
|
my $rTrees = $self->_pathsToRoot($st2); |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
# Find the shortest path in these trees. |
805
|
0
|
|
|
|
|
|
my %lcsLengths = (); |
806
|
0
|
|
|
|
|
|
my %lcsPaths = (); |
807
|
0
|
|
|
|
|
|
my $lcs = ""; |
808
|
0
|
|
|
|
|
|
foreach my $lTree (@{$lTrees}) { |
|
0
|
|
|
|
|
|
|
809
|
0
|
|
|
|
|
|
foreach my $rTree (@{$rTrees}) { |
|
0
|
|
|
|
|
|
|
810
|
0
|
|
|
|
|
|
$lcs = $self->_getLCSfromTrees($lTree, $rTree); |
811
|
0
|
0
|
|
|
|
|
if(defined $lcs) { |
812
|
|
|
|
|
|
|
|
813
|
0
|
|
|
|
|
|
my $lCount = 0; |
814
|
0
|
|
|
|
|
|
my $rCount = 0; |
815
|
0
|
|
|
|
|
|
my $length = 0; |
816
|
0
|
|
|
|
|
|
my $st = ""; |
817
|
|
|
|
|
|
|
|
818
|
0
|
|
|
|
|
|
my @lArray = (); |
819
|
0
|
|
|
|
|
|
my @rArray = (); |
820
|
|
|
|
|
|
|
|
821
|
0
|
|
|
|
|
|
my @lTreeArray = split/\s+/, $lTree; |
822
|
0
|
|
|
|
|
|
my @rTreeArray = split/\s+/, $rTree; |
823
|
|
|
|
|
|
|
|
824
|
0
|
|
|
|
|
|
foreach $st (reverse @lTreeArray) { |
825
|
0
|
|
|
|
|
|
$lCount++; |
826
|
0
|
|
|
|
|
|
push @lArray, $st; |
827
|
0
|
0
|
|
|
|
|
last if($st eq $lcs); |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
} |
830
|
0
|
|
|
|
|
|
foreach $st (reverse @rTreeArray) { |
831
|
0
|
|
|
|
|
|
$rCount++; |
832
|
0
|
0
|
|
|
|
|
last if($st eq $lcs); |
833
|
0
|
|
|
|
|
|
push @rArray, $st; |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
} |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
# length of the path |
838
|
0
|
0
|
|
|
|
|
if(exists $lcsLengths{$lcs}) { |
839
|
0
|
0
|
|
|
|
|
if($lcsLengths{$lcs} >= ($rCount + $lCount - 1)) { |
840
|
0
|
|
|
|
|
|
$lcsLengths{$lcs} = $rCount + $lCount - 1; |
841
|
0
|
|
|
|
|
|
my @fullpath = (@lArray, (reverse @rArray)); |
842
|
0
|
|
|
|
|
|
push @{$lcsPaths{$lcs}}, \@fullpath; |
|
0
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
else { |
846
|
0
|
|
|
|
|
|
$lcsLengths{$lcs} = $rCount + $lCount - 1; |
847
|
0
|
|
|
|
|
|
my @fullpath = (@lArray, (reverse @rArray)); |
848
|
0
|
|
|
|
|
|
push @{$lcsPaths{$lcs}}, \@fullpath; |
|
0
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
# If no paths exist |
857
|
0
|
0
|
|
|
|
|
if(!scalar(keys(%lcsPaths))) { |
858
|
0
|
|
|
|
|
|
return undef; |
859
|
|
|
|
|
|
|
} |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
# get the lcses and their associated path(s) |
862
|
0
|
|
|
|
|
|
my %rhash = (); |
863
|
0
|
|
|
|
|
|
my $prev_len = -1; |
864
|
0
|
|
|
|
|
|
foreach my $lcs (sort {$lcsLengths{$a} <=> $lcsLengths{$b}} keys(%lcsLengths)) { |
|
0
|
|
|
|
|
|
|
865
|
0
|
0
|
0
|
|
|
|
if( ($prev_len == -1) or ($prev_len == $lcsLengths{$lcs}) ) { |
866
|
0
|
|
|
|
|
|
foreach my $pathref (@{$lcsPaths{$lcs}}) { |
|
0
|
|
|
|
|
|
|
867
|
0
|
0
|
|
|
|
|
if( ($#{$pathref}+1) == $lcsLengths{$lcs}) { |
|
0
|
|
|
|
|
|
|
868
|
0
|
|
|
|
|
|
my $path = join " ", @{$pathref}; |
|
0
|
|
|
|
|
|
|
869
|
0
|
|
|
|
|
|
$rhash{$path} = $lcs; |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
} |
872
|
|
|
|
|
|
|
} |
873
|
0
|
|
|
|
|
|
else { last; } |
874
|
0
|
|
|
|
|
|
$prev_len = $lcsLengths{$lcs}; |
875
|
|
|
|
|
|
|
} |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
# return a reference to the hash containing the lcses and their path(s) |
878
|
0
|
|
|
|
|
|
return \%rhash; |
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
# method to get the Least Common Subsumer of two |
882
|
|
|
|
|
|
|
# paths to the root of a taxonomy |
883
|
|
|
|
|
|
|
# input : $array1 <- reference to an array containing |
884
|
|
|
|
|
|
|
# the paths to the root for tui1 |
885
|
|
|
|
|
|
|
# $array2 <- same thing for tui2 |
886
|
|
|
|
|
|
|
# output: $hash <- reference to a hash containing the |
887
|
|
|
|
|
|
|
# lcs as the key and the path as the hash |
888
|
|
|
|
|
|
|
sub _getLCSfromTrees { |
889
|
|
|
|
|
|
|
|
890
|
0
|
|
|
0
|
|
|
my $self = shift; |
891
|
0
|
|
|
|
|
|
my $arrayref1 = shift; |
892
|
0
|
|
|
|
|
|
my $arrayref2 = shift; |
893
|
|
|
|
|
|
|
|
894
|
0
|
|
|
|
|
|
my $function = "_getLCSfromTrees"; |
895
|
0
|
|
|
|
|
|
&_debug($function); |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
# check self |
898
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
899
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
# check parameter exists |
903
|
0
|
0
|
|
|
|
|
if(!defined $arrayref1) { |
904
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$arrayref1.", 4); |
905
|
|
|
|
|
|
|
} |
906
|
0
|
0
|
|
|
|
|
if(!defined $arrayref2) { |
907
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$arrayref2.", 4); |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
# get the arrays |
911
|
0
|
|
|
|
|
|
my @array1 = split/\s+/, $arrayref1; |
912
|
0
|
|
|
|
|
|
my @array2 = split/\s+/, $arrayref2; |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
# reverse them |
915
|
0
|
|
|
|
|
|
my @tree1 = reverse @array1; |
916
|
0
|
|
|
|
|
|
my @tree2 = reverse @array2; |
917
|
0
|
|
|
|
|
|
my $tmpString = " ".join(" ", @tree2)." "; |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
# find the lcs |
920
|
0
|
|
|
|
|
|
foreach my $element (@tree1) { |
921
|
0
|
0
|
|
|
|
|
if($tmpString =~ / $element /) { |
922
|
0
|
|
|
|
|
|
return $element; |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
|
926
|
0
|
|
|
|
|
|
return undef; |
927
|
|
|
|
|
|
|
} |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
# this function returns the shortest path between two semantic type TUIs |
931
|
|
|
|
|
|
|
# input : $st1 <- string containing the first cui |
932
|
|
|
|
|
|
|
# $st2 <- string containing the second |
933
|
|
|
|
|
|
|
# output: $array <- reference to an array containing the lcs(es) |
934
|
|
|
|
|
|
|
sub _findShortestPath { |
935
|
|
|
|
|
|
|
|
936
|
0
|
|
|
0
|
|
|
my $self = shift; |
937
|
0
|
|
|
|
|
|
my $st1 = shift; |
938
|
0
|
|
|
|
|
|
my $st2 = shift; |
939
|
|
|
|
|
|
|
|
940
|
0
|
|
|
|
|
|
my $function = "_findShortestPath"; |
941
|
0
|
|
|
|
|
|
&_debug($function); |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
# check self |
944
|
0
|
0
|
0
|
|
|
|
if(!defined $self || !ref $self) { |
945
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "", 2); |
946
|
|
|
|
|
|
|
} |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
# check parameter exists |
949
|
0
|
0
|
|
|
|
|
if(!defined $st1) { |
950
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st1.", 4); |
951
|
|
|
|
|
|
|
} |
952
|
0
|
0
|
|
|
|
|
if(!defined $st2) { |
953
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "Error with input variable \$st2.", 4); |
954
|
|
|
|
|
|
|
} |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
# check if valid semantic type TUI |
957
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st1)) ) { |
958
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "TUI ($st1) in not valid.", 12); |
959
|
|
|
|
|
|
|
} |
960
|
0
|
0
|
|
|
|
|
if(! ($errorhandler->_validTui($st2)) ) { |
961
|
0
|
|
|
|
|
|
$errorhandler->_error($pkg, $function, "TUI ($st2) in not valid.", 12); |
962
|
|
|
|
|
|
|
} |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
# find the shortest path(s) and lcs - there may be more than one |
965
|
0
|
|
|
|
|
|
my $hash = $self->_shortestPath($st1, $st2); |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
# remove the blanks from the paths |
968
|
0
|
|
|
|
|
|
my @paths = (); my $output = ""; |
|
0
|
|
|
|
|
|
|
969
|
0
|
|
|
|
|
|
foreach my $path (sort keys %{$hash}) { |
|
0
|
|
|
|
|
|
|
970
|
0
|
0
|
|
|
|
|
if($path=~/T[0-9]+/) { |
971
|
0
|
|
|
|
|
|
push @paths, $path; |
972
|
|
|
|
|
|
|
} |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
# return the shortest paths (all of them) |
976
|
0
|
|
|
|
|
|
return \@paths; |
977
|
|
|
|
|
|
|
} |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
# print out the function name to standard error |
981
|
|
|
|
|
|
|
# input : $function <- string containing function name |
982
|
|
|
|
|
|
|
# output: |
983
|
|
|
|
|
|
|
sub _debug { |
984
|
0
|
|
|
0
|
|
|
my $function = shift; |
985
|
0
|
0
|
|
|
|
|
if($debug) { print STDERR "In UMLS::Interface::STFinder::$function\n"; } |
|
0
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
} |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
1; |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
__END__ |