line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#UMLS::Association::Measures::WSA |
2
|
|
|
|
|
|
|
# Computes the Weighted Set Association (WSA) between two sets of terms. |
3
|
|
|
|
|
|
|
# WSA finds the shared linking terms between A and C and weights those |
4
|
|
|
|
|
|
|
# terms based on their association to A. Each B term therefore has a |
5
|
|
|
|
|
|
|
# weight relative to its association with A, which is multiplied by |
6
|
|
|
|
|
|
|
# its n11,n1p,np1 to make more associated terms more or less important. |
7
|
|
|
|
|
|
|
# The shared B to C set associaiton is then found using the weighted B |
8
|
|
|
|
|
|
|
# terms to produce the final association score. |
9
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
10
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
808
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
package UMLS::Association::Measures::WSA; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList |
16
|
|
|
|
|
|
|
# using linking set association (LSA) |
17
|
|
|
|
|
|
|
# Input: |
18
|
|
|
|
|
|
|
# $pairHashListRef - ref to an array of pairHashes |
19
|
|
|
|
|
|
|
# $matrixFileName - the fileName of the co-occurrence matrix |
20
|
|
|
|
|
|
|
# $noOrder - 1 if order is enforced, 0 if not |
21
|
|
|
|
|
|
|
# $paramsRef - the params used to create UMLS::Association which |
22
|
|
|
|
|
|
|
# are used when finding the A to B weights |
23
|
|
|
|
|
|
|
# Output: |
24
|
|
|
|
|
|
|
# \@statsList - ref to an array of \@stats, refs to arrays |
25
|
|
|
|
|
|
|
# containing the ordered values: n11, n1p, np1, npp |
26
|
|
|
|
|
|
|
# for each of the pair hashes. The index of the |
27
|
|
|
|
|
|
|
# \@statsList corresponds to the index of the pairHash |
28
|
|
|
|
|
|
|
# in the input $pairHashListRef |
29
|
|
|
|
|
|
|
sub getStats { |
30
|
2
|
|
|
2
|
0
|
2
|
my $pairHashListRef = shift; |
31
|
2
|
|
|
|
|
2
|
my $matrixFileName = shift; |
32
|
2
|
|
|
|
|
3
|
my $noOrder = shift; |
33
|
2
|
|
|
|
|
2
|
my $paramsRef = shift; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
################## STEP 1 ####################### |
36
|
|
|
|
|
|
|
######### Find the linking (B) terms ########### |
37
|
|
|
|
|
|
|
################################################# |
38
|
|
|
|
|
|
|
#read in the matrix - FILE READ ONE |
39
|
2
|
|
|
|
|
3
|
my ($matrixRef, $vocabSize) = &UMLS::Association::StatFinder::readInMatrix($pairHashListRef, $matrixFileName); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
#construct A to shared B pair hashListRef |
42
|
2
|
|
|
|
|
3
|
my @newPairHashList = (); |
43
|
2
|
|
|
|
|
2
|
my @sharedCoocs = (); |
44
|
2
|
|
|
|
|
2
|
foreach my $pairHashRef (@{$pairHashListRef}) { |
|
2
|
|
|
|
|
2
|
|
45
|
|
|
|
|
|
|
#get the linking terms and shared linking terms |
46
|
|
|
|
|
|
|
#MATRIX PASS 1 |
47
|
2
|
|
|
|
|
4
|
my ($set1CoocRef, $set2CoocRef) = &_getLinkingTermSets( |
48
|
|
|
|
|
|
|
$pairHashRef, $matrixRef, $noOrder); |
49
|
2
|
|
|
|
|
3
|
my $sharedCoocRef = &_getSharedLinkingTerms( |
50
|
|
|
|
|
|
|
$set1CoocRef, $set2CoocRef); |
51
|
2
|
|
|
|
|
2
|
my @sharedTerms = keys %{$sharedCoocRef}; |
|
2
|
|
|
|
|
3
|
|
52
|
2
|
|
|
|
|
3
|
push @sharedCoocs, \@sharedTerms; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
#construct pair hashes |
55
|
2
|
|
|
|
|
2
|
foreach my $term (keys %{$sharedCoocRef}) { |
|
2
|
|
|
|
|
2
|
|
56
|
5
|
|
|
|
|
5
|
my %newPairHash = (); |
57
|
5
|
|
|
|
|
5
|
$newPairHash{'set1'} = ${$pairHashRef}{'set1'}; |
|
5
|
|
|
|
|
7
|
|
58
|
|
|
|
|
|
|
|
59
|
5
|
|
|
|
|
4
|
my @set2 = (); |
60
|
5
|
|
|
|
|
5
|
push @set2, $term; |
61
|
5
|
|
|
|
|
6
|
$newPairHash{'set2'} = \@set2; |
62
|
5
|
|
|
|
|
7
|
push @newPairHashList, \%newPairHash; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
#Now we have a new pair hash ref which we will get associations |
66
|
|
|
|
|
|
|
# for. This is setA to each B linking term. It is arranged |
67
|
|
|
|
|
|
|
# such that you iterate over the shared terms of each pair hash |
68
|
|
|
|
|
|
|
# to get A to each B for that term (e.g. pairHash1 has 10 linking |
69
|
|
|
|
|
|
|
# terms, the first 10 pairHashes are A a single B. The B terms |
70
|
|
|
|
|
|
|
# are ordered in the sharedCoocs Array of Arrays |
71
|
|
|
|
|
|
|
#Doing it in this manner allows for WSA to be calculated in 3 |
72
|
|
|
|
|
|
|
# file reads and number pair hashes + 2 passes of the matrix |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
################## STEP 2 ####################### |
75
|
|
|
|
|
|
|
####### Find the Weight of each B term ######## |
76
|
|
|
|
|
|
|
################################################# |
77
|
|
|
|
|
|
|
#get A to shared B associations for all possible linking B terms |
78
|
|
|
|
|
|
|
# FILE READ 2 - MATRIX PASS 2 (calculateAssociation_pairHashList) |
79
|
2
|
|
|
|
|
2
|
my %optionsHash = (); |
80
|
2
|
100
|
66
|
|
|
8
|
if (defined $noOrder && $noOrder > 0) { $optionsHash{'noorder'} = 1; } |
|
1
|
|
|
|
|
1
|
|
81
|
2
|
|
|
|
|
2
|
$optionsHash{'measure'} = ${$paramsRef}{'measure'}; |
|
2
|
|
|
|
|
3
|
|
82
|
2
|
|
|
|
|
2
|
$optionsHash{'matrix'} = ${$paramsRef}{'matrix'}; |
|
2
|
|
|
|
|
3
|
|
83
|
2
|
|
|
|
|
11
|
my $assoc = UMLS::Association->new(\%optionsHash); |
84
|
2
|
|
|
|
|
3
|
my $aToBScoresRef = $assoc->_calculateAssociation_pairHashList(\@newPairHashList, ${$paramsRef}{'measure'}); |
|
2
|
|
|
|
|
5
|
|
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
#Normalize the weights unless told not to |
87
|
2
|
|
|
|
|
2
|
my $weightIterator = 0; |
88
|
2
|
|
|
|
|
3
|
my $reweightIterator = 0; |
89
|
2
|
50
|
|
|
|
4
|
if (!$paramsRef->{'nonorm'}) { |
90
|
|
|
|
|
|
|
#normalize the weights for each pairhash |
91
|
2
|
|
|
|
|
2
|
for (my $i = 0; $i < scalar @{$pairHashListRef}; $i++) { |
|
4
|
|
|
|
|
5
|
|
92
|
|
|
|
|
|
|
#get all set B term weights as a hash{term}=weight |
93
|
2
|
|
|
|
|
3
|
my %pairWeights = (); |
94
|
2
|
|
|
|
|
2
|
foreach my $term (@{$sharedCoocs[$i]}) { |
|
2
|
|
|
|
|
2
|
|
95
|
5
|
|
|
|
|
4
|
$pairWeights{$term} = ${$aToBScoresRef}[$weightIterator]; |
|
5
|
|
|
|
|
5
|
|
96
|
5
|
|
|
|
|
5
|
$weightIterator++; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
#scale the weights between 0 and 1 (weight/sum), so weight |
100
|
|
|
|
|
|
|
# becomes a percentage of the total weights |
101
|
|
|
|
|
|
|
# I need to keep weights <= 1 to maintain correctness |
102
|
|
|
|
|
|
|
# of stats (npp in particular, but others as well?) |
103
|
2
|
|
|
|
|
3
|
my $sum = 0; |
104
|
2
|
|
|
|
|
3
|
foreach my $cui (keys %pairWeights) { |
105
|
5
|
|
|
|
|
6
|
$sum += $pairWeights{$cui}; |
106
|
|
|
|
|
|
|
} |
107
|
2
|
|
|
|
|
3
|
foreach my $cui (keys %pairWeights) { |
108
|
5
|
|
|
|
|
5
|
${$aToBScoresRef}[$reweightIterator] /= $sum; |
|
5
|
|
|
|
|
4
|
|
109
|
5
|
|
|
|
|
7
|
$reweightIterator++; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
##### Now we have the normalized weights |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
#So now we have the weights for all B terms and for each pair hash. Next |
116
|
|
|
|
|
|
|
# step is to weight the subgraph using these weights for each |
117
|
|
|
|
|
|
|
# pairhash and then calculate the B to C direct assocition |
118
|
|
|
|
|
|
|
# using each of those re-weighted sub graphs |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
################## STEP 3 ####################### |
121
|
|
|
|
|
|
|
####### Find the WSA between B and C ########## |
122
|
|
|
|
|
|
|
################################################# |
123
|
|
|
|
|
|
|
# Create the B to C pair hash and read in the matrix of B to C terms |
124
|
|
|
|
|
|
|
# MATRIX READ 3 - reqiured because of links between the linking set terms |
125
|
|
|
|
|
|
|
# (e.g. edge 3->4 in sample4. This becomes a source sink if matrix isnt |
126
|
|
|
|
|
|
|
# read in again |
127
|
2
|
|
|
|
|
3
|
my @bToCPairHashList = (); |
128
|
2
|
|
|
|
|
2
|
for (my $i = 0; $i < scalar @{$pairHashListRef}; $i++) { |
|
4
|
|
|
|
|
5
|
|
129
|
|
|
|
|
|
|
#construct the B to C pair Hash |
130
|
2
|
|
|
|
|
2
|
my %pairHash = (); |
131
|
2
|
|
|
|
|
2
|
$pairHash{'set1'} = $sharedCoocs[$i]; |
132
|
2
|
|
|
|
|
2
|
$pairHash{'set2'} = ${${$pairHashListRef}[$i]}{'set2'}; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
2
|
|
133
|
2
|
|
|
|
|
4
|
push @bToCPairHashList, \%pairHash; |
134
|
|
|
|
|
|
|
} |
135
|
2
|
|
|
|
|
3
|
($matrixRef, $vocabSize) = &UMLS::Association::StatFinder::readInMatrix(\@bToCPairHashList, $matrixFileName); |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# MATRIX PASS +numPairHashes - to calculate WSA we need to |
138
|
|
|
|
|
|
|
# reweight the matrix differently for each pairHash |
139
|
|
|
|
|
|
|
#get WSA Stats (n11,n1p,np1,npp) for each pairHash |
140
|
2
|
|
|
|
|
6
|
$weightIterator = 0; |
141
|
2
|
|
|
|
|
3
|
my @statsList = (); |
142
|
2
|
|
|
|
|
2
|
for (my $i = 0; $i < scalar @{$pairHashListRef}; $i++) { |
|
4
|
|
|
|
|
7
|
|
143
|
|
|
|
|
|
|
#get all set B term weights as a hash{term}=weight |
144
|
2
|
|
|
|
|
2
|
my %weights = (); |
145
|
2
|
|
|
|
|
2
|
foreach my $term (@{$sharedCoocs[$i]}) { |
|
2
|
|
|
|
|
4
|
|
146
|
5
|
|
|
|
|
3
|
$weights{$term} = ${$aToBScoresRef}[$weightIterator]; |
|
5
|
|
|
|
|
7
|
|
147
|
5
|
|
|
|
|
5
|
$weightIterator++; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
#get the weighted subgraph |
151
|
2
|
|
|
|
|
4
|
my $weightedSubGraphRef = &_constructWeightedSubGraph($matrixRef, $bToCPairHashList[$i], \%weights); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
#calculate n11, n1p, np1, npp, using the weights specific to |
154
|
|
|
|
|
|
|
# this pair hash, and save the results |
155
|
2
|
|
|
|
|
4
|
my ($n1pRef, $np1Ref, $npp) = &UMLS::Association::Measures::Direct::_getAllCounts($weightedSubGraphRef); |
156
|
2
|
|
|
|
|
5
|
push @statsList, &UMLS::Association::Measures::Direct::_statsFromAllCounts( |
157
|
|
|
|
|
|
|
$weightedSubGraphRef, $n1pRef, $np1Ref, $npp, $noOrder, $bToCPairHashList[$i], \%weights); |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
#return the stats list, an array of array refs |
161
|
|
|
|
|
|
|
# each array ref conatins four values: |
162
|
|
|
|
|
|
|
# n11, n1p, np1, and npp for the pair hash at |
163
|
|
|
|
|
|
|
# the corresponding index in the pairHashList |
164
|
2
|
|
|
|
|
10
|
return \@statsList; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
################################################## |
170
|
|
|
|
|
|
|
# Sub Graph Construction |
171
|
|
|
|
|
|
|
################################################## |
172
|
|
|
|
|
|
|
#builds a subgraph relevant to this pair hash this includes adding |
173
|
|
|
|
|
|
|
# cuis in other pair hashes to the universal source/sink, and collapsing |
174
|
|
|
|
|
|
|
# edges to create set-nodes rather than cui nodes This also takes care of |
175
|
|
|
|
|
|
|
# noOrder weights contains - hash{term} = weight |
176
|
|
|
|
|
|
|
# Input: |
177
|
|
|
|
|
|
|
# $matrixRef - ref to a matrix from which we construct a subgraph |
178
|
|
|
|
|
|
|
# $pairHashRef - ref to a pairHash |
179
|
|
|
|
|
|
|
# $weightsRef - ref to a hash{cui} = weight of that cui |
180
|
|
|
|
|
|
|
# Output: |
181
|
|
|
|
|
|
|
# \%subGraph - a weighted subgraph for this pairHash |
182
|
|
|
|
|
|
|
sub _constructWeightedSubGraph { |
183
|
2
|
|
|
2
|
|
3
|
my $matrixRef = shift; |
184
|
2
|
|
|
|
|
2
|
my $pairHashRef = shift; |
185
|
2
|
|
|
|
|
2
|
my $weightsRef = shift; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
#convert the pair hash to two hashes of cuis |
188
|
2
|
|
|
|
|
2
|
my %set1 = (); |
189
|
2
|
|
|
|
|
2
|
foreach my $key (@{${$pairHashRef}{'set1'}}) { |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
4
|
|
190
|
5
|
|
|
|
|
5
|
$set1{$key} = 1; |
191
|
|
|
|
|
|
|
} |
192
|
2
|
|
|
|
|
3
|
my %set2 = (); |
193
|
2
|
|
|
|
|
2
|
foreach my $key (@{${$pairHashRef}{'set2'}}) { |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
4
|
|
194
|
4
|
|
|
|
|
5
|
$set2{$key} = 1; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# Restrict graph to nodes in this pairhash. That is, |
198
|
|
|
|
|
|
|
# set any nodes outside of sets1 and 2 to be the |
199
|
|
|
|
|
|
|
# universal source and sink |
200
|
|
|
|
|
|
|
#initalize the sub graph |
201
|
2
|
|
|
|
|
2
|
my %subGraph = (); |
202
|
2
|
|
|
|
|
3
|
my %emptyHash = (); |
203
|
2
|
|
|
|
|
3
|
$subGraph{'source'} = \%emptyHash; |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
#loop through all source and targets, and if not in |
206
|
|
|
|
|
|
|
# either of the sets, replace with the universal |
207
|
|
|
|
|
|
|
# sink or source |
208
|
2
|
|
|
|
|
3
|
foreach my $source (keys %{$matrixRef}) { |
|
2
|
|
|
|
|
5
|
|
209
|
|
|
|
|
|
|
#convert source to the universal source |
210
|
|
|
|
|
|
|
# node if it is not in this pair hash |
211
|
15
|
|
|
|
|
15
|
my $newSource = $source; |
212
|
15
|
100
|
100
|
|
|
28
|
if (!exists $set1{$source} && !exists $set2{$source}) { |
213
|
6
|
|
|
|
|
6
|
$newSource = 'source'; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
#go through all targets for this source |
217
|
15
|
|
|
|
|
16
|
foreach my $target (keys %{${$matrixRef}{$source}}) { |
|
15
|
|
|
|
|
10
|
|
|
15
|
|
|
|
|
23
|
|
218
|
|
|
|
|
|
|
#convert to universal sink if node is |
219
|
|
|
|
|
|
|
# not in this pair hash |
220
|
18
|
|
|
|
|
17
|
my $newTarget = $target; |
221
|
18
|
100
|
100
|
|
|
28
|
if (!exists $set1{$target} && !exists $set2{$target}) { |
222
|
5
|
|
|
|
|
11
|
$newTarget = 'sink'; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
#weights the value (if both source and target |
226
|
|
|
|
|
|
|
# have weights, then weight is the their product) |
227
|
18
|
|
|
|
|
17
|
my $value = ${${$matrixRef}{$source}}{$target}; |
|
18
|
|
|
|
|
13
|
|
|
18
|
|
|
|
|
20
|
|
228
|
18
|
100
|
|
|
|
16
|
if (defined ${$weightsRef}{$source}){ |
|
18
|
|
|
|
|
20
|
|
229
|
7
|
|
|
|
|
8
|
$value *= ${$weightsRef}{$source}; |
|
7
|
|
|
|
|
7
|
|
230
|
|
|
|
|
|
|
} |
231
|
18
|
100
|
|
|
|
16
|
if (defined ${$weightsRef}{$target}) { |
|
18
|
|
|
|
|
20
|
|
232
|
9
|
|
|
|
|
7
|
$value *= ${$weightsRef}{$target}; |
|
9
|
|
|
|
|
10
|
|
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
#add the value to the subgraph |
236
|
18
|
|
|
|
|
18
|
${$subGraph{$newSource}}{$newTarget} += $value; |
|
18
|
|
|
|
|
23
|
|
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
#At this point, the sub graph has been converted, such that |
240
|
|
|
|
|
|
|
# it contains only the nodes in this pair hash. All other nodes |
241
|
|
|
|
|
|
|
# have been converted to the universal source and univerals sink |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#return the subgraph |
244
|
2
|
|
|
|
|
5
|
return \%subGraph; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
################################################## |
249
|
|
|
|
|
|
|
# Linking Set Acquisition |
250
|
|
|
|
|
|
|
################################################## |
251
|
|
|
|
|
|
|
# Find the linking terms (direct co-occurrences) between sets 1 and |
252
|
|
|
|
|
|
|
# sets 2 and outputs them as co-occurrence hashes (hash{cui}=1) |
253
|
|
|
|
|
|
|
# Input: |
254
|
|
|
|
|
|
|
# $pairHashRef - ref to a pairHash |
255
|
|
|
|
|
|
|
# $matrixRef - ref to the read in co-occurrence matrix |
256
|
|
|
|
|
|
|
# $noOrder - 1 if order is enforced, 0 if not |
257
|
|
|
|
|
|
|
# Output: |
258
|
|
|
|
|
|
|
# \%set1Cooc - a hash{cui}=1 of all of set 1's direct co-occurrences |
259
|
|
|
|
|
|
|
# (order/noOrder is accounted for) |
260
|
|
|
|
|
|
|
# \%set2Cooc - a hash{cui}=1 of all of set 2's direct co-occurrences |
261
|
|
|
|
|
|
|
# (order/noOrder is accounted for) |
262
|
|
|
|
|
|
|
sub _getLinkingTermSets { |
263
|
2
|
|
|
2
|
|
3
|
my $pairHashRef = shift; |
264
|
2
|
|
|
|
|
2
|
my $matrixRef = shift; |
265
|
2
|
|
|
|
|
2
|
my $noOrder = shift; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
#convert pair hash to sets 1 and 2 hashes |
268
|
2
|
|
|
|
|
3
|
my %set1 = (); |
269
|
2
|
|
|
|
|
2
|
foreach my $node (@{${$pairHashRef}{'set1'}}) { |
|
2
|
|
|
|
|
1
|
|
|
2
|
|
|
|
|
4
|
|
270
|
4
|
|
|
|
|
6
|
$set1{$node} = 1; |
271
|
|
|
|
|
|
|
} |
272
|
2
|
|
|
|
|
2
|
my %set2 = (); |
273
|
2
|
|
|
|
|
2
|
foreach my $node (@{${$pairHashRef}{'set2'}}) { |
|
2
|
|
|
|
|
1
|
|
|
2
|
|
|
|
|
3
|
|
274
|
4
|
|
|
|
|
4
|
$set2{$node} = 1; |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
#get all co-occurring terms with set1 and set2 |
278
|
2
|
|
|
|
|
3
|
my %set1Cooc = (); |
279
|
2
|
|
|
|
|
3
|
my %set2Cooc = (); |
280
|
|
|
|
|
|
|
#check all nodes in the dataset |
281
|
2
|
|
|
|
|
2
|
foreach my $source (keys %{$matrixRef}) { |
|
2
|
|
|
|
|
4
|
|
282
|
16
|
|
|
|
|
16
|
foreach my $target (keys %{${$matrixRef}{$source}}) { |
|
16
|
|
|
|
|
12
|
|
|
16
|
|
|
|
|
23
|
|
283
|
|
|
|
|
|
|
#add co-occurrences to set1 and set2 |
284
|
18
|
100
|
|
|
|
21
|
if (exists $set1{$source}) { |
285
|
6
|
|
|
|
|
7
|
$set1Cooc{$target} = 1; |
286
|
|
|
|
|
|
|
} |
287
|
18
|
100
|
|
|
|
20
|
if (exists $set2{$target}) { |
288
|
4
|
|
|
|
|
4
|
$set2Cooc{$source} = 1; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
#if noorder, add co-occurrences |
292
|
|
|
|
|
|
|
# to set1 and set2 |
293
|
18
|
100
|
|
|
|
21
|
if ($noOrder) { |
294
|
9
|
100
|
|
|
|
11
|
if (exists $set1{$target}) { |
295
|
1
|
|
|
|
|
1
|
$set1Cooc{$source} = 1; |
296
|
|
|
|
|
|
|
} |
297
|
9
|
100
|
|
|
|
11
|
if (exists $set2{$source}) { |
298
|
2
|
|
|
|
|
2
|
$set2Cooc{$target} = 1; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
#return the two co-occurring sets |
305
|
2
|
|
|
|
|
4
|
return (\%set1Cooc, \%set2Cooc); |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# Finds the shared co-occurrences between the two input co-occurrence hashes |
309
|
|
|
|
|
|
|
# Input: |
310
|
|
|
|
|
|
|
# \%set1Cooc - a hash{cui}=1 of all of set 1's direct co-occurrences |
311
|
|
|
|
|
|
|
# (order/noOrder is accounted for) |
312
|
|
|
|
|
|
|
# \%set2Cooc - a hash{cui}=1 of all of set 2's direct co-occurrences |
313
|
|
|
|
|
|
|
# (order/noOrder is accounted for) |
314
|
|
|
|
|
|
|
# Output: |
315
|
|
|
|
|
|
|
# \%sharedCooc - a hash{cui}=1 of the shared co-occurrences between |
316
|
|
|
|
|
|
|
# the input co-occurrence hashes |
317
|
|
|
|
|
|
|
sub _getSharedLinkingTerms { |
318
|
2
|
|
|
2
|
|
2
|
my $set1CoocRef = shift; |
319
|
2
|
|
|
|
|
2
|
my $set2CoocRef = shift; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
#get the shared linking terms between |
322
|
|
|
|
|
|
|
# set1 and set2 co-occurrences |
323
|
2
|
|
|
|
|
2
|
my %sharedCooc = (); |
324
|
2
|
|
|
|
|
2
|
foreach my $node (keys %{$set1CoocRef}) { |
|
2
|
|
|
|
|
3
|
|
325
|
6
|
100
|
|
|
|
6
|
if (defined ${$set2CoocRef}{$node}) { |
|
6
|
|
|
|
|
7
|
|
326
|
5
|
|
|
|
|
6
|
$sharedCooc{$node} = 1; |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
} |
329
|
2
|
|
|
|
|
3
|
return \%sharedCooc; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
1; |