| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#UMLS::Association::Measures::Direct |
|
2
|
|
|
|
|
|
|
# Computes the association between two sets of terms |
|
3
|
|
|
|
|
|
|
# using Direct association, which is the association |
|
4
|
|
|
|
|
|
|
# between sets A and C using direct co-occurrences |
|
5
|
1
|
|
|
1
|
|
12
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
18
|
|
|
6
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
546
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
package UMLS::Association::Measures::Direct; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList |
|
12
|
|
|
|
|
|
|
# using direct association |
|
13
|
|
|
|
|
|
|
# Input: |
|
14
|
|
|
|
|
|
|
# $pairHashListRef - ref to an array of pairHashes |
|
15
|
|
|
|
|
|
|
# $matrixFileName - the fileName of the co-occurrence matrix |
|
16
|
|
|
|
|
|
|
# $noOrder - 1 if order is enforced, 0 if not |
|
17
|
|
|
|
|
|
|
# Output: |
|
18
|
|
|
|
|
|
|
# \@statsList - ref to an array of \@stats, refs to arrays |
|
19
|
|
|
|
|
|
|
# containing the ordered values: n11, n1p, np1, npp |
|
20
|
|
|
|
|
|
|
# for each of the pair hashes. The index of the |
|
21
|
|
|
|
|
|
|
# \@statsList corresponds to the index of the pairHash |
|
22
|
|
|
|
|
|
|
# in the input $pairHashListRef |
|
23
|
|
|
|
|
|
|
sub getStats { |
|
24
|
14
|
|
|
14
|
0
|
16
|
my $pairHashListRef = shift; |
|
25
|
14
|
|
|
|
|
14
|
my $matrixFileName = shift; |
|
26
|
14
|
|
|
|
|
12
|
my $noOrder = shift; |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#read in the matrix of all values needed for all |
|
29
|
|
|
|
|
|
|
# pair sets in the pair hash list |
|
30
|
14
|
|
|
|
|
21
|
my ($matrixRef, $vocabSize) = &UMLS::Association::StatFinder::readInMatrix($pairHashListRef, $matrixFileName); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
#compute n1p,np1, and npp for all values |
|
33
|
14
|
|
|
|
|
22
|
my ($n1pRef, $np1Ref, $npp) = &_getAllCounts($matrixRef); |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#compute n11,n1p,np1,npp for all pair hashes |
|
36
|
|
|
|
|
|
|
# and place into the statsList, a parallel array |
|
37
|
|
|
|
|
|
|
# of stats for that pair hash |
|
38
|
14
|
|
|
|
|
17
|
my @statsList = (); |
|
39
|
14
|
|
|
|
|
13
|
foreach my $pairHashRef (@{$pairHashListRef}) { |
|
|
14
|
|
|
|
|
15
|
|
|
40
|
21
|
|
|
|
|
28
|
push @statsList, &_statsFromAllCounts($matrixRef, $n1pRef, $np1Ref, $npp, $noOrder, $pairHashRef); |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
#return the stats list, an array of array refs |
|
44
|
|
|
|
|
|
|
# each array ref contains four values: |
|
45
|
|
|
|
|
|
|
# n11, n1p, np1, and npp for the pair hash at |
|
46
|
|
|
|
|
|
|
# the corresponding index in the pairHashList |
|
47
|
14
|
|
|
|
|
68
|
return \@statsList; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Computes n1p, np1, and npp for every CUI in the subgraph |
|
52
|
|
|
|
|
|
|
# Input: |
|
53
|
|
|
|
|
|
|
# $subGraphRef - ref to the subgraph or matrix read in |
|
54
|
|
|
|
|
|
|
# Output: |
|
55
|
|
|
|
|
|
|
# \%n1p - ref to a hash{$cui}=n1p for that cui, order enforced |
|
56
|
|
|
|
|
|
|
# \%np1 - ref to a hash{$cui}=np1 for that cui, order enforced |
|
57
|
|
|
|
|
|
|
# $npp - npp for the subGraphRef |
|
58
|
|
|
|
|
|
|
sub _getAllCounts { |
|
59
|
16
|
|
|
16
|
|
17
|
my $subGraphRef = shift; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
#find stats by iterating over all keys |
|
62
|
16
|
|
|
|
|
14
|
my %n1p = (); |
|
63
|
16
|
|
|
|
|
23
|
my %np1 = (); |
|
64
|
16
|
|
|
|
|
15
|
my $npp = 0; |
|
65
|
16
|
|
|
|
|
15
|
foreach my $key1 (keys %{$subGraphRef}) { |
|
|
16
|
|
|
|
|
40
|
|
|
66
|
101
|
|
|
|
|
82
|
foreach my $key2 (keys %{${$subGraphRef}{$key1}}) { |
|
|
101
|
|
|
|
|
79
|
|
|
|
101
|
|
|
|
|
164
|
|
|
67
|
|
|
|
|
|
|
#grab the value from the sub graph |
|
68
|
146
|
|
|
|
|
120
|
my $value = ${${$subGraphRef}{$key1}}{$key2}; |
|
|
146
|
|
|
|
|
113
|
|
|
|
146
|
|
|
|
|
148
|
|
|
69
|
|
|
|
|
|
|
|
|
70
|
146
|
|
|
|
|
166
|
$n1p{$key1} += $value; |
|
71
|
146
|
|
|
|
|
131
|
$np1{$key2} += $value; |
|
72
|
146
|
|
|
|
|
151
|
$npp += $value; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
16
|
|
|
|
|
36
|
return \%n1p, \%np1, $npp; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# Computes n11, n1p, np1,and npp for the pairHash using |
|
80
|
|
|
|
|
|
|
# the allCounts calculated from the _getAllCounts function |
|
81
|
|
|
|
|
|
|
# Input: |
|
82
|
|
|
|
|
|
|
# $subGraphRef - ref to the subgraph or matrix read in |
|
83
|
|
|
|
|
|
|
# $n1pRef - ref to a hash{$cui}=n1p for that cui, order enforced |
|
84
|
|
|
|
|
|
|
# $np1Ref - ref to a hash{$cui}=np1 for that cui, order enforced |
|
85
|
|
|
|
|
|
|
# $npp - npp for the subGraphRef |
|
86
|
|
|
|
|
|
|
# $noOrder - 0 if order is enforced, 1 if not |
|
87
|
|
|
|
|
|
|
# $pairHashRef - ref to a pairHash |
|
88
|
|
|
|
|
|
|
# Output: |
|
89
|
|
|
|
|
|
|
# \@stats - ref to an array of (n11,n1p,np1,npp) |
|
90
|
|
|
|
|
|
|
sub _statsFromAllCounts { |
|
91
|
23
|
|
|
23
|
|
20
|
my $subGraphRef = shift; |
|
92
|
23
|
|
|
|
|
22
|
my $n1pRef = shift; |
|
93
|
23
|
|
|
|
|
17
|
my $np1Ref = shift; |
|
94
|
23
|
|
|
|
|
22
|
my $npp = shift; |
|
95
|
23
|
|
|
|
|
22
|
my $noOrder = shift; |
|
96
|
23
|
|
|
|
|
19
|
my $pairHashRef = shift; |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
#NOTE: finding N11 is the bottleneck, but I don't think there is much I can do about it |
|
100
|
|
|
|
|
|
|
#find stats by iterating over all keys |
|
101
|
|
|
|
|
|
|
############ calculate n11 |
|
102
|
23
|
|
|
|
|
21
|
my $n11 = 0; |
|
103
|
23
|
|
|
|
|
20
|
foreach my $key1 (@{${$pairHashRef}{'set1'}}) { |
|
|
23
|
|
|
|
|
22
|
|
|
|
23
|
|
|
|
|
25
|
|
|
104
|
48
|
|
|
|
|
42
|
foreach my $key2 (@{${$pairHashRef}{'set2'}}) { |
|
|
48
|
|
|
|
|
60
|
|
|
|
48
|
|
|
|
|
53
|
|
|
105
|
99
|
100
|
|
|
|
92
|
if (defined ${${$subGraphRef}{$key1}}{$key2}) { |
|
|
99
|
|
|
|
|
84
|
|
|
|
99
|
|
|
|
|
130
|
|
|
106
|
33
|
|
|
|
|
28
|
$n11 += ${${$subGraphRef}{$key1}}{$key2}; |
|
|
33
|
|
|
|
|
28
|
|
|
|
33
|
|
|
|
|
36
|
|
|
107
|
|
|
|
|
|
|
} |
|
108
|
99
|
100
|
100
|
|
|
126
|
if ($noOrder && defined ${${$subGraphRef}{$key2}}{$key1}) { |
|
|
51
|
|
|
|
|
43
|
|
|
|
51
|
|
|
|
|
90
|
|
|
109
|
10
|
|
|
|
|
7
|
$n11 += ${${$subGraphRef}{$key2}}{$key1}; |
|
|
10
|
|
|
|
|
8
|
|
|
|
10
|
|
|
|
|
15
|
|
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
#remove noorder double counts (nodes pointing at themselves) |
|
114
|
23
|
100
|
|
|
|
27
|
if ($noOrder) { |
|
115
|
12
|
|
|
|
|
13
|
foreach my $key (@{${$pairHashRef}{'set1'}}) { |
|
|
12
|
|
|
|
|
13
|
|
|
|
12
|
|
|
|
|
12
|
|
|
116
|
26
|
100
|
|
|
|
23
|
if (exists ${${$subGraphRef}{$key}}{$key}) { |
|
|
26
|
|
|
|
|
16
|
|
|
|
26
|
|
|
|
|
42
|
|
|
117
|
3
|
|
|
|
|
2
|
$n11 -= ${${$subGraphRef}{$key}}{$key}; |
|
|
3
|
|
|
|
|
2
|
|
|
|
3
|
|
|
|
|
4
|
|
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
################################## |
|
123
|
|
|
|
|
|
|
############## calculate n1p |
|
124
|
23
|
|
|
|
|
22
|
my $n1p = 0; |
|
125
|
23
|
|
|
|
|
18
|
foreach my $key1 (@{${$pairHashRef}{'set1'}}) { |
|
|
23
|
|
|
|
|
16
|
|
|
|
23
|
|
|
|
|
27
|
|
|
126
|
|
|
|
|
|
|
#calculate n1p |
|
127
|
48
|
100
|
|
|
|
39
|
if (defined ${$n1pRef}{$key1}) { |
|
|
48
|
|
|
|
|
63
|
|
|
128
|
42
|
|
|
|
|
41
|
$n1p += ${$n1pRef}{$key1}; |
|
|
42
|
|
|
|
|
51
|
|
|
129
|
|
|
|
|
|
|
} |
|
130
|
48
|
100
|
100
|
|
|
65
|
if ($noOrder && defined ${$np1Ref}{$key1}) { |
|
|
26
|
|
|
|
|
43
|
|
|
131
|
21
|
|
|
|
|
19
|
$n1p += ${$np1Ref}{$key1}; |
|
|
21
|
|
|
|
|
22
|
|
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
#remove noorder double counts |
|
135
|
23
|
100
|
|
|
|
27
|
if ($noOrder) { |
|
136
|
12
|
|
|
|
|
10
|
foreach my $key1 (@{${$pairHashRef}{'set1'}}) { |
|
|
12
|
|
|
|
|
9
|
|
|
|
12
|
|
|
|
|
15
|
|
|
137
|
26
|
|
|
|
|
25
|
foreach my $key2 (@{${$pairHashRef}{'set1'}}) { |
|
|
26
|
|
|
|
|
20
|
|
|
|
26
|
|
|
|
|
29
|
|
|
138
|
66
|
100
|
|
|
|
56
|
if (defined ${${$subGraphRef}{$key1}}{$key2}) { |
|
|
66
|
|
|
|
|
52
|
|
|
|
66
|
|
|
|
|
84
|
|
|
139
|
7
|
|
|
|
|
6
|
$n1p -= ${${$subGraphRef}{$key1}}{$key2}; |
|
|
7
|
|
|
|
|
5
|
|
|
|
7
|
|
|
|
|
9
|
|
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
##################################### |
|
146
|
|
|
|
|
|
|
############## #calculate np1 |
|
147
|
23
|
|
|
|
|
22
|
my $np1 = 0; |
|
148
|
23
|
|
|
|
|
19
|
foreach my $key2 (@{${$pairHashRef}{'set2'}}) { |
|
|
23
|
|
|
|
|
18
|
|
|
|
23
|
|
|
|
|
27
|
|
|
149
|
|
|
|
|
|
|
#calculate np1 |
|
150
|
44
|
100
|
|
|
|
42
|
if (defined ${$np1Ref}{$key2}) { |
|
|
44
|
|
|
|
|
50
|
|
|
151
|
37
|
|
|
|
|
33
|
$np1 += ${$np1Ref}{$key2}; |
|
|
37
|
|
|
|
|
35
|
|
|
152
|
|
|
|
|
|
|
} |
|
153
|
44
|
100
|
100
|
|
|
51
|
if ($noOrder && defined ${$n1pRef}{$key2}) { |
|
|
22
|
|
|
|
|
32
|
|
|
154
|
19
|
|
|
|
|
17
|
$np1 += ${$n1pRef}{$key2}; |
|
|
19
|
|
|
|
|
20
|
|
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
#remove noorder double counts |
|
158
|
23
|
100
|
|
|
|
27
|
if ($noOrder) { |
|
159
|
12
|
|
|
|
|
11
|
foreach my $key1 (@{${$pairHashRef}{'set2'}}) { |
|
|
12
|
|
|
|
|
9
|
|
|
|
12
|
|
|
|
|
15
|
|
|
160
|
22
|
|
|
|
|
20
|
foreach my $key2 (@{${$pairHashRef}{'set2'}}) { |
|
|
22
|
|
|
|
|
19
|
|
|
|
22
|
|
|
|
|
24
|
|
|
161
|
52
|
100
|
|
|
|
39
|
if (defined ${${$subGraphRef}{$key1}}{$key2}) { |
|
|
52
|
|
|
|
|
44
|
|
|
|
52
|
|
|
|
|
68
|
|
|
162
|
9
|
|
|
|
|
7
|
$np1 -= ${${$subGraphRef}{$key1}}{$key2}; |
|
|
9
|
|
|
|
|
6
|
|
|
|
9
|
|
|
|
|
12
|
|
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
############################## |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
#pack and return the stats |
|
170
|
23
|
|
|
|
|
33
|
my @stats = ($n11, $n1p, $np1, $npp); |
|
171
|
23
|
|
|
|
|
44
|
return \@stats; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
1; |