line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#UMLS::Association::Measures::Direct |
2
|
|
|
|
|
|
|
# Computes the association between two sets of terms |
3
|
|
|
|
|
|
|
# using Direct association, which is the association |
4
|
|
|
|
|
|
|
# between sets A and C using direct co-occurrences |
5
|
1
|
|
|
1
|
|
12
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
6
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
546
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
package UMLS::Association::Measures::Direct; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList |
12
|
|
|
|
|
|
|
# using direct association |
13
|
|
|
|
|
|
|
# Input: |
14
|
|
|
|
|
|
|
# $pairHashListRef - ref to an array of pairHashes |
15
|
|
|
|
|
|
|
# $matrixFileName - the fileName of the co-occurrence matrix |
16
|
|
|
|
|
|
|
# $noOrder - 1 if order is enforced, 0 if not |
17
|
|
|
|
|
|
|
# Output: |
18
|
|
|
|
|
|
|
# \@statsList - ref to an array of \@stats, refs to arrays |
19
|
|
|
|
|
|
|
# containing the ordered values: n11, n1p, np1, npp |
20
|
|
|
|
|
|
|
# for each of the pair hashes. The index of the |
21
|
|
|
|
|
|
|
# \@statsList corresponds to the index of the pairHash |
22
|
|
|
|
|
|
|
# in the input $pairHashListRef |
23
|
|
|
|
|
|
|
sub getStats { |
24
|
14
|
|
|
14
|
0
|
16
|
my $pairHashListRef = shift; |
25
|
14
|
|
|
|
|
14
|
my $matrixFileName = shift; |
26
|
14
|
|
|
|
|
12
|
my $noOrder = shift; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#read in the matrix of all values needed for all |
29
|
|
|
|
|
|
|
# pair sets in the pair hash list |
30
|
14
|
|
|
|
|
21
|
my ($matrixRef, $vocabSize) = &UMLS::Association::StatFinder::readInMatrix($pairHashListRef, $matrixFileName); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
#compute n1p,np1, and npp for all values |
33
|
14
|
|
|
|
|
22
|
my ($n1pRef, $np1Ref, $npp) = &_getAllCounts($matrixRef); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#compute n11,n1p,np1,npp for all pair hashes |
36
|
|
|
|
|
|
|
# and place into the statsList, a parallel array |
37
|
|
|
|
|
|
|
# of stats for that pair hash |
38
|
14
|
|
|
|
|
17
|
my @statsList = (); |
39
|
14
|
|
|
|
|
13
|
foreach my $pairHashRef (@{$pairHashListRef}) { |
|
14
|
|
|
|
|
15
|
|
40
|
21
|
|
|
|
|
28
|
push @statsList, &_statsFromAllCounts($matrixRef, $n1pRef, $np1Ref, $npp, $noOrder, $pairHashRef); |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
#return the stats list, an array of array refs |
44
|
|
|
|
|
|
|
# each array ref contains four values: |
45
|
|
|
|
|
|
|
# n11, n1p, np1, and npp for the pair hash at |
46
|
|
|
|
|
|
|
# the corresponding index in the pairHashList |
47
|
14
|
|
|
|
|
68
|
return \@statsList; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Computes n1p, np1, and npp for every CUI in the subgraph |
52
|
|
|
|
|
|
|
# Input: |
53
|
|
|
|
|
|
|
# $subGraphRef - ref to the subgraph or matrix read in |
54
|
|
|
|
|
|
|
# Output: |
55
|
|
|
|
|
|
|
# \%n1p - ref to a hash{$cui}=n1p for that cui, order enforced |
56
|
|
|
|
|
|
|
# \%np1 - ref to a hash{$cui}=np1 for that cui, order enforced |
57
|
|
|
|
|
|
|
# $npp - npp for the subGraphRef |
58
|
|
|
|
|
|
|
sub _getAllCounts { |
59
|
16
|
|
|
16
|
|
17
|
my $subGraphRef = shift; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
#find stats by iterating over all keys |
62
|
16
|
|
|
|
|
14
|
my %n1p = (); |
63
|
16
|
|
|
|
|
23
|
my %np1 = (); |
64
|
16
|
|
|
|
|
15
|
my $npp = 0; |
65
|
16
|
|
|
|
|
15
|
foreach my $key1 (keys %{$subGraphRef}) { |
|
16
|
|
|
|
|
40
|
|
66
|
101
|
|
|
|
|
82
|
foreach my $key2 (keys %{${$subGraphRef}{$key1}}) { |
|
101
|
|
|
|
|
79
|
|
|
101
|
|
|
|
|
164
|
|
67
|
|
|
|
|
|
|
#grab the value from the sub graph |
68
|
146
|
|
|
|
|
120
|
my $value = ${${$subGraphRef}{$key1}}{$key2}; |
|
146
|
|
|
|
|
113
|
|
|
146
|
|
|
|
|
148
|
|
69
|
|
|
|
|
|
|
|
70
|
146
|
|
|
|
|
166
|
$n1p{$key1} += $value; |
71
|
146
|
|
|
|
|
131
|
$np1{$key2} += $value; |
72
|
146
|
|
|
|
|
151
|
$npp += $value; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
16
|
|
|
|
|
36
|
return \%n1p, \%np1, $npp; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# Computes n11, n1p, np1,and npp for the pairHash using |
80
|
|
|
|
|
|
|
# the allCounts calculated from the _getAllCounts function |
81
|
|
|
|
|
|
|
# Input: |
82
|
|
|
|
|
|
|
# $subGraphRef - ref to the subgraph or matrix read in |
83
|
|
|
|
|
|
|
# $n1pRef - ref to a hash{$cui}=n1p for that cui, order enforced |
84
|
|
|
|
|
|
|
# $np1Ref - ref to a hash{$cui}=np1 for that cui, order enforced |
85
|
|
|
|
|
|
|
# $npp - npp for the subGraphRef |
86
|
|
|
|
|
|
|
# $noOrder - 0 if order is enforced, 1 if not |
87
|
|
|
|
|
|
|
# $pairHashRef - ref to a pairHash |
88
|
|
|
|
|
|
|
# Output: |
89
|
|
|
|
|
|
|
# \@stats - ref to an array of (n11,n1p,np1,npp) |
90
|
|
|
|
|
|
|
sub _statsFromAllCounts { |
91
|
23
|
|
|
23
|
|
20
|
my $subGraphRef = shift; |
92
|
23
|
|
|
|
|
22
|
my $n1pRef = shift; |
93
|
23
|
|
|
|
|
17
|
my $np1Ref = shift; |
94
|
23
|
|
|
|
|
22
|
my $npp = shift; |
95
|
23
|
|
|
|
|
22
|
my $noOrder = shift; |
96
|
23
|
|
|
|
|
19
|
my $pairHashRef = shift; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
#NOTE: finding N11 is the bottleneck, but I don't think there is much I can do about it |
100
|
|
|
|
|
|
|
#find stats by iterating over all keys |
101
|
|
|
|
|
|
|
############ calculate n11 |
102
|
23
|
|
|
|
|
21
|
my $n11 = 0; |
103
|
23
|
|
|
|
|
20
|
foreach my $key1 (@{${$pairHashRef}{'set1'}}) { |
|
23
|
|
|
|
|
22
|
|
|
23
|
|
|
|
|
25
|
|
104
|
48
|
|
|
|
|
42
|
foreach my $key2 (@{${$pairHashRef}{'set2'}}) { |
|
48
|
|
|
|
|
60
|
|
|
48
|
|
|
|
|
53
|
|
105
|
99
|
100
|
|
|
|
92
|
if (defined ${${$subGraphRef}{$key1}}{$key2}) { |
|
99
|
|
|
|
|
84
|
|
|
99
|
|
|
|
|
130
|
|
106
|
33
|
|
|
|
|
28
|
$n11 += ${${$subGraphRef}{$key1}}{$key2}; |
|
33
|
|
|
|
|
28
|
|
|
33
|
|
|
|
|
36
|
|
107
|
|
|
|
|
|
|
} |
108
|
99
|
100
|
100
|
|
|
126
|
if ($noOrder && defined ${${$subGraphRef}{$key2}}{$key1}) { |
|
51
|
|
|
|
|
43
|
|
|
51
|
|
|
|
|
90
|
|
109
|
10
|
|
|
|
|
7
|
$n11 += ${${$subGraphRef}{$key2}}{$key1}; |
|
10
|
|
|
|
|
8
|
|
|
10
|
|
|
|
|
15
|
|
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
#remove noorder double counts (nodes pointing at themselves) |
114
|
23
|
100
|
|
|
|
27
|
if ($noOrder) { |
115
|
12
|
|
|
|
|
13
|
foreach my $key (@{${$pairHashRef}{'set1'}}) { |
|
12
|
|
|
|
|
13
|
|
|
12
|
|
|
|
|
12
|
|
116
|
26
|
100
|
|
|
|
23
|
if (exists ${${$subGraphRef}{$key}}{$key}) { |
|
26
|
|
|
|
|
16
|
|
|
26
|
|
|
|
|
42
|
|
117
|
3
|
|
|
|
|
2
|
$n11 -= ${${$subGraphRef}{$key}}{$key}; |
|
3
|
|
|
|
|
2
|
|
|
3
|
|
|
|
|
4
|
|
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
################################## |
123
|
|
|
|
|
|
|
############## calculate n1p |
124
|
23
|
|
|
|
|
22
|
my $n1p = 0; |
125
|
23
|
|
|
|
|
18
|
foreach my $key1 (@{${$pairHashRef}{'set1'}}) { |
|
23
|
|
|
|
|
16
|
|
|
23
|
|
|
|
|
27
|
|
126
|
|
|
|
|
|
|
#calculate n1p |
127
|
48
|
100
|
|
|
|
39
|
if (defined ${$n1pRef}{$key1}) { |
|
48
|
|
|
|
|
63
|
|
128
|
42
|
|
|
|
|
41
|
$n1p += ${$n1pRef}{$key1}; |
|
42
|
|
|
|
|
51
|
|
129
|
|
|
|
|
|
|
} |
130
|
48
|
100
|
100
|
|
|
65
|
if ($noOrder && defined ${$np1Ref}{$key1}) { |
|
26
|
|
|
|
|
43
|
|
131
|
21
|
|
|
|
|
19
|
$n1p += ${$np1Ref}{$key1}; |
|
21
|
|
|
|
|
22
|
|
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
#remove noorder double counts |
135
|
23
|
100
|
|
|
|
27
|
if ($noOrder) { |
136
|
12
|
|
|
|
|
10
|
foreach my $key1 (@{${$pairHashRef}{'set1'}}) { |
|
12
|
|
|
|
|
9
|
|
|
12
|
|
|
|
|
15
|
|
137
|
26
|
|
|
|
|
25
|
foreach my $key2 (@{${$pairHashRef}{'set1'}}) { |
|
26
|
|
|
|
|
20
|
|
|
26
|
|
|
|
|
29
|
|
138
|
66
|
100
|
|
|
|
56
|
if (defined ${${$subGraphRef}{$key1}}{$key2}) { |
|
66
|
|
|
|
|
52
|
|
|
66
|
|
|
|
|
84
|
|
139
|
7
|
|
|
|
|
6
|
$n1p -= ${${$subGraphRef}{$key1}}{$key2}; |
|
7
|
|
|
|
|
5
|
|
|
7
|
|
|
|
|
9
|
|
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
##################################### |
146
|
|
|
|
|
|
|
############## #calculate np1 |
147
|
23
|
|
|
|
|
22
|
my $np1 = 0; |
148
|
23
|
|
|
|
|
19
|
foreach my $key2 (@{${$pairHashRef}{'set2'}}) { |
|
23
|
|
|
|
|
18
|
|
|
23
|
|
|
|
|
27
|
|
149
|
|
|
|
|
|
|
#calculate np1 |
150
|
44
|
100
|
|
|
|
42
|
if (defined ${$np1Ref}{$key2}) { |
|
44
|
|
|
|
|
50
|
|
151
|
37
|
|
|
|
|
33
|
$np1 += ${$np1Ref}{$key2}; |
|
37
|
|
|
|
|
35
|
|
152
|
|
|
|
|
|
|
} |
153
|
44
|
100
|
100
|
|
|
51
|
if ($noOrder && defined ${$n1pRef}{$key2}) { |
|
22
|
|
|
|
|
32
|
|
154
|
19
|
|
|
|
|
17
|
$np1 += ${$n1pRef}{$key2}; |
|
19
|
|
|
|
|
20
|
|
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
#remove noorder double counts |
158
|
23
|
100
|
|
|
|
27
|
if ($noOrder) { |
159
|
12
|
|
|
|
|
11
|
foreach my $key1 (@{${$pairHashRef}{'set2'}}) { |
|
12
|
|
|
|
|
9
|
|
|
12
|
|
|
|
|
15
|
|
160
|
22
|
|
|
|
|
20
|
foreach my $key2 (@{${$pairHashRef}{'set2'}}) { |
|
22
|
|
|
|
|
19
|
|
|
22
|
|
|
|
|
24
|
|
161
|
52
|
100
|
|
|
|
39
|
if (defined ${${$subGraphRef}{$key1}}{$key2}) { |
|
52
|
|
|
|
|
44
|
|
|
52
|
|
|
|
|
68
|
|
162
|
9
|
|
|
|
|
7
|
$np1 -= ${${$subGraphRef}{$key1}}{$key2}; |
|
9
|
|
|
|
|
6
|
|
|
9
|
|
|
|
|
12
|
|
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
############################## |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
#pack and return the stats |
170
|
23
|
|
|
|
|
33
|
my @stats = ($n11, $n1p, $np1, $npp); |
171
|
23
|
|
|
|
|
44
|
return \@stats; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
1; |