line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#UMLS::Association::Measures::LTA |
2
|
|
|
|
|
|
|
# Computes the Linking Term Association (LTA) between two sets of terms. |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# LTA works by first finding the sets of linking terms for the A terms |
5
|
|
|
|
|
|
|
# and C terms to form sets B_A and B_C. It then uses these sets to |
6
|
|
|
|
|
|
|
# compute N11 - the count of unique shared linking terms, N1P, the count |
7
|
|
|
|
|
|
|
# of unique terms in B_A, NP1, the count of unique terms in B_C, and NPP, |
8
|
|
|
|
|
|
|
# the total number of unique terms in the dataset (the vocabulary size). |
9
|
|
|
|
|
|
|
# The association is then found using these counts. |
10
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
19
|
|
11
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
227
|
|
12
|
|
|
|
|
|
|
package UMLS::Association::Measures::LTA; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList |
15
|
|
|
|
|
|
|
# using linking term association (LTA) |
16
|
|
|
|
|
|
|
# Input: |
17
|
|
|
|
|
|
|
# $pairHashListRef - ref to an array of pairHashes |
18
|
|
|
|
|
|
|
# $matrixFileName - the fileName of the co-occurrence matrix |
19
|
|
|
|
|
|
|
# $noOrder - 1 if order is enforced, 0 if not |
20
|
|
|
|
|
|
|
# Output: |
21
|
|
|
|
|
|
|
# \@statsList - ref to an array of \@stats, refs to arrays |
22
|
|
|
|
|
|
|
# containing the ordered values: n11, n1p, np1, npp |
23
|
|
|
|
|
|
|
# for each of the pair hashes. The index of the |
24
|
|
|
|
|
|
|
# \@statsList corresponds to the index of the pairHash |
25
|
|
|
|
|
|
|
# in the input $pairHashListRef |
26
|
|
|
|
|
|
|
sub getStats { |
27
|
4
|
|
|
4
|
0
|
5
|
my $pairHashListRef = shift; |
28
|
4
|
|
|
|
|
4
|
my $matrixFileName = shift; |
29
|
4
|
|
|
|
|
4
|
my $noOrder = shift; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# get the linking term info |
32
|
4
|
|
|
|
|
6
|
my ($n1pRef, $np1Ref, $npp, $matrixRef, $linkingPairHashListRef) = &UMLS::Association::StatFinder::getLinkingTermsPairHashList($pairHashListRef, $matrixFileName, $noOrder, 1, 1); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#compute n11,n1p,np1, and npp for all pair hashes |
35
|
|
|
|
|
|
|
# and place into the statsList, a parallel array |
36
|
|
|
|
|
|
|
# of stats for that pair hash |
37
|
4
|
|
|
|
|
5
|
my @statsList = (); |
38
|
4
|
|
|
|
|
5
|
for (my $i = 0; $i < scalar @{$pairHashListRef}; $i++) { |
|
10
|
|
|
|
|
19
|
|
39
|
6
|
|
|
|
|
7
|
my $pairHashRef = ${$pairHashListRef}[$i]; |
|
6
|
|
|
|
|
6
|
|
40
|
6
|
|
|
|
|
6
|
my $linkingPairHashRef = ${$linkingPairHashListRef}[$i]; |
|
6
|
|
|
|
|
5
|
|
41
|
6
|
|
|
|
|
15
|
push @statsList, &_statsFromAllLinkingInfo($pairHashRef, $linkingPairHashRef, $npp); |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
#return the stats list, an array of array refs |
45
|
|
|
|
|
|
|
# each array ref contains four values: |
46
|
|
|
|
|
|
|
# n11, n1p, np1, and npp for the pair hash at |
47
|
|
|
|
|
|
|
# the corresponding index in the pairHashList |
48
|
4
|
|
|
|
|
18
|
return \@statsList; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Gets stats (n11,n1p,np1,npp) for a single pairHash using the |
53
|
|
|
|
|
|
|
# precomputed linkingPairHash (from StatFinder::getLinkingTermsPairHashList) |
54
|
|
|
|
|
|
|
# Input: |
55
|
|
|
|
|
|
|
# $pairHashListRef - ref to pairHash |
56
|
|
|
|
|
|
|
# $linkingPairHashRef - ref to the linking terms pair hash for this pairHash |
57
|
|
|
|
|
|
|
# $npp - npp for the subGraphRef |
58
|
|
|
|
|
|
|
# Output: |
59
|
|
|
|
|
|
|
# \@stats - ref to an array of (n11,n1p,np1,npp) |
60
|
|
|
|
|
|
|
sub _statsFromAllLinkingInfo { |
61
|
6
|
|
|
6
|
|
6
|
my $pairHashRef = shift; |
62
|
6
|
|
|
|
|
5
|
my $linkingPairHashRef = shift; |
63
|
6
|
|
|
|
|
6
|
my $npp = shift; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
################################## |
66
|
|
|
|
|
|
|
############## calculate n11 |
67
|
|
|
|
|
|
|
#find n11, the count of shared linking terms |
68
|
|
|
|
|
|
|
# NOTE: noorder is taken care of when constructing the linking set |
69
|
6
|
|
|
|
|
4
|
my $n11 = 0; |
70
|
|
|
|
|
|
|
#Find the B to C linking terms |
71
|
6
|
|
|
|
|
5
|
my %bToCLinkingTerms = (); |
72
|
6
|
|
|
|
|
5
|
foreach my $key (@{${$linkingPairHashRef}{'set2'}}) { |
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
7
|
|
73
|
20
|
|
|
|
|
21
|
$bToCLinkingTerms{$key} = 1; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
#iterate over all A to B terms and increment for each |
76
|
|
|
|
|
|
|
# term that is also a B to C shared linking term |
77
|
6
|
|
|
|
|
5
|
foreach my $key (@{${$linkingPairHashRef}{'set1'}}) { |
|
6
|
|
|
|
|
5
|
|
|
6
|
|
|
|
|
6
|
|
78
|
16
|
100
|
|
|
|
17
|
if (defined $bToCLinkingTerms{$key}) { |
79
|
10
|
|
|
|
|
10
|
$n11++; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
################################## |
84
|
|
|
|
|
|
|
############## calculate n1p and np1 |
85
|
6
|
|
|
|
|
6
|
my $n1p = scalar @{${$linkingPairHashRef}{'set1'}}; |
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
6
|
|
86
|
6
|
|
|
|
|
6
|
my $np1 = scalar @{${$linkingPairHashRef}{'set2'}}; |
|
6
|
|
|
|
|
5
|
|
|
6
|
|
|
|
|
6
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
############################## |
89
|
|
|
|
|
|
|
#pack and return the stats |
90
|
6
|
|
|
|
|
7
|
my @stats = ($n11, $n1p, $np1, $npp); |
91
|
6
|
|
|
|
|
11
|
return \@stats; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
1; |
95
|
|
|
|
|
|
|
|