line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#UMLS::Association::Measures::SBC |
2
|
|
|
|
|
|
|
# Computes the shared B to C set association (SBC) between two sets of terms |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# SBC works by first finding the set of linking terms for the A terms |
5
|
|
|
|
|
|
|
# and C terms to form sets B_A and B_C. It then finds the overlap |
6
|
|
|
|
|
|
|
# between these sets, the set of shared B terms, B_S. It then finds |
7
|
|
|
|
|
|
|
# the dirst association between sets B_S and C |
8
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
9
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
167
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
package UMLS::Association::Measures::SBC; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList |
14
|
|
|
|
|
|
|
# using shared B to C association (SBC) |
15
|
|
|
|
|
|
|
# Input: |
16
|
|
|
|
|
|
|
# $pairHashListRef - ref to an array of pairHashes |
17
|
|
|
|
|
|
|
# $matrixFileName - the fileName of the co-occurrence matrix |
18
|
|
|
|
|
|
|
# $noOrder - 1 if order is enforced, 0 if not |
19
|
|
|
|
|
|
|
# Output: |
20
|
|
|
|
|
|
|
# \@statsList - ref to an array of \@stats, refs to arrays |
21
|
|
|
|
|
|
|
# containing the ordered values: n11, n1p, np1, npp |
22
|
|
|
|
|
|
|
# for each of the pair hashes. The index of the |
23
|
|
|
|
|
|
|
# \@statsList corresponds to the index of the pairHash |
24
|
|
|
|
|
|
|
# in the input $pairHashListRef |
25
|
|
|
|
|
|
|
sub getStats { |
26
|
4
|
|
|
4
|
0
|
3
|
my $pairHashListRef = shift; |
27
|
4
|
|
|
|
|
4
|
my $matrixFileName = shift; |
28
|
4
|
|
|
|
|
4
|
my $noOrder = shift; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
#convert the pairHashes to linking term pairHashes |
31
|
4
|
|
|
|
|
12
|
my $linkingTermsPairHashListRef = &UMLS::Association::StatFinder::getLinkingTermsPairHashList( |
32
|
|
|
|
|
|
|
$pairHashListRef, $matrixFileName, $noOrder); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#find the overlapping linking terms, and set |
35
|
|
|
|
|
|
|
# the pairHashes to shared B (overlapping linking terms) |
36
|
|
|
|
|
|
|
# to C (original set 2 of the pair hash) |
37
|
4
|
|
|
|
|
5
|
my @sharedBToCPairHashList = (); |
38
|
4
|
|
|
|
|
5
|
my $start = time(); |
39
|
4
|
|
|
|
|
4
|
for (my $i = 0; $i < scalar @{$pairHashListRef}; $i++) { |
|
10
|
|
|
|
|
15
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
#grab terms from sets 1 and 2 of this pair hash |
42
|
6
|
|
|
|
|
5
|
my %set1Terms = (); |
43
|
6
|
|
|
|
|
7
|
foreach my $cui (@{${${$linkingTermsPairHashListRef}[$i]}{'set1'}}) { |
|
6
|
|
|
|
|
5
|
|
|
6
|
|
|
|
|
3
|
|
|
6
|
|
|
|
|
11
|
|
44
|
16
|
|
|
|
|
15
|
$set1Terms{$cui} = 1; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#find the overlapping B terms and save as an array |
48
|
6
|
|
|
|
|
5
|
my @sharedBTerms = (); |
49
|
6
|
|
|
|
|
6
|
foreach my $cui (@{${${$linkingTermsPairHashListRef}[$i]}{'set2'}}) { |
|
6
|
|
|
|
|
3
|
|
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
8
|
|
50
|
20
|
100
|
|
|
|
23
|
if (exists $set1Terms{$cui}) { |
51
|
10
|
|
|
|
|
11
|
push @sharedBTerms, $cui; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
#create and save the pair hash |
56
|
6
|
|
|
|
|
5
|
my %pairHash = (); |
57
|
6
|
|
|
|
|
9
|
$pairHash{'set1'} = \@sharedBTerms; |
58
|
6
|
|
|
|
|
5
|
$pairHash{'set2'} = ${${$pairHashListRef}[$i]}{'set2'}; |
|
6
|
|
|
|
|
5
|
|
|
6
|
|
|
|
|
8
|
|
59
|
6
|
|
|
|
|
7
|
push @sharedBToCPairHashList, \%pairHash; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
#Compute and return the direct association for shared |
63
|
|
|
|
|
|
|
# B to C set associations |
64
|
4
|
|
|
|
|
7
|
return &UMLS::Association::Measures::Direct::getStats(\@sharedBToCPairHashList, $matrixFileName, $noOrder); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
1; |