File Coverage

blib/lib/UMLS/Association/Measures/LTA.pm
Criterion Covered Total %
statement 41 41 100.0
branch 2 2 100.0
condition n/a
subroutine 4 4 100.0
pod 0 1 0.0
total 47 48 97.9


line stmt bran cond sub pod time code
1             #UMLS::Association::Measures::LTA
2             # Computes the Linking Term Association (LTA) between two sets of terms.
3             #
4             # LTA works by first finding the sets of linking terms for the A terms
5             # and C terms to form sets B_A and B_C. It then uses these sets to
6             # compute N11 - the count of unique shared linking terms, N1P, the count
7             # of unique terms in B_A, NP1, the count of unique terms in B_C, and NPP,
8             # the total number of unique terms in the dataset (the vocabulary size).
9             # The association is then found using these counts.
10 1     1   4 use strict;
  1         1  
  1         19  
11 1     1   3 use warnings;
  1         1  
  1         227  
12             package UMLS::Association::Measures::LTA;
13              
14             # Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList
15             # using linking term association (LTA)
16             # Input:
17             # $pairHashListRef - ref to an array of pairHashes
18             # $matrixFileName - the fileName of the co-occurrence matrix
19             # $noOrder - 1 if order is enforced, 0 if not
20             # Output:
21             # \@statsList - ref to an array of \@stats, refs to arrays
22             # containing the ordered values: n11, n1p, np1, npp
23             # for each of the pair hashes. The index of the
24             # \@statsList corresponds to the index of the pairHash
25             # in the input $pairHashListRef
26             sub getStats {
27 4     4 0 5 my $pairHashListRef = shift;
28 4         4 my $matrixFileName = shift;
29 4         4 my $noOrder = shift;
30            
31             # get the linking term info
32 4         6 my ($n1pRef, $np1Ref, $npp, $matrixRef, $linkingPairHashListRef) = &UMLS::Association::StatFinder::getLinkingTermsPairHashList($pairHashListRef, $matrixFileName, $noOrder, 1, 1);
33            
34             #compute n11,n1p,np1, and npp for all pair hashes
35             # and place into the statsList, a parallel array
36             # of stats for that pair hash
37 4         5 my @statsList = ();
38 4         5 for (my $i = 0; $i < scalar @{$pairHashListRef}; $i++) {
  10         19  
39 6         7 my $pairHashRef = ${$pairHashListRef}[$i];
  6         6  
40 6         6 my $linkingPairHashRef = ${$linkingPairHashListRef}[$i];
  6         5  
41 6         15 push @statsList, &_statsFromAllLinkingInfo($pairHashRef, $linkingPairHashRef, $npp);
42             }
43              
44             #return the stats list, an array of array refs
45             # each array ref contains four values:
46             # n11, n1p, np1, and npp for the pair hash at
47             # the corresponding index in the pairHashList
48 4         18 return \@statsList;
49             }
50              
51              
52             # Gets stats (n11,n1p,np1,npp) for a single pairHash using the
53             # precomputed linkingPairHash (from StatFinder::getLinkingTermsPairHashList)
54             # Input:
55             # $pairHashListRef - ref to pairHash
56             # $linkingPairHashRef - ref to the linking terms pair hash for this pairHash
57             # $npp - npp for the subGraphRef
58             # Output:
59             # \@stats - ref to an array of (n11,n1p,np1,npp)
60             sub _statsFromAllLinkingInfo {
61 6     6   6 my $pairHashRef = shift;
62 6         5 my $linkingPairHashRef = shift;
63 6         6 my $npp = shift;
64              
65             ##################################
66             ############## calculate n11
67             #find n11, the count of shared linking terms
68             # NOTE: noorder is taken care of when constructing the linking set
69 6         4 my $n11 = 0;
70             #Find the B to C linking terms
71 6         5 my %bToCLinkingTerms = ();
72 6         5 foreach my $key (@{${$linkingPairHashRef}{'set2'}}) {
  6         6  
  6         7  
73 20         21 $bToCLinkingTerms{$key} = 1;
74             }
75             #iterate over all A to B terms and increment for each
76             # term that is also a B to C shared linking term
77 6         5 foreach my $key (@{${$linkingPairHashRef}{'set1'}}) {
  6         5  
  6         6  
78 16 100       17 if (defined $bToCLinkingTerms{$key}) {
79 10         10 $n11++;
80             }
81             }
82              
83             ##################################
84             ############## calculate n1p and np1
85 6         6 my $n1p = scalar @{${$linkingPairHashRef}{'set1'}};
  6         6  
  6         6  
86 6         6 my $np1 = scalar @{${$linkingPairHashRef}{'set2'}};
  6         5  
  6         6  
87              
88             ##############################
89             #pack and return the stats
90 6         7 my @stats = ($n11, $n1p, $np1, $npp);
91 6         11 return \@stats;
92             }
93              
94             1;
95