File Coverage

blib/lib/Text/NSP/Measures/2D/MI/ll.pm
Criterion Covered Total %
statement 24 25 96.0
branch 2 2 100.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 32 34 94.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::2D::MI::ll - Perl module that implements Loglikelihood
4             measure of association for bigrams.
5              
6             =head1 SYNOPSIS
7              
8             =head3 Basic Usage
9              
10             use Text::NSP::Measures::2D::MI::ll;
11              
12             my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13              
14             $ll_value = calculateStatistic( n11=>$n11,
15             n1p=>$n1p,
16             np1=>$np1,
17             npp=>$npp);
18              
19             if( ($errorCode = getErrorCode()))
20             {
21             print STDERR $errorCode." - ".getErrorMessage();
22             }
23             else
24             {
25             print getStatisticName."value for bigram is ".$ll_value;
26             }
27              
28             =head1 DESCRIPTION
29              
30             The log-likelihood ratio measures the deviation between the observed data
31             and what would be expected if and were independent. The
32             higher the score, the less evidence there is in favor of concluding that
33             the words are independent.
34              
35             Assume that the frequency count data associated with a bigram
36             as shown by a 2x2 contingency table:
37              
38             word2 ~word2
39             word1 n11 n12 | n1p
40             ~word1 n21 n22 | n2p
41             --------------
42             np1 np2 npp
43              
44             where n11 is the number of times occur together, and
45             n12 is the number of times occurs with some word other than
46             word2, and n1p is the number of times in total that word1 occurs as
47             the first word in a bigram.
48              
49             The expected values for the internal cells are calculated by taking the
50             product of their associated marginals and dividing by the sample size,
51             for example:
52              
53             np1 * n1p
54             m11= ---------
55             npp
56              
57             Then the deviation between observed and expected values for each internal
58             cell is computed to arrive at the log-likelihood value.
59              
60             Log-Likelihood = 2 * [n11 * log(n11/m11) + n12 * log(n12/m12) +
61             n21 * log(n21/m21) + n22 * log(n22/m22)]
62              
63             =head2 Methods
64              
65             =over
66              
67             =cut
68              
69              
70             package Text::NSP::Measures::2D::MI::ll;
71              
72              
73 2     2   5507 use Text::NSP::Measures::2D::MI;
  2         5  
  2         679  
74 2     2   13 use strict;
  2         4  
  2         56  
75 2     2   11 use Carp;
  2         6  
  2         118  
76 2     2   12 use warnings;
  2         4  
  2         63  
77 2     2   9 no warnings 'redefine';
  2         4  
  2         610  
78             require Exporter;
79              
80             our ($VERSION, @EXPORT, @ISA);
81              
82             @ISA = qw(Exporter);
83              
84             @EXPORT = qw(initializeStatistic calculateStatistic
85             getErrorCode getErrorMessage getStatisticName);
86              
87             $VERSION = '0.97';
88              
89             =item calculateStatistic() - This method calculates the ll value
90              
91             INPUT PARAMS : $count_values .. Reference of an hash containing
92             the count values computed by the
93             count.pl program.
94              
95             RETURN VALUES : $loglikelihood .. Loglikelihood value for this bigram.
96              
97             =cut
98              
99             sub calculateStatistic
100             {
101 31     31   6409 my %values = @_;
102              
103             # computes and sets the observed and expected values from
104             # the frequency combination values. returns 0 if there is an
105             # error in the computation or the values are inconsistent.
106 31 100       181 if( !Text::NSP::Measures::2D::MI::getValues(\%values) )
107             {
108 10         28 return;
109             }
110              
111             # Now for the actual calculation of Loglikelihood!
112 21         33 my $logLikelihood = 0;
113              
114             # dont want ($nxy / $mxy) to be 0 or less! flag error if so!
115 21         59 $logLikelihood += $n11 * Text::NSP::Measures::2D::MI::computePMI( $n11, $m11 );
116 21         123 $logLikelihood += $n12 * Text::NSP::Measures::2D::MI::computePMI( $n12, $m12 );
117 21         87 $logLikelihood += $n21 * Text::NSP::Measures::2D::MI::computePMI( $n21, $m21 );
118 21         53 $logLikelihood += $n22 * Text::NSP::Measures::2D::MI::computePMI( $n22, $m22 );
119              
120 21         157 return ( 2 * $logLikelihood );
121             }
122              
123              
124             =item getStatisticName() - Returns the name of this statistic
125              
126             INPUT PARAMS : none
127              
128             RETURN VALUES : $name .. Name of the measure.
129              
130             =cut
131              
132             sub getStatisticName
133             {
134 0     0     return "Log-likelihood";
135             }
136              
137              
138              
139             1;
140             __END__