File Coverage

blib/lib/Text/NSP/Measures/4D/MI/ll.pm
Criterion Covered Total %
statement 36 37 97.3
branch 2 2 100.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 44 46 95.6


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::4D::MI::ll - Perl module that implements Loglikelihood
4             measure of association for 4-grams.
5              
6             =head1 SYNOPSIS
7              
8             =head3 Basic Usage
9             use Text::NSP::Measures::4D::MI::ll;
10              
11             $ll_value = calculateStatistic(
12             n1111=>8,
13             n1ppp=>306,
14             np1pp=>83,
15             npp1p=>83,
16             nppp1=>57,
17             n11pp=>8,
18             n1p1p=>8,
19             n1pp1=>8,
20             np11p=>83,
21             np1p1=>56,
22             npp11=>56,
23             n111p=>8,
24             n11p1=>8,
25             n1p11=>8,
26             np111=>56,
27             npppp=>15180);
28              
29             if( ($errorCode = getErrorCode()))
30             {
31             print STDERR $erroCode." - ".getErrorMessage()."\n";
32             }
33             else
34             {
35             print getStatisticName."value for 4-gram is ".$ll_value."\n";
36             }
37              
38             =head1 DESCRIPTION
39              
40             The log-likelihood ratio measures the devitation between the observed data
41             and what would be expected if , , and were
42             independent.The higher the score, the less evidence there is in favor of
43             concluding thatthe words are independent.
44              
45             The expected values for the internal cells are calculated by taking the
46             product of their associated marginals and dividing by the sample size,
47             for example:
48              
49             n1ppp * np1pp * npp1p * nppp1
50             m111= -------------------------------
51             npppp ^ 3
52              
53             Then the deviation between observed and expected values for each internal
54             cell is computed to arrive at the log-likelihood value.
55              
56             Log-Likelihood = 2 * [n1111 * log ( n1111 / m1111 ) + n1112 * log ( n1112 / m1112 ) +
57             n1121 * log ( n1121 / m1121 ) + n1122 * log ( n1122 / m1122 ) +
58             n1211 * log ( n1211 / m1211 ) + n1212 * log ( n1212 / m1212 ) +
59             n1221 * log ( n1221 / m1221 ) + n1222 * log ( n1222 / m1222 ) +
60             n2111 * log ( n2111 / m2111 ) + n2112 * log ( n2112 / m2112 ) +
61             n2121 * log ( n2121 / m2121 ) + n2122 * log ( n2122 / m2122 ) +
62             n2211 * log ( n2211 / m2211 ) + n2212 * log ( n2212 / m2212 ) +
63             n2221 * log ( n2221 / m2221 ) + n2222 * log ( n2222 / m2222 )];
64            
65             =head2 Methods
66              
67             =over
68              
69             =cut
70              
71              
72             package Text::NSP::Measures::4D::MI::ll;
73              
74              
75 1     1   1429 use Text::NSP::Measures::4D::MI;
  1         4  
  1         512  
76 1     1   5 use strict;
  1         1  
  1         30  
77 1     1   4 use Carp;
  1         1  
  1         49  
78 1     1   4 use warnings;
  1         18  
  1         22  
79 1     1   4 no warnings 'redefine';
  1         2  
  1         409  
80             require Exporter;
81              
82             our ($VERSION, @EXPORT, @ISA);
83              
84             @ISA = qw(Exporter);
85              
86             @EXPORT = qw(initializeStatistic calculateStatistic
87             getErrorCode getErrorMessage getStatisticName);
88              
89             $VERSION = '0.97';
90              
91             =item calculateStatistic($count_values) - This method calculates
92             the ll value
93              
94             INPUT PARAMS : $count_values .. Reference of an hash containing
95             the count values computed by the
96             count.pl program.
97              
98             RETURN VALUES : $loglikelihood .. Loglikelihood value for this 4-gram.
99              
100             =cut
101              
102             sub calculateStatistic
103             {
104 16     16   2754 my %values = @_;
105              
106             # computes and sets the observed and expected values from
107             # the frequency combination values. returns 0 if there is an
108             # error in the computation or the values are inconsistent.
109 16 100       50 if( !(Text::NSP::Measures::4D::MI::getValues(\%values)) ) {
110 15         31 return;
111             }
112              
113             # Now for the actual calculation of Loglikelihood!
114 1         2 my $logLikelihood = 0;
115              
116            
117             # dont want ($nxy / $mxy) to be 0 or less! flag error if so!
118 1         5 $logLikelihood += $n1111 * Text::NSP::Measures::4D::MI::computePMI ( $n1111, $m1111 );
119 1         3 $logLikelihood += $n1112 * Text::NSP::Measures::4D::MI::computePMI ( $n1112, $m1112 );
120 1         3 $logLikelihood += $n1121 * Text::NSP::Measures::4D::MI::computePMI ( $n1121, $m1121 );
121 1         4 $logLikelihood += $n1122 * Text::NSP::Measures::4D::MI::computePMI ( $n1122, $m1122 );
122 1         4 $logLikelihood += $n1211 * Text::NSP::Measures::4D::MI::computePMI ( $n1211, $m1211 );
123 1         8 $logLikelihood += $n1212 * Text::NSP::Measures::4D::MI::computePMI ( $n1212, $m1212 );
124 1         4 $logLikelihood += $n1221 * Text::NSP::Measures::4D::MI::computePMI ( $n1221, $m1221 );
125 1         3 $logLikelihood += $n1222 * Text::NSP::Measures::4D::MI::computePMI ( $n1222, $m1222 );
126 1         4 $logLikelihood += $n2111 * Text::NSP::Measures::4D::MI::computePMI ( $n2111, $m2111 );
127 1         3 $logLikelihood += $n2112 * Text::NSP::Measures::4D::MI::computePMI ( $n2112, $m2112 );
128 1         3 $logLikelihood += $n2121 * Text::NSP::Measures::4D::MI::computePMI ( $n2121, $m2121 );
129 1         3 $logLikelihood += $n2122 * Text::NSP::Measures::4D::MI::computePMI ( $n2122, $m2122 );
130 1         9 $logLikelihood += $n2211 * Text::NSP::Measures::4D::MI::computePMI ( $n2211, $m2211 );
131 1         3 $logLikelihood += $n2212 * Text::NSP::Measures::4D::MI::computePMI ( $n2212, $m2212 );
132 1         3 $logLikelihood += $n2221 * Text::NSP::Measures::4D::MI::computePMI ( $n2221, $m2221 );
133 1         3 $logLikelihood += $n2222 * Text::NSP::Measures::4D::MI::computePMI ( $n2222, $m2222 );
134 1         5 return ( 2 * $logLikelihood );
135             }
136              
137              
138             =item getStatisticName() - Returns the name of this statistic
139              
140             INPUT PARAMS : none
141              
142             RETURN VALUES : $name .. Name of the measure.
143              
144             =cut
145              
146             sub getStatisticName
147             {
148 0     0     return "Loglikelihood";
149             }
150              
151              
152              
153             1;
154             __END__