File Coverage

blib/lib/Text/NSP/Measures/4D/MI/ll.pm

Criterion	Covered	Total	%
statement	36	37	97.3
branch	2	2	100.0
condition			n/a
subroutine	6	7	85.7
pod			n/a
total	44	46	95.6

line	stmt	bran	sub	time	code
1					=head1 NAME
2
3					Text::NSP::Measures::4D::MI::ll - Perl module that implements Loglikelihood
4					measure of association for 4-grams.
5
6					=head1 SYNOPSIS
7
8					=head3 Basic Usage
9					use Text::NSP::Measures::4D::MI::ll;
10
11					$ll_value = calculateStatistic(
12					n1111=>8,
13					n1ppp=>306,
14					np1pp=>83,
15					npp1p=>83,
16					nppp1=>57,
17					n11pp=>8,
18					n1p1p=>8,
19					n1pp1=>8,
20					np11p=>83,
21					np1p1=>56,
22					npp11=>56,
23					n111p=>8,
24					n11p1=>8,
25					n1p11=>8,
26					np111=>56,
27					npppp=>15180);
28
29					if( ($errorCode = getErrorCode()))
30					{
31					print STDERR $erroCode." - ".getErrorMessage()."\n";
32					}
33					else
34					{
35					print getStatisticName."value for 4-gram is ".$ll_value."\n";
36					}
37
38					=head1 DESCRIPTION
39
40					The log-likelihood ratio measures the devitation between the observed data
41					and what would be expected if , , and were
42					independent.The higher the score, the less evidence there is in favor of
43					concluding thatthe words are independent.
44
45					The expected values for the internal cells are calculated by taking the
46					product of their associated marginals and dividing by the sample size,
47					for example:
48
49					n1ppp * np1pp * npp1p * nppp1
50					m111= -------------------------------
51					npppp ^ 3
52
53					Then the deviation between observed and expected values for each internal
54					cell is computed to arrive at the log-likelihood value.
55
56					Log-Likelihood = 2 * [n1111 * log ( n1111 / m1111 ) + n1112 * log ( n1112 / m1112 ) +
57					n1121 * log ( n1121 / m1121 ) + n1122 * log ( n1122 / m1122 ) +
58					n1211 * log ( n1211 / m1211 ) + n1212 * log ( n1212 / m1212 ) +
59					n1221 * log ( n1221 / m1221 ) + n1222 * log ( n1222 / m1222 ) +
60					n2111 * log ( n2111 / m2111 ) + n2112 * log ( n2112 / m2112 ) +
61					n2121 * log ( n2121 / m2121 ) + n2122 * log ( n2122 / m2122 ) +
62					n2211 * log ( n2211 / m2211 ) + n2212 * log ( n2212 / m2212 ) +
63					n2221 * log ( n2221 / m2221 ) + n2222 * log ( n2222 / m2222 )];
64
65					=head2 Methods
66
67					=over
68
69					=cut
70
71
72					package Text::NSP::Measures::4D::MI::ll;
73
74
75	1		1	3030	use Text::NSP::Measures::4D::MI;
	1			3
	1			591
76	1		1	5	use strict;
	1			2
	1			31
77	1		1	5	use Carp;
	1			2
	1			56
78	1		1	6	use warnings;
	1			25
	1			30
79	1		1	4	no warnings 'redefine';
	1			1
	1			2652
80					require Exporter;
81
82					our ($VERSION, @EXPORT, @ISA);
83
84					@ISA = qw(Exporter);
85
86					@EXPORT = qw(initializeStatistic calculateStatistic
87					getErrorCode getErrorMessage getStatisticName);
88
89					$VERSION = '0.97';
90
91					=item calculateStatistic($count_values) - This method calculates
92					the ll value
93
94					INPUT PARAMS : $count_values .. Reference of an hash containing
95					the count values computed by the
96					count.pl program.
97
98					RETURN VALUES : $loglikelihood .. Loglikelihood value for this 4-gram.
99
100					=cut
101
102					sub calculateStatistic
103					{
104	16		16	11150	my %values = @_;
105
106					# computes and sets the observed and expected values from
107					# the frequency combination values. returns 0 if there is an
108					# error in the computation or the values are inconsistent.
109	16	100		64	if( !(Text::NSP::Measures::4D::MI::getValues(\%values)) ) {
110	15			116	return;
111					}
112
113					# Now for the actual calculation of Loglikelihood!
114	1			3	my $logLikelihood = 0;
115
116
117					# dont want ($nxy / $mxy) to be 0 or less! flag error if so!
118	1			11	$logLikelihood += $n1111 * Text::NSP::Measures::4D::MI::computePMI ( $n1111, $m1111 );
119	1			4	$logLikelihood += $n1112 * Text::NSP::Measures::4D::MI::computePMI ( $n1112, $m1112 );
120	1			5	$logLikelihood += $n1121 * Text::NSP::Measures::4D::MI::computePMI ( $n1121, $m1121 );
121	1			4	$logLikelihood += $n1122 * Text::NSP::Measures::4D::MI::computePMI ( $n1122, $m1122 );
122	1			5	$logLikelihood += $n1211 * Text::NSP::Measures::4D::MI::computePMI ( $n1211, $m1211 );
123	1			4	$logLikelihood += $n1212 * Text::NSP::Measures::4D::MI::computePMI ( $n1212, $m1212 );
124	1			4	$logLikelihood += $n1221 * Text::NSP::Measures::4D::MI::computePMI ( $n1221, $m1221 );
125	1			4	$logLikelihood += $n1222 * Text::NSP::Measures::4D::MI::computePMI ( $n1222, $m1222 );
126	1			4	$logLikelihood += $n2111 * Text::NSP::Measures::4D::MI::computePMI ( $n2111, $m2111 );
127	1			16	$logLikelihood += $n2112 * Text::NSP::Measures::4D::MI::computePMI ( $n2112, $m2112 );
128	1			4	$logLikelihood += $n2121 * Text::NSP::Measures::4D::MI::computePMI ( $n2121, $m2121 );
129	1			4	$logLikelihood += $n2122 * Text::NSP::Measures::4D::MI::computePMI ( $n2122, $m2122 );
130	1			4	$logLikelihood += $n2211 * Text::NSP::Measures::4D::MI::computePMI ( $n2211, $m2211 );
131	1			4	$logLikelihood += $n2212 * Text::NSP::Measures::4D::MI::computePMI ( $n2212, $m2212 );
132	1			3	$logLikelihood += $n2221 * Text::NSP::Measures::4D::MI::computePMI ( $n2221, $m2221 );
133	1			5	$logLikelihood += $n2222 * Text::NSP::Measures::4D::MI::computePMI ( $n2222, $m2222 );
134	1			6	return ( 2 * $logLikelihood );
135					}
136
137
138					=item getStatisticName() - Returns the name of this statistic
139
140					INPUT PARAMS : none
141
142					RETURN VALUES : $name .. Name of the measure.
143
144					=cut
145
146					sub getStatisticName
147					{
148	0		0		return "Loglikelihood";
149					}
150
151
152
153					1;
154					__END__