File Coverage

blib/lib/Text/NSP/Measures/3D/MI/ll.pm

Criterion	Covered	Total	%
statement	28	29	96.5
branch	2	2	100.0
condition			n/a
subroutine	6	7	85.7
pod			n/a
total	36	38	94.7

line	stmt	bran	sub	time	code
1					=head1 NAME
2
3					Text::NSP::Measures::3D::MI::ll - Perl module that implements Loglikelihood
4					measure of association for trigrams.
5
6					=head1 SYNOPSIS
7
8					=head3 Basic Usage
9
10					use Text::NSP::Measures::3D::MI::ll;
11
12					$ll_value = calculateStatistic( n111=>10,
13					n1pp=>40,
14					np1p=>45,
15					npp1=>42,
16					n11p=>20,
17					n1p1=>23,
18					np11=>21,
19					nppp=>100);
20
21					if( ($errorCode = getErrorCode()))
22					{
23					print STDERR $erroCode." - ".getErrorMessage()."\n";
24					}
25					else
26					{
27					print getStatisticName."value for trigram is ".$ll_value."\n";
28					}
29
30
31					=head1 DESCRIPTION
32
33					The log-likelihood ratio measures the devitation between the observed data
34					and what would be expected if , and were independent.
35					The higher the score, the less evidence there is in favor of concluding that
36					the words are independent.
37
38					The expected values for the internal cells are calculated by taking the
39					product of their associated marginals and dividing by the sample size,
40					for example:
41
42					n1pp * np1p * npp1
43					m111= --------------------
44					nppp
45
46					Then the deviation between observed and expected values for each internal
47					cell is computed to arrive at the log-likelihood value.
48
49					Log-Likelihood = 2 * [n111 * log(n111/m111) + n112 * log(n112/m112) +
50					n121 * log(n121/m121) + n122 * log(n122/m122) +
51					n211 * log(n211/m211) + n212 * log(n212/m212) +
52					n221 * log(n221/m221) + n222 * log(n222/m222)]
53
54					=over
55
56					=cut
57
58
59					package Text::NSP::Measures::3D::MI::ll;
60
61
62	2		2	2678	use Text::NSP::Measures::3D::MI;
	2			5
	2			584
63	2		2	10	use strict;
	2			3
	2			36
64	2		2	10	use Carp;
	2			2
	2			104
65	2		2	9	use warnings;
	2			4
	2			45
66	2		2	8	no warnings 'redefine';
	2			3
	2			705
67					require Exporter;
68
69					our ($VERSION, @EXPORT, @ISA);
70
71					@ISA = qw(Exporter);
72
73					@EXPORT = qw(initializeStatistic calculateStatistic
74					getErrorCode getErrorMessage getStatisticName);
75
76					$VERSION = '0.97';
77
78					=item calculateStatistic($count_values) - This method calculates
79					the ll value
80
81					INPUT PARAMS : $count_values .. Reference of an hash containing
82					the count values computed by the
83					count.pl program.
84
85					RETURN VALUES : $loglikelihood .. Loglikelihood value for this trigram.
86
87					=cut
88
89					sub calculateStatistic
90					{
91	19		19	340	my %values = @_;
92
93					# computes and sets the observed and expected values from
94					# the frequency combination values. returns 0 if there is an
95					# error in the computation or the values are inconsistent.
96	19	100		47	if( !(Text::NSP::Measures::3D::MI::getValues(\%values)) ) {
97	15			31	return;
98					}
99
100					# Now for the actual calculation of Loglikelihood!
101	4			7	my $logLikelihood = 0;
102
103					# dont want ($nxy / $mxy) to be 0 or less! flag error if so!
104	4			12	$logLikelihood += $n111 * Text::NSP::Measures::3D::MI::computePMI( $n111, $m111 );
105	4			12	$logLikelihood += $n112 * Text::NSP::Measures::3D::MI::computePMI( $n112, $m112 );
106	4			12	$logLikelihood += $n121 * Text::NSP::Measures::3D::MI::computePMI( $n121, $m121 );
107	4			11	$logLikelihood += $n122 * Text::NSP::Measures::3D::MI::computePMI( $n122, $m122 );
108	4			17	$logLikelihood += $n211 * Text::NSP::Measures::3D::MI::computePMI( $n211, $m211 );
109	4			12	$logLikelihood += $n212 * Text::NSP::Measures::3D::MI::computePMI( $n212, $m212 );
110	4			11	$logLikelihood += $n221 * Text::NSP::Measures::3D::MI::computePMI( $n221, $m221 );
111	4			11	$logLikelihood += $n222 * Text::NSP::Measures::3D::MI::computePMI( $n222, $m222 );
112
113	4			14	return ( 2 * $logLikelihood );
114					}
115
116
117					=item getStatisticName() - Returns the name of this statistic
118
119					INPUT PARAMS : none
120
121					RETURN VALUES : $name .. Name of the measure.
122
123					=cut
124
125					sub getStatisticName
126					{
127	0		0		return "Loglikelihood";
128					}
129
130
131
132					1;
133					__END__