File Coverage

blib/lib/Text/NSP/Measures/2D/MI.pm

Criterion	Covered	Total	%
statement	37	62	59.6
branch	22	32	68.7
condition			n/a
subroutine	6	6	100.0
pod	2	2	100.0
total	67	102	65.6

line	stmt	bran	sub	pod	time	code
1						=head1 NAME
2
3						Text::NSP::Measures::2D::MI - Perl module that provides error checks
4						for Loglikelihood, Total Mutual
5						Information, Pointwise Mutual Information
6						and Poisson-Stirling Measure.
7
8						=head1 SYNOPSIS
9
10						=head3 Basic Usage
11
12						use Text::NSP::Measures::2D::MI::ll;
13
14						my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
15
16						$ll_value = calculateStatistic( n11=>$n11,
17						n1p=>$n1p,
18						np1=>$np1,
19						npp=>$npp);
20
21						if( ($errorCode = getErrorCode()))
22						{
23						print STDERR $errorCode." - ".getErrorMessage()."\n"";
24						}
25						else
26						{
27						print getStatisticName."value for bigram is ".$ll_value."\n"";
28						}
29
30						=head1 DESCRIPTION
31
32						This module is the base class for the Loglikelihood, Total Mutual
33						Information and the Pointwise Mutual Information measures. All these
34						measure are similar. This module provides error checks specific for
35						these measures, it also implements the computations that are common
36						to these measures.
37
38						=over
39
40						=item Log-Likelihood measure is computed as
41
42						Log-Likelihood = 2 * [n11 * log(n11/m11) + n12 * log(n12/m12) +
43						n21 * log(n21/m21) + n22 * log(n22/m22)]
44
45						=item Total Mutual Information
46
47						TMI = (1/npp)[n11 log(n11/m11)/log 2 + n12 * log(n12/m12)/log 2 +
48						n21 * log(n21/m21)/log 2 + n22 * log(n22/m22)/log 2]
49
50						=item Pointwise Mutual Information
51
52						PMI = log (n11/m11)/log 2
53
54						=item Poisson Stirling Measures
55
56						PS = n11*(log (n11/m11)-1)
57
58						=back
59
60						All these methods use the ratio of the observed values to expected values,
61						for computations, and thus have common error checks, so they have been grouped
62						together.
63
64						=head2 Methods
65
66						=over
67
68						=cut
69
70
71						package Text::NSP::Measures::2D::MI;
72
73
74	6		6		3666	use Text::NSP::Measures::2D;
	6				13
	6				1269
75	6		6		31	use strict;
	6				8
	6				119
76	6		6		30	use Carp;
	6				11
	6				296
77	6		6		28	use warnings;
	6				15
	6				2704
78						# use subs(calculateStatistic);
79						require Exporter;
80
81						our ($VERSION, @EXPORT, @ISA);
82
83						@ISA = qw(Exporter);
84
85						@EXPORT = qw(initializeStatistic calculateStatistic
86						getErrorCode getErrorMessage getStatisticName
87						$errorCodeNumber $errorMessage
88						$n11 $n12 $n21 $n22 $m11 $m12 $m21 $m22
89						$npp $np1 $np2 $n2p $n1p);
90
91						$VERSION = '1.03';
92
93
94						=item getValues() - This method calls the computeMarginalTotals(),
95						computeObservedValues() and the computeExpectedValues() methods to
96						compute the observed and expected values. It checks these values for
97						any errors that might cause the Loglikelihood, TMI & PMI measures to
98						fail.
99
100
101						INPUT PARAMS : $count_values .. Reference of an hash containing
102						the count values computed by the
103						count.pl program.
104
105
106						RETURN VALUES : 1/undef ..returns '1' to indicate success
107						and an undefined(NULL) value to indicate
108						failure.
109
110						=cut
111
112						sub getValues
113						{
114	98		98	1	127	my ($values)=@_;
115
116	98	100			310	if(!(Text::NSP::Measures::2D::computeMarginalTotals($values)) ){
117	20				57	return;
118						}
119
120	78	100			313	if( !(Text::NSP::Measures::2D::computeObservedValues($values)) ) {
121	20				62	return;
122						}
123
124	58	50			140	if( !(Text::NSP::Measures::2D::computeExpectedValues($values)) ) {
125	0				0	return;
126						}
127
128						# dont want ($nxy / $mxy) to be 0 or less! flag error if so and return;
129	58	50			156	if ( $n11 )
130						{
131	58	50			142	if ($m11 == 0)
132						{
133	0				0	$errorMessage = "Expected value in cell (1,1) must not be zero";
134	0				0	$errorCodeNumber = 211;
135	0				0	return;
136						}
137						}
138	58	100			128	if ( $n12 )
139						{
140	55	50			119	if ($m12 == 0)
141						{
142	0				0	$errorMessage = "Expected value in cell (1,2) must not be zero";
143	0				0	$errorCodeNumber = 211;
144	0				0	return;
145						}
146						}
147	58	100			121	if ( $n21 )
148						{
149	57	50			115	if ($m21 == 0)
150						{
151	0				0	$errorMessage = "Expected value in cell (2,1) must not be zero";
152	0				0	$errorCodeNumber = 211;
153	0				0	return;
154						}
155						}
156	58	100			116	if ( $n22 )
157						{
158	53	50			107	if ($m22 == 0)
159						{
160	0				0	$errorMessage = "Expected value in cell (2,2) must not be zero";
161	0				0	$errorCodeNumber = 211;
162	0				0	return;
163						}
164						}
165	58	50			113	if ($m11 < 0)
166						{
167	0				0	$errorMessage = "Expected value for cell (1,1) should not be negative";
168	0				0	$errorCodeNumber = 212;
169	0				0	return;
170						}
171	58	50			126	if ($m12 < 0)
172						{
173	0				0	$errorMessage = "Expected value for cell (1,2) should not be negative";
174	0				0	$errorCodeNumber = 212;
175	0				0	return;
176						}
177	58	50			165	if ($m21 < 0)
178						{
179	0				0	$errorMessage = "Expected value for cell (2,1) should not be negative";
180	0				0	$errorCodeNumber = 212;
181	0				0	return;
182						}
183	58	50			105	if ($m22 < 0)
184						{
185	0				0	$errorMessage = "Expected value for cell (2,2) should not be negative";
186	0				0	$errorCodeNumber = 212;
187	0				0	return;
188						}
189
190						# Everything looks good so we can return 1
191	58				168	return 1;
192						}
193
194
195
196
197						=item computePMI() - Computes the pmi of a given observed and expected
198						value pair.
199
200						INPUT PARAMS : $n ..Observed value
201						$m ..Expected value
202
203						RETURN VALUES : log(n/m) ..the log of the ratio of
204						observed value to expected
205						value.
206
207						=cut
208
209						sub computePMI
210						{
211	175		175	1	196	my $n = shift;
212	175				183	my $m = shift;
213	175	100			252	if($n)
214						{
215	167				191	my $val = $n/$m;
216	167				470	return log($val);
217						}
218						else
219						{
220	8				21	return 0;
221						}
222						}
223
224
225
226						1;
227						__END__