File Coverage

blib/lib/Text/NSP/Measures/2D/MI/tmi.pm

Criterion	Covered	Total	%
statement	24	25	96.0
branch	2	2	100.0
condition			n/a
subroutine	6	7	85.7
pod			n/a
total	32	34	94.1

line	stmt	bran	sub	time	code
1					=head1 NAME
2
3					Text::NSP::Measures::2D::MI::tmi - Perl module that implements True Mutual
4					Information.
5
6					=head1 SYNOPSIS
7
8					=head3 Basic Usage
9
10					use Text::NSP::Measures::2D::MI::tmi;
11
12					my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13
14					$tmi_value = calculateStatistic( n11=>$n11,
15					n1p=>$n1p,
16					np1=>$np1,
17					npp=>$npp);
18
19					if( ($errorCode = getErrorCode()))
20					{
21					print STDERR $errorCode." - ".getErrorMessage()."\n"";
22					}
23					else
24					{
25					print getStatisticName."value for bigram is ".$tmi_value."\n"";
26					}
27
28					=head1 DESCRIPTION
29
30					Assume that the frequency count data associated with a bigram
31					is stored in a 2x2 contingency table:
32
33					word2 ~word2
34					word1 n11 n12 \| n1p
35					~word1 n21 n22 \| n2p
36					--------------
37					np1 np2 npp
38
39					where n11 is the number of times occur together, and
40					n12 is the number of times occurs with some word other than
41					word2, and n1p is the number of times in total that word1 occurs as
42					the first word in a bigram.
43
44					The expected values for the internal cells are calculated by taking the
45					product of their associated marginals and dividing by the sample size,
46					for example:
47
48					np1 * n1p
49					m11= ---------
50					npp
51
52					True Mutual Information (tmi) is defined as the weighted average of the
53					Pointwise mutual informations for all the observed and expected value pairs.
54
55					tmi = [n11/npp * log(n11/m11) + n12/npp * log(n12/m12) +
56					n21/npp * log(n21/m21) + n22/npp * log(n22/m22)]
57
58
59					PMI = log (n11/m11)
60
61					=head2 Methods
62
63					=over
64
65					=cut
66
67					package Text::NSP::Measures::2D::MI::tmi;
68
69
70	1		1	1706	use Text::NSP::Measures::2D::MI;
	1			2
	1			176
71	1		1	4	use strict;
	1			1
	1			23
72	1		1	4	use Carp;
	1			2
	1			47
73	1		1	5	use warnings;
	1			1
	1			20
74	1		1	4	no warnings 'redefine';
	1			2
	1			274
75					require Exporter;
76
77					our ($VERSION, @EXPORT, @ISA);
78
79					@ISA = qw(Exporter);
80
81					@EXPORT = qw(initializeStatistic calculateStatistic
82					getErrorCode getErrorMessage getStatisticName);
83
84					$VERSION = '0.97';
85
86
87					=item calculateStatistic() - This method calculates the tmi value
88
89					INPUT PARAMS : $count_values .. Reference of an hash containing
90					the count values computed by the
91					count.pl program.
92
93					RETURN VALUES : $tmi .. TMI value for this bigram.
94
95					=cut
96
97					sub calculateStatistic
98					{
99	28		28	1924	my %values = @_;
100
101					# computes and returns the observed and expected values from
102					# the frequency combination values. returns 0 if there is an
103					# error in the computation or the values are inconsistent.
104	28	100		131	if( !(Text::NSP::Measures::2D::MI::getValues(\%values)) ) {
105	10			40	return(0);
106					}
107
108					#my $marginals = $self->computeMarginalTotals(@_);
109
110					# Now for the actual calculation of TMI!
111	18			49	my $tmi = 0;
112
113					# dont want ($nxy / $mxy) to be 0 or less! flag error if so!
114	18			90	$tmi += $n11/$npp * Text::NSP::Measures::2D::MI::computePMI( $n11, $m11 )/ log 2;
115	18			84	$tmi += $n12/$npp * Text::NSP::Measures::2D::MI::computePMI( $n12, $m12 )/ log 2;
116	18			84	$tmi += $n21/$npp * Text::NSP::Measures::2D::MI::computePMI( $n21, $m21 )/ log 2;
117	18			84	$tmi += $n22/$npp * Text::NSP::Measures::2D::MI::computePMI( $n22, $m22 )/ log 2;
118
119	18			94	return ($tmi);
120					}
121
122
123					=item getStatisticName() - Returns the name of this statistic
124
125					INPUT PARAMS : none
126
127					RETURN VALUES : $name .. Name of the measure.
128
129					=cut
130
131					sub getStatisticName
132					{
133	0		0		return "True Mutual Information";
134					}
135
136
137
138					1;
139					__END__