File Coverage

blib/lib/Text/NSP/Measures/2D/CHI.pm

Criterion	Covered	Total	%
statement	32	45	71.1
branch	18	24	75.0
condition			n/a
subroutine	6	6	100.0
pod	2	2	100.0
total	58	77	75.3

line	stmt	bran	sub	pod	time	code
1						=head1 NAME
2
3						Text::NSP::Measures::2D::CHI - Perl module that provides error checks
4						for the Pearson's chi squared, phi coefficient
5						and the Tscore measures.
6
7						=head1 SYNOPSIS
8
9						=head3 Basic Usage
10
11						use Text::NSP::Measures::2D::CHI::x2;
12
13						my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
14
15						$x2_value = calculateStatistic( n11=>$n11,
16						n1p=>$n1p,
17						np1=>$np1,
18						npp=>$npp);
19
20						if( ($errorCode = getErrorCode()))
21						{
22						print STDERR $errorCode." - ".getErrorMessage()."\n"";
23						}
24						else
25						{
26						print getStatisticName."value for bigram is ".$x2_value."\n"";
27						}
28
29						=head1 DESCRIPTION
30
31						This module is the base class for the Chi-squared and Phi coefficient
32						measures. This module provides error checks specific for these measures,
33						it also implements the computations that are common to these measures.
34
35						=over
36
37						=item Pearson's Chi-Squared
38
39						x2 = 2 * [((n11 - m11)/m11)^2 + ((n12 - m12)/m12)^2 +
40						((n21 - m21)/m21)^2 + ((n22 -m22)/m22)^2]
41
42						=item Phi Coefficient
43
44						PHI^2 = ((n11 * n22) - (n21 * n21))^2/(n1p * np1 * np2 * n2p)
45
46						=item T-Score
47
48						tscore = (n11 - m11)/sqrt(n11)
49
50						=back
51
52						Note that the value of PHI^2 is equivalent to
53						Pearson's Chi-Squared test multiplied by the sample size, that is:
54
55						Chi-Squared = npp * PHI^2
56
57						Although T-score seems quite different from the other two measures we
58						have put it in the CHI family because like the other two measures it
59						uses the difference between the observed and expected values and is also
60						quite similar in ranking the bigrams.
61
62						=over
63
64						=cut
65
66
67						package Text::NSP::Measures::2D::CHI;
68
69
70	5		5		3697	use Text::NSP::Measures::2D;
	5				12
	5				1031
71	5		5		24	use strict;
	5				7
	5				91
72	5		5		22	use Carp;
	5				9
	5				229
73	5		5		23	use warnings;
	5				10
	5				1788
74						# use subs(calculateStatistic);
75						require Exporter;
76
77						our ($VERSION, @EXPORT, @ISA);
78
79						@ISA = qw(Exporter);
80
81						@EXPORT = qw(initializeStatistic calculateStatistic
82						getErrorCode getErrorMessage getStatisticName
83						$n11 $n12 $n21 $n22 $m11 $m12 $m21 $m22
84						$npp $np1 $np2 $n2p $n1p $errorCodeNumber
85						$errorMessage);
86
87						$VERSION = '1.03';
88
89						=item getValues() - This method calls the computeMarginalTotals(),
90						computeObservedValues() and the computeExpectedValues() methods to
91						compute the observed and expected values. It checks thees values for
92						any errors that might cause the PHI and x2 measures to fail.
93
94						INPUT PARAMS : $count_values .. Reference of an hash containing
95						the count values computed by the
96						count.pl program.
97
98						RETURN VALUES : 1/undef ..returns '1' to indicate success
99						and an undefined(NULL) value to indicate
100						failure.
101
102						=cut
103
104						sub getValues
105						{
106	55		55	1	67	my ($values)=@_;
107
108	55	100			130	if(!(Text::NSP::Measures::2D::computeMarginalTotals($values)) ) {
109	15				44	return;
110						}
111
112	40	100			97	if( !(Text::NSP::Measures::2D::computeObservedValues($values)) ) {
113	15				46	return;
114						}
115
116	25	50			63	if( !(Text::NSP::Measures::2D::computeExpectedValues($values)) ) {
117	0				0	return;
118						}
119
120						# dont want ($nxy / $mxy) to be 0 or less! flag error if so and return;
121	25	50			56	if ( $n11 )
122						{
123	25	50			64	if ($m11 == 0)
124						{
125	0				0	$errorMessage = "Expected value in cell (1,1) must not be zero";
126	0				0	$errorCodeNumber = 221;
127	0				0	return;
128						}
129						}
130	25	100			46	if ( $n12 )
131						{
132	22	50			46	if ($m12 == 0)
133						{
134	0				0	$errorMessage = "Expected value in cell (1,2) must not be zero";
135	0				0	$errorCodeNumber = 221;
136	0				0	return;
137						}
138						}
139	25	100			61	if ( $n21 )
140						{
141	24	50			45	if ($m21 == 0)
142						{
143	0				0	$errorMessage = "Expected value in cell (2,1) must not be zero";
144	0				0	$errorCodeNumber = 221;
145	0				0	return;
146						}
147						}
148	25	100			50	if ( $n22 )
149						{
150	20	50			41	if ($m22 == 0)
151						{
152	0				0	$errorMessage = "Expected value in cell (2,2) must not be zero";
153	0				0	$errorCodeNumber = 221;
154	0				0	return;
155						}
156						}
157						# Everything looks good so we can return 1
158	25				83	return 1;
159						}
160
161
162
163
164						=item computeVal() - Computes the deviation in observed value with respect
165						to the expected values
166
167						INPUT PARAMS : $n ..Observed value
168						$m ..Expected value
169
170						RETURN VALUES : (n-m)^2/m ..the log of the ratio of
171						observed value to expected
172						value.
173
174						=cut
175
176						sub computeVal
177						{
178	28		28	1	37	my $n = shift;
179	28				32	my $m = shift;
180	28	100			52	if($m)
181						{
182	23				96	return (($n-$m)**2)/$m;
183						}
184						else
185						{
186	5				10	return 0;
187						}
188						}
189
190
191
192						1;
193						__END__