File Coverage

blib/lib/Text/NSP/Measures/2D/MI/ps.pm

Criterion	Covered	Total	%
statement	21	22	95.4
branch	2	2	100.0
condition			n/a
subroutine	6	7	85.7
pod			n/a
total	29	31	93.5

line	stmt	bran	sub	time	code
1					=head1 NAME
2
3					Text::NSP::Measures::2D::MI::ps - Perl module that implements Poisson-Stirling
4					measure of association for bigrams.
5
6					=head1 SYNOPSIS
7
8					=head3 Basic Usage
9
10					use Text::NSP::Measures::2D::MI::ps;
11
12					my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13
14					$ps_value = calculateStatistic( n11=>$n11,
15					n1p=>$n1p,
16					np1=>$np1,
17					npp=>$npp);
18
19					if( ($errorCode = getErrorCode()))
20					{
21					print STDERR $errorCode." - ".getErrorMessage()."\n"";
22					}
23					else
24					{
25					print getStatisticName."value for bigram is ".$ps_value."\n"";
26					}
27
28					=head1 DESCRIPTION
29
30					The log-likelihood ratio measures the deviation between the observed data
31					and what would be expected if and were independent. The
32					higher the score, the less evidence there is in favor of concluding that
33					the words are independent.
34
35					Assume that the frequency count data associated with a bigram
36					as shown by a 2x2 contingency table:
37
38					word2 ~word2
39					word1 n11 n12 \| n1p
40					~word1 n21 n22 \| n2p
41					--------------
42					np1 np2 npp
43
44					where n11 is the number of times occur together, and
45					n12 is the number of times occurs with some word other than
46					word2, and n1p is the number of times in total that word1 occurs as
47					the first word in a bigram.
48
49					The expected values for the internal cells are calculated by taking the
50					product of their associated marginals and dividing by the sample size,
51					for example:
52
53					np1 * n1p
54					m11= ---------
55					npp
56
57					The Poisson Stirling measure is a negative logarithmic approximation
58					of the Poisson-likelihood measure. It uses the Stirling's formula to
59					approximate the factorial in Poisson-likelihood measure.
60
61					Poisson-Stirling = n11 * ( log(n11) - log(m11) - 1)
62
63					which is same as
64
65					Poisson-Stirling = n11 * ( log(n11/m11) - 1)
66
67
68					=head2 Methods
69
70					=over
71
72					=cut
73
74
75					package Text::NSP::Measures::2D::MI::ps;
76
77
78	1		1	1312	use Text::NSP::Measures::2D::MI;
	1			3
	1			293
79	1		1	4	use strict;
	1			2
	1			361
80	1		1	5	use Carp;
	1			2
	1			48
81	1		1	5	use warnings;
	1			2
	1			45
82	1		1	4	no warnings 'redefine';
	1			1
	1			191
83					require Exporter;
84
85					our ($VERSION, @EXPORT, @ISA);
86
87					@ISA = qw(Exporter);
88
89					@EXPORT = qw(initializeStatistic calculateStatistic
90					getErrorCode getErrorMessage getStatisticName);
91
92					$VERSION = '0.97';
93
94					=item calculateStatistic() - This method calculates the ps value
95
96					INPUT PARAMS : $count_values .. Reference of an hash containing
97					the count values computed by the
98					count.pl program.
99
100					RETURN VALUES : $poissonStirling .. Poisson-Stirling value for this bigram.
101
102					=cut
103
104					sub calculateStatistic
105					{
106	11		11	146	my %values = @_;
107
108					# computes and returns the observed and expected values from
109					# the frequency combination values. returns 0 if there is an
110					# error in the computation or the values are inconsistent.
111	11	100		29	if( !(Text::NSP::Measures::2D::MI::getValues(\%values)) ) {
112	10			19	return;
113					}
114
115					# Now for the actual calculation of Loglikelihood!
116	1			1	my $poissonStirling = 0;
117
118					# dont want ($nxy / $mxy) to be 0 or less! flag error if so!
119	1			4	$poissonStirling = $n11 * (Text::NSP::Measures::2D::MI::computePMI($n11,$m11) - 1);
120
121	1			4	return $poissonStirling;
122					}
123
124
125					=item getStatisticName() - Returns the name of this statistic
126
127					INPUT PARAMS : none
128
129					RETURN VALUES : $name .. Name of the measure.
130
131					=cut
132
133					sub getStatisticName
134					{
135	0		0		return "Poisson-Stirling Measure";
136					}
137
138
139
140					1;
141					__END__