File Coverage

blib/lib/Text/NSP/Measures/2D/CHI/tscore.pm

Criterion	Covered	Total	%
statement	20	21	95.2
branch	2	2	100.0
condition			n/a
subroutine	6	7	85.7
pod			n/a
total	28	30	93.3

line	stmt	bran	sub	time	code
1					=head1 NAME
2
3					Text::NSP::Measures::2D::CHI::tscore - Perl module that implements T-score
4					measure of association for bigrams.
5
6
7					=head1 SYNOPSIS
8
9					=head3 Basic Usage
10
11					use Text::NSP::Measures::2D::CHI::tscore;
12
13					my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
14
15					$tscore_value = calculateStatistic( n11=>$n11,
16					n1p=>$n1p,
17					np1=>$np1,
18					npp=>$npp);
19
20					if( ($errorCode = getErrorCode()))
21					{
22					print STDERR $errorCode." - ".getErrorMessage()."\n"";
23					}
24					else
25					{
26					print getStatisticName."value for bigram is ".$tscore_value."\n"";
27					}
28
29					=head1 DESCRIPTION
30
31					Assume that the frequency count data associated with a bigram
32					is stored in a 2x2 contingency table:
33
34					word2 ~word2
35					word1 n11 n12 \| n1p
36					~word1 n21 n22 \| n2p
37					--------------
38					np1 np2 npp
39
40					where n11 is the number of times occur together, and
41					n12 is the number of times occurs with some word other than
42					word2, and n1p is the number of times in total that word1 occurs as
43					the first word in a bigram.
44
45					The T-score is defined as a ratio of difference between the observed
46					and the expected mean to the variance of the sample. Note that this
47					is a variant of the standard t-test that was proposed for use in the
48					identification of collocations in large samples of text.
49
50					Thus, the T-score is defined as follows:
51
52					m11 = n1p * np1 / npp
53
54					T-score = (n11 - m11)/sqrt(n11)
55
56					=over
57
58					=cut
59
60
61					package Text::NSP::Measures::2D::CHI::tscore;
62
63
64	1		1	4671	use Text::NSP::Measures::2D::CHI;
	1			3
	1			235
65	1		1	5	use strict;
	1			3
	1			33
66	1		1	6	use Carp;
	1			2
	1			53
67	1		1	6	use warnings;
	1			2
	1			27
68	1		1	5	no warnings 'redefine';
	1			3
	1			1535
69					require Exporter;
70
71					our ($VERSION, @EXPORT, @ISA);
72
73					@ISA = qw(Exporter);
74
75					@EXPORT = qw(initializeStatistic calculateStatistic
76					getErrorCode getErrorMessage getStatisticName);
77
78					$VERSION = '0.97';
79
80
81					=item calculateStatistic() - method to calculate the tscore Coefficient
82
83					INPUT PARAMS : $count_values .. Reference of an hash containing
84					the count values computed by the
85					count.pl program.
86
87					RETURN VALUES : $tscore .. tscore value for this bigram.
88
89					=cut
90
91					sub calculateStatistic
92					{
93	28		28	4624	my %values = @_;
94
95					# computes and returns the observed and expected values from
96					# the frequency combination values. returns 0 if there is an
97					# error in the computation or the values are inconsistent.
98	28	100		83	if( !(Text::NSP::Measures::2D::CHI::getValues(\%values)) ) {
99	10			25	return;
100					}
101					# Now calculate the tscore
102
103	18			79	my $tscore = (($n11-$m11)/($n11**0.5));
104
105	18			58	return ( $tscore );
106					}
107
108
109
110					=item getStatisticName() - Returns the name of this statistic
111
112					INPUT PARAMS : none
113
114					RETURN VALUES : $name .. Name of the measure.
115
116					=cut
117
118					sub getStatisticName
119					{
120	0		0		return "T-score";
121					}
122
123
124
125					1;
126					__END__