|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Text::NSP::Measures::2D::Fisher::twotailed - Perl module implementation of the two-sided  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                              Fisher's exact test.  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head3 Basic Usage  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Text::NSP::Measures::2D::Fisher::twotailed;  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $npp = 60; my $n1p = 20; my $np1 = 20;  my $n11 = 10;  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $twotailed_value = calculateStatistic( n11=>$n11,  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                       n1p=>$n1p,  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                       np1=>$np1,  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                       npp=>$npp);  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if( ($errorCode = getErrorCode()))  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR $errorCode." - ".getErrorMessage();  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print getStatisticName."value for bigram is ".$twotailed_value;  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Assume that the frequency count data associated with a bigram  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  is stored in a 2x2 contingency table:  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           word2   ~word2  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   word1    n11      n12 | n1p  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  ~word1    n21      n22 | n2p  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            --------------  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            np1      np2   npp  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 where n11 is the number of times  occur together, and  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 n12 is the number of times  occurs with some word other than  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 word2, and n1p is the number of times in total that word1 occurs as  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the first word in a bigram.  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The fishers exact tests are calculated by fixing the marginal totals  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and computing the hypergeometric probabilities for all the possible  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 contingency tables,  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A twotailed fishers test is calculated by adding the probabilities of  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 all the contingency tables with probabilities less than the probability  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the observed table. The twotailed fishers test tells us how likely  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it would be to observe an contingency table which is less probable than  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the current table.  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Methods  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Text::NSP::Measures::2D::Fisher::twotailed;  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
1371
 | 
 use Text::NSP::Measures::2D::Fisher;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
184
 | 
    | 
| 
64
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    | 
| 
65
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
4
 | 
 use Carp;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
    | 
| 
66
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
4
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
    | 
| 
67
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
4
 | 
 no warnings 'redefine';  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
335
 | 
    | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 require Exporter;  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our ($VERSION, @EXPORT, @ISA);  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 @ISA  = qw(Exporter);  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 @EXPORT = qw(initializeStatistic calculateStatistic  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              getErrorCode getErrorMessage getStatisticName);  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $VERSION = '0.97';  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item calculateStatistic() - This method calculates the twotailed  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Fisher value  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 INPUT PARAMS  : $count_values       .. Reference of an array containing  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                        the count values computed by the  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                        count.pl program.  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 RETURN VALUES : $twotailed          .. Twotailed Fisher value.  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub calculateStatistic  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
93
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
 
 | 
187
 | 
   my %values = @_;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
   my $probabilities;  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # computes and returns the observed and marginal values from  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # the frequency combination values. returns 0 if there is an  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # error in the computation or the values are inconsistent.  | 
| 
100
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
   if( !(Text::NSP::Measures::2D::Fisher::getValues(\%values)) )  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
102
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     return;  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   my $final_limit = ($n1p < $np1) ? $n1p : $np1;  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
107
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my $n11_org = $n11;  | 
| 
108
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my $n11_start = $n1p + $np1 - $npp;  | 
| 
109
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   if($n11_start<0)  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
111
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
     $n11_start = 0;  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   if( !($probabilities = Text::NSP::Measures::2D::Fisher::computeDistribution($n11_start, $final_limit)))  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
116
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       return;  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my $value;  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my $ttfisher=0;  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
123
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   foreach $value (sort { $a <=> $b } values %$probabilities)  | 
| 
 
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
104
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
125
 | 
32
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
67
 | 
     if($value > $probabilities->{$n11_org})  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
127
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
       next;  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
129
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     $ttfisher += exp($value);  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
   return $ttfisher;  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item getStatisticName() - Returns the name of this statistic  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 INPUT PARAMS  : none  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 RETURN VALUES : $name      .. Name of the measure.  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getStatisticName  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
146
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     return "Two Tailed Fisher";  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |