|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::DOOP::Util::Run::GeneMerge;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
    | 
| 
4
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
5
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
912
 | 
 use POSIX;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8147
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::DOOP::Util::Run::GeneMerge - GeneMerge based GO analyzer  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 VERSION  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Version 0.02  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '0.02';  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #!/usr/bin/perl -w  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::DOOP::DOOP;  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $test = Bio::DOOP::Util::Run::GeneMerge->new();  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ($test->getDescFile("GO/use/GO.BP.use") < 0){  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      print"Desc error\n"  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ($test->getAssocFile("GO/assoc/A_thaliana.converted.BP") < 0){  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      print"Assoc error\n"  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ($test->getPopFile("GO/pop.500") < 0){  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      print"Pop error\n"  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ($test->getStudyFile("GO/study.500/combined1314.list") < 0){  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      print"Study error\n"  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $results = $test->getResults();  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   foreach $res (@{$results}) {  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      print $$res{'GOterm'}," ",$$res{'RawEs'},"\n";  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is a module based on GeneMerge v1.2.  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Original program described in:  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Cristian I. Castillo-Davis and Daniel L. Hartl  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 GeneMerge - post-genomic analysis, data mining, and hypothesis testing  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bioinformatics Vol. 19 no. 7 2003, Pages 891-892  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The original program is not really good for large scale analysis,   | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 because the design uses a lot of I/O processes. This version fetches  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 everything into memory at start.  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHORS  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Tibor Nagy, Godollo, Endre Sebestyen, Martonvasar,  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Create new GeneMerge object.  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $genemerge = Bio::DOOP::Util::Run::GeneMerge->new;  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
78
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
   my $self                       = {};  | 
| 
79
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $dummy                      = shift;  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{HoAssoc}               = ();  | 
| 
82
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{HoPopAssocCount}       = ();  | 
| 
83
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{HoPopAssocFreq}        = ();  | 
| 
84
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{PopGeneNo}             = 0;  | 
| 
85
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{HoDesc}                = ();  | 
| 
86
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{StudyGeneNo}           = 0;  | 
| 
87
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{StudyGeneNoAssoc}      = 0;  | 
| 
88
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{HoStudyGeneAssocCount} = ();  | 
| 
89
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{HoAssocStudyGene}      = ();  | 
| 
90
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{StudyGeneUniqAssoc}    = 0;  | 
| 
91
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{BonferroniCorr}        = 0;  | 
| 
92
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{HoStudyGeneAssocPVal}  = ();  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
94
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bless $self;  | 
| 
95
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return ($self);  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getAssocFile  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method loads the GO association file and stores it in memory.  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The file format is the following. Each line starts with a cluster id, and after some whitespace  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the associated GO ids are enumerated, separated by semicolons.  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 81001020 GO:0016020;GO:0003674;GO:0008150                                                                                                                      | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 81001110 GO:0005739;GO:0003674  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $genemerge->getAssocFile('/tmp/assoc.txt');  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getAssocFile {  | 
| 
112
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self                = shift;  | 
| 
113
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $filename            = shift;  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    open ASSOC, $filename or return(-1);  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    while(){  | 
| 
118
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       chomp;  | 
| 
119
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my @assoc_line = split;  | 
| 
120
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my $assoc_gene = $assoc_line[0];  | 
| 
121
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my @assoc_go   = ();  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
123
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ($assoc_line[1]) {  | 
| 
124
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          @assoc_go = split /;/, $assoc_line[1];  | 
| 
125
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          @{$self->{HoAssoc}{$assoc_gene}} = @assoc_go;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
129
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    close ASSOC;  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    return(0);  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getPopFile  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method loads the population file and stores it in memory.  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The file format is the following. Each line contains one and only one  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 cluster id.  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 81001020  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 81001110  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $genemerge->getPopFile('/tmp/pop.txt');  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getPopFile {  | 
| 
148
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self                = shift;  | 
| 
149
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $filename            = shift;  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    open POP, $filename or return(-1);  | 
| 
152
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    while () {  | 
| 
153
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       chomp;  | 
| 
154
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my $PopGene = $_;  | 
| 
155
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{PopGeneNo}++;  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
157
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (exists $self->{HoAssoc}{$PopGene}) {  | 
| 
158
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          foreach my $AssocGO (@{$self->{HoAssoc}{$PopGene}})  {  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
159
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $self->{HoPopAssocCount}{$AssocGO}++;  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          }  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
163
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    close POP;  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $self->popFreq();  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
167
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    return(0);  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 popFreq  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method calculates the population frequency. Do not use it directly.  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub popFreq {  | 
| 
177
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self                = shift;  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    foreach my $PopAssocCountKey (keys %{$self->{HoPopAssocCount}}) {  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my $freq = $self->{HoPopAssocCount}{$PopAssocCountKey} / $self->{PopGeneNo};  | 
| 
181
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{HoPopAssocFreq}{$PopAssocCountKey} = $freq;  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getDescFile  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method loads the GO description file.  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The file format is the following. Each line starts with the GO id, and separated by a tab,  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the description of the GO id.  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 GO:0000007      low-affinity zinc ion transporter activity                                                                                                     | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 GO:0000008      thioredoxin  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $genemerge->getDescFile('/tmp/desc.txt');  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getDescFile {  | 
| 
199
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self                = shift;  | 
| 
200
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $filename            = shift;  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    open DESC, $filename or return(-1);  | 
| 
203
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    while () {  | 
| 
204
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       chomp;  | 
| 
205
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my @desc_line = split /\s/, $_, 2;  | 
| 
206
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{HoDesc}{$desc_line[0]} = $desc_line[1];  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
208
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    close DESC;  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
210
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    return(0);  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getStudyFile  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method loads the study data set, counts GO frequencies, calculates P values  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 based on the hypergeometric distribution, and corrects P values, based on the  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bonferroni method.  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The file format of the study file is the following. Each line contains one and only one  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 cluster id.  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 81001020  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 81001110  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $genemerge->getStudyFile('/tmp/study.txt');  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getStudyFile {  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # TODO we should split this in 2 or 3.  | 
| 
231
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self                = shift;  | 
| 
232
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $filename            = shift;  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    open STUDY, $filename or return(-1);  | 
| 
235
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    while() {  | 
| 
236
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       chomp;  | 
| 
237
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{StudyGeneNo}++;  | 
| 
238
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my $StudyGene = $_;  | 
| 
239
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (exists $self->{HoAssoc}{$StudyGene}) {  | 
| 
240
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          foreach my $StudyGeneGO (@{$self->{HoAssoc}{$StudyGene}}) {  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $self->{HoStudyGeneAssocCount}{$StudyGeneGO}++;  | 
| 
242
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             push @{$self->{HoAssocStudyGene}{$StudyGeneGO}}, $StudyGene;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          }  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
245
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $self->{StudyGeneNoAssoc}++;  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
249
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    close STUDY;  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    #Bonferroni correction  | 
| 
252
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    foreach my $StudyGeneAssocCountKey (keys %{$self->{HoStudyGeneAssocCount}}){  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{StudyGeneUniqAssoc}++;  | 
| 
254
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if($self->{HoPopAssocFreq}{$StudyGeneAssocCountKey} > (1 / $self->{PopGeneNo})) {  | 
| 
255
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $self->{BonferroniCorr}++;  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    #Calculate P-values based on hypergeometric distribution  | 
| 
260
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $PVal = 0;  | 
| 
261
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $PValC = 0;  | 
| 
262
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $N = $self->{PopGeneNo};  | 
| 
263
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $K = $self->{StudyGeneNo};  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
265
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    foreach my $StudyGeneAssocCountKey (keys %{$self->{HoStudyGeneAssocCount}}){  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my $P = $self->{HoPopAssocFreq}{$StudyGeneAssocCountKey};  | 
| 
267
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my $R = $self->{HoStudyGeneAssocCount}{$StudyGeneAssocCountKey};  | 
| 
268
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ($R != 1) {  | 
| 
269
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $PVal  = $self->hypergeometric($N,$P,$K,$R);  | 
| 
270
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $PValC = ($PVal * $self->{BonferroniCorr} >= 1) ? 1 : $PVal * $self->{BonferroniCorr};  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
272
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $PVal  = 'NA';  | 
| 
273
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $PValC = 'NA';  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
275
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ${$self->{HoStudyGeneAssocPVal}{$StudyGeneAssocCountKey}}[0] = $PVal;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
276
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ${$self->{HoStudyGeneAssocPVal}{$StudyGeneAssocCountKey}}[1] = $PValC;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    return(0);  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getResults  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method gives back all the results as an arrayref of hashes.  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $results = $genemerge->getResults();  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   foreach $result (@{$results}) {  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $goterm       = $$result{'GOterm'};  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $popfreq      = $$result{'PopFreq'};  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $popfrac      = $$result{'PopFrac'};  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $studyfrac    = $$result{'StudyFrac'};  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $studyfracall = $$result{'StudyFracAll'};  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $raw_escore   = $$result{'RawEs'};  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $escore       = $$result{'EScore'};  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $desc         = $$result{'Desc'};  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     @contrib      = @{$$result{'Contrib'}};  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getResults {  | 
| 
302
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self                = shift;  | 
| 
303
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my @results;  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
305
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    foreach my $goterm (sort keys %{$self->{HoStudyGeneAssocCount}}) {  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
306
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my %result;  | 
| 
307
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'GOterm'}       = $goterm;  | 
| 
308
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'PopFreq'}      = $self->{HoPopAssocFreq}{$goterm};  | 
| 
309
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'PopFrac'}      = $self->{HoPopAssocCount}{$goterm};  | 
| 
310
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'PopFracAll'}   = $self->{PopGeneNo};  | 
| 
311
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'StudyFrac'}    = $self->{HoStudyGeneAssocCount}{$goterm};  | 
| 
312
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'StudyFracAll'} = $self->{StudyGeneNo};  | 
| 
313
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'RawEs'}        = ${$self->{HoStudyGeneAssocPVal}{$goterm}}[0];  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
314
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'EScore'}       = ${$self->{HoStudyGeneAssocPVal}{$goterm}}[1];  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
315
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'Desc'}         = $self->{HoDesc}{$goterm};  | 
| 
316
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $result{'Contrib'}      = \@{$self->{HoAssocStudyGene}{$goterm}};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
317
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       push @results, \%result;  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
320
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    return(\@results);  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 hypergeometric  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is an internal function to calculate the hypergeometric distribution. Do not use it directly.  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub hypergeometric {  | 
| 
330
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    my $self             = shift;  | 
| 
331
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $n                = shift;  | 
| 
332
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $p                = shift;  | 
| 
333
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $k                = shift;  | 
| 
334
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $r                = shift;  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
336
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $i    = '0';  | 
| 
337
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $q    = '0';  | 
| 
338
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $np   = '0';  | 
| 
339
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $nq   = '0';  | 
| 
340
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $top  = '0';  | 
| 
341
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $sum  = '0';  | 
| 
342
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $lfoo = '0';  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
344
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $logNchooseK = '0';  | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
346
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $q = 1 - $p;  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
348
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $np = floor( $n * $p + 0.5 );  | 
| 
349
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $nq = floor( $n * $q + 0.5 );  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
351
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $logNchooseK = &logNchooseK( $n, $k );  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
353
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $top = ($np < $k) ? $np : $k;  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
355
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $lfoo = &logNchooseK($np, $top) + &logNchooseK($n * (1 - $p), $k - $top);  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
357
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    for ($i = $top ; $i >= $r ; $i--) {  | 
| 
358
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $sum += exp($lfoo - $logNchooseK);  | 
| 
359
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ($i > $r) { $lfoo = $lfoo + log($i / ($np - $i + 1)) + log(($nq - $k + $i) / ($k - $i + 1)) }  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
361
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    return $sum;  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 logNchooseK  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Another internal function for the correct statistical results. Do not use it directly.  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub logNchooseK {  | 
| 
371
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
 	my $n = shift;  | 
| 
372
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $k = shift;  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
374
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $i = '0';  | 
| 
375
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $result = '0';  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
377
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$k = ($k > ($n - $k)) ? $n - $k : $k;  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
379
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	for ($i = $n ; $i > ($n - $k) ; $i--) { $result += log($i) }  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
381
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$result -= &lFactorial($k);  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
383
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $result;  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 lFactorial  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Factorial calculating function. Do not use it directly.  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub lFactorial {  | 
| 
393
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
 	my $number = shift;  | 
| 
394
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $result = 0;  | 
| 
395
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $i;  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
397
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	for ($i = 2 ; $i <= $number ; $i++) { $result += log($i) }  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
399
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $result;  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |