line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::Candidates; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
##---------------------------------------------------------------------------## |
4
|
|
|
|
|
|
|
## Author: |
5
|
|
|
|
|
|
|
## Hugo WL ter Doest terdoest@cs.utwente.nl |
6
|
|
|
|
|
|
|
## Description: |
7
|
|
|
|
|
|
|
## Object/methods for candidate features |
8
|
|
|
|
|
|
|
## |
9
|
|
|
|
|
|
|
##---------------------------------------------------------------------------## |
10
|
|
|
|
|
|
|
## Copyright (C) 1998 Hugo WL ter Doest terdoest@cs.utwente.nl |
11
|
|
|
|
|
|
|
## |
12
|
|
|
|
|
|
|
## This library is free software; you can redistribute it and/or modify |
13
|
|
|
|
|
|
|
## it under the terms of the GNU General Public License as published by |
14
|
|
|
|
|
|
|
## the Free Software Foundation; either version 2 of the License, or |
15
|
|
|
|
|
|
|
## (at your option) any later version. |
16
|
|
|
|
|
|
|
## |
17
|
|
|
|
|
|
|
## This library is distributed in the hope that it will be useful, |
18
|
|
|
|
|
|
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of |
19
|
|
|
|
|
|
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
20
|
|
|
|
|
|
|
## GNU General Public License for more details. |
21
|
|
|
|
|
|
|
## |
22
|
|
|
|
|
|
|
## You should have received a copy of the GNU Library General Public |
23
|
|
|
|
|
|
|
## License along with this program; if not, write to the Free Software |
24
|
|
|
|
|
|
|
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
25
|
|
|
|
|
|
|
##---------------------------------------------------------------------------## |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
##---------------------------------------------------------------------------## |
29
|
|
|
|
|
|
|
## Globals |
30
|
|
|
|
|
|
|
##---------------------------------------------------------------------------## |
31
|
2
|
|
|
|
|
143
|
use vars qw($VERSION |
32
|
|
|
|
|
|
|
@ISA |
33
|
|
|
|
|
|
|
@EXPORT |
34
|
2
|
|
|
2
|
|
1617
|
$VECTOR_PACKAGE); |
|
2
|
|
|
|
|
5
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
##---------------------------------------------------------------------------## |
38
|
|
|
|
|
|
|
## Require libraries |
39
|
|
|
|
|
|
|
##---------------------------------------------------------------------------## |
40
|
2
|
|
|
2
|
|
14
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
82
|
|
41
|
2
|
|
|
2
|
|
9
|
use diagnostics -verbose; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
18
|
|
42
|
2
|
|
|
2
|
|
53
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
109
|
|
43
|
2
|
|
|
2
|
|
10
|
use Statistics::SparseVector; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
1739
|
|
44
|
|
|
|
|
|
|
$VECTOR_PACKAGE = "Statistics::SparseVector"; |
45
|
|
|
|
|
|
|
require Exporter; |
46
|
|
|
|
|
|
|
require AutoLoader; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
@ISA = qw(Exporter AutoLoader); |
49
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
50
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
51
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
52
|
|
|
|
|
|
|
@EXPORT = qw( |
53
|
|
|
|
|
|
|
); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub new { |
57
|
1
|
|
|
1
|
1
|
13
|
my($this, $arg) = @_; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# for calling $self->new($someth): |
60
|
1
|
|
33
|
|
|
9
|
my $class = ref($this) || $this; |
61
|
1
|
|
|
|
|
3
|
my $self = {}; |
62
|
1
|
|
|
|
|
4
|
bless $self, $class; |
63
|
1
|
50
|
|
|
|
4
|
if ($arg) { |
64
|
1
|
|
|
|
|
8
|
$self->read($arg); |
65
|
|
|
|
|
|
|
} |
66
|
1
|
|
|
|
|
5
|
return($self); |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
|
70
|
0
|
|
|
0
|
|
0
|
sub DESTROY { |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# reads a candidates file |
76
|
|
|
|
|
|
|
# dies if insufficient events or inconsistent lines |
77
|
|
|
|
|
|
|
# syntax first line: ... |
78
|
|
|
|
|
|
|
# syntax other lines: |
79
|
|
|
|
|
|
|
sub read { |
80
|
1
|
|
|
1
|
0
|
208
|
my($self, $file) = @_; |
81
|
|
|
|
|
|
|
|
82
|
1
|
|
|
|
|
1
|
my($features, |
83
|
|
|
|
|
|
|
$sum, |
84
|
|
|
|
|
|
|
$event, |
85
|
|
|
|
|
|
|
$candidate_names); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# prologue |
88
|
1
|
50
|
|
|
|
56
|
open(CANDS, $file) || |
89
|
|
|
|
|
|
|
die "Could not open $file\n"; |
90
|
1
|
|
|
|
|
6
|
print "Opened $file\n"; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# read candidate names, skip comments |
93
|
1
|
|
|
|
|
2
|
$candidate_names = ""; |
94
|
1
|
|
|
|
|
2
|
do { |
95
|
1
|
|
|
|
|
25
|
$candidate_names = ; |
96
|
|
|
|
|
|
|
} until ($candidate_names !~ /\#.*/); |
97
|
1
|
|
|
|
|
3
|
chomp $candidate_names; |
98
|
1
|
|
|
|
|
18
|
$self->{CANDIDATE_NAMES} = [split(/\t/,$candidate_names)]; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# read the candidate bitvectors |
101
|
1
|
|
|
|
|
3
|
$self->{NR_CANDIDATES} = 0; |
102
|
1
|
|
|
|
|
3
|
$self->{NR_CLASSES} = 0; |
103
|
1
|
|
|
|
|
4
|
while () { |
104
|
100
|
50
|
|
|
|
217
|
if (!/\#.*/) { |
105
|
100
|
|
|
|
|
113
|
chomp; |
106
|
100
|
|
|
|
|
115
|
$features = $_; |
107
|
100
|
100
|
|
|
|
166
|
if ($self->{NR_CLASSES} == 0) { |
108
|
1
|
|
|
|
|
3
|
$self->{NR_CANDIDATES} = length($features); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
else { |
111
|
99
|
50
|
|
|
|
205
|
if ($self->{NR_CANDIDATES} != length($features)) { |
112
|
0
|
|
|
|
|
0
|
croak "Candidate file corrupt ". |
113
|
|
|
|
|
|
|
"(line $self->{NR_CLASSES} has insufficient features)\n"; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
} |
116
|
100
|
|
|
|
|
301
|
$self->{CANDIDATES}[$self->{NR_CLASSES}++] = |
117
|
|
|
|
|
|
|
$VECTOR_PACKAGE->new_Bin($self->{NR_CANDIDATES}, $features); |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# epilogue |
122
|
1
|
|
|
|
|
12
|
close(CANDS); |
123
|
|
|
|
|
|
|
# check the candidates for constant functions |
124
|
1
|
|
|
|
|
6
|
$self->check(); |
125
|
1
|
|
|
|
|
7
|
print "Read $self->{NR_CANDIDATES} candidates for $self->{NR_CLASSES} events; "; |
126
|
1
|
|
|
|
|
5
|
print "closed $file\n"; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# check whether for all features f, \sum_x f(x) > 0, and |
131
|
|
|
|
|
|
|
# \sum_x f(x) != nr_classes |
132
|
|
|
|
|
|
|
sub check { |
133
|
1
|
|
|
1
|
1
|
3
|
my($self) = @_; |
134
|
|
|
|
|
|
|
|
135
|
1
|
|
|
|
|
1
|
my($x, |
136
|
|
|
|
|
|
|
$f, |
137
|
|
|
|
|
|
|
$sum); |
138
|
|
|
|
|
|
|
|
139
|
1
|
|
|
|
|
11
|
for ($f = 0; $f < $self->{NR_CANDIDATES}; $f++) { |
140
|
3
|
|
|
|
|
5
|
$sum = 0; |
141
|
3
|
|
|
|
|
9
|
for ($x = 0; $x < $self->{NR_CLASSES}; $x++) { |
142
|
300
|
|
|
|
|
776
|
$sum += $self->{CANDIDATES}[$x]->bit_test($f); |
143
|
|
|
|
|
|
|
} |
144
|
3
|
50
|
33
|
|
|
24
|
if (!$sum || ($sum == $self->{NR_CLASSES})) { |
145
|
0
|
|
|
|
|
0
|
croak "Candidate ",$f+1, " is constant, remove it\n"; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# writes remaining candidates to a file |
152
|
|
|
|
|
|
|
# syntax: same as input candidates file |
153
|
|
|
|
|
|
|
sub write { |
154
|
1
|
|
|
1
|
1
|
15629
|
my($self, $file) = @_; |
155
|
|
|
|
|
|
|
|
156
|
1
|
|
|
|
|
1082
|
my($x, |
157
|
|
|
|
|
|
|
$f); |
158
|
|
|
|
|
|
|
|
159
|
1
|
50
|
33
|
|
|
910
|
if (($self->{NR_CANDIDATES} > 0) && ($self->{NR_CLASSES})) { |
160
|
1
|
50
|
|
|
|
209
|
open(CANDIDATES,">$file") || |
161
|
|
|
|
|
|
|
die "Could not open $file\n"; |
162
|
1
|
|
|
|
|
12
|
print "Opened $file\n"; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# write the list of candidate names that were not added |
165
|
1
|
|
|
|
|
13
|
for ($f = 0; $f < $self->{NR_CANDIDATES}; $f++) { |
166
|
3
|
100
|
|
|
|
31
|
if (!$self->{ADDED}{$f}) { |
167
|
1
|
|
|
|
|
18
|
print CANDIDATES "$self->{CANDIDATE_NAMES}[$f]\t"; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
1
|
|
|
|
|
1862
|
print CANDIDATES "\n"; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# write candidates that were not added |
173
|
1
|
|
|
|
|
16
|
for ($x = 0; $x < $self->{NR_CLASSES}; $x++) { |
174
|
100
|
|
|
|
|
310
|
for ($f = 0; $f < $self->{NR_CANDIDATES}; $f++) { |
175
|
300
|
100
|
|
|
|
1623
|
if (!$self->{ADDED}{$f}) { |
176
|
100
|
|
|
|
|
534
|
print CANDIDATES $self->{CANDIDATES}[$x]->bit_test($f); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
} |
179
|
100
|
|
|
|
|
478
|
print CANDIDATES "\n"; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
1
|
|
|
|
|
89
|
close CANDIDATES; |
183
|
1
|
|
|
|
|
12
|
print "Closed $file\n"; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub clear { |
189
|
4
|
|
|
4
|
1
|
59
|
my($self) = @_; |
190
|
|
|
|
|
|
|
|
191
|
4
|
|
|
|
|
19
|
undef $self->{ADDED}; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
1; |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
__END__ |