line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::SDRS; |
2
|
|
|
|
|
|
|
|
3
|
9
|
|
|
9
|
|
138438
|
use 5.008; |
|
9
|
|
|
|
|
27
|
|
|
9
|
|
|
|
|
360
|
|
4
|
9
|
|
|
9
|
|
63
|
use strict; |
|
9
|
|
|
|
|
9
|
|
|
9
|
|
|
|
|
414
|
|
5
|
9
|
|
|
9
|
|
45
|
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $AUTOLOAD); |
|
9
|
|
|
|
|
36
|
|
|
9
|
|
|
|
|
738
|
|
6
|
9
|
|
|
9
|
|
27
|
use warnings; |
|
9
|
|
|
|
|
18
|
|
|
9
|
|
|
|
|
261
|
|
7
|
9
|
|
|
9
|
|
36
|
use Carp qw(cluck croak carp); |
|
9
|
|
|
|
|
0
|
|
|
9
|
|
|
|
|
657
|
|
8
|
9
|
|
|
9
|
|
4176
|
use POSIX; |
|
9
|
|
|
|
|
43002
|
|
|
9
|
|
|
|
|
54
|
|
9
|
9
|
|
|
9
|
|
31158
|
use Math::NumberCruncher; |
|
9
|
|
|
|
|
666387
|
|
|
9
|
|
|
|
|
594
|
|
10
|
9
|
|
|
9
|
|
6030
|
use Statistics::Distributions; |
|
9
|
|
|
|
|
25641
|
|
|
9
|
|
|
|
|
11214
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
require Exporter; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
17
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
18
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# This allows declaration |
21
|
|
|
|
|
|
|
# use Bio::SDRS ':all'; |
22
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
23
|
|
|
|
|
|
|
# will save memory. |
24
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw() ] ); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our @EXPORT = qw(); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
our $AUTOLOAD; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my %component = map { ($_ => 1) } ('MULTIPLE', 'LDOSE', 'HDOSE', 'STEP', |
33
|
|
|
|
|
|
|
'MAXPROC', 'TRIM', 'SIGNIFICANCE', |
34
|
|
|
|
|
|
|
'TMPDIR', 'DEBUG'); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
our $VERSION = '0.11'; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
1; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 NAME |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Bio::SDRS - Perl extension for Sigmoidal Dose Response Search, a tool |
43
|
|
|
|
|
|
|
for characterizing biological responses to compounds. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 SYNOPSIS |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
use Bio::SDRS; |
48
|
|
|
|
|
|
|
my $sdrs = new Bio::SDRS; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
$sdrs->doses(0.423377, 1.270132, 3.810395, 11.431184, 34.293553, |
51
|
|
|
|
|
|
|
102.880658, 308.641975, 925.925926, 2777.777778, 8333.333333, |
52
|
|
|
|
|
|
|
25000); |
53
|
|
|
|
|
|
|
$sdrs->set_assay('C8-BMS-208882', |
54
|
|
|
|
|
|
|
1.885, 1.82, 2.2, 2.205, 2.78, |
55
|
|
|
|
|
|
|
4.965, 9.21, 31.275, 74.445, 99.03, |
56
|
|
|
|
|
|
|
100); |
57
|
|
|
|
|
|
|
$sdrs->calculate; |
58
|
|
|
|
|
|
|
foreach my $assay ($sdrs->assays) { |
59
|
|
|
|
|
|
|
print "$assay\n"; |
60
|
|
|
|
|
|
|
foreach my $prop (('MAX', 'MIN', 'LOW', 'HIGH', 'EC50', |
61
|
|
|
|
|
|
|
'PVALUE', 'EC50RANGE', 'PEAK', 'A', 'B', |
62
|
|
|
|
|
|
|
'D', 'FOLD')) { |
63
|
|
|
|
|
|
|
printf " %s = %s\n", $prop, $sdrs->ec50data($assay, $prop); |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
print "\n"; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 Constructor |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
$obj = new Bio::SDRS; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# You can also use $obj = Bio::SDRS->new(); |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 Object Methods |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head3 Input Methods |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
$sdrs->multiple([$new_multiple]); |
79
|
|
|
|
|
|
|
$sdrs->ldose([$new_ldose]); |
80
|
|
|
|
|
|
|
$sdrs->hdose([$new_hdose]); |
81
|
|
|
|
|
|
|
$sdrs->step([$new_step]); |
82
|
|
|
|
|
|
|
$sdrs->maxproc([$new_maxproc]); |
83
|
|
|
|
|
|
|
$sdrs->trim([$new_trim]); |
84
|
|
|
|
|
|
|
$sdrs->significance([$new_significance]); |
85
|
|
|
|
|
|
|
$sdrs->tmpdir([$new_tmpdir]); |
86
|
|
|
|
|
|
|
$sdrs->debug([$new_debug]); |
87
|
|
|
|
|
|
|
$sdrs->doses(doses...); |
88
|
|
|
|
|
|
|
$sdrs->set_assay(assay, {response}...) |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head3 Output Methods |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
$sdrs->assays; |
93
|
|
|
|
|
|
|
$sdrs->scandata; |
94
|
|
|
|
|
|
|
$sdrs->score_doses; |
95
|
|
|
|
|
|
|
$sdrs->sorted_assays_by_dose([$dose]); |
96
|
|
|
|
|
|
|
$sdrs->pvalues_by_dose([$dose]) |
97
|
|
|
|
|
|
|
$sdrs->ec50data([$assay[, $property[, $precision]]]); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head3 Other Methods |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$sdrs->calculate; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 DESCRIPTION |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Bio::SDRS implements the Sigmoidal Dose Response Search of assay responses |
106
|
|
|
|
|
|
|
described in the paper by |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Rui-Ru Ji, Nathan O. Siemers, Lei Ming, Liang Schweizer, and Robert E |
109
|
|
|
|
|
|
|
Bruccoleri. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
The module is implemented using a simple object oriented paradigm |
112
|
|
|
|
|
|
|
where the object stores all the information needed for a calculation |
113
|
|
|
|
|
|
|
along with a state variable, C. The state variable has three |
114
|
|
|
|
|
|
|
possible values, C<'setup'>, C<'calculating'> and C<'calculated'>. The |
115
|
|
|
|
|
|
|
value of C<'setup'> indicates that the object is being setup with |
116
|
|
|
|
|
|
|
data, and any results in the object are inconsistent with the data. |
117
|
|
|
|
|
|
|
The value of C<'calculating'> indicates the object's computations are |
118
|
|
|
|
|
|
|
in progress and tells the code not to delete intermediate files. This |
119
|
|
|
|
|
|
|
object runs in parallel, and the object destruction code gets called |
120
|
|
|
|
|
|
|
when each thread exits. Intermediate files must be protected at that |
121
|
|
|
|
|
|
|
time. The value of C<'calculated'> indicates that the object's |
122
|
|
|
|
|
|
|
computational results are consistent with the data, and may be |
123
|
|
|
|
|
|
|
returned to a calling program. |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
The C<'calculate'> method is used to update all the calculated values |
126
|
|
|
|
|
|
|
from the input data. It checks the state variable first, and only does |
127
|
|
|
|
|
|
|
the calculation if the state is C<'setup'>. Once the calculations are |
128
|
|
|
|
|
|
|
complete, then the state variable is set to C<'calculated'>. Thus, the |
129
|
|
|
|
|
|
|
C method can be called whenever a calculated value is |
130
|
|
|
|
|
|
|
needed, and there is no performance penalty. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
The module initializes the C object with a state of |
133
|
|
|
|
|
|
|
C<'setup'>. Any data input sets the state to C<'setup'>. Any requests |
134
|
|
|
|
|
|
|
for calculated data, calls C<'calculate'>, which updates the state |
135
|
|
|
|
|
|
|
variable so futures requests for calculated data return quickly. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
B This module uses parallel programming via a fork call to get |
138
|
|
|
|
|
|
|
high performance. I
|
139
|
|
|
|
|
|
|
calling the C method, and reopen them afterwards. In |
140
|
|
|
|
|
|
|
addition, you must ensure that any automated DESTROY methods take in |
141
|
|
|
|
|
|
|
account their execution when the child processes terminated.> |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head1 METHODS |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
The following methods are provided: |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=over 4 |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=cut |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=item C |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Creates a new Bio::SDRS object. |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=cut |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub new { |
159
|
14
|
|
|
14
|
1
|
484
|
my $pkg; |
160
|
14
|
|
|
|
|
43
|
my $class = shift; |
161
|
14
|
|
|
|
|
39
|
eval {($pkg) = caller(0);}; |
|
14
|
|
|
|
|
162
|
|
162
|
14
|
50
|
|
|
|
62
|
if ($class ne $pkg) { |
163
|
0
|
|
|
|
|
0
|
unshift @_, $class; |
164
|
|
|
|
|
|
|
} |
165
|
14
|
|
|
|
|
33
|
my $self = {}; |
166
|
14
|
|
|
|
|
28
|
bless $self; |
167
|
14
|
|
|
|
|
52
|
$self->_init_data; |
168
|
14
|
|
|
|
|
37
|
$self->{MULTIPLE} = 1.18; |
169
|
14
|
|
|
|
|
28
|
$self->{LDOSE} = 0.17; |
170
|
14
|
|
|
|
|
28
|
$self->{HDOSE} = 30000; |
171
|
14
|
|
|
|
|
28
|
$self->{STEP} = 60; |
172
|
14
|
|
|
|
|
3433
|
$self->{MAXPROC} = 2; |
173
|
14
|
|
|
|
|
28
|
$self->{TRIM} = 0.0; |
174
|
14
|
|
|
|
|
28
|
$self->{SIGNIFICANCE} = 0.05; |
175
|
14
|
|
|
|
|
33
|
$self->{DEBUG} = 0; |
176
|
14
|
|
|
|
|
106
|
$self->_init_tmp; |
177
|
|
|
|
|
|
|
|
178
|
14
|
|
|
|
|
33
|
return $self; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub _init_data { |
182
|
28
|
|
|
28
|
|
65
|
my $self = shift; |
183
|
28
|
|
|
|
|
140
|
$self->{STATE} = "setup"; |
184
|
28
|
|
|
|
|
93
|
$self->{DOSES} = []; |
185
|
28
|
|
|
|
|
104
|
$self->{RESPONSES} = {}; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub _init_tmp { |
189
|
14
|
|
|
14
|
|
19
|
my $self = shift; |
190
|
14
|
50
|
|
|
|
61
|
my $tmp = exists($ENV{TMPDIR}) ? $ENV{TMPDIR} : "/tmp"; |
191
|
14
|
|
|
|
|
28
|
$tmp .= "/sdrs"; |
192
|
14
|
50
|
|
|
|
61
|
if (exists($ENV{USER})) { |
193
|
0
|
|
|
|
|
0
|
$tmp .= "." . $ENV{USER}; |
194
|
|
|
|
|
|
|
} |
195
|
14
|
|
|
|
|
57
|
$tmp .= "." . $$; |
196
|
14
|
|
|
|
|
33
|
$self->{TMPDIR} = $tmp; |
197
|
14
|
|
|
|
|
37
|
$self->{TMP_CREATED} = 0; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub DESTROY { |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# There's an important multiprocessing issue to keep in mind here. |
203
|
|
|
|
|
|
|
# When this process forks, this method will be invoked for all the subprocesses. |
204
|
|
|
|
|
|
|
# Thus, we need to prevent the deletion of the temporary array until the |
205
|
|
|
|
|
|
|
# calculation is completed. That is why there is a check on the STATE. |
206
|
|
|
|
|
|
|
|
207
|
14
|
|
|
14
|
|
69208518
|
my $self = shift; |
208
|
14
|
100
|
33
|
|
|
4432
|
if (not $self->{DEBUG} and |
|
|
|
66
|
|
|
|
|
209
|
|
|
|
|
|
|
$self->{TMP_CREATED} and |
210
|
|
|
|
|
|
|
$self->{STATE} eq 'calculated') { |
211
|
6
|
|
|
|
|
88
|
&_system_with_check("rm -rf " . $self->{TMPDIR}, |
212
|
|
|
|
|
|
|
$self->{DEBUG}); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# documentation for autoloaded methods goes here. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=item C |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Retrieves the current setting for the C value, |
221
|
|
|
|
|
|
|
and optionally sets it. This value specifies the multiplicity factor |
222
|
|
|
|
|
|
|
for increasing the dose in the search. It must be greater than one. |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item C |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Retrieves the current setting for the C value, |
227
|
|
|
|
|
|
|
and optionally sets it. This value specifies the lowest dose in the search. |
228
|
|
|
|
|
|
|
It must be greater than zero. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=item C |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Retrieves the current setting for the C value, |
233
|
|
|
|
|
|
|
and optionally sets it. This value specifies the highest |
234
|
|
|
|
|
|
|
dose in the search. It must be greater than the ldose value. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=item C |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Retrieves the current setting for the C value, and optionally |
239
|
|
|
|
|
|
|
sets it. This value specifies the maximum change in doses in the |
240
|
|
|
|
|
|
|
search. In the search process, this module starts at the ldose |
241
|
|
|
|
|
|
|
value. It tries multiplying the current dose by the C value, |
242
|
|
|
|
|
|
|
but it will only increase the dose by no more than the C value |
243
|
|
|
|
|
|
|
specified here. It must be positive. |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=item C |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
Retrieves the current setting for the C value, |
248
|
|
|
|
|
|
|
and optionally sets it. This value specifies the maximum number of processes that |
249
|
|
|
|
|
|
|
can be used for the search. The upper limit is 64 and the lower limit is 1. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=item C |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
Retrieves the current setting for the C value, and optionally |
254
|
|
|
|
|
|
|
sets it. This value specifies the trimming factor for the number of |
255
|
|
|
|
|
|
|
assays. If the C is 0, then all assays will be used, and if 1, |
256
|
|
|
|
|
|
|
no assays will be used. It must be between 0 and 1. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=item C |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
Retrieves the current setting for the C value, and |
261
|
|
|
|
|
|
|
optionally sets it. This value specifies the minimum permitted |
262
|
|
|
|
|
|
|
significance value for the F score cutoff. It must be between zero and |
263
|
|
|
|
|
|
|
1. |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=item C |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Retrieves the current setting for the C value, and optionally |
268
|
|
|
|
|
|
|
sets it. This value specifies the temporary directory where scans of |
269
|
|
|
|
|
|
|
the dose calculation are written. The default is |
270
|
|
|
|
|
|
|
/tmp/sdrs.C.C. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=item C |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Retrieves the current setting for the C variable, and |
275
|
|
|
|
|
|
|
optionally sets it. This value specifies whether debugging information |
276
|
|
|
|
|
|
|
is printed and if the temporary directory (see above) is deleted after |
277
|
|
|
|
|
|
|
execution of this module. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=cut |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub AUTOLOAD { |
282
|
112
|
|
|
112
|
|
9063
|
my $self = shift; |
283
|
112
|
|
|
|
|
190
|
my $type = ref($self); |
284
|
112
|
|
|
|
|
122
|
my $pkg = __PACKAGE__; |
285
|
112
|
|
|
|
|
84
|
my ($pos, $new_val); |
286
|
|
|
|
|
|
|
|
287
|
112
|
50
|
|
|
|
251
|
croak "$self is not an object\n" if not $type; |
288
|
112
|
50
|
|
|
|
868
|
croak "$pkg AUTOLOAD function fails on $type\n" |
289
|
|
|
|
|
|
|
if $type !~ m/^$pkg$/; |
290
|
112
|
|
|
|
|
239
|
my $name = uc($AUTOLOAD); |
291
|
112
|
|
|
|
|
1262
|
$name =~ s/^.*:://; |
292
|
112
|
50
|
|
|
|
617
|
if (not exists($component{$name})) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
293
|
0
|
|
|
|
|
0
|
croak "$name is not a valid method for $type.\n"; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
elsif (not exists($self->{$name})) { |
296
|
0
|
|
|
|
|
0
|
croak "$name is not a valid method for $type\n"; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
elsif (defined $_[0]) { |
299
|
112
|
|
|
|
|
151
|
$self->{$name} = shift; |
300
|
112
|
|
|
|
|
193
|
$self->{STATE} = "setup"; |
301
|
|
|
|
|
|
|
} |
302
|
112
|
50
|
66
|
|
|
366
|
if ($name eq "MULTIPLE" and |
303
|
|
|
|
|
|
|
$self->{MULTIPLE} <= 1.0) { |
304
|
0
|
|
|
|
|
0
|
croak "multiple parameter must be greater than 1.\n"; |
305
|
|
|
|
|
|
|
} |
306
|
112
|
50
|
66
|
|
|
346
|
if ($name eq "LDOSE" and |
307
|
|
|
|
|
|
|
$self->{LDOSE} <= 0.0) { |
308
|
0
|
|
|
|
|
0
|
croak "ldose parameter must be positive.\n"; |
309
|
|
|
|
|
|
|
} |
310
|
112
|
50
|
66
|
|
|
311
|
if ($name eq "HDOSE" and |
311
|
|
|
|
|
|
|
$self->{HDOSE} <= $self->{LDOSE}) { |
312
|
0
|
|
|
|
|
0
|
croak "hdose parameter must be greater than the ldose parameter.\n"; |
313
|
|
|
|
|
|
|
} |
314
|
112
|
50
|
66
|
|
|
370
|
if ($name eq "STEP" and |
315
|
|
|
|
|
|
|
$self->{STEP} <= 0.0) { |
316
|
0
|
|
|
|
|
0
|
croak "step parameter must be positive.\n"; |
317
|
|
|
|
|
|
|
} |
318
|
112
|
50
|
33
|
|
|
306
|
if ($name eq "MAXPROC" and |
|
|
|
66
|
|
|
|
|
319
|
|
|
|
|
|
|
($self->{MAXPROC} > 64 or $self->{MAXPROC} < 1)) { |
320
|
0
|
|
|
|
|
0
|
croak "maxproc parameter must be between 1 and 64.\n"; |
321
|
|
|
|
|
|
|
} |
322
|
112
|
50
|
33
|
|
|
483
|
if ($name eq "TRIM" and |
|
|
|
66
|
|
|
|
|
323
|
|
|
|
|
|
|
($self->{TRIM} < 0.0 or |
324
|
|
|
|
|
|
|
$self->{TRIM} > 1.0)) { |
325
|
0
|
|
|
|
|
0
|
croak "trim parameter must be between 0 and 1, inclusive\n"; |
326
|
|
|
|
|
|
|
} |
327
|
112
|
50
|
33
|
|
|
327
|
if ($name eq "SIGNIFICANCE" and |
|
|
|
66
|
|
|
|
|
328
|
|
|
|
|
|
|
($self->{SIGNIFICANCE} <= 0.0 or |
329
|
|
|
|
|
|
|
$self->{SIGNIFICANCE} >= 1.0)) { |
330
|
0
|
|
|
|
|
0
|
croak "trim parameter must be between 0 and 1, exclusive\n"; |
331
|
|
|
|
|
|
|
} |
332
|
112
|
|
|
|
|
684
|
return $self->{$name}; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=item C |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
Specify the list of compound doses used in the all the assays in the |
338
|
|
|
|
|
|
|
experiment. The doses must be in numerical order, from smallest to |
339
|
|
|
|
|
|
|
largest. |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=cut |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub doses { |
344
|
14
|
|
|
14
|
1
|
37
|
my $self = shift; |
345
|
14
|
50
|
|
|
|
75
|
if (scalar(@_) == 0) { |
346
|
0
|
|
|
|
|
0
|
return @{$self->{DOSES}}; |
|
0
|
|
|
|
|
0
|
|
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
else { |
349
|
14
|
|
|
|
|
79
|
$self->_init_data; |
350
|
14
|
|
|
|
|
465
|
my $dose1 = shift; |
351
|
14
|
|
|
|
|
32
|
push(@{$self->{DOSES}}, $dose1); |
|
14
|
|
|
|
|
128
|
|
352
|
14
|
|
|
|
|
70
|
foreach my $dose (@_) { |
353
|
125
|
50
|
|
|
|
462
|
if ($dose < $dose1) { |
354
|
0
|
|
|
|
|
0
|
croak "Doses not in order. Current dose is $dose, and previous does is $dose1\n"; |
355
|
|
|
|
|
|
|
} |
356
|
125
|
|
|
|
|
102
|
push(@{$self->{DOSES}}, $dose); |
|
125
|
|
|
|
|
274
|
|
357
|
125
|
|
|
|
|
158
|
$dose1 = $dose; |
358
|
|
|
|
|
|
|
} |
359
|
14
|
|
|
|
|
71
|
$self->{STATE} = "setup"; |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=item C |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
Adds an assay to the list to be searched over. Each response in the |
366
|
|
|
|
|
|
|
list corresponds to the doses specified in the C method. The |
367
|
|
|
|
|
|
|
number must match, and they must be in numerical order, from smallest |
368
|
|
|
|
|
|
|
to largest. |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=cut |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
sub set_assay { |
373
|
275
|
|
|
275
|
1
|
1589
|
my $self = shift; |
374
|
275
|
|
|
|
|
244
|
my $assay = shift; |
375
|
|
|
|
|
|
|
|
376
|
275
|
|
|
|
|
267
|
$self->{STATE} = "setup"; |
377
|
275
|
50
|
|
|
|
438
|
if (exists($self->{RESPONSES}->{$assay})) { |
378
|
0
|
|
|
|
|
0
|
croak "Duplicate assay ($assay) response specified.\n"; |
379
|
|
|
|
|
|
|
} |
380
|
275
|
|
|
|
|
979
|
$self->{RESPONSES}->{$assay} = [ @_ ]; |
381
|
275
|
50
|
|
|
|
199
|
if (scalar(@{$self->{DOSES}}) == 0) { |
|
275
|
|
|
|
|
429
|
|
382
|
0
|
|
|
|
|
0
|
croak "You must call the doses method before calling the set_assay method.\n"; |
383
|
|
|
|
|
|
|
} |
384
|
275
|
|
|
|
|
190
|
my $nresp = scalar(@{$self->{RESPONSES}->{$assay}}); |
|
275
|
|
|
|
|
307
|
|
385
|
275
|
|
|
|
|
185
|
my $ndoses = scalar(@{$self->{DOSES}}); |
|
275
|
|
|
|
|
235
|
|
386
|
275
|
50
|
|
|
|
905
|
if ($nresp != $ndoses) { |
387
|
0
|
|
|
|
|
0
|
croak "The number of assay responses ($nresp) does not equal the number of doses ($ndoses)\n"; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=item C |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Return the list of assay names; |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=cut |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
sub assays { |
398
|
6
|
|
|
6
|
1
|
813
|
my $self = shift; |
399
|
6
|
|
|
|
|
12
|
return keys %{$self->{RESPONSES}}; |
|
6
|
|
|
|
|
74
|
|
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=item C |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
Perform the SDRS calculation. |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=cut |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
sub calculate { |
409
|
14
|
|
|
14
|
1
|
312
|
my $self = shift; |
410
|
14
|
50
|
|
|
|
51
|
return if $self->{STATE} eq 'calculated'; |
411
|
14
|
|
|
|
|
66
|
$self->_make_tmp; |
412
|
14
|
|
|
|
|
89
|
$self->{STATE} = "calculating"; |
413
|
|
|
|
|
|
|
# |
414
|
|
|
|
|
|
|
# Calculate F Test degrees of freedom. |
415
|
|
|
|
|
|
|
# |
416
|
14
|
|
|
|
|
60
|
my $ndoses = scalar(@{$self->{DOSES}}); |
|
14
|
|
|
|
|
79
|
|
417
|
14
|
|
|
|
|
37
|
my $p = 4; #number of parameters |
418
|
14
|
|
|
|
|
93
|
$self->{FDISTR_N} = $p - 1; |
419
|
14
|
|
|
|
|
222
|
$self->{FDISTR_M} = $ndoses - $p; |
420
|
14
|
|
|
|
|
264
|
$self->{CUTOFF} = Statistics::Distributions::fdistr($self->{FDISTR_N}, |
421
|
|
|
|
|
|
|
$self->{FDISTR_M}, |
422
|
|
|
|
|
|
|
$self->{SIGNIFICANCE}); |
423
|
|
|
|
|
|
|
|
424
|
14
|
|
|
|
|
5813
|
my $count = scalar(keys %{$self->{RESPONSES}}); |
|
14
|
|
|
|
|
85
|
|
425
|
14
|
|
|
|
|
37
|
foreach my $assay (keys %{$self->{RESPONSES}}) { |
|
14
|
|
|
|
|
174
|
|
426
|
275
|
|
|
|
|
586
|
my @data = @{$self->{RESPONSES}->{$assay}}; |
|
275
|
|
|
|
|
2342
|
|
427
|
275
|
|
|
|
|
2156
|
my ($max, $min) = Math::NumberCruncher::Range(\@data); |
428
|
275
|
|
|
|
|
14121
|
$self->{MAX}->{$assay} = $max; |
429
|
275
|
|
|
|
|
835
|
$self->{MIN}->{$assay} = $min; |
430
|
275
|
|
|
|
|
1496
|
my $value = _compute_sst(\@data); #total sum of squares |
431
|
275
|
|
|
|
|
1416
|
$self->_define_ab_boundary($assay, \@data); |
432
|
275
|
|
|
|
|
1505
|
$self->{SST}->{$assay} = $value; |
433
|
275
|
|
|
|
|
1451
|
$self->{VALUES}->{$assay} = \@data; |
434
|
|
|
|
|
|
|
} |
435
|
14
|
|
|
|
|
110
|
my $assayNum = int($count * (1 - $self->{TRIM})); |
436
|
14
|
|
|
|
|
28
|
my %expressedAssays; |
437
|
14
|
|
|
|
|
23
|
$count = 0; |
438
|
14
|
|
|
|
|
37
|
foreach my $g (sort {$self->{MAX}->{$b} <=> $self->{MAX}->{$a}} (keys %{$self->{MAX}})) { |
|
1008
|
|
|
|
|
1071
|
|
|
14
|
|
|
|
|
264
|
|
439
|
275
|
50
|
|
|
|
348
|
last if ($count >= $assayNum); |
440
|
275
|
|
|
|
|
299
|
$expressedAssays{$g} = 1; |
441
|
275
|
|
|
|
|
226
|
$count++; |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
14
|
|
|
|
|
46
|
my (%pvalues, %sortedprobes, $pid, @pids, $file, $iproc, $err); |
445
|
14
|
|
|
|
|
37
|
my $tmpdir = $self->{TMPDIR}; |
446
|
14
|
|
|
|
|
65
|
for ($iproc = 0; $iproc < $self->{MAXPROC}; $iproc++) { |
447
|
44
|
100
|
|
|
|
47934
|
if ($pid = fork) { |
|
|
50
|
|
|
|
|
|
448
|
36
|
50
|
|
|
|
478
|
print STDERR "Process $pid forked.\n" if $self->{DEBUG}; |
449
|
36
|
|
|
|
|
1058
|
push @pids, $pid; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
elsif (defined $pid) { |
452
|
8
|
|
|
|
|
12001256
|
sleep $iproc; |
453
|
8
|
|
|
|
|
591
|
$file = "$tmpdir/sdrs.out.$iproc"; |
454
|
8
|
50
|
|
|
|
2655
|
open (ECOUT, ">$file") || die "can not open EC50 output file: $!\n"; |
455
|
|
|
|
|
|
|
# |
456
|
|
|
|
|
|
|
# The following statement causes the system to write each output line |
457
|
|
|
|
|
|
|
# to disk immediately, rather than waiting for a buffer to fill. |
458
|
|
|
|
|
|
|
# |
459
|
9
|
|
|
9
|
|
5868
|
select((select(ECOUT), $| = 1)[$[]); # Change buffering to line by line. |
|
9
|
|
|
|
|
3645
|
|
|
9
|
|
|
|
|
24975
|
|
|
8
|
|
|
|
|
945
|
|
460
|
8
|
|
|
|
|
143
|
my $last_dose = $self->{LDOSE}; |
461
|
8
|
|
|
|
|
84
|
my $icount = 0; |
462
|
8
|
|
|
|
|
401
|
for (my $dose = $self->{LDOSE}; |
463
|
|
|
|
|
|
|
$dose < $self->{HDOSE} * $self->{MULTIPLE}; |
464
|
|
|
|
|
|
|
$dose *= $self->{MULTIPLE}) { |
465
|
10984
|
100
|
|
|
|
21057
|
if ($dose - $last_dose > $self->{STEP}) { |
466
|
9840
|
|
|
|
|
10146
|
$dose = $last_dose + $self->{STEP}; |
467
|
|
|
|
|
|
|
} |
468
|
10984
|
100
|
|
|
|
23596
|
if (($icount++ % $self->{MAXPROC}) == $iproc) { |
469
|
2746
|
|
|
|
|
13282
|
print ECOUT $self->_scan_dose_point($dose); |
470
|
|
|
|
|
|
|
} |
471
|
10984
|
|
|
|
|
36413
|
$last_dose = $dose; |
472
|
|
|
|
|
|
|
} |
473
|
8
|
|
|
|
|
401
|
close ECOUT; |
474
|
8
|
|
|
|
|
867
|
exit 0; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
else { |
477
|
0
|
|
|
|
|
0
|
die "Can't fork: $!\n"; |
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
} |
480
|
6
|
|
|
|
|
166
|
$err = 0; |
481
|
6
|
|
|
|
|
72
|
foreach $pid (@pids) { |
482
|
24
|
|
|
|
|
286628847
|
waitpid($pid, 0); |
483
|
24
|
50
|
|
|
|
495
|
if ($? != 0) { |
484
|
0
|
|
|
|
|
0
|
print STDERR "Process $pid return status was $?\n"; |
485
|
0
|
|
|
|
|
0
|
$err = 1; |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# Now collect all the outputs together. |
490
|
6
|
|
|
|
|
34
|
%{$self->{GSCORE}} = (); |
|
6
|
|
|
|
|
69
|
|
491
|
6
|
|
|
|
|
17
|
%{$self->{GPARAM}} = (); |
|
6
|
|
|
|
|
34
|
|
492
|
6
|
|
|
|
|
6
|
%{$self->{FSCORES}} = (); |
|
6
|
|
|
|
|
25
|
|
493
|
6
|
|
|
|
|
36
|
my $multiple = $self->{MULTIPLE}; |
494
|
6
|
|
|
|
|
25
|
my $step = $self->{STEP}; |
495
|
6
|
|
|
|
|
12
|
@{$self->{SCANDATA}} = (); |
|
6
|
|
|
|
|
26
|
|
496
|
|
|
|
|
|
|
|
497
|
6
|
|
|
|
|
31
|
for ($iproc = 0; $iproc < $self->{MAXPROC}; $iproc++) { |
498
|
24
|
|
|
|
|
118
|
my $infile = "$tmpdir/sdrs.out.$iproc"; |
499
|
24
|
50
|
|
|
|
1493
|
open (IN, "<$infile") || die "can not open input file $infile: $!\n"; |
500
|
24
|
|
|
|
|
574
|
while () { |
501
|
207323
|
|
|
|
|
146337
|
push (@{$self->{SCANDATA}}, $_); |
|
207323
|
|
|
|
|
295781
|
|
502
|
207323
|
|
|
|
|
187921
|
chomp; |
503
|
207323
|
|
|
|
|
530684
|
my ($assay, $dose, $fscore, $a, $b, $d) = split (/\t/, $_); |
504
|
207323
|
|
|
|
|
420726
|
$self->{GSCORE}->{$assay}->{$dose} = $fscore; |
505
|
207323
|
|
|
|
|
433276
|
$self->{GPARAM}->{$assay}->{$dose} = "$a:$b:$d"; |
506
|
207323
|
50
|
|
|
|
326099
|
if (defined $expressedAssays{$assay}) { |
507
|
207323
|
|
|
|
|
593001
|
$self->{FSCORES}->{$dose}->{$assay} = $fscore; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
} |
510
|
24
|
|
|
|
|
817
|
close IN; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
#sort probesets based on F scores for every selected doses, output P values for FDR calculation |
514
|
6
|
|
|
|
|
12
|
%{$self->{SORTED_DATA}} = (); |
|
6
|
|
|
|
|
35
|
|
515
|
6
|
|
|
|
|
67
|
%{$self->{PVAL_DATA}} = (); |
|
6
|
|
|
|
|
13
|
|
516
|
6
|
|
|
|
|
11
|
foreach my $dose (keys %{$self->{FSCORES}}) { |
|
6
|
|
|
|
|
3358
|
|
517
|
8238
|
|
|
|
|
8268
|
my %fs = %{$self->{FSCORES}->{$dose}}; |
|
8238
|
|
|
|
|
221508
|
|
518
|
8238
|
50
|
|
|
|
67764
|
foreach my $assay (sort {$fs{$b} <=> $fs{$a} || $a cmp $b } (keys %fs)) { |
|
745540
|
|
|
|
|
1261965
|
|
519
|
207323
|
|
|
|
|
164415
|
push (@{$self->{SORTED_DATA}->{$dose}}, $assay); |
|
207323
|
|
|
|
|
406669
|
|
520
|
207323
|
|
|
|
|
188970
|
my $pvalue; |
521
|
207323
|
50
|
|
|
|
377916
|
if ($fs{$assay} < 0) { |
522
|
0
|
|
|
|
|
0
|
$pvalue = 1.0; |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
else { |
525
|
207323
|
|
|
|
|
499208
|
$pvalue = Statistics::Distributions::fprob($self->{FDISTR_N}, |
526
|
|
|
|
|
|
|
$self->{FDISTR_M}, |
527
|
|
|
|
|
|
|
$fs{$assay}); |
528
|
|
|
|
|
|
|
} |
529
|
207323
|
|
|
|
|
9396046
|
push (@{$self->{PVAL_DATA}->{$dose}}, $pvalue); |
|
207323
|
|
|
|
|
745905
|
|
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
#calculates EC50 for every probeset |
534
|
6
|
|
|
|
|
1859
|
%{$self->{EC50DATA}} = (); |
|
6
|
|
|
|
|
31
|
|
535
|
6
|
|
|
|
|
17
|
foreach my $assay (keys %{$self->{GPARAM}}) { |
|
6
|
|
|
|
|
75
|
|
536
|
151
|
|
|
|
|
367
|
my (%fscore, %param); |
537
|
151
|
|
|
|
|
201
|
%fscore = %{$self->{GSCORE}->{$assay}}; |
|
151
|
|
|
|
|
221885
|
|
538
|
151
|
|
|
|
|
14542
|
%param = %{$self->{GPARAM}->{$assay}}; |
|
151
|
|
|
|
|
150879
|
|
539
|
|
|
|
|
|
|
|
540
|
151
|
|
|
|
|
29908
|
my @sorted_list = sort {$a<=>$b} (values %fscore); |
|
1884222
|
|
|
|
|
1515218
|
|
541
|
151
|
|
|
|
|
534
|
my $max = pop @sorted_list; |
542
|
151
|
|
|
|
|
402
|
my $min = shift @sorted_list; |
543
|
151
|
|
|
|
|
71745
|
@sorted_list = sort {$fscore{$a}<=>$fscore{$b}} (keys %fscore); |
|
1884222
|
|
|
|
|
1811816
|
|
544
|
151
|
|
|
|
|
20461
|
my $ec50 = pop @sorted_list; #highest f score |
545
|
151
|
|
|
|
|
292
|
my ($range, $high, $low, $peak, $ec50range, $fold); |
546
|
151
|
100
|
|
|
|
1139
|
if ($max >= $self->{CUTOFF}) { |
547
|
111
|
|
|
|
|
950
|
$range = $self->_find_ec50_range(\%fscore, $ec50); |
548
|
111
|
|
|
|
|
819
|
($high, $low, $peak) = split (/\-/, $range); |
549
|
111
|
|
|
|
|
832
|
$ec50range = $high - $low; |
550
|
|
|
|
|
|
|
} else { |
551
|
40
|
|
|
|
|
195
|
$high = $low = $peak = $ec50range = ''; |
552
|
|
|
|
|
|
|
} |
553
|
151
|
|
|
|
|
196
|
my $pvalue; |
554
|
151
|
50
|
|
|
|
493
|
if ($max <= 0.0) { |
555
|
0
|
|
|
|
|
0
|
$pvalue = 1.0; |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
else { |
558
|
151
|
|
|
|
|
1454
|
$pvalue = Statistics::Distributions::fprob($self->{FDISTR_N}, |
559
|
|
|
|
|
|
|
$self->{FDISTR_M}, |
560
|
|
|
|
|
|
|
$max); |
561
|
|
|
|
|
|
|
} |
562
|
151
|
|
|
|
|
18415
|
my ($a, $b, $d) = split (/\:/, $param{$ec50}); |
563
|
151
|
100
|
|
|
|
688
|
if ($d >= 0) { |
564
|
11
|
100
|
|
|
|
52
|
if ($a != 0) { |
565
|
10
|
|
|
|
|
30
|
$fold = -($b/$a); |
566
|
|
|
|
|
|
|
} else { |
567
|
1
|
|
|
|
|
9
|
$fold = -99999; |
568
|
|
|
|
|
|
|
} |
569
|
|
|
|
|
|
|
} else { |
570
|
140
|
50
|
|
|
|
450
|
if ($a != 0) { |
571
|
140
|
|
|
|
|
295
|
$fold = $b/$a; |
572
|
|
|
|
|
|
|
} else { |
573
|
0
|
|
|
|
|
0
|
$fold = 99999; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
} |
576
|
151
|
|
|
|
|
123762
|
$self->{EC50DATA}->{$assay} = { MAX => $max, |
577
|
|
|
|
|
|
|
MIN => $min, |
578
|
|
|
|
|
|
|
LOW => $low, |
579
|
|
|
|
|
|
|
HIGH => $high, |
580
|
|
|
|
|
|
|
EC50 => $ec50, |
581
|
|
|
|
|
|
|
PVALUE => $pvalue, |
582
|
|
|
|
|
|
|
EC50RANGE => $ec50range, |
583
|
|
|
|
|
|
|
PEAK => $peak, |
584
|
|
|
|
|
|
|
A => $a, |
585
|
|
|
|
|
|
|
B => $b, |
586
|
|
|
|
|
|
|
D => $d, |
587
|
|
|
|
|
|
|
FOLD => $fold }; |
588
|
|
|
|
|
|
|
} |
589
|
6
|
|
|
|
|
132
|
$self->{STATE} = "calculated"; |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=item C |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
Return the complete list of scan data used in the SDRS calculation. This is just an array of lines containing the values of the EC50 calculations. |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=cut |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
sub scandata { |
599
|
6
|
|
|
6
|
1
|
3783
|
my $self = shift; |
600
|
6
|
|
|
|
|
17
|
return @{$self->{SCANDATA}}; |
|
6
|
|
|
|
|
99035
|
|
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=item C |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
Return the list of doses used in the SDRS calculation that can be |
606
|
|
|
|
|
|
|
used as arguments for assays and pvalues. |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=cut |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
sub score_doses { |
611
|
5
|
|
|
5
|
1
|
27755
|
my $self = shift; |
612
|
5
|
|
|
|
|
25
|
return keys %{$self->{FSCORES}}; |
|
5
|
|
|
|
|
2865
|
|
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=item C |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
Return a list of assays for each dose in $dose sorted by F-score. |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=cut |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
sub sorted_assays_by_dose { |
622
|
6865
|
|
|
6865
|
1
|
52065
|
my $self = shift; |
623
|
6865
|
|
|
|
|
6200
|
my $dose = shift; |
624
|
6865
|
50
|
|
|
|
12225
|
if ($self->{STATE} ne 'calculated') { |
625
|
0
|
|
|
|
|
0
|
$self->calculate; |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
6865
|
50
|
|
|
|
8680
|
if (not defined $dose) { |
629
|
0
|
|
|
|
|
0
|
return %{$self->{SORTED_DATA}}; |
|
0
|
|
|
|
|
0
|
|
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
else { |
632
|
6865
|
50
|
|
|
|
10125
|
if (not exists($self->{SORTED_DATA}->{$dose})) { |
633
|
0
|
|
|
|
|
0
|
carp "Sorted assays by dose = $dose does not exist.\n"; |
634
|
0
|
|
|
|
|
0
|
return undef; |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
else { |
637
|
6865
|
|
|
|
|
5185
|
return @{$self->{SORTED_DATA}->{$dose}}; |
|
6865
|
|
|
|
|
66040
|
|
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=item C |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
Return a list of P values for the assays returned by sorted_assays_by_dose |
646
|
|
|
|
|
|
|
for each dose in $dose sorted by F-score. |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=cut |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub pvalues_by_dose { |
651
|
6865
|
|
|
6865
|
1
|
22765
|
my $self = shift; |
652
|
6865
|
|
|
|
|
6415
|
my $dose = shift; |
653
|
6865
|
50
|
|
|
|
11810
|
if ($self->{STATE} ne 'calculated') { |
654
|
0
|
|
|
|
|
0
|
$self->calculate; |
655
|
|
|
|
|
|
|
} |
656
|
|
|
|
|
|
|
|
657
|
6865
|
50
|
|
|
|
8645
|
if (not defined $dose) { |
658
|
0
|
|
|
|
|
0
|
return %{$self->{PVAL_DATA}}; |
|
0
|
|
|
|
|
0
|
|
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
else { |
661
|
6865
|
50
|
|
|
|
10370
|
if (not exists($self->{PVAL_DATA}->{$dose})) { |
662
|
0
|
|
|
|
|
0
|
carp "P values by dose = $dose does not exist.\n"; |
663
|
0
|
|
|
|
|
0
|
return undef; |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
else { |
666
|
6865
|
|
|
|
|
4915
|
return @{$self->{PVAL_DATA}->{$dose}}; |
|
6865
|
|
|
|
|
66585
|
|
667
|
|
|
|
|
|
|
} |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=item C |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
Returns EC50 data for the calculation. If no arguments are specified, |
674
|
|
|
|
|
|
|
then the internal hash for the EC50 calculation are returned. If just |
675
|
|
|
|
|
|
|
an C<$assay> is specified, then the internal hash for all the EC50 |
676
|
|
|
|
|
|
|
values associated with that C<$assay> is returned. If an C<$assay> and |
677
|
|
|
|
|
|
|
C<$property> are specified, then the property for that assay is |
678
|
|
|
|
|
|
|
returned. If the C<$precision> operand is specified, then it controls |
679
|
|
|
|
|
|
|
how many digits of precision are returned for the value. |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
Here is the list of possible properties. |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
MAX Maximum F score |
684
|
|
|
|
|
|
|
MIN Minimum F score. If this property is negative, then an error |
685
|
|
|
|
|
|
|
was encountered in the calculation of F scores. This is likely due |
686
|
|
|
|
|
|
|
insufficient range in the responses. |
687
|
|
|
|
|
|
|
LOW Lower bound of 95% confidence interval for the estimated EC50. |
688
|
|
|
|
|
|
|
HIGH Upper bound of 95% confidence interval for the estimated EC50. |
689
|
|
|
|
|
|
|
EC50 Estimated EC50. |
690
|
|
|
|
|
|
|
PVALUE P value of the best fitting model |
691
|
|
|
|
|
|
|
EC50RANGE range of 95% confidence interval for the estimated EC50. |
692
|
|
|
|
|
|
|
PEAK Number of peaks in the F scores at search doses across experimental dose range. |
693
|
|
|
|
|
|
|
A Estimated value for A in the best model. |
694
|
|
|
|
|
|
|
B Estimated value for B in the best model. |
695
|
|
|
|
|
|
|
D Estimated value for D in the best model. |
696
|
|
|
|
|
|
|
FOLD Ratio of B/A or 99999.0. If A == 0. Positive if D < 0, negative otherwise. |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=cut |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
sub ec50data { |
701
|
1812
|
|
|
1812
|
1
|
4502
|
my $self = shift; |
702
|
1812
|
|
|
|
|
1259
|
my $assay = shift; |
703
|
1812
|
|
|
|
|
1214
|
my $property = shift; |
704
|
1812
|
|
|
|
|
1108
|
my $precision = shift; |
705
|
|
|
|
|
|
|
|
706
|
1812
|
50
|
|
|
|
2620
|
$precision = 6 if not defined $precision; |
707
|
1812
|
50
|
|
|
|
2604
|
if ($self->{STATE} ne 'calculated') { |
708
|
0
|
|
|
|
|
0
|
$self->calculate; |
709
|
|
|
|
|
|
|
} |
710
|
1812
|
50
|
|
|
|
1929
|
if (not defined $assay) { |
711
|
0
|
|
|
|
|
0
|
return %{$self->{EC50DATA}}; |
|
0
|
|
|
|
|
0
|
|
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
else { |
714
|
1812
|
50
|
|
|
|
2528
|
if (not exists($self->{EC50DATA}->{$assay})) { |
715
|
0
|
|
|
|
|
0
|
carp "EC50 data for $assay does not exist.\n"; |
716
|
0
|
|
|
|
|
0
|
return undef; |
717
|
|
|
|
|
|
|
} |
718
|
1812
|
|
|
|
|
1455
|
my $assaydata = $self->{EC50DATA}->{$assay}; |
719
|
1812
|
50
|
|
|
|
1744
|
if (not defined $property) { |
720
|
0
|
|
|
|
|
0
|
return %{$self->{EC50DATA}->{$assay}}; |
|
0
|
|
|
|
|
0
|
|
721
|
|
|
|
|
|
|
} |
722
|
|
|
|
|
|
|
else { |
723
|
1812
|
50
|
|
|
|
1977
|
if (not exists($assaydata->{$property})) { |
724
|
0
|
|
|
|
|
0
|
carp "EC50 data for property $property for assay $assay does not exist.\n"; |
725
|
0
|
|
|
|
|
0
|
return undef; |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
else { |
728
|
1812
|
|
|
|
|
1652
|
my $ret = $assaydata->{$property}; |
729
|
1812
|
100
|
100
|
|
|
5663
|
if ($ret ne "" and $ret != int($ret)) { |
730
|
1394
|
|
|
|
|
3794
|
$ret = sprintf("%.${precision}g", $ret) + 0.0; |
731
|
|
|
|
|
|
|
} |
732
|
1812
|
|
|
|
|
5371
|
return $ret; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
sub _make_tmp { |
739
|
14
|
|
|
14
|
|
14
|
my $self = shift; |
740
|
14
|
50
|
33
|
|
|
930
|
if (-d $self->{TMPDIR} and -w $self->{TMPDIR}) { |
741
|
0
|
|
|
|
|
0
|
return; |
742
|
|
|
|
|
|
|
} |
743
|
14
|
50
|
|
|
|
123
|
if (-d $self->{TMPDIR}) { |
744
|
0
|
|
|
|
|
0
|
croak "Unable to write " . $self->{TMPDIR} . "\n"; |
745
|
|
|
|
|
|
|
} |
746
|
14
|
50
|
|
|
|
131
|
if (-w $self->{TMPDIR}) { |
747
|
0
|
0
|
|
|
|
0
|
unlink($self->{TMPDIR}) || |
748
|
|
|
|
|
|
|
croak "Unable to delete " . $self->{TMPDIR} . "\n"; |
749
|
|
|
|
|
|
|
} |
750
|
14
|
|
|
|
|
113
|
&_system_with_check("mkdir -p " . $self->{TMPDIR}, |
751
|
|
|
|
|
|
|
$self->{DEBUG}); |
752
|
14
|
|
|
|
|
118
|
$self->{TMP_CREATED} = 1; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
sub _find_ec50_range { |
756
|
111
|
|
|
111
|
|
268
|
my $self = shift; |
757
|
111
|
|
|
|
|
232
|
my ($scores, $ec50) = @_; |
758
|
111
|
|
|
|
|
237
|
my @range = (); |
759
|
111
|
|
|
|
|
177
|
my $off_flag = 1; |
760
|
111
|
|
|
|
|
181
|
my $peak = 0; |
761
|
111
|
|
|
|
|
111
|
my $current; |
762
|
111
|
|
|
|
|
136
|
my $seen = 0; |
763
|
111
|
|
|
|
|
196
|
my $boundary; |
764
|
111
|
|
|
|
|
21081
|
foreach my $dose (sort {$a<=>$b} keys %$scores) { |
|
1402431
|
|
|
|
|
1060682
|
|
765
|
152403
|
|
|
|
|
216193
|
$current = $$scores{$dose}; |
766
|
152403
|
100
|
|
|
|
209265
|
$seen = 1 if ($dose == $ec50); |
767
|
152403
|
100
|
|
|
|
184691
|
if ($current >= $self->{CUTOFF}) { |
768
|
99063
|
|
|
|
|
93671
|
push @range, $dose; |
769
|
99063
|
100
|
|
|
|
161602
|
$off_flag = 0 if ($off_flag == 1); |
770
|
|
|
|
|
|
|
} else { |
771
|
53340
|
100
|
|
|
|
68950
|
if ($seen == 1) { |
772
|
55
|
|
|
|
|
350
|
$boundary = _find_boundary(\@range); |
773
|
55
|
|
|
|
|
105
|
$seen = 0; |
774
|
|
|
|
|
|
|
} |
775
|
53340
|
|
|
|
|
46980
|
@range = (); |
776
|
53340
|
100
|
|
|
|
81230
|
if ($off_flag == 0) { |
777
|
55
|
|
|
|
|
95
|
$off_flag = 1; |
778
|
55
|
|
|
|
|
130
|
$peak++; |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
} |
782
|
111
|
100
|
|
|
|
20760
|
$boundary = _find_boundary(\@range) if ($seen == 1); |
783
|
111
|
100
|
|
|
|
448
|
$peak++ if ($current >= $self->{CUTOFF}); |
784
|
|
|
|
|
|
|
|
785
|
111
|
|
|
|
|
5109
|
return "$boundary-$peak"; |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
sub _find_boundary { |
789
|
111
|
|
|
111
|
|
259
|
my $range = shift; |
790
|
111
|
|
|
|
|
156
|
my ($high, $low); |
791
|
|
|
|
|
|
|
|
792
|
111
|
50
|
|
|
|
323
|
if (@$range >= 2) { |
793
|
111
|
|
|
|
|
202
|
$low = shift @$range; |
794
|
111
|
|
|
|
|
273
|
$high = pop @$range; |
795
|
|
|
|
|
|
|
} else { |
796
|
0
|
|
|
|
|
0
|
$low = shift @$range; |
797
|
0
|
|
|
|
|
0
|
$high = $low; |
798
|
|
|
|
|
|
|
} |
799
|
111
|
|
|
|
|
508
|
return "$high-$low"; |
800
|
|
|
|
|
|
|
} |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
sub _scan_dose_point { |
803
|
2746
|
|
|
2746
|
|
4492
|
my $self = shift; |
804
|
2746
|
|
|
|
|
3594
|
my $dose = shift; |
805
|
2746
|
|
|
|
|
6604
|
my $ecstring = ''; |
806
|
2746
|
|
|
|
|
4603
|
my $m = $self->{FDISTR_M}; |
807
|
2746
|
|
|
|
|
4236
|
my $n = $self->{FDISTR_N}; |
808
|
2746
|
|
|
|
|
3424
|
foreach my $assay (keys %{$self->{VALUES}}) { |
|
2746
|
|
|
|
|
17709
|
|
809
|
42563
|
|
|
|
|
283631
|
my ($al, $ah, $astep) = split (/\:/, $self->{ARANGE}->{$assay}); |
810
|
42563
|
|
|
|
|
170063
|
my ($bl, $bh, $bstep) = split (/\:/, $self->{BRANGE}->{$assay}); |
811
|
42563
|
|
|
|
|
211820795
|
my ($a, $b, $d, $best_sse) = _compute_best_sse($al, $ah, $astep, |
812
|
|
|
|
|
|
|
$bl, $bh, $bstep, |
813
|
|
|
|
|
|
|
$dose, |
814
|
|
|
|
|
|
|
$self->{VALUES}->{$assay}, |
815
|
|
|
|
|
|
|
$self->{DOSES}); |
816
|
42563
|
50
|
33
|
|
|
459636
|
if ($best_sse == 0 or $n == 0) { |
817
|
0
|
|
|
|
|
0
|
carp sprintf("best_sse($best_sse) = 0 or n($n) = 0 for $assay. arange = %s brange = %s\n", |
818
|
|
|
|
|
|
|
$self->{ARANGE}->{$assay}, |
819
|
|
|
|
|
|
|
$self->{BRANGE}->{$assay}); |
820
|
0
|
|
|
|
|
0
|
$ecstring .= "$assay\t" . |
821
|
|
|
|
|
|
|
sprintf("%.5f", $dose) . |
822
|
|
|
|
|
|
|
"\t-1\t-1\t-1\t-1\n"; |
823
|
|
|
|
|
|
|
} |
824
|
|
|
|
|
|
|
else { |
825
|
42563
|
|
|
|
|
221174
|
my $f_score = ($self->{SST}->{$assay} - $best_sse) * $m / ($best_sse * $n); |
826
|
42563
|
|
|
|
|
499043
|
$f_score = sprintf("%.3f", $f_score); |
827
|
42563
|
|
|
|
|
137600
|
$a = sprintf("%.3f", $a); |
828
|
42563
|
|
|
|
|
128574
|
$b = sprintf("%.3f", $b); |
829
|
42563
|
|
|
|
|
116794
|
$d = sprintf("%.3f", $d); |
830
|
42563
|
|
|
|
|
364682
|
$ecstring .= "$assay\t" . |
831
|
|
|
|
|
|
|
sprintf("%.5f", $dose) . |
832
|
|
|
|
|
|
|
"\t$f_score\t$a\t$b\t$d\n"; |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
} |
835
|
2746
|
|
|
|
|
865087
|
return $ecstring; |
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
sub _define_ab_boundary { |
839
|
275
|
|
|
275
|
|
460
|
my $self = shift; |
840
|
275
|
|
|
|
|
509
|
my ($assay, $data) = @_; |
841
|
275
|
|
|
|
|
1832
|
my @sorted = sort {$a<=>$b} @$data; |
|
6950
|
|
|
|
|
6678
|
|
842
|
275
|
|
|
|
|
1160
|
my @sorted_copy = @sorted; |
843
|
275
|
|
|
|
|
1371
|
my @brange = splice (@sorted, $#sorted-5, 6); |
844
|
275
|
|
|
|
|
1374
|
$self->_find_step($assay, 'b', \@brange); |
845
|
275
|
|
|
|
|
1912
|
my @arange = splice (@sorted_copy, 0, 6); |
846
|
275
|
|
|
|
|
1442
|
$self->_find_step($assay, 'a', \@arange); |
847
|
|
|
|
|
|
|
} |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
sub _find_step { |
850
|
|
|
|
|
|
|
|
851
|
550
|
|
|
550
|
|
1009
|
my $self = shift; |
852
|
550
|
|
|
|
|
1086
|
my ($assay, $pam, $data) = @_; |
853
|
550
|
|
|
|
|
2740
|
my $stdev = Math::NumberCruncher::StandardDeviation($data); |
854
|
550
|
|
|
|
|
3161617
|
my $mean = Math::NumberCruncher::Mean($data); |
855
|
550
|
|
|
|
|
11127
|
my $cutoff = $mean / 10; |
856
|
550
|
100
|
100
|
|
|
2712
|
$stdev = $cutoff if ($stdev eq 'NaN' || $stdev < $cutoff); |
857
|
550
|
|
|
|
|
285996
|
my ($l, $h, $step); |
858
|
550
|
|
|
|
|
2185
|
$step = $stdev / 2.5; |
859
|
550
|
|
|
|
|
432010
|
$step = sprintf("%.3f", $step); |
860
|
550
|
100
|
|
|
|
61433
|
if ($step == 0.0) { |
861
|
5
|
|
|
|
|
1265
|
carp "Data range too small for $assay -- step size raised to 0.001.\n"; |
862
|
5
|
|
|
|
|
400
|
$step = "0.001"; |
863
|
|
|
|
|
|
|
} |
864
|
550
|
100
|
|
|
|
2510
|
if ($pam eq 'a') { |
|
|
50
|
|
|
|
|
|
865
|
275
|
|
|
|
|
1090
|
$h = $mean + 2.3*$stdev; |
866
|
275
|
|
|
|
|
272314
|
$l = $mean - 2*$stdev; |
867
|
275
|
100
|
|
|
|
224417
|
$l = $self->{MIN}->{$assay} if ($l <= 0); |
868
|
275
|
|
|
|
|
44166
|
$h = sprintf("%.3f", $h); |
869
|
275
|
|
|
|
|
24036
|
$l = sprintf("%.3f", $l); |
870
|
275
|
|
|
|
|
18527
|
$self->{ARANGE}->{$assay} = "$l:$h:$step"; |
871
|
|
|
|
|
|
|
} elsif ($pam eq 'b') { |
872
|
275
|
|
|
|
|
1167
|
$h = $mean + 2*$stdev; |
873
|
275
|
|
|
|
|
263235
|
$l = $mean - 2.3*$stdev; |
874
|
275
|
100
|
|
|
|
311141
|
$l = $self->{MIN}->{$assay} if ($l <= 0); |
875
|
275
|
|
|
|
|
46294
|
$h = sprintf("%.3f", $h); |
876
|
275
|
|
|
|
|
27362
|
$l = sprintf("%.3f", $l); |
877
|
275
|
|
|
|
|
14486
|
$self->{BRANGE}->{$assay} = "$l:$h:$step"; |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
sub _compute_sst { |
882
|
275
|
|
|
275
|
|
622
|
my $data = shift; |
883
|
275
|
|
|
|
|
383
|
my $sst = 0; |
884
|
275
|
|
|
|
|
983
|
my $mean = Math::NumberCruncher::Mean($data); |
885
|
|
|
|
|
|
|
|
886
|
275
|
|
|
|
|
6152
|
foreach my $data (@$data) { |
887
|
3010
|
|
|
|
|
4509
|
$sst += ($data - $mean)**2; |
888
|
|
|
|
|
|
|
} |
889
|
275
|
|
|
|
|
658
|
return $sst; |
890
|
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
sub _system_with_check { |
893
|
|
|
|
|
|
|
|
894
|
20
|
|
|
20
|
|
46
|
my $command = shift; |
895
|
20
|
|
|
|
|
46
|
my $echo = shift; |
896
|
|
|
|
|
|
|
|
897
|
20
|
50
|
33
|
|
|
151
|
if (defined $echo and $echo) { |
898
|
0
|
|
|
|
|
0
|
&_dated_mesg($command); |
899
|
|
|
|
|
|
|
} |
900
|
20
|
|
|
|
|
113506
|
my $status = system("$command"); |
901
|
20
|
50
|
|
|
|
474625
|
if ($status != 0) { |
902
|
0
|
|
|
|
|
|
croak "Shell command: $command returned $status error code.\n"; |
903
|
|
|
|
|
|
|
} |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
sub _dated_mesg { |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
# Print a dated message to STDERR |
909
|
0
|
|
|
0
|
|
|
my $mesg = shift; |
910
|
|
|
|
|
|
|
|
911
|
0
|
|
|
|
|
|
my $date = &_date; |
912
|
0
|
0
|
|
|
|
|
if (substr($mesg, -1, 1) ne "\n") { |
913
|
0
|
|
|
|
|
|
$mesg .= "\n"; |
914
|
|
|
|
|
|
|
} |
915
|
0
|
|
|
|
|
|
print STDERR "At $date: $mesg"; |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
sub _date { |
919
|
0
|
|
|
0
|
|
|
return strftime("%a %b %d %T %Z %Y", localtime); |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
=back |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
=head1 SEE ALSO |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
sdrs.pl |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
=head1 AUTHORS |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
Ruiru Ji |
931
|
|
|
|
|
|
|
Nathan O. Siemers |
932
|
|
|
|
|
|
|
Lei Ming |
933
|
|
|
|
|
|
|
Liang Schweizer |
934
|
|
|
|
|
|
|
Robert Bruccoleri |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
=cut |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
|
939
|
9
|
|
|
9
|
|
8136
|
use Inline C => <<'END_OF_C_CODE', NAME => 'Bio::SDRS::FastSSE'; |
|
9
|
|
|
|
|
161550
|
|
|
9
|
|
|
|
|
54
|
|
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
void extract_double_array(SV *arrayp, |
942
|
|
|
|
|
|
|
double a[], |
943
|
|
|
|
|
|
|
I32 asize, |
944
|
|
|
|
|
|
|
I32 *limitp); |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
void _compute_best_sse(double al, |
947
|
|
|
|
|
|
|
double ah, |
948
|
|
|
|
|
|
|
double astep, |
949
|
|
|
|
|
|
|
double bl, |
950
|
|
|
|
|
|
|
double bh, |
951
|
|
|
|
|
|
|
double bstep, |
952
|
|
|
|
|
|
|
double dose, |
953
|
|
|
|
|
|
|
SV* valuesp, |
954
|
|
|
|
|
|
|
SV* dosesp) |
955
|
|
|
|
|
|
|
{ |
956
|
|
|
|
|
|
|
double a, b, d, diff, sse, tmp, y_hat, d2; |
957
|
|
|
|
|
|
|
double a_best, b_best, d_best, sse_best; |
958
|
|
|
|
|
|
|
SV* param_ret; |
959
|
|
|
|
|
|
|
SV* best_sse_ret; |
960
|
|
|
|
|
|
|
I32 count = 0; |
961
|
|
|
|
|
|
|
I32 i; |
962
|
|
|
|
|
|
|
I32 vlimit, dlimit; |
963
|
|
|
|
|
|
|
#define DOSE_SIZE 100 |
964
|
|
|
|
|
|
|
double doses[DOSE_SIZE]; |
965
|
|
|
|
|
|
|
double values[DOSE_SIZE]; |
966
|
|
|
|
|
|
|
Inline_Stack_Vars; |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
extract_double_array(valuesp, values, DOSE_SIZE, &vlimit); |
969
|
|
|
|
|
|
|
extract_double_array(dosesp, doses, DOSE_SIZE, &dlimit); |
970
|
|
|
|
|
|
|
if (vlimit != dlimit) { |
971
|
|
|
|
|
|
|
fprintf(stderr, "Error in compute_best_sse: limit mismatch. vlimit = %d dlimit = %s\n", |
972
|
|
|
|
|
|
|
vlimit, dlimit); |
973
|
|
|
|
|
|
|
exit(1); |
974
|
|
|
|
|
|
|
} |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
for (a = al; a < ah; a += astep) { |
977
|
|
|
|
|
|
|
// fprintf(stderr, "a = %14.10g\n", a); |
978
|
|
|
|
|
|
|
for (b = bh; b > bl && a <= b; b -= bstep) { |
979
|
|
|
|
|
|
|
// fprintf(stderr, "b = %14.10g\n", b); |
980
|
|
|
|
|
|
|
diff = b - a; |
981
|
|
|
|
|
|
|
for (d = -6; d < 6.3; d += 0.3) { |
982
|
|
|
|
|
|
|
// fprintf(stderr, "d = %14.10g dlimit = %d\n", d, dlimit); |
983
|
|
|
|
|
|
|
sse = 0; |
984
|
|
|
|
|
|
|
for (i = 0; i <= dlimit; i++) { |
985
|
|
|
|
|
|
|
tmp = doses[i] / dose; |
986
|
|
|
|
|
|
|
tmp = pow(tmp, d); |
987
|
|
|
|
|
|
|
y_hat = a + diff / (1 + tmp); |
988
|
|
|
|
|
|
|
d2 = (y_hat - values[i]); |
989
|
|
|
|
|
|
|
sse += d2 * d2; |
990
|
|
|
|
|
|
|
} |
991
|
|
|
|
|
|
|
if (++count == 1 || sse < sse_best) { |
992
|
|
|
|
|
|
|
a_best = a; |
993
|
|
|
|
|
|
|
b_best = b; |
994
|
|
|
|
|
|
|
d_best = d; |
995
|
|
|
|
|
|
|
sse_best = sse; |
996
|
|
|
|
|
|
|
} |
997
|
|
|
|
|
|
|
} |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
} |
1000
|
|
|
|
|
|
|
Inline_Stack_Reset; |
1001
|
|
|
|
|
|
|
Inline_Stack_Push(sv_2mortal(newSVnv(a_best))); |
1002
|
|
|
|
|
|
|
Inline_Stack_Push(sv_2mortal(newSVnv(b_best))); |
1003
|
|
|
|
|
|
|
Inline_Stack_Push(sv_2mortal(newSVnv(d_best))); |
1004
|
|
|
|
|
|
|
Inline_Stack_Push(sv_2mortal(newSVnv(sse_best))); |
1005
|
|
|
|
|
|
|
Inline_Stack_Done; |
1006
|
|
|
|
|
|
|
} |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
void extract_double_array(SV *arrayp, |
1009
|
|
|
|
|
|
|
double a[], |
1010
|
|
|
|
|
|
|
I32 asize, |
1011
|
|
|
|
|
|
|
I32 *limitp) |
1012
|
|
|
|
|
|
|
/* |
1013
|
|
|
|
|
|
|
* Extract the elements of the Perl array, pointed to by arrayp, |
1014
|
|
|
|
|
|
|
* into the C array a. The size of a is asize, and the code will check |
1015
|
|
|
|
|
|
|
* for overflow. The limiting subscript for arrayp will be returned in |
1016
|
|
|
|
|
|
|
* *limitp. */ |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
{ |
1019
|
|
|
|
|
|
|
AV* array; |
1020
|
|
|
|
|
|
|
I32 limit, i; |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
array = SvRV(arrayp); |
1023
|
|
|
|
|
|
|
limit = av_len(array); |
1024
|
|
|
|
|
|
|
if (limit+1 > asize) { |
1025
|
|
|
|
|
|
|
fprintf(stderr, |
1026
|
|
|
|
|
|
|
"extract_double_array: Perl array is too big. limit = %d asize = %d\n", |
1027
|
|
|
|
|
|
|
limit, asize); |
1028
|
|
|
|
|
|
|
exit(1); |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
for (i = 0; i <= limit; i++) { |
1031
|
|
|
|
|
|
|
SV *element, **elementp; |
1032
|
|
|
|
|
|
|
elementp = av_fetch(array, i, 0); |
1033
|
|
|
|
|
|
|
element = *elementp; |
1034
|
|
|
|
|
|
|
a[i] = SvNV(element); |
1035
|
|
|
|
|
|
|
} |
1036
|
|
|
|
|
|
|
*limitp = limit; |
1037
|
|
|
|
|
|
|
} |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
END_OF_C_CODE |