line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Algorithm::CurveFit::Simple; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ABSTRACT: Convenience wrapper around Algorithm::CurveFit. |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
our $VERSION = '1.03'; # VERSION 1.03 |
6
|
|
|
|
|
|
|
|
7
|
5
|
|
|
5
|
|
562395
|
use strict; |
|
5
|
|
|
|
|
21
|
|
|
5
|
|
|
|
|
111
|
|
8
|
5
|
|
|
5
|
|
20
|
use warnings; |
|
5
|
|
|
|
|
8
|
|
|
5
|
|
|
|
|
120
|
|
9
|
5
|
|
|
5
|
|
1876
|
use Algorithm::CurveFit; |
|
5
|
|
|
|
|
541876
|
|
|
5
|
|
|
|
|
205
|
|
10
|
5
|
|
|
5
|
|
38
|
use Time::HiRes; |
|
5
|
|
|
|
|
8
|
|
|
5
|
|
|
|
|
42
|
|
11
|
5
|
|
|
5
|
|
406
|
use JSON::PP; |
|
5
|
|
|
|
|
8
|
|
|
5
|
|
|
|
|
423
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our %STATS_H; # side-products of fit() stored here for profiling purposes |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
BEGIN { |
16
|
5
|
|
|
5
|
|
24
|
require Exporter; |
17
|
5
|
|
|
|
|
9
|
our $VERSION = '1.03'; |
18
|
5
|
|
|
|
|
90
|
our @ISA = qw(Exporter); |
19
|
5
|
|
|
|
|
8866
|
our @EXPORT_OK = qw(fit %STATS_H); |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# fit() - only public function for this distribution |
23
|
|
|
|
|
|
|
# Given at least parameter "xy", generate a best-fit curve within a time limit. |
24
|
|
|
|
|
|
|
# Output: max deviation, avg deviation, implementation source string (perl or C, for now). |
25
|
|
|
|
|
|
|
# Optional parameters and their defaults: |
26
|
|
|
|
|
|
|
# terms => 3 # number of terms in formula, max is 10 |
27
|
|
|
|
|
|
|
# time_limit => 3 # number of seconds to try for better fit |
28
|
|
|
|
|
|
|
# inv => 1 # invert sense of curve-fit, from x->y to y->x |
29
|
|
|
|
|
|
|
# impl_lang => 'perl' # programming language used for output implementation: perl, c |
30
|
|
|
|
|
|
|
# impl_name => 'x2y' # name given to output implementation function |
31
|
|
|
|
|
|
|
sub fit { |
32
|
1
|
|
|
1
|
1
|
79
|
my %p = @_; |
33
|
|
|
|
|
|
|
|
34
|
1
|
|
|
|
|
13
|
my $formula = _init_formula(%p); |
35
|
1
|
|
|
|
|
5
|
my ($xdata, $ydata) = _init_data(%p); |
36
|
1
|
|
|
|
|
4
|
my $parameters = _init_parameters($xdata, $ydata, %p); |
37
|
|
|
|
|
|
|
|
38
|
1
|
|
|
|
|
1
|
my $iter_mode = 'time'; |
39
|
1
|
|
|
|
|
2
|
my $time_limit = 3; # sane default? |
40
|
1
|
50
|
|
|
|
2
|
$time_limit = 0.01 if ($time_limit < 0.01); |
41
|
1
|
|
|
|
|
2
|
my $n_iter; |
42
|
1
|
50
|
|
|
|
3
|
if (defined($p{iterations})) { |
43
|
0
|
|
|
|
|
0
|
$iter_mode = 'iter'; |
44
|
0
|
|
0
|
|
|
0
|
$n_iter = $p{iterations} || 10000; |
45
|
|
|
|
|
|
|
} else { |
46
|
1
|
|
33
|
|
|
4
|
$time_limit = $p{time_limit} // $time_limit; |
47
|
1
|
|
|
|
|
2
|
$n_iter = 10000 * $time_limit; # will use this to figure out how long it -really- takes. |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
1
|
|
|
|
|
2
|
my ($n_sec, $params_ar_ar); |
51
|
1
|
50
|
|
|
|
3
|
if ($iter_mode eq 'time') { |
52
|
1
|
|
|
|
|
4
|
($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class}); |
53
|
1
|
|
|
|
|
4
|
$STATS_H{iter_mode} = $iter_mode; |
54
|
1
|
|
|
|
|
3
|
$STATS_H{fit_calib_iter} = $n_iter; |
55
|
1
|
|
|
|
|
2
|
$STATS_H{fit_calib_time} = $n_sec; |
56
|
1
|
|
|
|
|
2
|
$STATS_H{fit_calib_parar} = $params_ar_ar; |
57
|
1
|
|
|
|
|
3
|
$n_iter = int(($time_limit / $n_sec) * $n_iter + 1); |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
1
|
|
|
|
|
5
|
($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class}); |
61
|
1
|
|
|
|
|
4
|
$STATS_H{fit_iter} = $n_iter; |
62
|
1
|
|
|
|
|
3
|
$STATS_H{fit_time} = $n_sec; |
63
|
1
|
|
|
|
|
2
|
$STATS_H{fit_parar} = $params_ar_ar; |
64
|
|
|
|
|
|
|
|
65
|
1
|
|
|
|
|
5
|
my $coderef = _implement_formula($params_ar_ar, "coderef", "", $xdata, \%p); |
66
|
1
|
|
|
|
|
4
|
my ($max_dev, $avg_dev) = _calculate_deviation($coderef, $xdata, $ydata); |
67
|
1
|
|
50
|
|
|
4
|
my $impl_lang = $p{impl_lang} // 'perl'; |
68
|
1
|
|
|
|
|
3
|
$impl_lang = lc($impl_lang); |
69
|
1
|
50
|
|
|
|
3
|
my $impl_name = $p{inv} ? "y2x" : "x2y"; |
70
|
1
|
|
33
|
|
|
6
|
$impl_name = $p{impl_name} // $impl_name; |
71
|
1
|
|
|
|
|
2
|
my $impl = $coderef; |
72
|
1
|
50
|
|
|
|
5
|
$impl = _implement_formula($params_ar_ar, $impl_lang, $impl_name, $xdata, \%p) unless($impl_lang eq 'coderef'); |
73
|
1
|
|
|
|
|
17
|
return ($max_dev, $avg_dev, $impl); |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# ($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class}); |
77
|
|
|
|
|
|
|
sub _try_fit { |
78
|
2
|
|
|
2
|
|
9
|
my ($formula, $parameters, $xdata, $ydata, $n_iter, $fitter_class) = @_; |
79
|
2
|
|
50
|
|
|
18
|
$fitter_class //= "Algorithm::CurveFit"; |
80
|
2
|
|
|
|
|
4
|
my $params_ar_ar = [map {[@$_]} @$parameters]; # making a copy because curve_fit() is destructive |
|
8
|
|
|
|
|
15
|
|
81
|
2
|
|
|
|
|
6
|
my $tm0 = Time::HiRes::time(); |
82
|
2
|
|
|
|
|
16
|
my $res = $fitter_class->curve_fit( |
83
|
|
|
|
|
|
|
formula => $formula, |
84
|
|
|
|
|
|
|
params => $params_ar_ar, |
85
|
|
|
|
|
|
|
variable => 'x', |
86
|
|
|
|
|
|
|
xdata => $xdata, |
87
|
|
|
|
|
|
|
ydata => $ydata, |
88
|
|
|
|
|
|
|
maximum_iterations => $n_iter |
89
|
|
|
|
|
|
|
); |
90
|
2
|
|
|
|
|
203406
|
my $tm_elapsed = Time::HiRes::time() - $tm0; |
91
|
2
|
|
|
|
|
9
|
return ($tm_elapsed, $params_ar_ar); |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub _init_formula { |
95
|
11
|
|
|
11
|
|
10092
|
my %p = @_; |
96
|
11
|
|
|
|
|
17
|
my $formula = 'k + a*x + b*x^2 + c*x^3'; # sane'ish default |
97
|
11
|
|
100
|
|
|
41
|
my $terms = $p{terms} // 3; |
98
|
11
|
50
|
|
|
|
20
|
die "maximum of 10 terms\n" if ($terms > 10); |
99
|
11
|
100
|
|
|
|
20
|
if ($terms != 3) { |
100
|
8
|
|
|
|
|
10
|
$formula = 'k'; |
101
|
8
|
|
|
|
|
14
|
for (my $i = 1; $i <= $terms; $i++) { |
102
|
42
|
|
|
|
|
44
|
my $fact = chr(ord('a') + $i - 1); |
103
|
42
|
|
|
|
|
79
|
$formula .= " + $fact * x^$i"; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
} |
106
|
11
|
|
|
|
|
48
|
return $formula; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# ($xdata, $ydata) = _init_data(%p); |
110
|
|
|
|
|
|
|
sub _init_data { |
111
|
15
|
|
|
15
|
|
10628
|
my %p = @_; |
112
|
15
|
|
|
|
|
24
|
my ($xdata, $ydata); |
113
|
15
|
100
|
100
|
|
|
66
|
if (defined($p{xydata})) { |
|
|
100
|
|
|
|
|
|
114
|
7
|
|
|
|
|
9
|
my $xy = $p{xydata}; |
115
|
7
|
50
|
33
|
|
|
54
|
unless ( |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
116
|
|
|
|
|
|
|
ref($xy) eq 'ARRAY' |
117
|
|
|
|
|
|
|
&& @$xy >= 2 |
118
|
|
|
|
|
|
|
&& ref($xy->[0]) eq 'ARRAY' |
119
|
|
|
|
|
|
|
&& ref($xy->[1]) eq 'ARRAY' |
120
|
|
|
|
|
|
|
) { |
121
|
0
|
|
|
|
|
0
|
die "xydata must be either an arrayref of [x, y] data point arrayrefs or an arrayref [[x0, x1, ... xN], [y0, y1, ... yN]]\n"; |
122
|
|
|
|
|
|
|
} |
123
|
7
|
100
|
100
|
|
|
17
|
if (@$xy == 2 && @{$xy->[0]} > 2) { |
|
3
|
|
|
|
|
7
|
|
124
|
|
|
|
|
|
|
# user has provided [[x, ..], [y, ..]] |
125
|
2
|
|
|
|
|
2
|
$xdata = $xy->[0]; |
126
|
2
|
|
|
|
|
3
|
$ydata = $xy->[1]; |
127
|
|
|
|
|
|
|
} else { |
128
|
|
|
|
|
|
|
# user has provided [[x, y], [x, y], ..] |
129
|
5
|
100
|
|
|
|
6
|
die "pairwise xydata must have two data points per element\n" unless(@{$xy->[0]} == 2); |
|
5
|
|
|
|
|
16
|
|
130
|
4
|
|
|
|
|
6
|
$xdata = [map {$_->[0]} @{$xy}]; |
|
21
|
|
|
|
|
26
|
|
|
4
|
|
|
|
|
7
|
|
131
|
4
|
|
|
|
|
6
|
$ydata = [map {$_->[1]} @{$xy}]; |
|
21
|
|
|
|
|
21
|
|
|
4
|
|
|
|
|
5
|
|
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
elsif (defined($p{xdata}) && defined($p{ydata})) { |
135
|
5
|
|
|
|
|
8
|
$xdata = $p{xdata}; |
136
|
5
|
|
|
|
|
4
|
$ydata = $p{ydata}; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
else { |
139
|
3
|
|
|
|
|
14
|
die "must provide at least xydata or both xdata and ydata\n"; |
140
|
|
|
|
|
|
|
} |
141
|
11
|
50
|
33
|
|
|
40
|
die "xdata and ydata must both be arrayref\n" unless (ref($xdata) eq "ARRAY" && ref($ydata) eq "ARRAY"); |
142
|
11
|
100
|
|
|
|
31
|
die "xdata and ydata must have the same number of elements\n" unless (@$xdata == @$ydata); |
143
|
9
|
100
|
|
|
|
17
|
die "must have more than one data-point\n" unless (@$xdata > 1); |
144
|
8
|
100
|
|
|
|
15
|
if ($p{inv}) { |
145
|
3
|
|
|
|
|
5
|
$STATS_H{xdata} = $ydata; |
146
|
3
|
|
|
|
|
4
|
$STATS_H{ydata} = $xdata; |
147
|
3
|
|
|
|
|
18
|
return ($ydata, $xdata); |
148
|
|
|
|
|
|
|
} |
149
|
5
|
|
|
|
|
8
|
$STATS_H{xdata} = $xdata; |
150
|
5
|
|
|
|
|
7
|
$STATS_H{ydata} = $ydata; |
151
|
5
|
|
|
|
|
16
|
return ($xdata, $ydata); |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# $parameters = _init_parameters($xdata, $ydata, %p); |
155
|
|
|
|
|
|
|
sub _init_parameters { |
156
|
3
|
|
|
3
|
|
1207
|
my ($xdata, $ydata, %p) = @_; |
157
|
3
|
|
|
|
|
5
|
my $k = 0; |
158
|
3
|
|
|
|
|
4
|
my $n_points = @$xdata; |
159
|
3
|
|
|
|
|
6
|
foreach my $v (@$ydata) { $k += $v; } |
|
18
|
|
|
|
|
18
|
|
160
|
3
|
|
|
|
|
5
|
$k /= $n_points; |
161
|
|
|
|
|
|
|
# zzapp -- implement any precision hints here. |
162
|
3
|
|
|
|
|
9
|
my @params = (['k', $k, 0.0000001]); |
163
|
3
|
|
100
|
|
|
11
|
my $terms = $p{terms} // 3; |
164
|
3
|
|
|
|
|
8
|
push @params, map {[chr(ord('a')+$_-1), 0.5, 0.0000001]} (1..$terms); |
|
8
|
|
|
|
|
18
|
|
165
|
3
|
|
|
|
|
10
|
return \@params; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# $impl = _implement_formula($params_ar_ar, $impl_lang, $impl_name, $xdata, \%p) unless($impl_lang eq 'coderef'); |
169
|
|
|
|
|
|
|
sub _implement_formula { |
170
|
10
|
|
|
10
|
|
4609
|
my ($params_ar_ar, $impl_lang, $impl_name, $xdata, $opt_hr) = @_; |
171
|
10
|
100
|
|
|
|
28
|
return _implement_formula_as_coderef(@_) if ($impl_lang eq 'coderef'); |
172
|
|
|
|
|
|
|
# return _implement_formula_as_python(@_) if ($impl_lang eq 'python'); # zzapp |
173
|
8
|
100
|
|
|
|
18
|
return _implement_formula_as_C(@_) if ($impl_lang eq 'c'); |
174
|
|
|
|
|
|
|
# return _implement_formula_as_R(@_) if ($impl_lang eq 'r'); # zzapp |
175
|
|
|
|
|
|
|
# return _implement_formula_as_MATLAB(@_) if ($impl_lang eq 'matlab'); # zzapp |
176
|
5
|
|
|
|
|
14
|
return _implement_formula_as_perl(@_); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub _implement_formula_as_coderef { |
180
|
2
|
|
|
2
|
|
6
|
my ($params_ar_ar, $impl_lang, $impl_name, $xdata, $opt_hr) = @_; |
181
|
2
|
|
|
|
|
4
|
my $k_ar = $params_ar_ar->[0]; |
182
|
2
|
|
|
|
|
15
|
my $formula = sprintf("%f", $k_ar->[1]); |
183
|
2
|
|
|
|
|
6
|
for (my $i = 1; defined($params_ar_ar->[$i]); $i++) { |
184
|
6
|
|
|
|
|
9
|
my $fact = $params_ar_ar->[$i]->[1]; |
185
|
6
|
100
|
|
|
|
14
|
my $pow = ($i == 1) ? "" : "**$i"; |
186
|
6
|
|
|
|
|
26
|
$formula .= sprintf(' + %f * $x%s', $fact, $pow); |
187
|
|
|
|
|
|
|
} |
188
|
2
|
|
|
|
|
4
|
$STATS_H{impl_formula} = $formula; |
189
|
2
|
|
|
|
|
5
|
my $bounder = ''; |
190
|
2
|
50
|
|
|
|
8
|
if ($opt_hr->{bounds_check}) { |
191
|
0
|
|
|
|
|
0
|
my ($high_x, $low_x) = ($xdata->[0], $xdata->[0]); |
192
|
0
|
|
|
|
|
0
|
foreach my $x (@$xdata) { |
193
|
0
|
0
|
|
|
|
0
|
$high_x = $x if ($high_x < $x); |
194
|
0
|
0
|
|
|
|
0
|
$low_x = $x if ($low_x > $x); |
195
|
|
|
|
|
|
|
} |
196
|
0
|
|
|
|
|
0
|
$bounder = 'die "x out of bounds (high)" if ($x > '.$high_x.'); die "x out of bounds (low)" if ($x < '.$low_x.');'; |
197
|
|
|
|
|
|
|
} |
198
|
2
|
|
|
|
|
3
|
my $rounder = ''; |
199
|
2
|
50
|
|
|
|
6
|
$rounder = '$y = int($y + 0.5);' if ($opt_hr->{round_result}); |
200
|
2
|
|
|
|
|
8
|
my $src = 'sub { my($x) = @_; '.$bounder.' my $y = '.$formula.'; '.$rounder.' return $y; }'; |
201
|
2
|
|
|
|
|
4
|
$STATS_H{impl_source} = $src; |
202
|
2
|
|
|
|
|
3
|
$STATS_H{impl_exception} = ''; |
203
|
2
|
|
|
|
|
195
|
my $coderef = eval($src); |
204
|
2
|
50
|
|
|
|
8
|
$STATS_H{impl_exception} = $@ unless(defined($coderef)); |
205
|
2
|
|
|
|
|
7
|
return $coderef; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub _implement_formula_as_perl { |
209
|
5
|
|
|
5
|
|
8
|
my ($params_ar_ar, $impl_lang, $impl_name, $xdata, $opt_hr) = @_; |
210
|
5
|
|
|
|
|
9
|
my $k_ar = $params_ar_ar->[0]; |
211
|
5
|
|
|
|
|
31
|
my $formula = sprintf("%.11f", $k_ar->[1]); |
212
|
5
|
|
|
|
|
14
|
for (my $i = 1; defined($params_ar_ar->[$i]); $i++) { |
213
|
15
|
|
|
|
|
17
|
my $fact = $params_ar_ar->[$i]->[1]; |
214
|
15
|
100
|
|
|
|
31
|
my $pow = ($i == 1) ? "" : "**$i"; |
215
|
15
|
|
|
|
|
54
|
$formula .= sprintf(' + %.11f * $x%s', $fact, $pow); |
216
|
|
|
|
|
|
|
} |
217
|
5
|
|
|
|
|
18
|
$STATS_H{impl_formula} = $formula; |
218
|
5
|
|
|
|
|
6
|
my $bounder = ''; |
219
|
5
|
100
|
|
|
|
12
|
if ($opt_hr->{bounds_check}) { |
220
|
1
|
|
|
|
|
3
|
my ($high_x, $low_x) = ($xdata->[0], $xdata->[0]); |
221
|
1
|
|
|
|
|
2
|
foreach my $x (@$xdata) { |
222
|
7
|
100
|
|
|
|
9
|
$high_x = $x if ($high_x < $x); |
223
|
7
|
100
|
|
|
|
10
|
$low_x = $x if ($low_x > $x); |
224
|
|
|
|
|
|
|
} |
225
|
1
|
|
|
|
|
7
|
$bounder = sprintf(' die "x out of bounds (high)" if ($x > %.11f);'."\n", $high_x) . |
226
|
|
|
|
|
|
|
sprintf(' die "x out of bounds (low)" if ($x < %.11f);'."\n", $low_x); |
227
|
|
|
|
|
|
|
} |
228
|
5
|
|
|
|
|
8
|
my $rounder = ''; |
229
|
5
|
100
|
|
|
|
14
|
$rounder = ' $y = int($y + 0.5);'."\n" if ($opt_hr->{round_result}); |
230
|
5
|
|
|
|
|
21
|
my $src = join("\n",( |
231
|
|
|
|
|
|
|
"sub $impl_name {", |
232
|
|
|
|
|
|
|
' my($x) = @_;', |
233
|
|
|
|
|
|
|
$bounder, |
234
|
|
|
|
|
|
|
' my $y = '.$formula.';', |
235
|
|
|
|
|
|
|
$rounder, |
236
|
|
|
|
|
|
|
' return $y;', |
237
|
|
|
|
|
|
|
'}' |
238
|
|
|
|
|
|
|
)); |
239
|
5
|
|
|
|
|
8
|
$STATS_H{impl_source} = $src; |
240
|
5
|
|
|
|
|
7
|
$STATS_H{impl_exception} = ''; |
241
|
5
|
|
|
|
|
14
|
return $src; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub _implement_formula_as_C { |
245
|
3
|
|
|
3
|
|
18
|
my ($params_ar_ar, $impl_lang, $impl_name, $xdata, $opt_hr) = @_; |
246
|
3
|
|
|
|
|
5
|
my $k_ar = $params_ar_ar->[0]; |
247
|
3
|
|
|
|
|
3
|
my $src = ""; |
248
|
3
|
100
|
66
|
|
|
10
|
$src .= "#include \n" if ($opt_hr->{round_result} && !$opt_hr->{suppress_includes}); |
249
|
3
|
|
|
|
|
6
|
$src .= "double $impl_name(double x) {\n"; |
250
|
3
|
|
|
|
|
16
|
$src .= sprintf(" double y = %.11f;\n", $k_ar->[1]); |
251
|
3
|
|
|
|
|
5
|
$src .= " double xx = x;\n"; # eliminating pow() calls, which gcc doesn't seem willing to optimize completely away |
252
|
|
|
|
|
|
|
|
253
|
3
|
100
|
|
|
|
7
|
if ($opt_hr->{bounds_check}) { |
254
|
1
|
|
|
|
|
2
|
my ($high_x, $low_x) = ($xdata->[0], $xdata->[0]); |
255
|
1
|
|
|
|
|
3
|
foreach my $x (@$xdata) { |
256
|
7
|
100
|
|
|
|
11
|
$high_x = $x if ($high_x < $x); |
257
|
7
|
100
|
|
|
|
8
|
$low_x = $x if ($low_x > $x); |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
# zzapp -- this is kludgy. better way to signal bounds violation? |
260
|
1
|
|
|
|
|
8
|
$src .= sprintf(" if (x > %.11f) return -1.0;\n", $high_x) . |
261
|
|
|
|
|
|
|
sprintf(" if (x < %.11f) return -1.0;\n", $low_x); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
3
|
|
|
|
|
4
|
my $formula = ""; |
265
|
3
|
|
|
|
|
8
|
for (my $i = 1; defined($params_ar_ar->[$i]); $i++) { |
266
|
9
|
|
|
|
|
10
|
my $fact = $params_ar_ar->[$i]->[1]; |
267
|
9
|
|
|
|
|
21
|
$formula .= sprintf(" y += %.11f * xx;\n", $fact); |
268
|
9
|
100
|
|
|
|
23
|
$formula .= " xx *= x;\n" if(defined($params_ar_ar->[$i+1])); |
269
|
|
|
|
|
|
|
} |
270
|
3
|
|
|
|
|
4
|
$STATS_H{impl_formula} = $formula; # zzapp -- not clean! |
271
|
|
|
|
|
|
|
|
272
|
3
|
|
|
|
|
5
|
$src .= $formula; |
273
|
3
|
100
|
|
|
|
7
|
$src .= " y = round(y);\n" if ($opt_hr->{round_result}); |
274
|
3
|
|
|
|
|
4
|
$src .= " return y;\n}\n"; |
275
|
3
|
|
|
|
|
3
|
$STATS_H{impl_source} = $src; |
276
|
3
|
|
|
|
|
4
|
$STATS_H{impl_exception} = ''; |
277
|
3
|
|
|
|
|
8
|
return $src; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# ($max_dev, $avg_dev) = _calculate_deviation($coderef, $xdata, $ydata); |
281
|
|
|
|
|
|
|
sub _calculate_deviation { |
282
|
1
|
|
|
1
|
|
2
|
my ($coderef, $xdata, $ydata) = @_; |
283
|
1
|
|
|
|
|
2
|
my $max_off = 1.0; |
284
|
1
|
|
|
|
|
2
|
my $max_off_datum = 0.0; |
285
|
1
|
|
|
|
|
2
|
my $tot_off = 0.0; |
286
|
1
|
|
|
|
|
8
|
for (my $i = 0; defined($xdata->[$i]); $i++) { |
287
|
10
|
|
|
|
|
9
|
my $x = $xdata->[$i]; |
288
|
10
|
|
|
|
|
8
|
my $y = eval { $coderef->($x); }; |
|
10
|
|
|
|
|
142
|
|
289
|
10
|
50
|
|
|
|
39
|
unless(defined($y)) { |
290
|
0
|
|
|
|
|
0
|
$STATS_H{deviation_exception} = $@; |
291
|
0
|
|
|
|
|
0
|
$STATS_H{deviation_exception_datum} = $x; |
292
|
0
|
|
|
|
|
0
|
die "caught exception calculating deviations"; |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
10
|
|
|
|
|
14
|
my $observed_y = $ydata->[$i]; |
296
|
10
|
50
|
33
|
|
|
24
|
if ($observed_y && $y) { |
297
|
10
|
100
|
|
|
|
17
|
my $deviation = $y > $observed_y ? $y / $observed_y : $observed_y / $y; |
298
|
10
|
|
|
|
|
11
|
my $dev_mag = abs($deviation - 1.0); |
299
|
10
|
|
|
|
|
11
|
my $max_mag = abs($max_off - 1.0); |
300
|
|
|
|
|
|
|
# print "x=$x\ty=$y\toy=$observed_y\tdev_mag=$dev_mag\tmax_mag=$max_mag\n"; |
301
|
10
|
100
|
|
|
|
15
|
($max_off, $max_off_datum) = ($deviation, $x) if ($dev_mag > $max_mag); |
302
|
10
|
|
|
|
|
19
|
$tot_off += $deviation; |
303
|
|
|
|
|
|
|
} else { |
304
|
0
|
|
|
|
|
0
|
$tot_off += 1.0; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
} |
307
|
1
|
|
|
|
|
3
|
$STATS_H{deviation_max_offset_datum} = $max_off_datum; |
308
|
1
|
|
|
|
|
4
|
return ($max_off, $tot_off / @$xdata); |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
1; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head1 NAME |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Algorithm::CurveFit::Simple - Convenience wrapper around Algorithm::CurveFit |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head1 SYNOPSIS |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
use Algorithm::CurveFit::Simple qw(fit); |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
my ($max_dev, $avg_dev, $src) = fit(xdata => \@xdata, ydata => \@ydata, ..options..); |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# Alternatively pass xdata and ydata together: |
325
|
|
|
|
|
|
|
my ($max_dev, $avg_dev, $src) = fit(xydata => [\@xdata, \@ydata], ..options..); |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# Alternatively pass data as array of [x,y] pairs: |
328
|
|
|
|
|
|
|
my ($max_dev, $avg_dev, $src) = fit(xydata => [[1, 2], [2, 5], [3, 10]], ..options..); |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=head1 DESCRIPTION |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
This is a convenience wrapper around L. Given a body of (x, y) data points, it will generate a polynomial formula f(x) = y which fits that data. |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Its main differences from L are: |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
=over 4 |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=item * It synthesizes the initial formula for you, |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=item * It allows for a time limit on the curve-fit instead of an iteration count, |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=item * It implements the formula as source code (or as a perl coderef, if you want to use the formula immediately in your program). |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=back |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Additionally it returns a maximum deviation and average deviation of the formula vs the xydata, which is more useful (to me, at least) than L's square residual output. Closer to 1.0 indicates a better fit. Play with C #> until these deviations are as close to 1.0 as possible, and beware overfitting. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=head1 SUBROUTINES |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
There is only one public subroutine, C. It B be given either C or C and C parameters. All other paramters are optional. |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
It returns three values: A maximum deviation, the average deviation and the formula implementation. |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head2 Options |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=over 4 |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=item C \@xdata, ydata =E \@ydata)> |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
The data points the formula will fit. Same as L parameters of the same name. |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=item C [[1, 2, 3, 4], [10, 17, 26, 37]])> |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=item C [[1, 10], [2, 17], [3, 26], [4, 37]])> |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
A more convenient way to provide data points. C will try to detect how the data points are organized -- list of x and list of y, or list of [x,y]. |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=item C 3)> |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Sets the order of the polynomial, which will be of the form C. The default is 3 and the limit is 10. |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
There is no need to specify initial C. It will be calculated from C. |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=item C 3)> |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
If a time limit is given (in seconds), C will spend no more than that long trying to fit the data. It may return in much less time. The default is 3. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=item C 10000)> |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
If an iteration count is given, C will ignore any time limit and iterate up to C times trying to fit the curve. Same as L parameter of the same name. |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=item C 1)> |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
Setting C inverts the sense of the fit. Instead of C the formula will fit C. |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=item C "perl")> |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Sets the programming language in which the formula will be implemented. Currently supported languages are C<"C">, C<"coderef"> and the default, C<"perl">. |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
When C "coderef"> is specified, a code reference is returned instead which may be used immediately by your perl script: |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
my($max_dev, $avg_dev, $x2y) = fit(xydata => \@xy, impl_lang => "coderef"); |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
my $y = $x2y->(42); |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
More implementation languages will be supported in the future. |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=item C "x2y")> |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Sets the name of the function implementing the formula. The default is C<"x2y">. Has no effect when used with C "coderef")>. |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
my($max_dev, $avg_dev, $src) = fit(xydata => \@xy, impl_name => "converto"); |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
print "$src\n"; |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub converto { |
407
|
|
|
|
|
|
|
my($x) = @_; |
408
|
|
|
|
|
|
|
my $y = -5340.93059104837 + 249.23009968947 * $x + -3.87745746448 * $x**2 + 0.02114780993 * $x**3; |
409
|
|
|
|
|
|
|
return $y; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=item C 1)> |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
When set, the implementation will include logic for checking whether the input is out-of-bounds, per the highest and lowest x points in the data used to fit the formula. For implementation languages which support exceptions, an exception will be thrown. For others (like C), C<-1.0> will be returned to indicate the error. |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
For instance, if the highest x in C<$xydata> is 83.0 and the lowest x is 60.0: |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
my($max_dev, $avg_dev, $src) = fit(xydata => \@xy, bounds_check => 1); |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
print "$src\n"; |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
sub x2y { |
423
|
|
|
|
|
|
|
my($x) = @_; |
424
|
|
|
|
|
|
|
die "x out of bounds (high)" if ($x > 83.80000000000); |
425
|
|
|
|
|
|
|
die "x out of bounds (low)" if ($x < 60.80000000000); |
426
|
|
|
|
|
|
|
my $y = -5340.93059104837 + 249.23009968947 * $x + -3.87745746448 * $x**2 + 0.02114780993 * $x**3; |
427
|
|
|
|
|
|
|
return $y; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=item C 1)> |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
When set, the implementation will round the output to the nearest whole number. When the implementation language is C<"C"> this adds an C<#include Emath.hE> directive to the source code, which will have to be compiled against libm -- see C. |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
my($max_dev, $avg_dev, $src) = fit(xydata => \@xy, round_result => 1); |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
print "$src\n"; |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
sub x2y { |
439
|
|
|
|
|
|
|
my($x) = @_; |
440
|
|
|
|
|
|
|
my $y = -5340.93059104837 + 249.23009968947 * $x + -3.87745746448 * $x**2 + 0.02114780993 * $x**3; |
441
|
|
|
|
|
|
|
$y = int($y + 0.5); |
442
|
|
|
|
|
|
|
return $y; |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=item C 1)> |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
When set and C "C">, any C<#include> directives which the implementation might need will be suppressed. |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=back |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head1 VARIABLES |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
The class variable C<%STATS_H> contains various intermediate values which might be helpful. For instance, C<$STATS_H{deviation_max_offset_datum}> contains the x data point which corresponds to the maximum deviation returned. |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
The contents of C<%STATS_H> is subject to change and might not be fully documented in future versions. The current fields are: |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=over 4 |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=item C: The x data point corresponding with returned maximum deviation. |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item C: Arrayref of formula parameters as returned by L after a short fitting attempt used for timing calibration. |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=item C: The number of seconds L spent in the calibration run. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=item C: The iterations parameter passed to L. |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=item C: Arrayref of formula parameters as returned by L. |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=item C: The number of seconds L actually spent fitting the formula. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=item C: The exception thrown when the implementation was used to calculate the deviations, or the empty string if none. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=item C: The formula part of the implementation. |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=item C: The implementation source string. |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item C: One of C<"time"> or C<"iter">, indicating whether a time limit was used or an iteration count. |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=item C: Arrayref of x data points as passed to L. |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=item C: Arrayref of y data points as passed to L. |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=back |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head1 CAVEATS |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=over 4 |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=item * Only simple polynomial functions are supported. Sometimes you need something else. Use L for such cases. |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=item * If C is very large, iterating over it to calculate deviances can take more time than permitted by C. |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=item * The dangers of overfitting are real! L |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=item * Using too many terms can dramatically reduce the accuracy of the fitted formula. |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=item * Sometimes calling L with a ten-term polynomial causes it to hang. |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=back |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=head1 TO DO |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=over 4 |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=item * Support more programming languages for formula implementation: R, MATLAB, python |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
=item * Calculate the actual term sigfigs and set precision appropriately in the formula implementation instead of just "%.11f". |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=item * Support trying a range of terms and returning whatever gives the best fit. |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=item * Support piecewise output formulas. |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=item * Work around L's occasional hang problem when using ten-term polynomials. |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=back |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head1 SEE ALSO |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
L |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
L |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=cut |