File Coverage

levmar-2.5/lmbc_core.c
Criterion Covered Total %
statement 261 328 79.5
branch 312 520 60.0
condition n/a
subroutine n/a
pod n/a
total 573 848 67.5


line stmt bran cond sub pod time code
1             /////////////////////////////////////////////////////////////////////////////////
2             //
3             // Levenberg - Marquardt non-linear minimization algorithm
4             // Copyright (C) 2004-05 Manolis Lourakis (lourakis at ics forth gr)
5             // Institute of Computer Science, Foundation for Research & Technology - Hellas
6             // Heraklion, Crete, Greece.
7             //
8             // This program is free software; you can redistribute it and/or modify
9             // it under the terms of the GNU General Public License as published by
10             // the Free Software Foundation; either version 2 of the License, or
11             // (at your option) any later version.
12             //
13             // This program is distributed in the hope that it will be useful,
14             // but WITHOUT ANY WARRANTY; without even the implied warranty of
15             // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16             // GNU General Public License for more details.
17             //
18             /////////////////////////////////////////////////////////////////////////////////
19              
20             #ifndef LM_REAL // not included by lmbc.c
21             #error This file should not be compiled directly!
22             #endif
23              
24              
25             /* precision-specific definitions */
26             #define FUNC_STATE LM_ADD_PREFIX(func_state)
27             #define LNSRCH LM_ADD_PREFIX(lnsrch)
28             #define BOXPROJECT LM_ADD_PREFIX(boxProject)
29             #define LEVMAR_BOX_CHECK LM_ADD_PREFIX(levmar_box_check)
30             #define LEVMAR_BC_DER LM_ADD_PREFIX(levmar_bc_der)
31             #define LEVMAR_BC_DIF LM_ADD_PREFIX(levmar_bc_dif)
32             #define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx)
33             #define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx)
34             #define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)
35             #define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)
36             #define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)
37             #define LMBC_DIF_DATA LM_ADD_PREFIX(lmbc_dif_data)
38             #define LMBC_DIF_FUNC LM_ADD_PREFIX(lmbc_dif_func)
39             #define LMBC_DIF_JACF LM_ADD_PREFIX(lmbc_dif_jacf)
40              
41             #ifdef HAVE_LAPACK
42             #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU)
43             #define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol)
44             #define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR)
45             #define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS)
46             #define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD)
47             #define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK)
48             #else
49             #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack)
50             #endif /* HAVE_LAPACK */
51              
52             /* find the median of 3 numbers */
53             #define __MEDIAN3(a, b, c) ( ((a) >= (b))?\
54             ( ((c) >= (a))? (a) : ( ((c) <= (b))? (b) : (c) ) ) : \
55             ( ((c) >= (b))? (b) : ( ((c) <= (a))? (a) : (c) ) ) )
56              
57             #define _POW_ LM_CNST(2.1)
58              
59             #define __LSITMAX 150 // max #iterations for line search
60              
61             struct FUNC_STATE{
62             int n, *nfev;
63             LM_REAL *hx, *x;
64             void *adata;
65             };
66              
67             static void
68 2757           LNSRCH(int m, LM_REAL *x, LM_REAL f, LM_REAL *g, LM_REAL *p, LM_REAL alpha, LM_REAL *xpls,
69             LM_REAL *ffpls, void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), struct FUNC_STATE state,
70             int *mxtake, int *iretcd, LM_REAL stepmx, LM_REAL steptl, LM_REAL *sx)
71             {
72             /* Find a next newton iterate by backtracking line search.
73             * Specifically, finds a \lambda such that for a fixed alpha<0.5 (usually 1e-4),
74             * f(x + \lambda*p) <= f(x) + alpha * \lambda * g^T*p
75             *
76             * Translated (with minor changes) from Schnabel, Koontz & Weiss uncmin.f, v1.3
77              
78             * PARAMETERS :
79              
80             * m --> dimension of problem (i.e. number of variables)
81             * x(m) --> old iterate: x[k-1]
82             * f --> function value at old iterate, f(x)
83             * g(m) --> gradient at old iterate, g(x), or approximate
84             * p(m) --> non-zero newton step
85             * alpha --> fixed constant < 0.5 for line search (see above)
86             * xpls(m) <-- new iterate x[k]
87             * ffpls <-- function value at new iterate, f(xpls)
88             * func --> name of subroutine to evaluate function
89             * state <--> information other than x and m that func requires.
90             * state is not modified in xlnsrch (but can be modified by func).
91             * iretcd <-- return code
92             * mxtake <-- boolean flag indicating step of maximum length used
93             * stepmx --> maximum allowable step size
94             * steptl --> relative step size at which successive iterates
95             * considered close enough to terminate algorithm
96             * sx(m) --> diagonal scaling matrix for x, can be NULL
97              
98             * internal variables
99              
100             * sln newton length
101             * rln relative length of newton step
102             */
103              
104             register int i, j;
105 2757           int firstback = 1;
106             LM_REAL disc;
107             LM_REAL a3, b;
108             LM_REAL t1, t2, t3, lambda, tlmbda, rmnlmb;
109             LM_REAL scl, rln, sln, slp;
110             LM_REAL tmp1, tmp2;
111 2757           LM_REAL fpls, pfpls = 0., plmbda = 0.; /* -Wall */
112              
113 2757           f*=LM_CNST(0.5);
114 2757           *mxtake = 0;
115 2757           *iretcd = 2;
116 2757           tmp1 = 0.;
117 2757 50         if(!sx) /* no scaling */
    50          
118 13785 100         for (i = 0; i < m; ++i)
    100          
119 11028           tmp1 += p[i] * p[i];
120             else
121 0 0         for (i = 0; i < m; ++i)
    0          
122 0           tmp1 += sx[i] * sx[i] * p[i] * p[i];
123 2757           sln = (LM_REAL)sqrt(tmp1);
124 2757 50         if (sln > stepmx) {
    50          
125             /* newton step longer than maximum allowed */
126 0           scl = stepmx / sln;
127 0 0         for(i=0; i
    0          
128 0           p[i]*=scl;
129 0           sln = stepmx;
130             }
131 13785 100         for(i=0, slp=0.; i
    100          
132 11028           slp+=g[i]*p[i];
133 2757           rln = 0.;
134 2757 50         if(!sx) /* no scaling */
    50          
135 13785 100         for (i = 0; i < m; ++i) {
    100          
136 11028 50         tmp1 = (FABS(x[i])>=LM_CNST(1.))? FABS(x[i]) : LM_CNST(1.);
    50          
    0          
    50          
    50          
    0          
137 11028 100         tmp2 = FABS(p[i])/tmp1;
    100          
138 11028 100         if(rln < tmp2) rln = tmp2;
    100          
139             }
140             else
141 0 0         for (i = 0; i < m; ++i) {
    0          
142 0 0         tmp1 = (FABS(x[i])>=LM_CNST(1.)/sx[i])? FABS(x[i]) : LM_CNST(1.)/sx[i];
    0          
    0          
    0          
    0          
    0          
143 0 0         tmp2 = FABS(p[i])/tmp1;
    0          
144 0 0         if(rln < tmp2) rln = tmp2;
    0          
145             }
146 2757           rmnlmb = steptl / rln;
147 2757           lambda = LM_CNST(1.0);
148              
149             /* check if new iterate satisfactory. generate new lambda if necessary. */
150              
151 13336 50         for(j=__LSITMAX; j>=0; --j) {
    50          
152 66680 100         for (i = 0; i < m; ++i)
    100          
153 53344           xpls[i] = x[i] + lambda * p[i];
154              
155             /* evaluate function at new point */
156 13336           (*func)(xpls, state.hx, m, state.n, state.adata); ++(*(state.nfev));
157             /* ### state.hx=state.x-state.hx, tmp1=||state.hx|| */
158             #if 1
159 13336           tmp1=LEVMAR_L2NRMXMY(state.hx, state.x, state.hx, state.n);
160             #else
161             for(i=0, tmp1=0.0; i
162             state.hx[i]=tmp2=state.x[i]-state.hx[i];
163             tmp1+=tmp2*tmp2;
164             }
165             #endif
166 13336           fpls=LM_CNST(0.5)*tmp1; *ffpls=tmp1;
167              
168 13336           if (fpls <= f + slp * alpha * lambda) { /* solution found */
169 2139           *iretcd = 0;
170 2139 50         if (lambda == LM_CNST(1.) && sln > stepmx * LM_CNST(.99)) *mxtake = 1;
    0          
    50          
    0          
171 2139           return;
172             }
173              
174             /* else : solution not (yet) found */
175              
176             /* First find a point with a finite value */
177              
178 11197 100         if (lambda < rmnlmb) {
    100          
179             /* no satisfactory xpls found sufficiently distinct from x */
180              
181 618           *iretcd = 1;
182 618           return;
183             }
184             else { /* calculate new lambda */
185              
186             /* modifications to cover non-finite values */
187 10579 50         if (!LM_FINITE(fpls)) {
    50          
188 0           lambda *= LM_CNST(0.1);
189 0           firstback = 1;
190             }
191             else {
192 10579 100         if (firstback) { /* first backtrack: quadratic fit */
    50          
193 2732           tlmbda = -lambda * slp / ((fpls - f - slp) * LM_CNST(2.));
194 2732           firstback = 0;
195             }
196             else { /* all subsequent backtracks: cubic fit */
197 7847           t1 = fpls - f - lambda * slp;
198 7847           t2 = pfpls - f - plmbda * slp;
199 7847           t3 = LM_CNST(1.) / (lambda - plmbda);
200 15694           a3 = LM_CNST(3.) * t3 * (t1 / (lambda * lambda)
201 7847           - t2 / (plmbda * plmbda));
202 15694           b = t3 * (t2 * lambda / (plmbda * plmbda)
203 7847           - t1 * plmbda / (lambda * lambda));
204 7847           disc = b * b - a3 * slp;
205 7847 100         if (disc > b * b)
    0          
206             /* only one positive critical point, must be minimum */
207 8 50         tlmbda = (-b + ((a3 < 0)? -(LM_REAL)sqrt(disc): (LM_REAL)sqrt(disc))) /a3;
    0          
208             else
209             /* both critical points positive, first is minimum */
210 7839 50         tlmbda = (-b + ((a3 < 0)? (LM_REAL)sqrt(disc): -(LM_REAL)sqrt(disc))) /a3;
    0          
211              
212 7847 50         if (tlmbda > lambda * LM_CNST(.5))
    0          
213 0           tlmbda = lambda * LM_CNST(.5);
214             }
215 10579           plmbda = lambda;
216 10579           pfpls = fpls;
217 10579 100         if (tlmbda < lambda * LM_CNST(.1))
    100          
218 9011           lambda *= LM_CNST(.1);
219             else
220 1568           lambda = tlmbda;
221             }
222             }
223             }
224             /* this point is reached when the iterations limit is exceeded */
225 0           *iretcd = 1; /* failed */
226 0           return;
227             } /* LNSRCH */
228              
229             /* Projections to feasible set \Omega: P_{\Omega}(y) := arg min { ||x - y|| : x \in \Omega}, y \in R^m */
230              
231             /* project vector p to a box shaped feasible set. p is a mx1 vector.
232             * Either lb, ub can be NULL. If not NULL, they are mx1 vectors
233             */
234 177472           static void BOXPROJECT(LM_REAL *p, LM_REAL *lb, LM_REAL *ub, int m)
235             {
236             register int i;
237              
238 177472 50         if(!lb){ /* no lower bounds */
    50          
239 0 0         if(!ub) /* no upper bounds */
    0          
240 0           return;
241             else{ /* upper bounds only */
242 0 0         for(i=0; i
    0          
243 0 0         if(p[i]>ub[i]) p[i]=ub[i];
    0          
244             }
245             }
246             else
247 177472 50         if(!ub){ /* lower bounds only */
    50          
248 0 0         for(i=0; i
    0          
249 0 0         if(p[i]
    0          
250             }
251             else /* box bounds */
252 887350 100         for(i=0; i
    100          
253 709878 50         p[i]=__MEDIAN3(lb[i], p[i], ub[i]);
    0          
    0          
    100          
    50          
    50          
    0          
    0          
    100          
    50          
254             }
255              
256             /*
257             * This function seeks the parameter vector p that best describes the measurements
258             * vector x under box constraints.
259             * More precisely, given a vector function func : R^m --> R^n with n>=m,
260             * it finds p s.t. func(p) ~= x, i.e. the squared second order (i.e. L2) norm of
261             * e=x-func(p) is minimized under the constraints lb[i]<=p[i]<=ub[i].
262             * If no lower bound constraint applies for p[i], use -DBL_MAX/-FLT_MAX for lb[i];
263             * If no upper bound constraint applies for p[i], use DBL_MAX/FLT_MAX for ub[i].
264             *
265             * This function requires an analytic Jacobian. In case the latter is unavailable,
266             * use LEVMAR_BC_DIF() bellow
267             *
268             * Returns the number of iterations (>=0) if successful, LM_ERROR if failed
269             *
270             * For details, see C. Kanzow, N. Yamashita and M. Fukushima: "Levenberg-Marquardt
271             * methods for constrained nonlinear equations with strong local convergence properties",
272             * Journal of Computational and Applied Mathematics 172, 2004, pp. 375-397.
273             * Also, see K. Madsen, H.B. Nielsen and O. Tingleff's lecture notes on
274             * unconstrained Levenberg-Marquardt at http://www.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf
275             */
276              
277 16           int LEVMAR_BC_DER(
278             void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */
279             void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), /* function to evaluate the Jacobian \part x / \part p */
280             LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */
281             LM_REAL *x, /* I: measurement vector. NULL implies a zero vector */
282             int m, /* I: parameter vector dimension (i.e. #unknowns) */
283             int n, /* I: measurement vector dimension */
284             LM_REAL *lb, /* I: vector of lower bounds. If NULL, no lower bounds apply */
285             LM_REAL *ub, /* I: vector of upper bounds. If NULL, no upper bounds apply */
286             int itmax, /* I: maximum number of iterations */
287             LM_REAL opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu,
288             * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used.
289             * Note that ||J^T e||_inf is computed on free (not equal to lb[i] or ub[i]) variables only.
290             */
291             LM_REAL info[LM_INFO_SZ],
292             /* O: information regarding the minimization. Set to NULL if don't care
293             * info[0]= ||e||_2 at initial p.
294             * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
295             * info[5]= # iterations,
296             * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
297             * 2 - stopped by small Dp
298             * 3 - stopped by itmax
299             * 4 - singular matrix. Restart from current p with increased mu
300             * 5 - no further error reduction is possible. Restart with increased mu
301             * 6 - stopped by small ||e||_2
302             * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
303             * info[7]= # function evaluations
304             * info[8]= # Jacobian evaluations
305             * info[9]= # linear systems solved, i.e. # attempts for reducing error
306             */
307             LM_REAL *work, /* working memory at least LM_BC_DER_WORKSZ() reals large, allocated if NULL */
308             LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
309             void *adata) /* pointer to possibly additional data, passed uninterpreted to func & jacf.
310             * Set to NULL if not needed
311             */
312             {
313             register int i, j, k, l;
314 16           int worksz, freework=0, issolved;
315             /* temp work arrays */
316             LM_REAL *e, /* nx1 */
317             *hx, /* \hat{x}_i, nx1 */
318             *jacTe, /* J^T e_i mx1 */
319             *jac, /* nxm */
320             *jacTjac, /* mxm */
321             *Dp, /* mx1 */
322             *diag_jacTjac, /* diagonal of J^T J, mx1 */
323             *pDp; /* p + Dp, mx1 */
324              
325             register LM_REAL mu, /* damping constant */
326             tmp; /* mainly used in matrix & vector multiplications */
327             LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */
328 16           LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL;
329             LM_REAL tau, eps1, eps2, eps2_sq, eps3;
330             LM_REAL init_p_eL2;
331 16           int nu=2, nu2, stop=0, nfev, njev=0, nlss=0;
332 16           const int nm=n*m;
333              
334             /* variables for constrained LM */
335             struct FUNC_STATE fstate;
336 16           LM_REAL alpha=LM_CNST(1e-4), beta=LM_CNST(0.9), gamma=LM_CNST(0.99995), gamma_sq=gamma*gamma, rho=LM_CNST(1e-8);
337             LM_REAL t, t0;
338 16           LM_REAL steptl=LM_CNST(1e3)*(LM_REAL)sqrt(LM_REAL_EPSILON), jacTeDp;
339 16           LM_REAL tmin=LM_CNST(1e-12), tming=LM_CNST(1e-18); /* minimum step length for LS and PG steps */
340 16           const LM_REAL tini=LM_CNST(1.0); /* initial step length for LS and PG steps */
341 16           int nLMsteps=0, nLSsteps=0, nPGsteps=0, gprevtaken=0;
342             int numactive;
343 16           int (*linsolver)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)=NULL;
344              
345 16           mu=jacTe_inf=t=0.0; tmin=tmin; /* -Wall */
346              
347 16 50         if(n
    50          
348 0           fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m);
349 0           return LM_ERROR;
350             }
351              
352 16 50         if(!jacf){
    50          
353 0           fprintf(stderr, RCAT("No function specified for computing the Jacobian in ", LEVMAR_BC_DER)
354             RCAT("().\nIf no such function is available, use ", LEVMAR_BC_DIF) RCAT("() rather than ", LEVMAR_BC_DER) "()\n");
355 0           return LM_ERROR;
356             }
357              
358 16 50         if(!LEVMAR_BOX_CHECK(lb, ub, m)){
    50          
359 0           fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): at least one lower bound exceeds the upper one\n"));
360 0           return LM_ERROR;
361             }
362              
363 16 50         if(opts){
    50          
364 16           tau=opts[0];
365 16           eps1=opts[1];
366 16           eps2=opts[2];
367 16           eps2_sq=opts[2]*opts[2];
368 16           eps3=opts[3];
369             }
370             else{ // use default values
371 0           tau=LM_CNST(LM_INIT_MU);
372 0           eps1=LM_CNST(LM_STOP_THRESH);
373 0           eps2=LM_CNST(LM_STOP_THRESH);
374 0           eps2_sq=LM_CNST(LM_STOP_THRESH)*LM_CNST(LM_STOP_THRESH);
375 0           eps3=LM_CNST(LM_STOP_THRESH);
376             }
377              
378 16 50         if(!work){
    50          
379 0           worksz=LM_BC_DER_WORKSZ(m, n); //2*n+4*m + n*m + m*m;
380 0           work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */
381 0 0         if(!work){
    0          
382 0           fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): memory allocation request failed\n"));
383 0           return LM_ERROR;
384             }
385 0           freework=1;
386             }
387              
388             /* set up work arrays */
389 16           e=work;
390 16           hx=e + n;
391 16           jacTe=hx + n;
392 16           jac=jacTe + m;
393 16           jacTjac=jac + nm;
394 16           Dp=jacTjac + m*m;
395 16           diag_jacTjac=Dp + m;
396 16           pDp=diag_jacTjac + m;
397              
398 16           fstate.n=n;
399 16           fstate.hx=hx;
400 16           fstate.x=x;
401 16           fstate.adata=adata;
402 16           fstate.nfev=&nfev;
403            
404             /* see if starting point is within the feasile set */
405 78 100         for(i=0; i
    100          
406 62           pDp[i]=p[i];
407 16           BOXPROJECT(p, lb, ub, m); /* project to feasible set */
408             // for(i=0; i
409             // if(pDp[i]!=p[i])
410             // fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_BC_DER) "()! [%g projected to %g]\n",
411             // i, pDp[i], p[i]);
412              
413             /* compute e=x - f(p) and its L2 norm */
414 16           (*func)(p, hx, m, n, adata); nfev=1;
415             /* ### e=x-hx, p_eL2=||e|| */
416             #if 1
417 16           p_eL2=LEVMAR_L2NRMXMY(e, x, hx, n);
418             #else
419             for(i=0, p_eL2=0.0; i
420             e[i]=tmp=x[i]-hx[i];
421             p_eL2+=tmp*tmp;
422             }
423             #endif
424 16           init_p_eL2=p_eL2;
425 16 50         if(!LM_FINITE(p_eL2)) stop=7;
    50          
426              
427 60496 100         for(k=0; k
    50          
    100          
    100          
428             /* Note that p and e have been updated at a previous iteration */
429              
430 60482 100         if(p_eL2<=eps3){ /* error is small */
    50          
431 2           stop=6;
432 2           break;
433             }
434              
435             /* Compute the Jacobian J at p, J^T J, J^T e, ||J^T e||_inf and ||p||^2.
436             * Since J^T J is symmetric, its computation can be sped up by computing
437             * only its upper triangular part and copying it to the lower part
438             */
439              
440 60480           (*jacf)(p, jac, m, n, adata); ++njev;
441              
442             /* J^T J, J^T e */
443 60480           if(nm<__BLOCKSZ__SQ){ // this is a small problem
444             /* J^T*J_ij = \sum_l J^T_il * J_lj = \sum_l J_li * J_lj.
445             * Thus, the product J^T J can be computed using an outer loop for
446             * l that adds J_li*J_lj to each element ij of the result. Note that
447             * with this scheme, the accesses to J and JtJ are always along rows,
448             * therefore induces less cache misses compared to the straightforward
449             * algorithm for computing the product (i.e., l loop is innermost one).
450             * A similar scheme applies to the computation of J^T e.
451             * However, for large minimization problems (i.e., involving a large number
452             * of unknowns and measurements) for which J/J^T J rows are too large to
453             * fit in the L1 cache, even this scheme incures many cache misses. In
454             * such cases, a cache-efficient blocking scheme is preferable.
455             *
456             * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this
457             * performance problem.
458             *
459             * Note that the non-blocking algorithm is faster on small
460             * problems since in this case it avoids the overheads of blocking.
461             */
462             register int l, im;
463             register LM_REAL alpha, *jaclm;
464              
465             /* looping downwards saves a few computations */
466 1028024 100         for(i=m*m; i-->0; )
    100          
467 967552           jacTjac[i]=0.0;
468 302360 100         for(i=m; i-->0; )
    100          
469 241888           jacTe[i]=0.0;
470              
471 302360 100         for(l=n; l-->0; ){
    100          
472 241888           jaclm=jac+l*m;
473 1209440 100         for(i=m; i-->0; ){
    100          
474 967552           im=i*m;
475 967552           alpha=jaclm[i]; //jac[l*m+i];
476 3386432 100         for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */
    100          
477 2418880           jacTjac[im+j]+=jaclm[j]*alpha; //jac[l*m+j]
478              
479             /* J^T e */
480 967552           jacTe[i]+=alpha*e[l];
481             }
482             }
483              
484 302360 100         for(i=m; i-->0; ) /* copy to upper part */
    100          
485 604720 100         for(j=i+1; j
    100          
486 362832           jacTjac[i*m+j]=jacTjac[j*m+i];
487             }
488             else{ // this is a large problem
489             /* Cache efficient computation of J^T J based on blocking
490             */
491 8           LEVMAR_TRANS_MAT_MAT_MULT(jac, jacTjac, n, m);
492              
493             /* cache efficient computation of J^T e */
494 32 100         for(i=0; i
    0          
495 24           jacTe[i]=0.0;
496              
497 8008 100         for(i=0; i
    0          
498             register LM_REAL *jacrow;
499              
500 32000 100         for(l=0, jacrow=jac+i*m, tmp=e[i]; l
    0          
501 24000           jacTe[l]+=jacrow[l]*tmp;
502             }
503             }
504              
505             /* Compute ||J^T e||_inf and ||p||^2. Note that ||J^T e||_inf
506             * is computed for free (i.e. inactive) variables only.
507             * At a local minimum, if p[i]==ub[i] then g[i]>0;
508             * if p[i]==lb[i] g[i]<0; otherwise g[i]=0
509             */
510 302392 100         for(i=j=numactive=0, p_L2=jacTe_inf=0.0; i
    100          
511 241912 50         if(ub && p[i]==ub[i]){ ++numactive; if(jacTe[i]>0.0) ++j; }
    100          
    100          
    50          
    100          
    50          
512 181466 50         else if(lb && p[i]==lb[i]){ ++numactive; if(jacTe[i]<0.0) ++j; }
    50          
    0          
    50          
    50          
    0          
513 181466 100         else if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;
    100          
    100          
    100          
514              
515 241912           diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */
516 241912           p_L2+=p[i]*p[i];
517             }
518             //p_L2=sqrt(p_L2);
519              
520             #if 0
521             if(!(k%100)){
522             printf("Current estimate: ");
523             for(i=0; i
524             printf("%.9g ", p[i]);
525             printf("-- errors %.9g %0.9g, #active %d [%d]\n", jacTe_inf, p_eL2, numactive, j);
526             }
527             #endif
528              
529             /* check for convergence */
530 60480 100         if(j==numactive && (jacTe_inf <= eps1)){
    50          
    50          
    50          
531 0           Dp_L2=0.0; /* no increment for p in this case */
532 0           stop=1;
533 0           break;
534             }
535              
536             /* compute initial damping factor */
537 60480 100         if(k==0){
    100          
538 16 50         if(!lb && !ub){ /* no bounds */
    0          
    50          
    0          
539 0 0         for(i=0, tmp=LM_REAL_MIN; i
    0          
540 0 0         if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */
    0          
541 0           mu=tau*tmp;
542             }
543             else
544 16           mu=LM_CNST(0.5)*tau*p_eL2; /* use Kanzow's starting mu */
545             }
546              
547             /* determine increment using a combination of adaptive damping, line search and projected gradient search */
548             while(1){
549             /* augment normal equations */
550 302392 100         for(i=0; i
    100          
551 241912           jacTjac[i*m+i]+=mu;
552              
553             /* solve augmented equations */
554             #ifdef HAVE_LAPACK
555             /* 6 alternatives are available: LU, Cholesky, 2 variants of QR decomposition, SVD and LDLt.
556             * Cholesky is the fastest but might be inaccurate; QR is slower but more accurate;
557             * SVD is the slowest but most accurate; LU offers a tradeoff between accuracy and speed
558             */
559              
560             issolved=AX_EQ_B_BK(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_BK;
561             //issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU;
562             //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_CHOL;
563             //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_QR;
564             //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); ++nlss; linsolver=(int (*)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m))AX_EQ_B_QRLS;
565             //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_SVD;
566              
567             #else
568             /* use the LU included with levmar */
569 60480           issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU;
570             #endif /* HAVE_LAPACK */
571              
572 60480           if(issolved){
573 302392 100         for(i=0; i
    100          
574 241912           pDp[i]=p[i] + Dp[i];
575              
576             /* compute p's new estimate and ||Dp||^2 */
577 60480           BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */
578 302392 100         for(i=0, Dp_L2=0.0; i
    100          
579 241912           Dp[i]=tmp=pDp[i]-p[i];
580 241912           Dp_L2+=tmp*tmp;
581             }
582             //Dp_L2=sqrt(Dp_L2);
583              
584 60480 50         if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
    50          
585 0           stop=2;
586 0           break;
587             }
588              
589 60480 50         if(Dp_L2>=(p_L2+eps2)/(LM_CNST(EPSILON)*LM_CNST(EPSILON))){ /* almost singular */
    50          
590 0           stop=4;
591 0           break;
592             }
593              
594 60480           (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */
595             /* ### hx=x-hx, pDp_eL2=||hx|| */
596             #if 1
597 60480           pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n);
598             #else
599             for(i=0, pDp_eL2=0.0; i
600             hx[i]=tmp=x[i]-hx[i];
601             pDp_eL2+=tmp*tmp;
602             }
603             #endif
604 60480           if(!LM_FINITE(pDp_eL2)){
605 0           stop=7;
606 0           break;
607             }
608              
609 60480 100         if(pDp_eL2<=gamma_sq*p_eL2){
    100          
610 172 100         for(i=0, dL=0.0; i
    100          
611 136           dL+=Dp[i]*(mu*Dp[i]+jacTe[i]);
612              
613             #if 1
614 36 50         if(dL>0.0){
    50          
615 36           dF=p_eL2-pDp_eL2;
616 36           tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0));
617 36           tmp=LM_CNST(1.0)-tmp*tmp*tmp;
618 36 100         mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) );
    50          
619             }
620             else
621 0 0         mu=(mu>=pDp_eL2)? pDp_eL2 : mu; /* pDp_eL2 is the new pDp_eL2 */
    0          
622             #else
623              
624             mu=(mu>=pDp_eL2)? pDp_eL2 : mu; /* pDp_eL2 is the new pDp_eL2 */
625             #endif
626              
627 36           nu=2;
628              
629 172 100         for(i=0 ; i
    100          
630 136           p[i]=pDp[i];
631              
632 8148 100         for(i=0; i
    100          
633 8112           e[i]=hx[i];
634 36           p_eL2=pDp_eL2;
635 36           ++nLMsteps;
636 36           gprevtaken=0;
637 36           break;
638             }
639             }
640             else{
641              
642             /* the augmented linear system could not be solved, increase mu */
643              
644 0           mu*=nu;
645 0           nu2=nu<<1; // 2*nu;
646 0 0         if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */
    0          
647 0           stop=5;
648 0           break;
649             }
650 0           nu=nu2;
651              
652 0 0         for(i=0; i
    0          
653 0           jacTjac[i*m+i]=diag_jacTjac[i];
654              
655 0           continue; /* solve again with increased nu */
656             }
657              
658             /* if this point is reached, the LM step did not reduce the error;
659             * see if it is a descent direction
660             */
661              
662             /* negate jacTe (i.e. g) & compute g^T * Dp */
663 302220 100         for(i=0, jacTeDp=0.0; i
    100          
664 241776           jacTe[i]=-jacTe[i];
665 241776           jacTeDp+=jacTe[i]*Dp[i];
666             }
667              
668 60444 100         if(jacTeDp<=-rho*pow(Dp_L2, _POW_/LM_CNST(2.0))){
    100          
669             /* Dp is a descent direction; do a line search along it */
670             int mxtake, iretcd;
671             LM_REAL stepmx;
672              
673 2757 50         tmp=(LM_REAL)sqrt(p_L2); stepmx=LM_CNST(1e3)*( (tmp>=LM_CNST(1.0))? tmp : LM_CNST(1.0) );
    50          
674              
675             #if 1
676             /* use Schnabel's backtracking line search; it requires fewer "func" evaluations */
677 2757           LNSRCH(m, p, p_eL2, jacTe, Dp, alpha, pDp, &pDp_eL2, func, fstate,
678             &mxtake, &iretcd, stepmx, steptl, NULL); /* NOTE: LNSRCH() updates hx */
679 2757 100         if(iretcd!=0) goto gradproj; /* rather inelegant but effective way to handle LNSRCH() failures... */
    100          
680             #else
681             /* use the simpler (but slower!) line search described by Kanzow et al */
682             for(t=tini; t>tmin; t*=beta){
683             for(i=0; i
684             pDp[i]=p[i] + t*Dp[i];
685             //pDp[i]=__MEDIAN3(lb[i], pDp[i], ub[i]); /* project to feasible set */
686             }
687              
688             (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + t*Dp */
689             for(i=0, pDp_eL2=0.0; i
690             hx[i]=tmp=x[i]-hx[i];
691             pDp_eL2+=tmp*tmp;
692             }
693             if(!LM_FINITE(pDp_eL2)) goto gradproj; /* treat as line search failure */
694              
695             //if(LM_CNST(0.5)*pDp_eL2<=LM_CNST(0.5)*p_eL2 + t*alpha*jacTeDp) break;
696             if(pDp_eL2<=p_eL2 + LM_CNST(2.0)*t*alpha*jacTeDp) break;
697             }
698             #endif
699 2139           ++nLSsteps;
700 2139           gprevtaken=0;
701              
702             /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2.
703             * These values are used below to update their corresponding variables
704             */
705             }
706             else{
707             gradproj: /* Note that this point can also be reached via a goto when LNSRCH() fails */
708              
709             /* jacTe is a descent direction; make a projected gradient step */
710              
711             /* if the previous step was along the gradient descent, try to use the t employed in that step */
712             /* compute ||g|| */
713 291525 100         for(i=0, tmp=0.0; i
    100          
714 233220           tmp+=jacTe[i]*jacTe[i];
715 58305           tmp=(LM_REAL)sqrt(tmp);
716 58305           tmp=LM_CNST(100.0)/(LM_CNST(1.0)+tmp);
717 58305 50         t0=(tmp<=tini)? tmp : tini; /* guard against poor scaling & large steps; see (3.50) in C.T. Kelley's book */
    50          
718              
719 116976 100         for(t=(gprevtaken)? t : t0; t>tming; t*=beta){
    50          
    100          
    50          
720 584880 100         for(i=0; i
    100          
721 467904           pDp[i]=p[i] - t*jacTe[i];
722 116976           BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */
723 584880 100         for(i=0; i
    100          
724 467904           Dp[i]=pDp[i]-p[i];
725              
726 116976           (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p - t*g */
727             /* compute ||e(pDp)||_2 */
728             /* ### hx=x-hx, pDp_eL2=||hx|| */
729             #if 1
730 116976           pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n);
731             #else
732             for(i=0, pDp_eL2=0.0; i
733             hx[i]=tmp=x[i]-hx[i];
734             pDp_eL2+=tmp*tmp;
735             }
736             #endif
737 116976           if(!LM_FINITE(pDp_eL2)){
738 0           stop=7;
739 0           goto breaknested;
740             }
741              
742 584880 100         for(i=0, tmp=0.0; i
    100          
743 467904           tmp+=jacTe[i]*Dp[i];
744              
745 116976 100         if(gprevtaken && pDp_eL2<=p_eL2 + LM_CNST(2.0)*LM_CNST(0.99999)*tmp){ /* starting t too small */
    100          
    100          
    100          
746 52369           t=t0;
747 52369           gprevtaken=0;
748 52369           continue;
749             }
750             //if(LM_CNST(0.5)*pDp_eL2<=LM_CNST(0.5)*p_eL2 + alpha*tmp) break;
751 64607 100         if(pDp_eL2<=p_eL2 + LM_CNST(2.0)*alpha*tmp) break;
    100          
752             }
753              
754 58305           ++nPGsteps;
755 58305           gprevtaken=1;
756             /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2 */
757             }
758              
759             /* update using computed values */
760              
761 302220 100         for(i=0, Dp_L2=0.0; i
    100          
762 241776           tmp=pDp[i]-p[i];
763 241776           Dp_L2+=tmp*tmp;
764             }
765             //Dp_L2=sqrt(Dp_L2);
766              
767 60444 50         if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
    100          
768 2           stop=2;
769 2           break;
770             }
771              
772 302210 100         for(i=0 ; i
    100          
773 241768           p[i]=pDp[i];
774              
775 302210 100         for(i=0; i
    100          
776 241768           e[i]=hx[i];
777 60442           p_eL2=pDp_eL2;
778 60442           break;
779 0           } /* inner loop */
780             }
781              
782             breaknested: /* NOTE: this point is also reached via an explicit goto! */
783              
784 16 100         if(k>=itmax) stop=3;
    100          
785              
786 78 100         for(i=0; i
    100          
787 62           jacTjac[i*m+i]=diag_jacTjac[i];
788              
789 16 50         if(info){
    50          
790 16           info[0]=init_p_eL2;
791 16           info[1]=p_eL2;
792 16           info[2]=jacTe_inf;
793 16           info[3]=Dp_L2;
794 78 100         for(i=0, tmp=LM_REAL_MIN; i
    100          
795 62 100         if(tmp
    100          
796 16           info[4]=mu/tmp;
797 16           info[5]=(LM_REAL)k;
798 16           info[6]=(LM_REAL)stop;
799 16           info[7]=(LM_REAL)nfev;
800 16           info[8]=(LM_REAL)njev;
801 16           info[9]=(LM_REAL)nlss;
802             }
803              
804             /* covariance matrix */
805 16 50         if(covar){
    50          
806 16           LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n);
807             }
808            
809 16 50         if(freework) free(work);
    50          
810              
811             #ifdef LINSOLVERS_RETAIN_MEMORY
812 16 50         if(linsolver) (*linsolver)(NULL, NULL, NULL, 0);
    50          
813             #endif
814              
815             #if 0
816             printf("%d LM steps, %d line search, %d projected gradient\n", nLMsteps, nLSsteps, nPGsteps);
817             #endif
818              
819 16 50         return (stop!=4 && stop!=7)? k : LM_ERROR;
    50          
    50          
    50          
820             }
821              
822             /* following struct & LMBC_DIF_XXX functions won't be necessary if a true secant
823             * version of LEVMAR_BC_DIF() is implemented...
824             */
825             struct LMBC_DIF_DATA{
826             int ffdif; // nonzero if forward differencing is used
827             void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata);
828             LM_REAL *hx, *hxx;
829             void *adata;
830             LM_REAL delta;
831             };
832              
833 49939           static void LMBC_DIF_FUNC(LM_REAL *p, LM_REAL *hx, int m, int n, void *data)
834             {
835 49939           struct LMBC_DIF_DATA *dta=(struct LMBC_DIF_DATA *)data;
836              
837             /* call user-supplied function passing it the user-supplied data */
838 49939           (*(dta->func))(p, hx, m, n, dta->adata);
839 49939           }
840              
841 15254           static void LMBC_DIF_JACF(LM_REAL *p, LM_REAL *jac, int m, int n, void *data)
842             {
843 15254           struct LMBC_DIF_DATA *dta=(struct LMBC_DIF_DATA *)data;
844              
845 15254 50         if(dta->ffdif){
    50          
846             /* evaluate user-supplied function at p */
847 15254           (*(dta->func))(p, dta->hx, m, n, dta->adata);
848 15254           LEVMAR_FDIF_FORW_JAC_APPROX(dta->func, p, dta->hx, dta->hxx, dta->delta, jac, m, n, dta->adata);
849             }
850             else
851 0           LEVMAR_FDIF_CENT_JAC_APPROX(dta->func, p, dta->hx, dta->hxx, dta->delta, jac, m, n, dta->adata);
852 15254           }
853              
854              
855             /* No Jacobian version of the LEVMAR_BC_DER() function above: the Jacobian is approximated with
856             * the aid of finite differences (forward or central, see the comment for the opts argument)
857             * Ideally, this function should be implemented with a secant approach. Currently, it just calls
858             * LEVMAR_BC_DER()
859             */
860 5           int LEVMAR_BC_DIF(
861             void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */
862             LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */
863             LM_REAL *x, /* I: measurement vector. NULL implies a zero vector */
864             int m, /* I: parameter vector dimension (i.e. #unknowns) */
865             int n, /* I: measurement vector dimension */
866             LM_REAL *lb, /* I: vector of lower bounds. If NULL, no lower bounds apply */
867             LM_REAL *ub, /* I: vector of upper bounds. If NULL, no upper bounds apply */
868             int itmax, /* I: maximum number of iterations */
869             LM_REAL opts[5], /* I: opts[0-4] = minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \delta]. Respectively the
870             * scale factor for initial \mu, stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2 and
871             * the step used in difference approximation to the Jacobian. Set to NULL for defaults to be used.
872             * If \delta<0, the Jacobian is approximated with central differences which are more accurate
873             * (but slower!) compared to the forward differences employed by default.
874             */
875             LM_REAL info[LM_INFO_SZ],
876             /* O: information regarding the minimization. Set to NULL if don't care
877             * info[0]= ||e||_2 at initial p.
878             * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
879             * info[5]= # iterations,
880             * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
881             * 2 - stopped by small Dp
882             * 3 - stopped by itmax
883             * 4 - singular matrix. Restart from current p with increased mu
884             * 5 - no further error reduction is possible. Restart with increased mu
885             * 6 - stopped by small ||e||_2
886             * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
887             * info[7]= # function evaluations
888             * info[8]= # Jacobian evaluations
889             * info[9]= # linear systems solved, i.e. # attempts for reducing error
890             */
891             LM_REAL *work, /* working memory at least LM_BC_DIF_WORKSZ() reals large, allocated if NULL */
892             LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
893             void *adata) /* pointer to possibly additional data, passed uninterpreted to func.
894             * Set to NULL if not needed
895             */
896             {
897             struct LMBC_DIF_DATA data;
898             int ret;
899              
900             //fprintf(stderr, RCAT("\nWarning: current implementation of ", LEVMAR_BC_DIF) "() does not use a secant approach!\n\n");
901              
902 5 50         data.ffdif=!opts || opts[4]>=0.0;
    50          
    50          
    50          
903              
904 5           data.func=func;
905 5           data.hx=(LM_REAL *)malloc(2*n*sizeof(LM_REAL)); /* allocate a big chunk in one step */
906 5 50         if(!data.hx){
    50          
907 0           fprintf(stderr, LCAT(LEVMAR_BC_DIF, "(): memory allocation request failed\n"));
908 0           return LM_ERROR;
909             }
910 5           data.hxx=data.hx+n;
911 5           data.adata=adata;
912 5 50         data.delta=(opts)? FABS(opts[4]) : (LM_REAL)LM_DIFF_DELTA;
    50          
    50          
    50          
913              
914 5           ret=LEVMAR_BC_DER(LMBC_DIF_FUNC, LMBC_DIF_JACF, p, x, m, n, lb, ub, itmax, opts, info, work, covar, (void *)&data);
915              
916 5           if(info){ /* correct the number of function calls */
917 5 50         if(data.ffdif)
    50          
918 5           info[7]+=info[8]*(m+1); /* each Jacobian evaluation costs m+1 function calls */
919             else
920 0           info[7]+=info[8]*(2*m); /* each Jacobian evaluation costs 2*m function calls */
921             }
922              
923 5           free(data.hx);
924              
925 5           return ret;
926             }
927              
928             /* undefine everything. THIS MUST REMAIN AT THE END OF THE FILE */
929             #undef FUNC_STATE
930             #undef LNSRCH
931             #undef BOXPROJECT
932             #undef LEVMAR_BOX_CHECK
933             #undef LEVMAR_BC_DER
934             #undef LMBC_DIF_DATA
935             #undef LMBC_DIF_FUNC
936             #undef LMBC_DIF_JACF
937             #undef LEVMAR_BC_DIF
938             #undef LEVMAR_FDIF_FORW_JAC_APPROX
939             #undef LEVMAR_FDIF_CENT_JAC_APPROX
940             #undef LEVMAR_COVAR
941             #undef LEVMAR_TRANS_MAT_MAT_MULT
942             #undef LEVMAR_L2NRMXMY
943             #undef AX_EQ_B_LU
944             #undef AX_EQ_B_CHOL
945             #undef AX_EQ_B_QR
946             #undef AX_EQ_B_QRLS
947             #undef AX_EQ_B_SVD
948             #undef AX_EQ_B_BK