File Coverage

levmar-2.6/Axb_core.c
Criterion Covered Total %
statement 71 76 93.4
branch n/a
condition n/a
subroutine n/a
pod n/a
total 71 76 93.4


line stmt bran cond sub pod time code
1             /////////////////////////////////////////////////////////////////////////////////
2             //
3             // Solution of linear systems involved in the Levenberg - Marquardt
4             // minimization algorithm
5             // Copyright (C) 2004 Manolis Lourakis (lourakis at ics forth gr)
6             // Institute of Computer Science, Foundation for Research & Technology - Hellas
7             // Heraklion, Crete, Greece.
8             //
9             // This program is free software; you can redistribute it and/or modify
10             // it under the terms of the GNU General Public License as published by
11             // the Free Software Foundation; either version 2 of the License, or
12             // (at your option) any later version.
13             //
14             // This program is distributed in the hope that it will be useful,
15             // but WITHOUT ANY WARRANTY; without even the implied warranty of
16             // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17             // GNU General Public License for more details.
18             //
19             /////////////////////////////////////////////////////////////////////////////////
20              
21              
22             /* Solvers for the linear systems Ax=b. Solvers should NOT modify their A & B arguments! */
23              
24              
25             #ifndef LM_REAL // not included by Axb.c
26             #error This file should not be compiled directly!
27             #endif
28              
29              
30             #ifdef LINSOLVERS_RETAIN_MEMORY
31             #define __STATIC__ static
32             #else
33             #define __STATIC__ // empty
34             #endif /* LINSOLVERS_RETAIN_MEMORY */
35              
36             #ifdef HAVE_LAPACK
37              
38             /* prototypes of LAPACK routines */
39              
40             #define GEQRF LM_MK_LAPACK_NAME(geqrf)
41             #define ORGQR LM_MK_LAPACK_NAME(orgqr)
42             #define TRTRS LM_MK_LAPACK_NAME(trtrs)
43             #define POTF2 LM_MK_LAPACK_NAME(potf2)
44             #define POTRF LM_MK_LAPACK_NAME(potrf)
45             #define POTRS LM_MK_LAPACK_NAME(potrs)
46             #define GETRF LM_MK_LAPACK_NAME(getrf)
47             #define GETRS LM_MK_LAPACK_NAME(getrs)
48             #define GESVD LM_MK_LAPACK_NAME(gesvd)
49             #define GESDD LM_MK_LAPACK_NAME(gesdd)
50             #define SYTRF LM_MK_LAPACK_NAME(sytrf)
51             #define SYTRS LM_MK_LAPACK_NAME(sytrs)
52             #define PLASMA_POSV LM_CAT_(PLASMA_, LM_ADD_PREFIX(posv))
53              
54             #ifdef __cplusplus
55             extern "C" {
56             #endif
57             /* QR decomposition */
58             extern int GEQRF(int *m, int *n, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info);
59             extern int ORGQR(int *m, int *n, int *k, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info);
60              
61             /* solution of triangular systems */
62             extern int TRTRS(char *uplo, char *trans, char *diag, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info);
63              
64             /* Cholesky decomposition and systems solution */
65             extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info);
66             extern int POTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *info); /* block version of dpotf2 */
67             extern int POTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info);
68              
69             /* LU decomposition and systems solution */
70             extern int GETRF(int *m, int *n, LM_REAL *a, int *lda, int *ipiv, int *info);
71             extern int GETRS(char *trans, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info);
72              
73             /* Singular Value Decomposition (SVD) */
74             extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu,
75             LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info);
76              
77             /* lapack 3.0 new SVD routine, faster than xgesvd().
78             * In case that your version of LAPACK does not include them, use the above two older routines
79             */
80             extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt,
81             LM_REAL *work, int *lwork, int *iwork, int *info);
82              
83             /* LDLt/UDUt factorization and systems solution */
84             extern int SYTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *ipiv, LM_REAL *work, int *lwork, int *info);
85             extern int SYTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info);
86             #ifdef __cplusplus
87             }
88             #endif
89              
90             /* precision-specific definitions */
91             #define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR)
92             #define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS)
93             #define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol)
94             #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU)
95             #define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD)
96             #define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK)
97             #define AX_EQ_B_PLASMA_CHOL LM_ADD_PREFIX(Ax_eq_b_PLASMA_Chol)
98              
99             /*
100             * This function returns the solution of Ax = b
101             *
102             * The function is based on QR decomposition with explicit computation of Q:
103             * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes
104             * Q R x = b or R x = Q^T b.
105             * The last equation can be solved directly.
106             *
107             * A is mxm, b is mx1
108             *
109             * The function returns 0 in case of error, 1 if successful
110             *
111             * This function is often called repetitively to solve problems of identical
112             * dimensions. To avoid repetitive malloc's and free's, allocated memory is
113             * retained between calls and free'd-malloc'ed when not of the appropriate size.
114             * A call with NULL as the first argument forces this memory to be released.
115             */
116             int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
117             {
118             __STATIC__ LM_REAL *buf=NULL;
119             __STATIC__ int buf_sz=0;
120              
121             static int nb=0; /* no __STATIC__ decl. here! */
122              
123             LM_REAL *a, *tau, *r, *work;
124             int a_sz, tau_sz, r_sz, tot_sz;
125             register int i, j;
126             int info, worksz, nrhs=1;
127             register LM_REAL sum;
128              
129             if(!A)
130             #ifdef LINSOLVERS_RETAIN_MEMORY
131             {
132             if(buf) free(buf);
133             buf=NULL;
134             buf_sz=0;
135              
136             return 1;
137             }
138             #else
139             return 1; /* NOP */
140             #endif /* LINSOLVERS_RETAIN_MEMORY */
141            
142             /* calculate required memory size */
143             a_sz=m*m;
144             tau_sz=m;
145             r_sz=m*m; /* only the upper triangular part really needed */
146             if(!nb){
147             LM_REAL tmp;
148              
149             worksz=-1; // workspace query; optimal size is returned in tmp
150             GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info);
151             nb=((int)tmp)/m; // optimal worksize is m*nb
152             }
153             worksz=nb*m;
154             tot_sz=a_sz + tau_sz + r_sz + worksz;
155              
156             #ifdef LINSOLVERS_RETAIN_MEMORY
157             if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
158             if(buf) free(buf); /* free previously allocated memory */
159              
160             buf_sz=tot_sz;
161             buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
162             if(!buf){
163             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n");
164             exit(1);
165             }
166             }
167             #else
168             buf_sz=tot_sz;
169             buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
170             if(!buf){
171             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n");
172             exit(1);
173             }
174             #endif /* LINSOLVERS_RETAIN_MEMORY */
175              
176             a=buf;
177             tau=a+a_sz;
178             r=tau+tau_sz;
179             work=r+r_sz;
180              
181             /* store A (column major!) into a */
182             for(i=0; i
183             for(j=0; j
184             a[i+j*m]=A[i*m+j];
185              
186             /* QR decomposition of A */
187             GEQRF((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
188             /* error treatment */
189             if(info!=0){
190             if(info<0){
191             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QR) "()\n", -info);
192             exit(1);
193             }
194             else{
195             fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QR) "()\n", info);
196             #ifndef LINSOLVERS_RETAIN_MEMORY
197             free(buf);
198             #endif
199              
200             return 0;
201             }
202             }
203              
204             /* R is stored in the upper triangular part of a; copy it in r so that ORGQR() below won't destroy it */
205             memcpy(r, a, r_sz*sizeof(LM_REAL));
206              
207             /* compute Q using the elementary reflectors computed by the above decomposition */
208             ORGQR((int *)&m, (int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
209             if(info!=0){
210             if(info<0){
211             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", ORGQR) " in ", AX_EQ_B_QR) "()\n", -info);
212             exit(1);
213             }
214             else{
215             fprintf(stderr, RCAT("Unknown LAPACK error (%d) in ", AX_EQ_B_QR) "()\n", info);
216             #ifndef LINSOLVERS_RETAIN_MEMORY
217             free(buf);
218             #endif
219              
220             return 0;
221             }
222             }
223              
224             /* Q is now in a; compute Q^T b in x */
225             for(i=0; i
226             for(j=0, sum=0.0; j
227             sum+=a[i*m+j]*B[j];
228             x[i]=sum;
229             }
230              
231             /* solve the linear system R x = Q^t b */
232             TRTRS("U", "N", "N", (int *)&m, (int *)&nrhs, r, (int *)&m, x, (int *)&m, &info);
233             /* error treatment */
234             if(info!=0){
235             if(info<0){
236             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QR) "()\n", -info);
237             exit(1);
238             }
239             else{
240             fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QR) "()\n", info);
241             #ifndef LINSOLVERS_RETAIN_MEMORY
242             free(buf);
243             #endif
244              
245             return 0;
246             }
247             }
248              
249             #ifndef LINSOLVERS_RETAIN_MEMORY
250             free(buf);
251             #endif
252              
253             return 1;
254             }
255              
256             /*
257             * This function returns the solution of min_x ||Ax - b||
258             *
259             * || . || is the second order (i.e. L2) norm. This is a least squares technique that
260             * is based on QR decomposition:
261             * If A=Q R with Q orthogonal and R upper triangular, the normal equations become
262             * (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b.
263             * This amounts to solving R^T y = A^T b for y and then R x = y for x
264             * Note that Q does not need to be explicitly computed
265             *
266             * A is mxn, b is mx1
267             *
268             * The function returns 0 in case of error, 1 if successful
269             *
270             * This function is often called repetitively to solve problems of identical
271             * dimensions. To avoid repetitive malloc's and free's, allocated memory is
272             * retained between calls and free'd-malloc'ed when not of the appropriate size.
273             * A call with NULL as the first argument forces this memory to be released.
274             */
275             int AX_EQ_B_QRLS(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m, int n)
276             {
277             __STATIC__ LM_REAL *buf=NULL;
278             __STATIC__ int buf_sz=0;
279              
280             static int nb=0; /* no __STATIC__ decl. here! */
281              
282             LM_REAL *a, *tau, *r, *work;
283             int a_sz, tau_sz, r_sz, tot_sz;
284             register int i, j;
285             int info, worksz, nrhs=1;
286             register LM_REAL sum;
287            
288             if(!A)
289             #ifdef LINSOLVERS_RETAIN_MEMORY
290             {
291             if(buf) free(buf);
292             buf=NULL;
293             buf_sz=0;
294              
295             return 1;
296             }
297             #else
298             return 1; /* NOP */
299             #endif /* LINSOLVERS_RETAIN_MEMORY */
300            
301             if(m
302             fprintf(stderr, RCAT("Normal equations require that the number of rows is greater than number of columns in ", AX_EQ_B_QRLS) "() [%d x %d]! -- try transposing\n", m, n);
303             exit(1);
304             }
305            
306             /* calculate required memory size */
307             a_sz=m*n;
308             tau_sz=n;
309             r_sz=n*n;
310             if(!nb){
311             LM_REAL tmp;
312              
313             worksz=-1; // workspace query; optimal size is returned in tmp
314             GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info);
315             nb=((int)tmp)/m; // optimal worksize is m*nb
316             }
317             worksz=nb*m;
318             tot_sz=a_sz + tau_sz + r_sz + worksz;
319              
320             #ifdef LINSOLVERS_RETAIN_MEMORY
321             if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
322             if(buf) free(buf); /* free previously allocated memory */
323              
324             buf_sz=tot_sz;
325             buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
326             if(!buf){
327             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n");
328             exit(1);
329             }
330             }
331             #else
332             buf_sz=tot_sz;
333             buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
334             if(!buf){
335             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n");
336             exit(1);
337             }
338             #endif /* LINSOLVERS_RETAIN_MEMORY */
339              
340             a=buf;
341             tau=a+a_sz;
342             r=tau+tau_sz;
343             work=r+r_sz;
344              
345             /* store A (column major!) into a */
346             for(i=0; i
347             for(j=0; j
348             a[i+j*m]=A[i*n+j];
349              
350             /* compute A^T b in x */
351             for(i=0; i
352             for(j=0, sum=0.0; j
353             sum+=A[j*n+i]*B[j];
354             x[i]=sum;
355             }
356              
357             /* QR decomposition of A */
358             GEQRF((int *)&m, (int *)&n, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
359             /* error treatment */
360             if(info!=0){
361             if(info<0){
362             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", -info);
363             exit(1);
364             }
365             else{
366             fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", info);
367             #ifndef LINSOLVERS_RETAIN_MEMORY
368             free(buf);
369             #endif
370              
371             return 0;
372             }
373             }
374              
375             /* R is stored in the upper triangular part of a. Note that a is mxn while r nxn */
376             for(j=0; j
377             for(i=0; i<=j; i++)
378             r[i+j*n]=a[i+j*m];
379              
380             /* lower part is zero */
381             for(i=j+1; i
382             r[i+j*n]=0.0;
383             }
384              
385             /* solve the linear system R^T y = A^t b */
386             TRTRS("U", "T", "N", (int *)&n, (int *)&nrhs, r, (int *)&n, x, (int *)&n, &info);
387             /* error treatment */
388             if(info!=0){
389             if(info<0){
390             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info);
391             exit(1);
392             }
393             else{
394             fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info);
395             #ifndef LINSOLVERS_RETAIN_MEMORY
396             free(buf);
397             #endif
398              
399             return 0;
400             }
401             }
402              
403             /* solve the linear system R x = y */
404             TRTRS("U", "N", "N", (int *)&n, (int *)&nrhs, r, (int *)&n, x, (int *)&n, &info);
405             /* error treatment */
406             if(info!=0){
407             if(info<0){
408             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info);
409             exit(1);
410             }
411             else{
412             fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info);
413             #ifndef LINSOLVERS_RETAIN_MEMORY
414             free(buf);
415             #endif
416              
417             return 0;
418             }
419             }
420              
421             #ifndef LINSOLVERS_RETAIN_MEMORY
422             free(buf);
423             #endif
424              
425             return 1;
426             }
427              
428             /*
429             * This function returns the solution of Ax=b
430             *
431             * The function assumes that A is symmetric & postive definite and employs
432             * the Cholesky decomposition:
433             * If A=L L^T with L lower triangular, the system to be solved becomes
434             * (L L^T) x = b
435             * This amounts to solving L y = b for y and then L^T x = y for x
436             *
437             * A is mxm, b is mx1
438             *
439             * The function returns 0 in case of error, 1 if successful
440             *
441             * This function is often called repetitively to solve problems of identical
442             * dimensions. To avoid repetitive malloc's and free's, allocated memory is
443             * retained between calls and free'd-malloc'ed when not of the appropriate size.
444             * A call with NULL as the first argument forces this memory to be released.
445             */
446             int AX_EQ_B_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
447             {
448             __STATIC__ LM_REAL *buf=NULL;
449             __STATIC__ int buf_sz=0;
450              
451             LM_REAL *a;
452             int a_sz, tot_sz;
453             int info, nrhs=1;
454            
455             if(!A)
456             #ifdef LINSOLVERS_RETAIN_MEMORY
457             {
458             if(buf) free(buf);
459             buf=NULL;
460             buf_sz=0;
461              
462             return 1;
463             }
464             #else
465             return 1; /* NOP */
466             #endif /* LINSOLVERS_RETAIN_MEMORY */
467            
468             /* calculate required memory size */
469             a_sz=m*m;
470             tot_sz=a_sz;
471              
472             #ifdef LINSOLVERS_RETAIN_MEMORY
473             if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
474             if(buf) free(buf); /* free previously allocated memory */
475              
476             buf_sz=tot_sz;
477             buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
478             if(!buf){
479             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n");
480             exit(1);
481             }
482             }
483             #else
484             buf_sz=tot_sz;
485             buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
486             if(!buf){
487             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n");
488             exit(1);
489             }
490             #endif /* LINSOLVERS_RETAIN_MEMORY */
491              
492             a=buf;
493              
494             /* store A into a and B into x. A is assumed symmetric,
495             * hence no transposition is needed
496             */
497             memcpy(a, A, a_sz*sizeof(LM_REAL));
498             memcpy(x, B, m*sizeof(LM_REAL));
499              
500             /* Cholesky decomposition of A */
501             //POTF2("L", (int *)&m, a, (int *)&m, (int *)&info);
502             POTRF("L", (int *)&m, a, (int *)&m, (int *)&info);
503             /* error treatment */
504             if(info!=0){
505             if(info<0){
506             fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTF2) "/", POTRF) " in ",
507             AX_EQ_B_CHOL) "()\n", -info);
508             exit(1);
509             }
510             else{
511             fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", info);
512             #ifndef LINSOLVERS_RETAIN_MEMORY
513             free(buf);
514             #endif
515              
516             return 0;
517             }
518             }
519              
520             /* solve using the computed Cholesky in one lapack call */
521             POTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
522             if(info<0){
523             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTRS) " in ", AX_EQ_B_CHOL) "()\n", -info);
524             exit(1);
525             }
526              
527             #if 0
528             /* alternative: solve the linear system L y = b ... */
529             TRTRS("L", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
530             /* error treatment */
531             if(info!=0){
532             if(info<0){
533             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_CHOL) "()\n", -info);
534             exit(1);
535             }
536             else{
537             fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info);
538             #ifndef LINSOLVERS_RETAIN_MEMORY
539             free(buf);
540             #endif
541              
542             return 0;
543             }
544             }
545              
546             /* ... solve the linear system L^T x = y */
547             TRTRS("L", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
548             /* error treatment */
549             if(info!=0){
550             if(info<0){
551             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) "in ", AX_EQ_B_CHOL) "()\n", -info);
552             exit(1);
553             }
554             else{
555             fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info);
556             #ifndef LINSOLVERS_RETAIN_MEMORY
557             free(buf);
558             #endif
559              
560             return 0;
561             }
562             }
563             #endif /* 0 */
564              
565             #ifndef LINSOLVERS_RETAIN_MEMORY
566             free(buf);
567             #endif
568              
569             return 1;
570             }
571              
572             #ifdef HAVE_PLASMA
573              
574             /* Linear algebra using PLASMA parallel library for multicore CPUs.
575             * http://icl.cs.utk.edu/plasma/
576             *
577             * WARNING: BLAS multithreading should be disabled, e.g. setenv MKL_NUM_THREADS 1
578             */
579              
580             #ifndef _LM_PLASMA_MISC_
581             /* avoid multiple inclusion of helper code */
582             #define _LM_PLASMA_MISC_
583              
584             #include
585             #include
586             #include
587             #include
588             #include
589              
590             /* programmatically determine the number of cores on the current machine */
591             #ifdef _WIN32
592             #include
593             #elif __linux
594             #include
595             #endif
596             static int getnbcores()
597             {
598             #ifdef _WIN32
599             SYSTEM_INFO sysinfo;
600             GetSystemInfo(&sysinfo);
601             return sysinfo.dwNumberOfProcessors;
602             #elif __linux
603             return sysconf(_SC_NPROCESSORS_ONLN);
604             #else // unknown system
605             return 2<<1; // will be halved by right shift below
606             #endif
607             }
608              
609             static int PLASMA_ncores=-(getnbcores()>>1); // >0 if PLASMA initialized, <0 otherwise
610              
611             /* user-specified number of cores */
612             void levmar_PLASMA_setnbcores(int cores)
613             {
614             PLASMA_ncores=(cores>0)? -cores : ((cores)? cores : -2);
615             }
616             #endif /* _LM_PLASMA_MISC_ */
617              
618             /*
619             * This function returns the solution of Ax=b
620             *
621             * The function assumes that A is symmetric & positive definite and employs the
622             * Cholesky decomposition implemented by PLASMA for homogeneous multicore processors.
623             *
624             * A is mxm, b is mx1
625             *
626             * The function returns 0 in case of error, 1 if successfull
627             *
628             * This function is often called repetitively to solve problems of identical
629             * dimensions. To avoid repetitive malloc's and free's, allocated memory is
630             * retained between calls and free'd-malloc'ed when not of the appropriate size.
631             * A call with NULL as the first argument forces this memory to be released.
632             */
633             int AX_EQ_B_PLASMA_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
634             {
635             __STATIC__ LM_REAL *buf=NULL;
636             __STATIC__ int buf_sz=0;
637              
638             LM_REAL *a;
639             int a_sz, tot_sz;
640             int info, nrhs=1;
641              
642             if(A==NULL){
643             #ifdef LINSOLVERS_RETAIN_MEMORY
644             if(buf) free(buf);
645             buf=NULL;
646             buf_sz=0;
647             #endif /* LINSOLVERS_RETAIN_MEMORY */
648              
649             PLASMA_Finalize();
650             PLASMA_ncores=-PLASMA_ncores;
651              
652             return 1;
653             }
654              
655             /* calculate required memory size */
656             a_sz=m*m;
657             tot_sz=a_sz;
658              
659             #ifdef LINSOLVERS_RETAIN_MEMORY
660             if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
661             if(buf) free(buf); /* free previously allocated memory */
662              
663             buf_sz=tot_sz;
664             buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
665             if(!buf){
666             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n");
667             exit(1);
668             }
669             }
670             #else
671             buf_sz=tot_sz;
672             buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
673             if(!buf){
674             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n");
675             exit(1);
676             }
677             #endif /* LINSOLVERS_RETAIN_MEMORY */
678              
679             a=buf;
680              
681             /* store A into a and B into x; A is assumed to be symmetric,
682             * hence no transposition is needed
683             */
684             memcpy(a, A, a_sz*sizeof(LM_REAL));
685             memcpy(x, B, m*sizeof(LM_REAL));
686              
687             /* initialize PLASMA */
688             if(PLASMA_ncores<0){
689             PLASMA_ncores=-PLASMA_ncores;
690             PLASMA_Init(PLASMA_ncores);
691             fprintf(stderr, RCAT("\n", AX_EQ_B_PLASMA_CHOL) "(): PLASMA is running on %d cores.\n\n", PLASMA_ncores);
692             }
693            
694             /* Solve the linear system */
695             info=PLASMA_POSV(PlasmaLower, m, 1, a, m, x, m);
696             /* error treatment */
697             if(info!=0){
698             if(info<0){
699             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", PLASMA_POSV) " in ",
700             AX_EQ_B_PLASMA_CHOL) "()\n", -info);
701             exit(1);
702             }
703             else{
704             fprintf(stderr, RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\n"
705             "the factorization could not be completed for ", PLASMA_POSV) " in ", AX_EQ_B_CHOL) "()\n", info);
706             #ifndef LINSOLVERS_RETAIN_MEMORY
707             free(buf);
708             #endif
709             return 0;
710             }
711             }
712              
713             #ifndef LINSOLVERS_RETAIN_MEMORY
714             free(buf);
715             #endif
716              
717             return 1;
718             }
719             #endif /* HAVE_PLASMA */
720              
721             /*
722             * This function returns the solution of Ax = b
723             *
724             * The function employs LU decomposition:
725             * If A=L U with L lower and U upper triangular, then the original system
726             * amounts to solving
727             * L y = b, U x = y
728             *
729             * A is mxm, b is mx1
730             *
731             * The function returns 0 in case of error, 1 if successful
732             *
733             * This function is often called repetitively to solve problems of identical
734             * dimensions. To avoid repetitive malloc's and free's, allocated memory is
735             * retained between calls and free'd-malloc'ed when not of the appropriate size.
736             * A call with NULL as the first argument forces this memory to be released.
737             */
738             int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
739             {
740             __STATIC__ LM_REAL *buf=NULL;
741             __STATIC__ int buf_sz=0;
742              
743             int a_sz, ipiv_sz, tot_sz;
744             register int i, j;
745             int info, *ipiv, nrhs=1;
746             LM_REAL *a;
747            
748             if(!A)
749             #ifdef LINSOLVERS_RETAIN_MEMORY
750             {
751             if(buf) free(buf);
752             buf=NULL;
753             buf_sz=0;
754              
755             return 1;
756             }
757             #else
758             return 1; /* NOP */
759             #endif /* LINSOLVERS_RETAIN_MEMORY */
760            
761             /* calculate required memory size */
762             ipiv_sz=m;
763             a_sz=m*m;
764             tot_sz=a_sz*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
765              
766             #ifdef LINSOLVERS_RETAIN_MEMORY
767             if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
768             if(buf) free(buf); /* free previously allocated memory */
769              
770             buf_sz=tot_sz;
771             buf=(LM_REAL *)malloc(buf_sz);
772             if(!buf){
773             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
774             exit(1);
775             }
776             }
777             #else
778             buf_sz=tot_sz;
779             buf=(LM_REAL *)malloc(buf_sz);
780             if(!buf){
781             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
782             exit(1);
783             }
784             #endif /* LINSOLVERS_RETAIN_MEMORY */
785              
786             a=buf;
787             ipiv=(int *)(a+a_sz);
788              
789             /* store A (column major!) into a and B into x */
790             for(i=0; i
791             for(j=0; j
792             a[i+j*m]=A[i*m+j];
793              
794             x[i]=B[i];
795             }
796              
797             /* LU decomposition for A */
798             GETRF((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);
799             if(info!=0){
800             if(info<0){
801             fprintf(stderr, RCAT(RCAT("argument %d of ", GETRF) " illegal in ", AX_EQ_B_LU) "()\n", -info);
802             exit(1);
803             }
804             else{
805             fprintf(stderr, RCAT(RCAT("singular matrix A for ", GETRF) " in ", AX_EQ_B_LU) "()\n");
806             #ifndef LINSOLVERS_RETAIN_MEMORY
807             free(buf);
808             #endif
809              
810             return 0;
811             }
812             }
813              
814             /* solve the system with the computed LU */
815             GETRS("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
816             if(info!=0){
817             if(info<0){
818             fprintf(stderr, RCAT(RCAT("argument %d of ", GETRS) " illegal in ", AX_EQ_B_LU) "()\n", -info);
819             exit(1);
820             }
821             else{
822             fprintf(stderr, RCAT(RCAT("unknown error for ", GETRS) " in ", AX_EQ_B_LU) "()\n");
823             #ifndef LINSOLVERS_RETAIN_MEMORY
824             free(buf);
825             #endif
826              
827             return 0;
828             }
829             }
830              
831             #ifndef LINSOLVERS_RETAIN_MEMORY
832             free(buf);
833             #endif
834              
835             return 1;
836             }
837              
838             /*
839             * This function returns the solution of Ax = b
840             *
841             * The function is based on SVD decomposition:
842             * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
843             * (U D V^T) x = b or x=V D^{-1} U^T b
844             * Note that V D^{-1} U^T is the pseudoinverse A^+
845             *
846             * A is mxm, b is mx1.
847             *
848             * The function returns 0 in case of error, 1 if successful
849             *
850             * This function is often called repetitively to solve problems of identical
851             * dimensions. To avoid repetitive malloc's and free's, allocated memory is
852             * retained between calls and free'd-malloc'ed when not of the appropriate size.
853             * A call with NULL as the first argument forces this memory to be released.
854             */
855             int AX_EQ_B_SVD(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
856             {
857             __STATIC__ LM_REAL *buf=NULL;
858             __STATIC__ int buf_sz=0;
859             static LM_REAL eps=LM_CNST(-1.0);
860              
861             register int i, j;
862             LM_REAL *a, *u, *s, *vt, *work;
863             int a_sz, u_sz, s_sz, vt_sz, tot_sz;
864             LM_REAL thresh, one_over_denom;
865             register LM_REAL sum;
866             int info, rank, worksz, *iwork, iworksz;
867            
868             if(!A)
869             #ifdef LINSOLVERS_RETAIN_MEMORY
870             {
871             if(buf) free(buf);
872             buf=NULL;
873             buf_sz=0;
874              
875             return 1;
876             }
877             #else
878             return 1; /* NOP */
879             #endif /* LINSOLVERS_RETAIN_MEMORY */
880            
881             /* calculate required memory size */
882             #if 1 /* use optimal size */
883             worksz=-1; // workspace query. Keep in mind that GESDD requires more memory than GESVD
884             /* note that optimal work size is returned in thresh */
885             GESVD("A", "A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, &info);
886             //GESDD("A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, NULL, &info);
887             worksz=(int)thresh;
888             #else /* use minimum size */
889             worksz=5*m; // min worksize for GESVD
890             //worksz=m*(7*m+4); // min worksize for GESDD
891             #endif
892             iworksz=8*m;
893             a_sz=m*m;
894             u_sz=m*m; s_sz=m; vt_sz=m*m;
895              
896             tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
897              
898             #ifdef LINSOLVERS_RETAIN_MEMORY
899             if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
900             if(buf) free(buf); /* free previously allocated memory */
901              
902             buf_sz=tot_sz;
903             buf=(LM_REAL *)malloc(buf_sz);
904             if(!buf){
905             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
906             exit(1);
907             }
908             }
909             #else
910             buf_sz=tot_sz;
911             buf=(LM_REAL *)malloc(buf_sz);
912             if(!buf){
913             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
914             exit(1);
915             }
916             #endif /* LINSOLVERS_RETAIN_MEMORY */
917              
918             a=buf;
919             u=a+a_sz;
920             s=u+u_sz;
921             vt=s+s_sz;
922             work=vt+vt_sz;
923             iwork=(int *)(work+worksz);
924              
925             /* store A (column major!) into a */
926             for(i=0; i
927             for(j=0; j
928             a[i+j*m]=A[i*m+j];
929              
930             /* SVD decomposition of A */
931             GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
932             //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);
933              
934             /* error treatment */
935             if(info!=0){
936             if(info<0){
937             fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", -info);
938             exit(1);
939             }
940             else{
941             fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%d]\n", info);
942             #ifndef LINSOLVERS_RETAIN_MEMORY
943             free(buf);
944             #endif
945              
946             return 0;
947             }
948             }
949              
950             if(eps<0.0){
951             LM_REAL aux;
952              
953             /* compute machine epsilon */
954             for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
955             ;
956             eps*=LM_CNST(2.0);
957             }
958              
959             /* compute the pseudoinverse in a */
960             for(i=0; i
961             for(rank=0, thresh=eps*s[0]; rankthresh; rank++){
962             one_over_denom=LM_CNST(1.0)/s[rank];
963              
964             for(j=0; j
965             for(i=0; i
966             a[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
967             }
968              
969             /* compute A^+ b in x */
970             for(i=0; i
971             for(j=0, sum=0.0; j
972             sum+=a[i*m+j]*B[j];
973             x[i]=sum;
974             }
975              
976             #ifndef LINSOLVERS_RETAIN_MEMORY
977             free(buf);
978             #endif
979              
980             return 1;
981             }
982              
983             /*
984             * This function returns the solution of Ax = b for a real symmetric matrix A
985             *
986             * The function is based on LDLT factorization with the pivoting
987             * strategy of Bunch and Kaufman:
988             * A is factored as L*D*L^T where L is lower triangular and
989             * D symmetric and block diagonal (aka spectral decomposition,
990             * Banachiewicz factorization, modified Cholesky factorization)
991             *
992             * A is mxm, b is mx1.
993             *
994             * The function returns 0 in case of error, 1 if successfull
995             *
996             * This function is often called repetitively to solve problems of identical
997             * dimensions. To avoid repetitive malloc's and free's, allocated memory is
998             * retained between calls and free'd-malloc'ed when not of the appropriate size.
999             * A call with NULL as the first argument forces this memory to be released.
1000             */
1001             int AX_EQ_B_BK(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
1002             {
1003             __STATIC__ LM_REAL *buf=NULL;
1004             __STATIC__ int buf_sz=0, nb=0;
1005              
1006             LM_REAL *a, *work;
1007             int a_sz, ipiv_sz, work_sz, tot_sz;
1008             int info, *ipiv, nrhs=1;
1009            
1010             if(!A)
1011             #ifdef LINSOLVERS_RETAIN_MEMORY
1012             {
1013             if(buf) free(buf);
1014             buf=NULL;
1015             buf_sz=0;
1016              
1017             return 1;
1018             }
1019             #else
1020             return 1; /* NOP */
1021             #endif /* LINSOLVERS_RETAIN_MEMORY */
1022              
1023             /* calculate required memory size */
1024             ipiv_sz=m;
1025             a_sz=m*m;
1026             if(!nb){
1027             LM_REAL tmp;
1028              
1029             work_sz=-1; // workspace query; optimal size is returned in tmp
1030             SYTRF("L", (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&work_sz, (int *)&info);
1031             nb=((int)tmp)/m; // optimal worksize is m*nb
1032             }
1033             work_sz=(nb!=-1)? nb*m : 1;
1034             tot_sz=(a_sz + work_sz)*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
1035              
1036             #ifdef LINSOLVERS_RETAIN_MEMORY
1037             if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
1038             if(buf) free(buf); /* free previously allocated memory */
1039              
1040             buf_sz=tot_sz;
1041             buf=(LM_REAL *)malloc(buf_sz);
1042             if(!buf){
1043             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n");
1044             exit(1);
1045             }
1046             }
1047             #else
1048             buf_sz=tot_sz;
1049             buf=(LM_REAL *)malloc(buf_sz);
1050             if(!buf){
1051             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n");
1052             exit(1);
1053             }
1054             #endif /* LINSOLVERS_RETAIN_MEMORY */
1055              
1056             a=buf;
1057             work=a+a_sz;
1058             ipiv=(int *)(work+work_sz);
1059              
1060             /* store A into a and B into x; A is assumed to be symmetric, hence
1061             * the column and row major order representations are the same
1062             */
1063             memcpy(a, A, a_sz*sizeof(LM_REAL));
1064             memcpy(x, B, m*sizeof(LM_REAL));
1065              
1066             /* LDLt factorization for A */
1067             SYTRF("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);
1068             if(info!=0){
1069             if(info<0){
1070             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRF) " in ", AX_EQ_B_BK) "()\n", -info);
1071             exit(1);
1072             }
1073             else{
1074             fprintf(stderr, RCAT(RCAT("LAPACK error: singular block diagonal matrix D for", SYTRF) " in ", AX_EQ_B_BK)"() [D(%d, %d) is zero]\n", info, info);
1075             #ifndef LINSOLVERS_RETAIN_MEMORY
1076             free(buf);
1077             #endif
1078              
1079             return 0;
1080             }
1081             }
1082              
1083             /* solve the system with the computed factorization */
1084             SYTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
1085             if(info<0){
1086             fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRS) " in ", AX_EQ_B_BK) "()\n", -info);
1087             exit(1);
1088             }
1089              
1090             #ifndef LINSOLVERS_RETAIN_MEMORY
1091             free(buf);
1092             #endif
1093              
1094             return 1;
1095             }
1096              
1097             /* undefine all. IT MUST REMAIN IN THIS POSITION IN FILE */
1098             #undef AX_EQ_B_QR
1099             #undef AX_EQ_B_QRLS
1100             #undef AX_EQ_B_CHOL
1101             #undef AX_EQ_B_LU
1102             #undef AX_EQ_B_SVD
1103             #undef AX_EQ_B_BK
1104             #undef AX_EQ_B_PLASMA_CHOL
1105              
1106             #undef GEQRF
1107             #undef ORGQR
1108             #undef TRTRS
1109             #undef POTF2
1110             #undef POTRF
1111             #undef POTRS
1112             #undef GETRF
1113             #undef GETRS
1114             #undef GESVD
1115             #undef GESDD
1116             #undef SYTRF
1117             #undef SYTRS
1118             #undef PLASMA_POSV
1119              
1120             #else // no LAPACK
1121              
1122             /* precision-specific definitions */
1123             #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack)
1124              
1125             /*
1126             * This function returns the solution of Ax = b
1127             *
1128             * The function employs LU decomposition followed by forward/back substitution (see
1129             * also the LAPACK-based LU solver above)
1130             *
1131             * A is mxm, b is mx1
1132             *
1133             * The function returns 0 in case of error, 1 if successful
1134             *
1135             * This function is often called repetitively to solve problems of identical
1136             * dimensions. To avoid repetitive malloc's and free's, allocated memory is
1137             * retained between calls and free'd-malloc'ed when not of the appropriate size.
1138             * A call with NULL as the first argument forces this memory to be released.
1139             */
1140 102463           int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
1141             {
1142             __STATIC__ void *buf=NULL;
1143             __STATIC__ int buf_sz=0;
1144              
1145             register int i, j, k;
1146 102463           int *idx, maxi=-1, idx_sz, a_sz, work_sz, tot_sz;
1147             LM_REAL *a, *work, max, sum, tmp;
1148              
1149 102463           if(!A)
1150             #ifdef LINSOLVERS_RETAIN_MEMORY
1151             {
1152 160           if(buf) free(buf);
1153 160           buf=NULL;
1154 160           buf_sz=0;
1155              
1156 160           return 1;
1157             }
1158             #else
1159             return 1; /* NOP */
1160             #endif /* LINSOLVERS_RETAIN_MEMORY */
1161            
1162             /* calculate required memory size */
1163 102303           idx_sz=m;
1164 102303           a_sz=m*m;
1165 102303           work_sz=m;
1166 102303           tot_sz=(a_sz+work_sz)*sizeof(LM_REAL) + idx_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
1167              
1168             #ifdef LINSOLVERS_RETAIN_MEMORY
1169 102303           if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
1170 160           if(buf) free(buf); /* free previously allocated memory */
1171              
1172 160           buf_sz=tot_sz;
1173 160           buf=(void *)malloc(tot_sz);
1174 160           if(!buf){
1175 0           fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
1176 0           exit(1);
1177             }
1178             }
1179             #else
1180             buf_sz=tot_sz;
1181             buf=(void *)malloc(tot_sz);
1182             if(!buf){
1183             fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
1184             exit(1);
1185             }
1186             #endif /* LINSOLVERS_RETAIN_MEMORY */
1187              
1188 102303           a=buf;
1189 102303           work=a+a_sz;
1190 102303           idx=(int *)(work+work_sz);
1191              
1192             /* avoid destroying A, B by copying them to a, x resp. */
1193 102303           memcpy(a, A, a_sz*sizeof(LM_REAL));
1194 102303           memcpy(x, B, m*sizeof(LM_REAL));
1195              
1196             /* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */
1197 427974           for(i=0; i
1198 325671           max=0.0;
1199 1461152           for(j=0; j
1200 1135481           if((tmp=FABS(a[i*m+j]))>max)
1201 487520           max=tmp;
1202 325671           if(max==0.0){
1203 0           fprintf(stderr, RCAT("Singular matrix A in ", AX_EQ_B_LU) "()!\n");
1204             #ifndef LINSOLVERS_RETAIN_MEMORY
1205             free(buf);
1206             #endif
1207              
1208 0           return 0;
1209             }
1210 325671           work[i]=LM_CNST(1.0)/max;
1211             }
1212              
1213 427974           for(j=0; j
1214 730576           for(i=0; i
1215 404905           sum=a[i*m+j];
1216 646914           for(k=0; k
1217 242009           sum-=a[i*m+k]*a[k*m+j];
1218 404905           a[i*m+j]=sum;
1219             }
1220 325671           max=0.0;
1221 1056247           for(i=j; i
1222 730576           sum=a[i*m+j];
1223 1377490           for(k=0; k
1224 646914           sum-=a[i*m+k]*a[k*m+j];
1225 730576           a[i*m+j]=sum;
1226 730576           if((tmp=work[i]*FABS(sum))>=max){
1227 387658           max=tmp;
1228 387658           maxi=i;
1229             }
1230             }
1231 325671           if(j!=maxi){
1232 306902           for(k=0; k
1233 244930           tmp=a[maxi*m+k];
1234 244930           a[maxi*m+k]=a[j*m+k];
1235 244930           a[j*m+k]=tmp;
1236             }
1237 61972           work[maxi]=work[j];
1238             }
1239 325671           idx[j]=maxi;
1240 325671           if(a[j*m+j]==0.0)
1241 0           a[j*m+j]=LM_REAL_EPSILON;
1242 325671           if(j!=m-1){
1243 223368           tmp=LM_CNST(1.0)/(a[j*m+j]);
1244 628273           for(i=j+1; i
1245 404905           a[i*m+j]*=tmp;
1246             }
1247             }
1248              
1249             /* The decomposition has now replaced a. Solve the linear system using
1250             * forward and back substitution
1251             */
1252 427974           for(i=k=0; i
1253 325671           j=idx[i];
1254 325671           sum=x[j];
1255 325671           x[j]=x[i];
1256 325671           if(k!=0)
1257 628273           for(j=k-1; j
1258 404905           sum-=a[i*m+j]*x[j];
1259             else
1260 102303           if(sum!=0.0)
1261 102303           k=i+1;
1262 325671           x[i]=sum;
1263             }
1264              
1265 427974           for(i=m-1; i>=0; --i){
1266 325671           sum=x[i];
1267 730576           for(j=i+1; j
1268 404905           sum-=a[i*m+j]*x[j];
1269 325671           x[i]=sum/a[i*m+i];
1270             }
1271              
1272             #ifndef LINSOLVERS_RETAIN_MEMORY
1273             free(buf);
1274             #endif
1275              
1276 102303           return 1;
1277             }
1278              
1279             /* undefine all. IT MUST REMAIN IN THIS POSITION IN FILE */
1280             #undef AX_EQ_B_LU
1281              
1282             #endif /* HAVE_LAPACK */