2.50: svn merge https://svn.blender.org/svnroot/bf-blender/trunk/blender -r17853...
[blender.git] / intern / opennl / intern / opennl.c
1 /*
2  *  $Id$
3  *
4  *  OpenNL: Numerical Library
5  *  Copyright (C) 2004 Bruno Levy
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  *  If you modify this software, you should include a notice giving the
22  *  name of the person performing the modification, the date of modification,
23  *  and the reason for such modification.
24  *
25  *  Contact: Bruno Levy
26  *
27  *       levy@loria.fr
28  *
29  *       ISA Project
30  *       LORIA, INRIA Lorraine, 
31  *       Campus Scientifique, BP 239
32  *       54506 VANDOEUVRE LES NANCY CEDEX 
33  *       FRANCE
34  *
35  *  Note that the GNU General Public License does not permit incorporating
36  *  the Software into proprietary programs. 
37  */
38
39 #include "ONL_opennl.h"
40
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <math.h>
45
46 #ifdef NL_PARANOID
47 #ifndef NL_DEBUG
48 #define NL_DEBUG
49 #endif
50 #endif
51
52 /* SuperLU includes */
53 #include <ssp_defs.h>
54 #include <util.h>
55
56 /************************************************************************************/
57 /* Assertions */
58
59
60 static void __nl_assertion_failed(char* cond, char* file, int line) {
61         fprintf(
62                 stderr, 
63                 "OpenNL assertion failed: %s, file:%s, line:%d\n",
64                 cond,file,line
65         );
66         abort();
67 }
68
69 static void __nl_range_assertion_failed(
70         float x, float min_val, float max_val, char* file, int line
71 ) {
72         fprintf(
73                 stderr, 
74                 "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n",
75                 x, min_val, max_val, file,line
76         );
77         abort();
78 }
79
80 static void __nl_should_not_have_reached(char* file, int line) {
81         fprintf(
82                 stderr, 
83                 "OpenNL should not have reached this point: file:%s, line:%d\n",
84                 file,line
85         );
86         abort();
87 }
88
89
90 #define __nl_assert(x) {                                                                                \
91         if(!(x)) {                                                                                                \
92                 __nl_assertion_failed(#x,__FILE__, __LINE__);             \
93         }                                                                                                                  \
94
95
96 #define __nl_range_assert(x,min_val,max_val) {                            \
97         if(((x) < (min_val)) || ((x) > (max_val))) {                            \
98                 __nl_range_assertion_failed(x, min_val, max_val,                \
99                         __FILE__, __LINE__                                                                \
100                 );                                                                                                       \
101         }                                                                                                                  \
102 }
103
104 #define __nl_assert_not_reached {                                                          \
105         __nl_should_not_have_reached(__FILE__, __LINE__);                 \
106 }
107
108 #ifdef NL_DEBUG
109 #define __nl_debug_assert(x) __nl_assert(x)
110 #define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
111 #else
112 #define __nl_debug_assert(x) 
113 #define __nl_debug_range_assert(x,min_val,max_val) 
114 #endif
115
116 #ifdef NL_PARANOID
117 #define __nl_parano_assert(x) __nl_assert(x)
118 #define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
119 #else
120 #define __nl_parano_assert(x) 
121 #define __nl_parano_range_assert(x,min_val,max_val) 
122 #endif
123
124 /************************************************************************************/
125 /* classic macros */
126
127 #ifndef MIN
128 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 
129 #endif
130
131 #ifndef MAX
132 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 
133 #endif
134
135 /************************************************************************************/
136 /* memory management */
137
138 #define __NL_NEW(T)                             (T*)(calloc(1, sizeof(T))) 
139 #define __NL_NEW_ARRAY(T,NB)       (T*)(calloc((NB),sizeof(T))) 
140 #define __NL_RENEW_ARRAY(T,x,NB)   (T*)(realloc(x,(NB)*sizeof(T))) 
141 #define __NL_DELETE(x)                   free(x); x = NULL 
142 #define __NL_DELETE_ARRAY(x)       free(x); x = NULL
143
144 #define __NL_CLEAR(T, x)                   memset(x, 0, sizeof(T)) 
145 #define __NL_CLEAR_ARRAY(T,x,NB)   memset(x, 0, (NB)*sizeof(T)) 
146
147 /************************************************************************************/
148 /* Dynamic arrays for sparse row/columns */
149
150 typedef struct {
151         NLuint   index;
152         NLfloat value;
153 } __NLCoeff;
154
155 typedef struct {
156         NLuint size;
157         NLuint capacity;
158         __NLCoeff* coeff;
159 } __NLRowColumn;
160
161 static void __nlRowColumnConstruct(__NLRowColumn* c) {
162         c->size  = 0;
163         c->capacity = 0;
164         c->coeff        = NULL;
165 }
166
167 static void __nlRowColumnDestroy(__NLRowColumn* c) {
168         __NL_DELETE_ARRAY(c->coeff);
169 #ifdef NL_PARANOID
170         __NL_CLEAR(__NLRowColumn, c); 
171 #endif
172 }
173
174 static void __nlRowColumnGrow(__NLRowColumn* c) {
175         if(c->capacity != 0) {
176                 c->capacity = 2 * c->capacity;
177                 c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity);
178         } else {
179                 c->capacity = 4;
180                 c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity);
181         }
182 }
183
184 static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
185         NLuint i;
186         for(i=0; i<c->size; i++) {
187                 if(c->coeff[i].index == (NLuint)index) {
188                         c->coeff[i].value += value;
189                         return;
190                 }
191         }
192         if(c->size == c->capacity) {
193                 __nlRowColumnGrow(c);
194         }
195         c->coeff[c->size].index = index;
196         c->coeff[c->size].value = value;
197         c->size++;
198 }
199
200 /* Does not check whether the index already exists */
201 static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
202         if(c->size == c->capacity) {
203                 __nlRowColumnGrow(c);
204         }
205         c->coeff[c->size].index = index;
206         c->coeff[c->size].value = value;
207         c->size++;
208 }
209
210 static void __nlRowColumnClear(__NLRowColumn* c) {
211         c->size  = 0;
212         c->capacity = 0;
213         __NL_DELETE_ARRAY(c->coeff);
214 }
215
216 /************************************************************************************/
217 /* SparseMatrix data structure */
218
219 #define __NL_ROWS         1
220 #define __NL_COLUMNS   2
221 #define __NL_SYMMETRIC 4
222
223 typedef struct {
224         NLuint m;
225         NLuint n;
226         NLuint diag_size;
227         NLenum storage;
228         __NLRowColumn* row;
229         __NLRowColumn* column;
230         NLfloat*          diag;
231 } __NLSparseMatrix;
232
233
234 static void __nlSparseMatrixConstruct(
235         __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage
236 ) {
237         NLuint i;
238         M->m = m;
239         M->n = n;
240         M->storage = storage;
241         if(storage & __NL_ROWS) {
242                 M->row = __NL_NEW_ARRAY(__NLRowColumn, m);
243                 for(i=0; i<m; i++) {
244                         __nlRowColumnConstruct(&(M->row[i]));
245                 }
246         } else {
247                 M->row = NULL;
248         }
249
250         if(storage & __NL_COLUMNS) {
251                 M->column = __NL_NEW_ARRAY(__NLRowColumn, n);
252                 for(i=0; i<n; i++) {
253                         __nlRowColumnConstruct(&(M->column[i]));
254                 }
255         } else {
256                 M->column = NULL;
257         }
258
259         M->diag_size = MIN(m,n);
260         M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
261 }
262
263 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
264         NLuint i;
265         __NL_DELETE_ARRAY(M->diag);
266         if(M->storage & __NL_ROWS) {
267                 for(i=0; i<M->m; i++) {
268                         __nlRowColumnDestroy(&(M->row[i]));
269                 }
270                 __NL_DELETE_ARRAY(M->row);
271         }
272         if(M->storage & __NL_COLUMNS) {
273                 for(i=0; i<M->n; i++) {
274                         __nlRowColumnDestroy(&(M->column[i]));
275                 }
276                 __NL_DELETE_ARRAY(M->column);
277         }
278 #ifdef NL_PARANOID
279         __NL_CLEAR(__NLSparseMatrix,M);
280 #endif
281 }
282
283 static void __nlSparseMatrixAdd(
284         __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
285 ) {
286         __nl_parano_range_assert(i, 0, M->m - 1);
287         __nl_parano_range_assert(j, 0, M->n - 1);
288         if((M->storage & __NL_SYMMETRIC) && (j > i)) {
289                 return;
290         }
291         if(i == j) {
292                 M->diag[i] += value;
293         }
294         if(M->storage & __NL_ROWS) {
295                 __nlRowColumnAdd(&(M->row[i]), j, value);
296         }
297         if(M->storage & __NL_COLUMNS) {
298                 __nlRowColumnAdd(&(M->column[j]), i, value);
299         }
300 }
301
302 static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
303         NLuint i;
304         if(M->storage & __NL_ROWS) {
305                 for(i=0; i<M->m; i++) {
306                         __nlRowColumnClear(&(M->row[i]));
307                 }
308         }
309         if(M->storage & __NL_COLUMNS) {
310                 for(i=0; i<M->n; i++) {
311                         __nlRowColumnClear(&(M->column[i]));
312                 }
313         }
314         __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);       
315 }
316
317 /* Returns the number of non-zero coefficients */
318 static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
319         NLuint nnz = 0;
320         NLuint i;
321         if(M->storage & __NL_ROWS) {
322                 for(i = 0; i<M->m; i++) {
323                         nnz += M->row[i].size;
324                 }
325         } else if (M->storage & __NL_COLUMNS) {
326                 for(i = 0; i<M->n; i++) {
327                         nnz += M->column[i].size;
328                 }
329         } else {
330                 __nl_assert_not_reached;
331         }
332         return nnz;
333 }
334
335 /************************************************************************************/
336 /* SparseMatrix x Vector routines, internal helper routines */
337
338 static void __nlSparseMatrix_mult_rows_symmetric(
339         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
340 ) {
341         NLuint m = A->m;
342         NLuint i,ij;
343         __NLRowColumn* Ri = NULL;
344         __NLCoeff* c = NULL;
345         for(i=0; i<m; i++) {
346                 y[i] = 0;
347                 Ri = &(A->row[i]);
348                 for(ij=0; ij<Ri->size; ij++) {
349                         c = &(Ri->coeff[ij]);
350                         y[i] += c->value * x[c->index];
351                         if(i != c->index) {
352                                 y[c->index] += c->value * x[i];
353                         }
354                 }
355         }
356 }
357
358 static void __nlSparseMatrix_mult_rows(
359         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
360 ) {
361         NLuint m = A->m;
362         NLuint i,ij;
363         __NLRowColumn* Ri = NULL;
364         __NLCoeff* c = NULL;
365         for(i=0; i<m; i++) {
366                 y[i] = 0;
367                 Ri = &(A->row[i]);
368                 for(ij=0; ij<Ri->size; ij++) {
369                         c = &(Ri->coeff[ij]);
370                         y[i] += c->value * x[c->index];
371                 }
372         }
373 }
374
375 static void __nlSparseMatrix_mult_cols_symmetric(
376         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
377 ) {
378         NLuint n = A->n;
379         NLuint j,ii;
380         __NLRowColumn* Cj = NULL;
381         __NLCoeff* c = NULL;
382         for(j=0; j<n; j++) {
383                 y[j] = 0;
384                 Cj = &(A->column[j]);
385                 for(ii=0; ii<Cj->size; ii++) {
386                         c = &(Cj->coeff[ii]);
387                         y[c->index] += c->value * x[j];
388                         if(j != c->index) {
389                                 y[j] += c->value * x[c->index];
390                         }
391                 }
392         }
393 }
394
395 static void __nlSparseMatrix_mult_cols(
396         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
397 ) {
398         NLuint n = A->n;
399         NLuint j,ii; 
400         __NLRowColumn* Cj = NULL;
401         __NLCoeff* c = NULL;
402         __NL_CLEAR_ARRAY(NLfloat, y, A->m);
403         for(j=0; j<n; j++) {
404                 Cj = &(A->column[j]);
405                 for(ii=0; ii<Cj->size; ii++) {
406                         c = &(Cj->coeff[ii]);
407                         y[c->index] += c->value * x[j];
408                 }
409         }
410 }
411
412 /************************************************************************************/
413 /* SparseMatrix x Vector routines, main driver routine */
414
415 static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
416         if(A->storage & __NL_ROWS) {
417                 if(A->storage & __NL_SYMMETRIC) {
418                         __nlSparseMatrix_mult_rows_symmetric(A, x, y);
419                 } else {
420                         __nlSparseMatrix_mult_rows(A, x, y);
421                 }
422         } else {
423                 if(A->storage & __NL_SYMMETRIC) {
424                         __nlSparseMatrix_mult_cols_symmetric(A, x, y);
425                 } else {
426                         __nlSparseMatrix_mult_cols(A, x, y);
427                 }
428         }
429 }
430
431 /* ****************** Routines for least squares ******************* */
432
433 static void __nlSparseMatrix_square(
434         __NLSparseMatrix* AtA, __NLSparseMatrix *A
435 ) {
436         NLuint m = A->m;
437         NLuint n = A->n;
438         NLuint i, j0, j1;
439         __NLRowColumn *Ri = NULL;
440         __NLCoeff *c0 = NULL, *c1 = NULL;
441         float value;
442
443         __nlSparseMatrixConstruct(AtA, n, n, A->storage);
444
445         for(i=0; i<m; i++) {
446                 Ri = &(A->row[i]);
447
448                 for(j0=0; j0<Ri->size; j0++) {
449                         c0 = &(Ri->coeff[j0]);
450                         for(j1=0; j1<Ri->size; j1++) {
451                                 c1 = &(Ri->coeff[j1]);
452
453                                 value = c0->value*c1->value;
454                                 __nlSparseMatrixAdd(AtA, c0->index, c1->index, value);
455                         }
456                 }
457         }
458 }
459
460 static void __nlSparseMatrix_transpose_mult_rows(
461         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
462 ) {
463         NLuint m = A->m;
464         NLuint n = A->n;
465         NLuint i,ij;
466         __NLRowColumn* Ri = NULL;
467         __NLCoeff* c = NULL;
468
469         __NL_CLEAR_ARRAY(NLfloat, y, n);
470
471         for(i=0; i<m; i++) {
472                 Ri = &(A->row[i]);
473                 for(ij=0; ij<Ri->size; ij++) {
474                         c = &(Ri->coeff[ij]);
475                         y[c->index] += c->value * x[i];
476                 }
477         }
478 }
479
480 /************************************************************************************/
481 /* NLContext data structure */
482
483 typedef void(*__NLMatrixFunc)(float* x, float* y);
484
485 typedef struct {
486         NLfloat  value[4];
487         NLboolean locked;
488         NLuint  index;
489         __NLRowColumn *a;
490 } __NLVariable;
491
492 #define __NL_STATE_INITIAL                              0
493 #define __NL_STATE_SYSTEM                               1
494 #define __NL_STATE_MATRIX                               2
495 #define __NL_STATE_MATRIX_CONSTRUCTED   3
496 #define __NL_STATE_SYSTEM_CONSTRUCTED   4
497 #define __NL_STATE_SYSTEM_SOLVED                5
498
499 typedef struct {
500         NLenum                  state;
501         NLuint                  n;
502         NLuint                  m;
503         __NLVariable*   variable;
504         NLfloat*                b;
505         NLfloat*                Mtb;
506         __NLSparseMatrix M;
507         __NLSparseMatrix MtM;
508         NLfloat*                x;
509         NLuint                  nb_variables;
510         NLuint                  nb_rows;
511         NLboolean               least_squares;
512         NLboolean               symmetric;
513         NLuint                  nb_rhs;
514         NLboolean               solve_again;
515         NLboolean               alloc_M;
516         NLboolean               alloc_MtM;
517         NLboolean               alloc_variable;
518         NLboolean               alloc_x;
519         NLboolean               alloc_b;
520         NLboolean               alloc_Mtb;
521         NLfloat                 error;
522         __NLMatrixFunc  matrix_vector_prod;
523
524         struct __NLSuperLUContext {
525                 NLboolean alloc_slu;
526                 SuperMatrix L, U;
527                 NLint *perm_c, *perm_r;
528                 SuperLUStat_t stat;
529         } slu;
530 } __NLContext;
531
532 static __NLContext* __nlCurrentContext = NULL;
533
534 static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
535         __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
536 }
537
538
539 NLContext nlNewContext(void) {
540         __NLContext* result       = __NL_NEW(__NLContext);
541         result->state                   = __NL_STATE_INITIAL;
542         result->matrix_vector_prod = __nlMatrixVectorProd_default;
543         result->nb_rhs = 1;
544         nlMakeCurrent(result);
545         return result;
546 }
547
548 static void __nlFree_SUPERLU(__NLContext *context);
549
550 void nlDeleteContext(NLContext context_in) {
551         __NLContext* context = (__NLContext*)(context_in);
552         int i;
553
554         if(__nlCurrentContext == context) {
555                 __nlCurrentContext = NULL;
556         }
557         if(context->alloc_M) {
558                 __nlSparseMatrixDestroy(&context->M);
559         }
560         if(context->alloc_MtM) {
561                 __nlSparseMatrixDestroy(&context->MtM);
562         }
563         if(context->alloc_variable) {
564                 for(i=0; i<context->nb_variables; i++) {
565                         if(context->variable[i].a) {
566                                 __nlRowColumnDestroy(context->variable[i].a);
567                                 __NL_DELETE(context->variable[i].a);
568                         }
569                 }
570         }
571         if(context->alloc_b) {
572                 __NL_DELETE_ARRAY(context->b);
573         }
574         if(context->alloc_Mtb) {
575                 __NL_DELETE_ARRAY(context->Mtb);
576         }
577         if(context->alloc_x) {
578                 __NL_DELETE_ARRAY(context->x);
579         }
580         if (context->slu.alloc_slu) {
581                 __nlFree_SUPERLU(context);
582         }
583
584 #ifdef NL_PARANOID
585         __NL_CLEAR(__NLContext, context);
586 #endif
587         __NL_DELETE(context);
588 }
589
590 void nlMakeCurrent(NLContext context) {
591         __nlCurrentContext = (__NLContext*)(context);
592 }
593
594 NLContext nlGetCurrent(void) {
595         return __nlCurrentContext;
596 }
597
598 static void __nlCheckState(NLenum state) {
599         __nl_assert(__nlCurrentContext->state == state);
600 }
601
602 static void __nlTransition(NLenum from_state, NLenum to_state) {
603         __nlCheckState(from_state);
604         __nlCurrentContext->state = to_state;
605 }
606
607 /************************************************************************************/
608 /* Get/Set parameters */
609
610 void nlSolverParameterf(NLenum pname, NLfloat param) {
611         __nlCheckState(__NL_STATE_INITIAL);
612         switch(pname) {
613         case NL_NB_VARIABLES: {
614                 __nl_assert(param > 0);
615                 __nlCurrentContext->nb_variables = (NLuint)param;
616         } break;
617         case NL_NB_ROWS: {
618                 __nl_assert(param > 0);
619                 __nlCurrentContext->nb_rows = (NLuint)param;
620         } break;
621         case NL_LEAST_SQUARES: {
622                 __nlCurrentContext->least_squares = (NLboolean)param;
623         } break;
624         case NL_SYMMETRIC: {
625                 __nlCurrentContext->symmetric = (NLboolean)param;               
626         } break;
627         case NL_NB_RIGHT_HAND_SIDES: {
628                 __nlCurrentContext->nb_rhs = (NLuint)param;
629         } break;
630         default: {
631                 __nl_assert_not_reached;
632         } break;
633         }
634 }
635
636 void nlSolverParameteri(NLenum pname, NLint param) {
637         __nlCheckState(__NL_STATE_INITIAL);
638         switch(pname) {
639         case NL_NB_VARIABLES: {
640                 __nl_assert(param > 0);
641                 __nlCurrentContext->nb_variables = (NLuint)param;
642         } break;
643         case NL_NB_ROWS: {
644                 __nl_assert(param > 0);
645                 __nlCurrentContext->nb_rows = (NLuint)param;
646         } break;
647         case NL_LEAST_SQUARES: {
648                 __nlCurrentContext->least_squares = (NLboolean)param;
649         } break;
650         case NL_SYMMETRIC: {
651                 __nlCurrentContext->symmetric = (NLboolean)param;               
652         } break;
653         case NL_NB_RIGHT_HAND_SIDES: {
654                 __nlCurrentContext->nb_rhs = (NLuint)param;
655         } break;
656         default: {
657                 __nl_assert_not_reached;
658         } break;
659         }
660 }
661
662 void nlGetBooleanv(NLenum pname, NLboolean* params) {
663         switch(pname) {
664         case NL_LEAST_SQUARES: {
665                 *params = __nlCurrentContext->least_squares;
666         } break;
667         case NL_SYMMETRIC: {
668                 *params = __nlCurrentContext->symmetric;
669         } break;
670         default: {
671                 __nl_assert_not_reached;
672         } break;
673         }
674 }
675
676 void nlGetFloatv(NLenum pname, NLfloat* params) {
677         switch(pname) {
678         case NL_NB_VARIABLES: {
679                 *params = (NLfloat)(__nlCurrentContext->nb_variables);
680         } break;
681         case NL_NB_ROWS: {
682                 *params = (NLfloat)(__nlCurrentContext->nb_rows);
683         } break;
684         case NL_LEAST_SQUARES: {
685                 *params = (NLfloat)(__nlCurrentContext->least_squares);
686         } break;
687         case NL_SYMMETRIC: {
688                 *params = (NLfloat)(__nlCurrentContext->symmetric);
689         } break;
690         case NL_ERROR: {
691                 *params = (NLfloat)(__nlCurrentContext->error);
692         } break;
693         default: {
694                 __nl_assert_not_reached;
695         } break;
696         }
697 }
698
699 void nlGetIntergerv(NLenum pname, NLint* params) {
700         switch(pname) {
701         case NL_NB_VARIABLES: {
702                 *params = (NLint)(__nlCurrentContext->nb_variables);
703         } break;
704         case NL_NB_ROWS: {
705                 *params = (NLint)(__nlCurrentContext->nb_rows);
706         } break;
707         case NL_LEAST_SQUARES: {
708                 *params = (NLint)(__nlCurrentContext->least_squares);
709         } break;
710         case NL_SYMMETRIC: {
711                 *params = (NLint)(__nlCurrentContext->symmetric);
712         } break;
713         default: {
714                 __nl_assert_not_reached;
715         } break;
716         }
717 }
718
719 /************************************************************************************/
720 /* Enable / Disable */
721
722 void nlEnable(NLenum pname) {
723         switch(pname) {
724         default: {
725                 __nl_assert_not_reached;
726         }
727         }
728 }
729
730 void nlDisable(NLenum pname) {
731         switch(pname) {
732         default: {
733                 __nl_assert_not_reached;
734         }
735         }
736 }
737
738 NLboolean nlIsEnabled(NLenum pname) {
739         switch(pname) {
740         default: {
741                 __nl_assert_not_reached;
742         }
743         }
744         return NL_FALSE;
745 }
746
747 /************************************************************************************/
748 /* Get/Set Lock/Unlock variables */
749
750 void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) {
751         __nlCheckState(__NL_STATE_SYSTEM);
752         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
753         __nlCurrentContext->variable[index].value[rhsindex] = value;    
754 }
755
756 NLfloat nlGetVariable(NLuint rhsindex, NLuint index) {
757         __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
758         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
759         return __nlCurrentContext->variable[index].value[rhsindex];
760 }
761
762 void nlLockVariable(NLuint index) {
763         __nlCheckState(__NL_STATE_SYSTEM);
764         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
765         __nlCurrentContext->variable[index].locked = NL_TRUE;
766 }
767
768 void nlUnlockVariable(NLuint index) {
769         __nlCheckState(__NL_STATE_SYSTEM);
770         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
771         __nlCurrentContext->variable[index].locked = NL_FALSE;
772 }
773
774 NLboolean nlVariableIsLocked(NLuint index) {
775         __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
776         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
777         return __nlCurrentContext->variable[index].locked;
778 }
779
780 /************************************************************************************/
781 /* System construction */
782
783 static void __nlVariablesToVector() {
784         __NLContext *context = __nlCurrentContext;
785         NLuint i, j, nb_rhs;
786
787         __nl_assert(context->alloc_x);
788         __nl_assert(context->alloc_variable);
789
790         nb_rhs= context->nb_rhs;
791
792         for(i=0; i<context->nb_variables; i++) {
793                 __NLVariable* v = &(context->variable[i]);
794                 if(!v->locked) {
795                         __nl_assert(v->index < context->n);
796
797                         for(j=0; j<nb_rhs; j++)
798                                 context->x[context->n*j + v->index] = v->value[j];
799                 }
800         }
801 }
802
803 static void __nlVectorToVariables() {
804         __NLContext *context = __nlCurrentContext;
805         NLuint i, j, nb_rhs;
806
807         __nl_assert(context->alloc_x);
808         __nl_assert(context->alloc_variable);
809
810         nb_rhs= context->nb_rhs;
811
812         for(i=0; i<context->nb_variables; i++) {
813                 __NLVariable* v = &(context->variable[i]);
814                 if(!v->locked) {
815                         __nl_assert(v->index < context->n);
816
817                         for(j=0; j<nb_rhs; j++)
818                                 v->value[j] = context->x[context->n*j + v->index];
819                 }
820         }
821 }
822
823 static void __nlBeginSystem() {
824         __nl_assert(__nlCurrentContext->nb_variables > 0);
825
826         if (__nlCurrentContext->solve_again)
827                 __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM);
828         else {
829                 __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM);
830
831                 __nlCurrentContext->variable = __NL_NEW_ARRAY(
832                         __NLVariable, __nlCurrentContext->nb_variables);
833                 
834                 __nlCurrentContext->alloc_variable = NL_TRUE;
835         }
836 }
837
838 static void __nlEndSystem() {
839         __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED);   
840 }
841
842 static void __nlBeginMatrix() {
843         NLuint i;
844         NLuint m = 0, n = 0;
845         NLenum storage = __NL_ROWS;
846         __NLContext *context = __nlCurrentContext;
847
848         __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX);
849
850         if (!context->solve_again) {
851                 for(i=0; i<context->nb_variables; i++) {
852                         if(context->variable[i].locked) {
853                                 context->variable[i].index = ~0;
854                                 context->variable[i].a = __NL_NEW(__NLRowColumn);
855                                 __nlRowColumnConstruct(context->variable[i].a);
856                         }
857                         else
858                                 context->variable[i].index = n++;
859                 }
860
861                 m = (context->nb_rows == 0)? n: context->nb_rows;
862
863                 context->m = m;
864                 context->n = n;
865
866                 __nlSparseMatrixConstruct(&context->M, m, n, storage);
867                 context->alloc_M = NL_TRUE;
868
869                 context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs);
870                 context->alloc_b = NL_TRUE;
871
872                 context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs);
873                 context->alloc_x = NL_TRUE;
874         }
875         else {
876                 /* need to recompute b only, A is not constructed anymore */
877                 __NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs);
878         }
879
880         __nlVariablesToVector();
881 }
882
883 static void __nlEndMatrixRHS(NLuint rhs) {
884         __NLContext *context = __nlCurrentContext;
885         __NLVariable *variable;
886         __NLRowColumn *a;
887         NLfloat *b, *Mtb;
888         NLuint i, j;
889
890         b = context->b + context->m*rhs;
891         Mtb = context->Mtb + context->n*rhs;
892
893         for(i=0; i<__nlCurrentContext->nb_variables; i++) {
894                 variable = &(context->variable[i]);
895
896                 if(variable->locked) {
897                         a = variable->a;
898
899                         for(j=0; j<a->size; j++) {
900                                 b[a->coeff[j].index] -= a->coeff[j].value*variable->value[rhs];
901                         }
902                 }
903         }
904
905         if(context->least_squares)
906                 __nlSparseMatrix_transpose_mult_rows(&context->M, b, Mtb);
907 }
908
909 static void __nlEndMatrix() {
910         __NLContext *context = __nlCurrentContext;
911         NLuint i;
912
913         __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED);       
914         
915         if(context->least_squares) {
916                 if(!__nlCurrentContext->solve_again) {
917                         __nlSparseMatrix_square(&context->MtM, &context->M);
918                         context->alloc_MtM = NL_TRUE;
919
920                         context->Mtb =
921                                 __NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs);
922                         context->alloc_Mtb = NL_TRUE;
923                 }
924         }
925
926         for(i=0; i<context->nb_rhs; i++)
927                 __nlEndMatrixRHS(i);
928 }
929
930 void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
931 {
932         __NLContext *context = __nlCurrentContext;
933
934         __nlCheckState(__NL_STATE_MATRIX);
935
936         if(context->solve_again)
937                 return;
938
939         if (!context->least_squares && context->variable[row].locked);
940         else if (context->variable[col].locked) {
941                 if(!context->least_squares)
942                         row = context->variable[row].index;
943                 __nlRowColumnAppend(context->variable[col].a, row, value);
944         }
945         else {
946                 __NLSparseMatrix* M  = &context->M;
947                 
948                 if(!context->least_squares)
949                         row = context->variable[row].index;
950                 col = context->variable[col].index;
951                 
952                 __nl_range_assert(row, 0, context->m - 1);
953                 __nl_range_assert(col, 0, context->n - 1);
954
955                 __nlSparseMatrixAdd(M, row, col, value);
956         }
957 }
958
959 void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
960 {
961         __NLContext *context = __nlCurrentContext;
962         NLfloat* b = context->b;
963
964         __nlCheckState(__NL_STATE_MATRIX);
965
966         if(context->least_squares) {
967                 __nl_range_assert(index, 0, context->m - 1);
968                 b[rhsindex*context->m + index] += value;
969         }
970         else {
971                 if(!context->variable[index].locked) {
972                         index = context->variable[index].index;
973                         __nl_range_assert(index, 0, context->m - 1);
974
975                         b[rhsindex*context->m + index] += value;
976                 }
977         }
978 }
979
980 void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value)
981 {
982         __NLContext *context = __nlCurrentContext;
983         NLfloat* b = context->b;
984
985         __nlCheckState(__NL_STATE_MATRIX);
986
987         if(context->least_squares) {
988                 __nl_range_assert(index, 0, context->m - 1);
989                 b[rhsindex*context->m + index] = value;
990         }
991         else {
992                 if(!context->variable[index].locked) {
993                         index = context->variable[index].index;
994                         __nl_range_assert(index, 0, context->m - 1);
995
996                         b[rhsindex*context->m + index] = value;
997                 }
998         }
999 }
1000
1001 void nlBegin(NLenum prim) {
1002         switch(prim) {
1003         case NL_SYSTEM: {
1004                 __nlBeginSystem();
1005         } break;
1006         case NL_MATRIX: {
1007                 __nlBeginMatrix();
1008         } break;
1009         default: {
1010                 __nl_assert_not_reached;
1011         }
1012         }
1013 }
1014
1015 void nlEnd(NLenum prim) {
1016         switch(prim) {
1017         case NL_SYSTEM: {
1018                 __nlEndSystem();
1019         } break;
1020         case NL_MATRIX: {
1021                 __nlEndMatrix();
1022         } break;
1023         default: {
1024                 __nl_assert_not_reached;
1025         }
1026         }
1027 }
1028
1029 /************************************************************************/
1030 /* SuperLU wrapper */
1031
1032 /* Note: SuperLU is difficult to call, but it is worth it.      */
1033 /* Here is a driver inspired by A. Sheffer's "cow flattener". */
1034 static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) {
1035
1036         /* OpenNL Context */
1037         __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M;
1038         NLuint n = context->n;
1039         NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */
1040
1041         /* Compressed Row Storage matrix representation */
1042         NLint   *xa             = __NL_NEW_ARRAY(NLint, n+1);
1043         NLfloat *rhs    = __NL_NEW_ARRAY(NLfloat, n);
1044         NLfloat *a              = __NL_NEW_ARRAY(NLfloat, nnz);
1045         NLint   *asub   = __NL_NEW_ARRAY(NLint, nnz);
1046         NLint   *etree  = __NL_NEW_ARRAY(NLint, n);
1047
1048         /* SuperLU variables */
1049         SuperMatrix At, AtP;
1050         NLint info, panel_size, relax;
1051         superlu_options_t options;
1052
1053         /* Temporary variables */
1054         NLuint i, jj, count;
1055         
1056         __nl_assert(!(M->storage & __NL_SYMMETRIC));
1057         __nl_assert(M->storage & __NL_ROWS);
1058         __nl_assert(M->m == M->n);
1059         
1060         /* Convert M to compressed column format */
1061         for(i=0, count=0; i<n; i++) {
1062                 __NLRowColumn *Ri = M->row + i;
1063                 xa[i] = count;
1064
1065                 for(jj=0; jj<Ri->size; jj++, count++) {
1066                         a[count] = Ri->coeff[jj].value;
1067                         asub[count] = Ri->coeff[jj].index;
1068                 }
1069         }
1070         xa[n] = nnz;
1071
1072         /* Free M, don't need it anymore at this point */
1073         __nlSparseMatrixClear(M);
1074
1075         /* Create superlu A matrix transposed */
1076         sCreate_CompCol_Matrix(
1077                 &At, n, n, nnz, a, asub, xa, 
1078                 SLU_NC,         /* Colum wise, no supernode */
1079                 SLU_S,          /* floats */ 
1080                 SLU_GE          /* general storage */
1081         );
1082
1083         /* Set superlu options */
1084         set_default_options(&options);
1085         options.ColPerm = MY_PERMC;
1086         options.Fact = DOFACT;
1087
1088         StatInit(&(context->slu.stat));
1089
1090         panel_size = sp_ienv(1); /* sp_ienv give us the defaults */
1091         relax = sp_ienv(2);
1092
1093         /* Compute permutation and permuted matrix */
1094         context->slu.perm_r = __NL_NEW_ARRAY(NLint, n);
1095         context->slu.perm_c = __NL_NEW_ARRAY(NLint, n);
1096
1097         if ((permutation == NULL) || (*permutation == -1)) {
1098                 get_perm_c(3, &At, context->slu.perm_c);
1099
1100                 if (permutation)
1101                         memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n);
1102         }
1103         else
1104                 memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n);
1105
1106         sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP);
1107
1108         /* Decompose into L and U */
1109         sgstrf(&options, &AtP, relax, panel_size,
1110                 etree, NULL, 0, context->slu.perm_c, context->slu.perm_r,
1111                 &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info);
1112
1113         /* Cleanup */
1114
1115         Destroy_SuperMatrix_Store(&At);
1116         Destroy_CompCol_Permuted(&AtP);
1117
1118         __NL_DELETE_ARRAY(etree);
1119         __NL_DELETE_ARRAY(xa);
1120         __NL_DELETE_ARRAY(rhs);
1121         __NL_DELETE_ARRAY(a);
1122         __NL_DELETE_ARRAY(asub);
1123
1124         context->slu.alloc_slu = NL_TRUE;
1125
1126         return (info == 0);
1127 }
1128
1129 static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
1130
1131         /* OpenNL Context */
1132         NLfloat* b = (context->least_squares)? context->Mtb: context->b;
1133         NLfloat* x = context->x;
1134         NLuint n = context->n, j;
1135
1136         /* SuperLU variables */
1137         SuperMatrix B;
1138         NLint info;
1139
1140         for(j=0; j<context->nb_rhs; j++, b+=n, x+=n) {
1141                 /* Create superlu array for B */
1142                 sCreate_Dense_Matrix(
1143                         &B, n, 1, b, n, 
1144                         SLU_DN, /* Fortran-type column-wise storage */
1145                         SLU_S,  /* floats                                                 */
1146                         SLU_GE  /* general                                                */
1147                 );
1148
1149                 /* Forward/Back substitution to compute x */
1150                 sgstrs(TRANS, &(context->slu.L), &(context->slu.U),
1151                         context->slu.perm_c, context->slu.perm_r, &B,
1152                         &(context->slu.stat), &info);
1153
1154                 if(info == 0)
1155                         memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n);
1156
1157                 Destroy_SuperMatrix_Store(&B);
1158         }
1159
1160         return (info == 0);
1161 }
1162
1163 static void __nlFree_SUPERLU(__NLContext *context) {
1164
1165         Destroy_SuperNode_Matrix(&(context->slu.L));
1166         Destroy_CompCol_Matrix(&(context->slu.U));
1167
1168         StatFree(&(context->slu.stat));
1169
1170         __NL_DELETE_ARRAY(context->slu.perm_r);
1171         __NL_DELETE_ARRAY(context->slu.perm_c);
1172
1173         context->slu.alloc_slu = NL_FALSE;
1174 }
1175
1176 void nlPrintMatrix(void) {
1177         __NLContext *context = __nlCurrentContext;
1178         __NLSparseMatrix* M  = &(context->M);
1179         __NLSparseMatrix* MtM  = &(context->MtM);
1180         float *b = context->b;
1181         NLuint i, jj, k;
1182         NLuint m = context->m;
1183         NLuint n = context->n;
1184         __NLRowColumn* Ri = NULL;
1185         float *value = malloc(sizeof(*value)*(n+m));
1186
1187         printf("A:\n");
1188         for(i=0; i<m; i++) {
1189                 Ri = &(M->row[i]);
1190
1191                 memset(value, 0.0, sizeof(*value)*n);
1192                 for(jj=0; jj<Ri->size; jj++)
1193                         value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
1194
1195                 for (k = 0; k<n; k++)
1196                         printf("%.3f ", value[k]);
1197                 printf("\n");
1198         }
1199
1200         for(k=0; k<context->nb_rhs; k++) {
1201                 printf("b (%d):\n", k);
1202                 for(i=0; i<n; i++)
1203                         printf("%f ", b[context->n*k + i]);
1204                 printf("\n");
1205         }
1206
1207         if(context->alloc_MtM) {
1208                 printf("AtA:\n");
1209                 for(i=0; i<n; i++) {
1210                         Ri = &(MtM->row[i]);
1211
1212                         memset(value, 0.0, sizeof(*value)*m);
1213                         for(jj=0; jj<Ri->size; jj++)
1214                                 value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
1215
1216                         for (k = 0; k<n; k++)
1217                                 printf("%.3f ", value[k]);
1218                         printf("\n");
1219                 }
1220
1221                 for(k=0; k<context->nb_rhs; k++) {
1222                         printf("Mtb (%d):\n", k);
1223                         for(i=0; i<n; i++)
1224                                 printf("%f ", context->Mtb[context->n*k + i]);
1225                         printf("\n");
1226                 }
1227                 printf("\n");
1228         }
1229
1230         free(value);
1231 }
1232
1233 /************************************************************************/
1234 /* nlSolve() driver routine */
1235
1236 NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) {
1237         NLboolean result = NL_TRUE;
1238
1239         __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED);
1240
1241         if (!__nlCurrentContext->solve_again)
1242                 result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation);
1243
1244         if (result) {
1245                 result = __nlInvert_SUPERLU(__nlCurrentContext);
1246
1247                 if (result) {
1248                         __nlVectorToVariables();
1249
1250                         if (solveAgain)
1251                                 __nlCurrentContext->solve_again = NL_TRUE;
1252
1253                         __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SYSTEM_SOLVED);
1254                 }
1255         }
1256
1257         return result;
1258 }
1259
1260 NLboolean nlSolve() {
1261         return nlSolveAdvanced(NULL, NL_FALSE);
1262 }
1263