Remove the svn:keywords property.
[blender.git] / intern / opennl / intern / opennl.c
1 /** \file opennl/intern/opennl.c
2  *  \ingroup opennlintern
3  */
4 /*
5  *  $Id: opennl.c 35142 2011-02-25 10:24:29Z jesterking $
6  *
7  *  OpenNL: Numerical Library
8  *  Copyright (C) 2004 Bruno Levy
9  *
10  *  This program is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU General Public License as published by
12  *  the Free Software Foundation; either version 2 of the License, or
13  *  (at your option) any later version.
14  *
15  *  This program is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *  GNU General Public License for more details.
19  *
20  *  You should have received a copy of the GNU General Public License
21  *  along with this program; if not, write to the Free Software
22  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  *  If you modify this software, you should include a notice giving the
25  *  name of the person performing the modification, the date of modification,
26  *  and the reason for such modification.
27  *
28  *  Contact: Bruno Levy
29  *
30  *       levy@loria.fr
31  *
32  *       ISA Project
33  *       LORIA, INRIA Lorraine, 
34  *       Campus Scientifique, BP 239
35  *       54506 VANDOEUVRE LES NANCY CEDEX 
36  *       FRANCE
37  *
38  *  Note that the GNU General Public License does not permit incorporating
39  *  the Software into proprietary programs. 
40  */
41
42 #include "ONL_opennl.h"
43
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <string.h>
47 #include <math.h>
48
49 #ifdef NL_PARANOID
50 #ifndef NL_DEBUG
51 #define NL_DEBUG
52 #endif
53 #endif
54
55 /* SuperLU includes */
56 #include <ssp_defs.h>
57 #include <util.h>
58
59 /************************************************************************************/
60 /* Assertions */
61
62
63 static void __nl_assertion_failed(char* cond, char* file, int line) {
64         fprintf(
65                 stderr, 
66                 "OpenNL assertion failed: %s, file:%s, line:%d\n",
67                 cond,file,line
68         );
69         abort();
70 }
71
72 static void __nl_range_assertion_failed(
73         float x, float min_val, float max_val, char* file, int line
74 ) {
75         fprintf(
76                 stderr, 
77                 "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n",
78                 x, min_val, max_val, file,line
79         );
80         abort();
81 }
82
83 static void __nl_should_not_have_reached(char* file, int line) {
84         fprintf(
85                 stderr, 
86                 "OpenNL should not have reached this point: file:%s, line:%d\n",
87                 file,line
88         );
89         abort();
90 }
91
92
93 #define __nl_assert(x) {                                                                                \
94         if(!(x)) {                                                                                                \
95                 __nl_assertion_failed(#x,__FILE__, __LINE__);             \
96         }                                                                                                                  \
97
98
99 #define __nl_range_assert(x,min_val,max_val) {                            \
100         if(((x) < (min_val)) || ((x) > (max_val))) {                            \
101                 __nl_range_assertion_failed(x, min_val, max_val,                \
102                         __FILE__, __LINE__                                                                \
103                 );                                                                                                       \
104         }                                                                                                                  \
105 }
106
107 #define __nl_assert_not_reached {                                                          \
108         __nl_should_not_have_reached(__FILE__, __LINE__);                 \
109 }
110
111 #ifdef NL_DEBUG
112 #define __nl_debug_assert(x) __nl_assert(x)
113 #define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
114 #else
115 #define __nl_debug_assert(x) 
116 #define __nl_debug_range_assert(x,min_val,max_val) 
117 #endif
118
119 #ifdef NL_PARANOID
120 #define __nl_parano_assert(x) __nl_assert(x)
121 #define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
122 #else
123 #define __nl_parano_assert(x) 
124 #define __nl_parano_range_assert(x,min_val,max_val) 
125 #endif
126
127 /************************************************************************************/
128 /* classic macros */
129
130 #ifndef MIN
131 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 
132 #endif
133
134 #ifndef MAX
135 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 
136 #endif
137
138 /************************************************************************************/
139 /* memory management */
140
141 #define __NL_NEW(T)                             (T*)(calloc(1, sizeof(T))) 
142 #define __NL_NEW_ARRAY(T,NB)       (T*)(calloc((NB),sizeof(T))) 
143 #define __NL_RENEW_ARRAY(T,x,NB)   (T*)(realloc(x,(NB)*sizeof(T))) 
144 #define __NL_DELETE(x)                   free(x); x = NULL 
145 #define __NL_DELETE_ARRAY(x)       free(x); x = NULL
146
147 #define __NL_CLEAR(T, x)                   memset(x, 0, sizeof(T)) 
148 #define __NL_CLEAR_ARRAY(T,x,NB)   memset(x, 0, (NB)*sizeof(T)) 
149
150 /************************************************************************************/
151 /* Dynamic arrays for sparse row/columns */
152
153 typedef struct {
154         NLuint   index;
155         NLfloat value;
156 } __NLCoeff;
157
158 typedef struct {
159         NLuint size;
160         NLuint capacity;
161         __NLCoeff* coeff;
162 } __NLRowColumn;
163
164 static void __nlRowColumnConstruct(__NLRowColumn* c) {
165         c->size  = 0;
166         c->capacity = 0;
167         c->coeff        = NULL;
168 }
169
170 static void __nlRowColumnDestroy(__NLRowColumn* c) {
171         __NL_DELETE_ARRAY(c->coeff);
172 #ifdef NL_PARANOID
173         __NL_CLEAR(__NLRowColumn, c); 
174 #endif
175 }
176
177 static void __nlRowColumnGrow(__NLRowColumn* c) {
178         if(c->capacity != 0) {
179                 c->capacity = 2 * c->capacity;
180                 c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity);
181         } else {
182                 c->capacity = 4;
183                 c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity);
184         }
185 }
186
187 static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
188         NLuint i;
189         for(i=0; i<c->size; i++) {
190                 if(c->coeff[i].index == (NLuint)index) {
191                         c->coeff[i].value += value;
192                         return;
193                 }
194         }
195         if(c->size == c->capacity) {
196                 __nlRowColumnGrow(c);
197         }
198         c->coeff[c->size].index = index;
199         c->coeff[c->size].value = value;
200         c->size++;
201 }
202
203 /* Does not check whether the index already exists */
204 static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
205         if(c->size == c->capacity) {
206                 __nlRowColumnGrow(c);
207         }
208         c->coeff[c->size].index = index;
209         c->coeff[c->size].value = value;
210         c->size++;
211 }
212
213 static void __nlRowColumnClear(__NLRowColumn* c) {
214         c->size  = 0;
215         c->capacity = 0;
216         __NL_DELETE_ARRAY(c->coeff);
217 }
218
219 /************************************************************************************/
220 /* SparseMatrix data structure */
221
222 #define __NL_ROWS         1
223 #define __NL_COLUMNS   2
224 #define __NL_SYMMETRIC 4
225
226 typedef struct {
227         NLuint m;
228         NLuint n;
229         NLuint diag_size;
230         NLenum storage;
231         __NLRowColumn* row;
232         __NLRowColumn* column;
233         NLfloat*          diag;
234 } __NLSparseMatrix;
235
236
237 static void __nlSparseMatrixConstruct(
238         __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage
239 ) {
240         NLuint i;
241         M->m = m;
242         M->n = n;
243         M->storage = storage;
244         if(storage & __NL_ROWS) {
245                 M->row = __NL_NEW_ARRAY(__NLRowColumn, m);
246                 for(i=0; i<m; i++) {
247                         __nlRowColumnConstruct(&(M->row[i]));
248                 }
249         } else {
250                 M->row = NULL;
251         }
252
253         if(storage & __NL_COLUMNS) {
254                 M->column = __NL_NEW_ARRAY(__NLRowColumn, n);
255                 for(i=0; i<n; i++) {
256                         __nlRowColumnConstruct(&(M->column[i]));
257                 }
258         } else {
259                 M->column = NULL;
260         }
261
262         M->diag_size = MIN(m,n);
263         M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
264 }
265
266 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
267         NLuint i;
268         __NL_DELETE_ARRAY(M->diag);
269         if(M->storage & __NL_ROWS) {
270                 for(i=0; i<M->m; i++) {
271                         __nlRowColumnDestroy(&(M->row[i]));
272                 }
273                 __NL_DELETE_ARRAY(M->row);
274         }
275         if(M->storage & __NL_COLUMNS) {
276                 for(i=0; i<M->n; i++) {
277                         __nlRowColumnDestroy(&(M->column[i]));
278                 }
279                 __NL_DELETE_ARRAY(M->column);
280         }
281 #ifdef NL_PARANOID
282         __NL_CLEAR(__NLSparseMatrix,M);
283 #endif
284 }
285
286 static void __nlSparseMatrixAdd(
287         __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
288 ) {
289         __nl_parano_range_assert(i, 0, M->m - 1);
290         __nl_parano_range_assert(j, 0, M->n - 1);
291         if((M->storage & __NL_SYMMETRIC) && (j > i)) {
292                 return;
293         }
294         if(i == j) {
295                 M->diag[i] += value;
296         }
297         if(M->storage & __NL_ROWS) {
298                 __nlRowColumnAdd(&(M->row[i]), j, value);
299         }
300         if(M->storage & __NL_COLUMNS) {
301                 __nlRowColumnAdd(&(M->column[j]), i, value);
302         }
303 }
304
305 static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
306         NLuint i;
307         if(M->storage & __NL_ROWS) {
308                 for(i=0; i<M->m; i++) {
309                         __nlRowColumnClear(&(M->row[i]));
310                 }
311         }
312         if(M->storage & __NL_COLUMNS) {
313                 for(i=0; i<M->n; i++) {
314                         __nlRowColumnClear(&(M->column[i]));
315                 }
316         }
317         __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);       
318 }
319
320 /* Returns the number of non-zero coefficients */
321 static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
322         NLuint nnz = 0;
323         NLuint i;
324         if(M->storage & __NL_ROWS) {
325                 for(i = 0; i<M->m; i++) {
326                         nnz += M->row[i].size;
327                 }
328         } else if (M->storage & __NL_COLUMNS) {
329                 for(i = 0; i<M->n; i++) {
330                         nnz += M->column[i].size;
331                 }
332         } else {
333                 __nl_assert_not_reached;
334         }
335         return nnz;
336 }
337
338 /************************************************************************************/
339 /* SparseMatrix x Vector routines, internal helper routines */
340
341 static void __nlSparseMatrix_mult_rows_symmetric(
342         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
343 ) {
344         NLuint m = A->m;
345         NLuint i,ij;
346         __NLRowColumn* Ri = NULL;
347         __NLCoeff* c = NULL;
348         for(i=0; i<m; i++) {
349                 y[i] = 0;
350                 Ri = &(A->row[i]);
351                 for(ij=0; ij<Ri->size; ij++) {
352                         c = &(Ri->coeff[ij]);
353                         y[i] += c->value * x[c->index];
354                         if(i != c->index) {
355                                 y[c->index] += c->value * x[i];
356                         }
357                 }
358         }
359 }
360
361 static void __nlSparseMatrix_mult_rows(
362         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
363 ) {
364         NLuint m = A->m;
365         NLuint i,ij;
366         __NLRowColumn* Ri = NULL;
367         __NLCoeff* c = NULL;
368         for(i=0; i<m; i++) {
369                 y[i] = 0;
370                 Ri = &(A->row[i]);
371                 for(ij=0; ij<Ri->size; ij++) {
372                         c = &(Ri->coeff[ij]);
373                         y[i] += c->value * x[c->index];
374                 }
375         }
376 }
377
378 static void __nlSparseMatrix_mult_cols_symmetric(
379         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
380 ) {
381         NLuint n = A->n;
382         NLuint j,ii;
383         __NLRowColumn* Cj = NULL;
384         __NLCoeff* c = NULL;
385         for(j=0; j<n; j++) {
386                 y[j] = 0;
387                 Cj = &(A->column[j]);
388                 for(ii=0; ii<Cj->size; ii++) {
389                         c = &(Cj->coeff[ii]);
390                         y[c->index] += c->value * x[j];
391                         if(j != c->index) {
392                                 y[j] += c->value * x[c->index];
393                         }
394                 }
395         }
396 }
397
398 static void __nlSparseMatrix_mult_cols(
399         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
400 ) {
401         NLuint n = A->n;
402         NLuint j,ii; 
403         __NLRowColumn* Cj = NULL;
404         __NLCoeff* c = NULL;
405         __NL_CLEAR_ARRAY(NLfloat, y, A->m);
406         for(j=0; j<n; j++) {
407                 Cj = &(A->column[j]);
408                 for(ii=0; ii<Cj->size; ii++) {
409                         c = &(Cj->coeff[ii]);
410                         y[c->index] += c->value * x[j];
411                 }
412         }
413 }
414
415 /************************************************************************************/
416 /* SparseMatrix x Vector routines, main driver routine */
417
418 static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
419         if(A->storage & __NL_ROWS) {
420                 if(A->storage & __NL_SYMMETRIC) {
421                         __nlSparseMatrix_mult_rows_symmetric(A, x, y);
422                 } else {
423                         __nlSparseMatrix_mult_rows(A, x, y);
424                 }
425         } else {
426                 if(A->storage & __NL_SYMMETRIC) {
427                         __nlSparseMatrix_mult_cols_symmetric(A, x, y);
428                 } else {
429                         __nlSparseMatrix_mult_cols(A, x, y);
430                 }
431         }
432 }
433
434 /* ****************** Routines for least squares ******************* */
435
436 static void __nlSparseMatrix_square(
437         __NLSparseMatrix* AtA, __NLSparseMatrix *A
438 ) {
439         NLuint m = A->m;
440         NLuint n = A->n;
441         NLuint i, j0, j1;
442         __NLRowColumn *Ri = NULL;
443         __NLCoeff *c0 = NULL, *c1 = NULL;
444         float value;
445
446         __nlSparseMatrixConstruct(AtA, n, n, A->storage);
447
448         for(i=0; i<m; i++) {
449                 Ri = &(A->row[i]);
450
451                 for(j0=0; j0<Ri->size; j0++) {
452                         c0 = &(Ri->coeff[j0]);
453                         for(j1=0; j1<Ri->size; j1++) {
454                                 c1 = &(Ri->coeff[j1]);
455
456                                 value = c0->value*c1->value;
457                                 __nlSparseMatrixAdd(AtA, c0->index, c1->index, value);
458                         }
459                 }
460         }
461 }
462
463 static void __nlSparseMatrix_transpose_mult_rows(
464         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
465 ) {
466         NLuint m = A->m;
467         NLuint n = A->n;
468         NLuint i,ij;
469         __NLRowColumn* Ri = NULL;
470         __NLCoeff* c = NULL;
471
472         __NL_CLEAR_ARRAY(NLfloat, y, n);
473
474         for(i=0; i<m; i++) {
475                 Ri = &(A->row[i]);
476                 for(ij=0; ij<Ri->size; ij++) {
477                         c = &(Ri->coeff[ij]);
478                         y[c->index] += c->value * x[i];
479                 }
480         }
481 }
482
483 /************************************************************************************/
484 /* NLContext data structure */
485
486 typedef void(*__NLMatrixFunc)(float* x, float* y);
487
488 typedef struct {
489         NLfloat  value[4];
490         NLboolean locked;
491         NLuint  index;
492         __NLRowColumn *a;
493 } __NLVariable;
494
495 #define __NL_STATE_INITIAL                              0
496 #define __NL_STATE_SYSTEM                               1
497 #define __NL_STATE_MATRIX                               2
498 #define __NL_STATE_MATRIX_CONSTRUCTED   3
499 #define __NL_STATE_SYSTEM_CONSTRUCTED   4
500 #define __NL_STATE_SYSTEM_SOLVED                5
501
502 typedef struct {
503         NLenum                  state;
504         NLuint                  n;
505         NLuint                  m;
506         __NLVariable*   variable;
507         NLfloat*                b;
508         NLfloat*                Mtb;
509         __NLSparseMatrix M;
510         __NLSparseMatrix MtM;
511         NLfloat*                x;
512         NLuint                  nb_variables;
513         NLuint                  nb_rows;
514         NLboolean               least_squares;
515         NLboolean               symmetric;
516         NLuint                  nb_rhs;
517         NLboolean               solve_again;
518         NLboolean               alloc_M;
519         NLboolean               alloc_MtM;
520         NLboolean               alloc_variable;
521         NLboolean               alloc_x;
522         NLboolean               alloc_b;
523         NLboolean               alloc_Mtb;
524         NLfloat                 error;
525         __NLMatrixFunc  matrix_vector_prod;
526
527         struct __NLSuperLUContext {
528                 NLboolean alloc_slu;
529                 SuperMatrix L, U;
530                 NLint *perm_c, *perm_r;
531                 SuperLUStat_t stat;
532         } slu;
533 } __NLContext;
534
535 static __NLContext* __nlCurrentContext = NULL;
536
537 static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
538         __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
539 }
540
541
542 NLContext nlNewContext(void) {
543         __NLContext* result       = __NL_NEW(__NLContext);
544         result->state                   = __NL_STATE_INITIAL;
545         result->matrix_vector_prod = __nlMatrixVectorProd_default;
546         result->nb_rhs = 1;
547         nlMakeCurrent(result);
548         return result;
549 }
550
551 static void __nlFree_SUPERLU(__NLContext *context);
552
553 void nlDeleteContext(NLContext context_in) {
554         __NLContext* context = (__NLContext*)(context_in);
555         int i;
556
557         if(__nlCurrentContext == context) {
558                 __nlCurrentContext = NULL;
559         }
560         if(context->alloc_M) {
561                 __nlSparseMatrixDestroy(&context->M);
562         }
563         if(context->alloc_MtM) {
564                 __nlSparseMatrixDestroy(&context->MtM);
565         }
566         if(context->alloc_variable) {
567                 for(i=0; i<context->nb_variables; i++) {
568                         if(context->variable[i].a) {
569                                 __nlRowColumnDestroy(context->variable[i].a);
570                                 __NL_DELETE(context->variable[i].a);
571                         }
572                 }
573         }
574         if(context->alloc_b) {
575                 __NL_DELETE_ARRAY(context->b);
576         }
577         if(context->alloc_Mtb) {
578                 __NL_DELETE_ARRAY(context->Mtb);
579         }
580         if(context->alloc_x) {
581                 __NL_DELETE_ARRAY(context->x);
582         }
583         if (context->slu.alloc_slu) {
584                 __nlFree_SUPERLU(context);
585         }
586
587 #ifdef NL_PARANOID
588         __NL_CLEAR(__NLContext, context);
589 #endif
590         __NL_DELETE(context);
591 }
592
593 void nlMakeCurrent(NLContext context) {
594         __nlCurrentContext = (__NLContext*)(context);
595 }
596
597 NLContext nlGetCurrent(void) {
598         return __nlCurrentContext;
599 }
600
601 static void __nlCheckState(NLenum state) {
602         __nl_assert(__nlCurrentContext->state == state);
603 }
604
605 static void __nlTransition(NLenum from_state, NLenum to_state) {
606         __nlCheckState(from_state);
607         __nlCurrentContext->state = to_state;
608 }
609
610 /************************************************************************************/
611 /* Get/Set parameters */
612
613 void nlSolverParameterf(NLenum pname, NLfloat param) {
614         __nlCheckState(__NL_STATE_INITIAL);
615         switch(pname) {
616         case NL_NB_VARIABLES: {
617                 __nl_assert(param > 0);
618                 __nlCurrentContext->nb_variables = (NLuint)param;
619         } break;
620         case NL_NB_ROWS: {
621                 __nl_assert(param > 0);
622                 __nlCurrentContext->nb_rows = (NLuint)param;
623         } break;
624         case NL_LEAST_SQUARES: {
625                 __nlCurrentContext->least_squares = (NLboolean)param;
626         } break;
627         case NL_SYMMETRIC: {
628                 __nlCurrentContext->symmetric = (NLboolean)param;               
629         } break;
630         case NL_NB_RIGHT_HAND_SIDES: {
631                 __nlCurrentContext->nb_rhs = (NLuint)param;
632         } break;
633         default: {
634                 __nl_assert_not_reached;
635         } break;
636         }
637 }
638
639 void nlSolverParameteri(NLenum pname, NLint param) {
640         __nlCheckState(__NL_STATE_INITIAL);
641         switch(pname) {
642         case NL_NB_VARIABLES: {
643                 __nl_assert(param > 0);
644                 __nlCurrentContext->nb_variables = (NLuint)param;
645         } break;
646         case NL_NB_ROWS: {
647                 __nl_assert(param > 0);
648                 __nlCurrentContext->nb_rows = (NLuint)param;
649         } break;
650         case NL_LEAST_SQUARES: {
651                 __nlCurrentContext->least_squares = (NLboolean)param;
652         } break;
653         case NL_SYMMETRIC: {
654                 __nlCurrentContext->symmetric = (NLboolean)param;               
655         } break;
656         case NL_NB_RIGHT_HAND_SIDES: {
657                 __nlCurrentContext->nb_rhs = (NLuint)param;
658         } break;
659         default: {
660                 __nl_assert_not_reached;
661         } break;
662         }
663 }
664
665 void nlGetBooleanv(NLenum pname, NLboolean* params) {
666         switch(pname) {
667         case NL_LEAST_SQUARES: {
668                 *params = __nlCurrentContext->least_squares;
669         } break;
670         case NL_SYMMETRIC: {
671                 *params = __nlCurrentContext->symmetric;
672         } break;
673         default: {
674                 __nl_assert_not_reached;
675         } break;
676         }
677 }
678
679 void nlGetFloatv(NLenum pname, NLfloat* params) {
680         switch(pname) {
681         case NL_NB_VARIABLES: {
682                 *params = (NLfloat)(__nlCurrentContext->nb_variables);
683         } break;
684         case NL_NB_ROWS: {
685                 *params = (NLfloat)(__nlCurrentContext->nb_rows);
686         } break;
687         case NL_LEAST_SQUARES: {
688                 *params = (NLfloat)(__nlCurrentContext->least_squares);
689         } break;
690         case NL_SYMMETRIC: {
691                 *params = (NLfloat)(__nlCurrentContext->symmetric);
692         } break;
693         case NL_ERROR: {
694                 *params = (NLfloat)(__nlCurrentContext->error);
695         } break;
696         default: {
697                 __nl_assert_not_reached;
698         } break;
699         }
700 }
701
702 void nlGetIntergerv(NLenum pname, NLint* params) {
703         switch(pname) {
704         case NL_NB_VARIABLES: {
705                 *params = (NLint)(__nlCurrentContext->nb_variables);
706         } break;
707         case NL_NB_ROWS: {
708                 *params = (NLint)(__nlCurrentContext->nb_rows);
709         } break;
710         case NL_LEAST_SQUARES: {
711                 *params = (NLint)(__nlCurrentContext->least_squares);
712         } break;
713         case NL_SYMMETRIC: {
714                 *params = (NLint)(__nlCurrentContext->symmetric);
715         } break;
716         default: {
717                 __nl_assert_not_reached;
718         } break;
719         }
720 }
721
722 /************************************************************************************/
723 /* Enable / Disable */
724
725 void nlEnable(NLenum pname) {
726         switch(pname) {
727         default: {
728                 __nl_assert_not_reached;
729         }
730         }
731 }
732
733 void nlDisable(NLenum pname) {
734         switch(pname) {
735         default: {
736                 __nl_assert_not_reached;
737         }
738         }
739 }
740
741 NLboolean nlIsEnabled(NLenum pname) {
742         switch(pname) {
743         default: {
744                 __nl_assert_not_reached;
745         }
746         }
747         return NL_FALSE;
748 }
749
750 /************************************************************************************/
751 /* Get/Set Lock/Unlock variables */
752
753 void nlSetVariable(NLuint rhsindex, NLuint index, NLfloat value) {
754         __nlCheckState(__NL_STATE_SYSTEM);
755         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
756         __nlCurrentContext->variable[index].value[rhsindex] = value;    
757 }
758
759 NLfloat nlGetVariable(NLuint rhsindex, NLuint index) {
760         __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
761         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
762         return __nlCurrentContext->variable[index].value[rhsindex];
763 }
764
765 void nlLockVariable(NLuint index) {
766         __nlCheckState(__NL_STATE_SYSTEM);
767         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
768         __nlCurrentContext->variable[index].locked = NL_TRUE;
769 }
770
771 void nlUnlockVariable(NLuint index) {
772         __nlCheckState(__NL_STATE_SYSTEM);
773         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
774         __nlCurrentContext->variable[index].locked = NL_FALSE;
775 }
776
777 NLboolean nlVariableIsLocked(NLuint index) {
778         __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
779         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
780         return __nlCurrentContext->variable[index].locked;
781 }
782
783 /************************************************************************************/
784 /* System construction */
785
786 static void __nlVariablesToVector() {
787         __NLContext *context = __nlCurrentContext;
788         NLuint i, j, nb_rhs;
789
790         __nl_assert(context->alloc_x);
791         __nl_assert(context->alloc_variable);
792
793         nb_rhs= context->nb_rhs;
794
795         for(i=0; i<context->nb_variables; i++) {
796                 __NLVariable* v = &(context->variable[i]);
797                 if(!v->locked) {
798                         __nl_assert(v->index < context->n);
799
800                         for(j=0; j<nb_rhs; j++)
801                                 context->x[context->n*j + v->index] = v->value[j];
802                 }
803         }
804 }
805
806 static void __nlVectorToVariables() {
807         __NLContext *context = __nlCurrentContext;
808         NLuint i, j, nb_rhs;
809
810         __nl_assert(context->alloc_x);
811         __nl_assert(context->alloc_variable);
812
813         nb_rhs= context->nb_rhs;
814
815         for(i=0; i<context->nb_variables; i++) {
816                 __NLVariable* v = &(context->variable[i]);
817                 if(!v->locked) {
818                         __nl_assert(v->index < context->n);
819
820                         for(j=0; j<nb_rhs; j++)
821                                 v->value[j] = context->x[context->n*j + v->index];
822                 }
823         }
824 }
825
826 static void __nlBeginSystem() {
827         __nl_assert(__nlCurrentContext->nb_variables > 0);
828
829         if (__nlCurrentContext->solve_again)
830                 __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM);
831         else {
832                 __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM);
833
834                 __nlCurrentContext->variable = __NL_NEW_ARRAY(
835                         __NLVariable, __nlCurrentContext->nb_variables);
836                 
837                 __nlCurrentContext->alloc_variable = NL_TRUE;
838         }
839 }
840
841 static void __nlEndSystem() {
842         __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED);   
843 }
844
845 static void __nlBeginMatrix() {
846         NLuint i;
847         NLuint m = 0, n = 0;
848         NLenum storage = __NL_ROWS;
849         __NLContext *context = __nlCurrentContext;
850
851         __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX);
852
853         if (!context->solve_again) {
854                 for(i=0; i<context->nb_variables; i++) {
855                         if(context->variable[i].locked) {
856                                 context->variable[i].index = ~0;
857                                 context->variable[i].a = __NL_NEW(__NLRowColumn);
858                                 __nlRowColumnConstruct(context->variable[i].a);
859                         }
860                         else
861                                 context->variable[i].index = n++;
862                 }
863
864                 m = (context->nb_rows == 0)? n: context->nb_rows;
865
866                 context->m = m;
867                 context->n = n;
868
869                 __nlSparseMatrixConstruct(&context->M, m, n, storage);
870                 context->alloc_M = NL_TRUE;
871
872                 context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs);
873                 context->alloc_b = NL_TRUE;
874
875                 context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs);
876                 context->alloc_x = NL_TRUE;
877         }
878         else {
879                 /* need to recompute b only, A is not constructed anymore */
880                 __NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs);
881         }
882
883         __nlVariablesToVector();
884 }
885
886 static void __nlEndMatrixRHS(NLuint rhs) {
887         __NLContext *context = __nlCurrentContext;
888         __NLVariable *variable;
889         __NLRowColumn *a;
890         NLfloat *b, *Mtb;
891         NLuint i, j;
892
893         b = context->b + context->m*rhs;
894         Mtb = context->Mtb + context->n*rhs;
895
896         for(i=0; i<__nlCurrentContext->nb_variables; i++) {
897                 variable = &(context->variable[i]);
898
899                 if(variable->locked) {
900                         a = variable->a;
901
902                         for(j=0; j<a->size; j++) {
903                                 b[a->coeff[j].index] -= a->coeff[j].value*variable->value[rhs];
904                         }
905                 }
906         }
907
908         if(context->least_squares)
909                 __nlSparseMatrix_transpose_mult_rows(&context->M, b, Mtb);
910 }
911
912 static void __nlEndMatrix() {
913         __NLContext *context = __nlCurrentContext;
914         NLuint i;
915
916         __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED);       
917         
918         if(context->least_squares) {
919                 if(!__nlCurrentContext->solve_again) {
920                         __nlSparseMatrix_square(&context->MtM, &context->M);
921                         context->alloc_MtM = NL_TRUE;
922
923                         context->Mtb =
924                                 __NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs);
925                         context->alloc_Mtb = NL_TRUE;
926                 }
927         }
928
929         for(i=0; i<context->nb_rhs; i++)
930                 __nlEndMatrixRHS(i);
931 }
932
933 void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
934 {
935         __NLContext *context = __nlCurrentContext;
936
937         __nlCheckState(__NL_STATE_MATRIX);
938
939         if(context->solve_again)
940                 return;
941
942         if (!context->least_squares && context->variable[row].locked);
943         else if (context->variable[col].locked) {
944                 if(!context->least_squares)
945                         row = context->variable[row].index;
946                 __nlRowColumnAppend(context->variable[col].a, row, value);
947         }
948         else {
949                 __NLSparseMatrix* M  = &context->M;
950                 
951                 if(!context->least_squares)
952                         row = context->variable[row].index;
953                 col = context->variable[col].index;
954                 
955                 __nl_range_assert(row, 0, context->m - 1);
956                 __nl_range_assert(col, 0, context->n - 1);
957
958                 __nlSparseMatrixAdd(M, row, col, value);
959         }
960 }
961
962 void nlRightHandSideAdd(NLuint rhsindex, NLuint index, NLfloat value)
963 {
964         __NLContext *context = __nlCurrentContext;
965         NLfloat* b = context->b;
966
967         __nlCheckState(__NL_STATE_MATRIX);
968
969         if(context->least_squares) {
970                 __nl_range_assert(index, 0, context->m - 1);
971                 b[rhsindex*context->m + index] += value;
972         }
973         else {
974                 if(!context->variable[index].locked) {
975                         index = context->variable[index].index;
976                         __nl_range_assert(index, 0, context->m - 1);
977
978                         b[rhsindex*context->m + index] += value;
979                 }
980         }
981 }
982
983 void nlRightHandSideSet(NLuint rhsindex, NLuint index, NLfloat value)
984 {
985         __NLContext *context = __nlCurrentContext;
986         NLfloat* b = context->b;
987
988         __nlCheckState(__NL_STATE_MATRIX);
989
990         if(context->least_squares) {
991                 __nl_range_assert(index, 0, context->m - 1);
992                 b[rhsindex*context->m + index] = value;
993         }
994         else {
995                 if(!context->variable[index].locked) {
996                         index = context->variable[index].index;
997                         __nl_range_assert(index, 0, context->m - 1);
998
999                         b[rhsindex*context->m + index] = value;
1000                 }
1001         }
1002 }
1003
1004 void nlBegin(NLenum prim) {
1005         switch(prim) {
1006         case NL_SYSTEM: {
1007                 __nlBeginSystem();
1008         } break;
1009         case NL_MATRIX: {
1010                 __nlBeginMatrix();
1011         } break;
1012         default: {
1013                 __nl_assert_not_reached;
1014         }
1015         }
1016 }
1017
1018 void nlEnd(NLenum prim) {
1019         switch(prim) {
1020         case NL_SYSTEM: {
1021                 __nlEndSystem();
1022         } break;
1023         case NL_MATRIX: {
1024                 __nlEndMatrix();
1025         } break;
1026         default: {
1027                 __nl_assert_not_reached;
1028         }
1029         }
1030 }
1031
1032 /************************************************************************/
1033 /* SuperLU wrapper */
1034
1035 /* Note: SuperLU is difficult to call, but it is worth it.      */
1036 /* Here is a driver inspired by A. Sheffer's "cow flattener". */
1037 static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) {
1038
1039         /* OpenNL Context */
1040         __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M;
1041         NLuint n = context->n;
1042         NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */
1043
1044         /* Compressed Row Storage matrix representation */
1045         NLint   *xa             = __NL_NEW_ARRAY(NLint, n+1);
1046         NLfloat *rhs    = __NL_NEW_ARRAY(NLfloat, n);
1047         NLfloat *a              = __NL_NEW_ARRAY(NLfloat, nnz);
1048         NLint   *asub   = __NL_NEW_ARRAY(NLint, nnz);
1049         NLint   *etree  = __NL_NEW_ARRAY(NLint, n);
1050
1051         /* SuperLU variables */
1052         SuperMatrix At, AtP;
1053         NLint info, panel_size, relax;
1054         superlu_options_t options;
1055
1056         /* Temporary variables */
1057         NLuint i, jj, count;
1058         
1059         __nl_assert(!(M->storage & __NL_SYMMETRIC));
1060         __nl_assert(M->storage & __NL_ROWS);
1061         __nl_assert(M->m == M->n);
1062         
1063         /* Convert M to compressed column format */
1064         for(i=0, count=0; i<n; i++) {
1065                 __NLRowColumn *Ri = M->row + i;
1066                 xa[i] = count;
1067
1068                 for(jj=0; jj<Ri->size; jj++, count++) {
1069                         a[count] = Ri->coeff[jj].value;
1070                         asub[count] = Ri->coeff[jj].index;
1071                 }
1072         }
1073         xa[n] = nnz;
1074
1075         /* Free M, don't need it anymore at this point */
1076         __nlSparseMatrixClear(M);
1077
1078         /* Create superlu A matrix transposed */
1079         sCreate_CompCol_Matrix(
1080                 &At, n, n, nnz, a, asub, xa, 
1081                 SLU_NC,         /* Colum wise, no supernode */
1082                 SLU_S,          /* floats */ 
1083                 SLU_GE          /* general storage */
1084         );
1085
1086         /* Set superlu options */
1087         set_default_options(&options);
1088         options.ColPerm = MY_PERMC;
1089         options.Fact = DOFACT;
1090
1091         StatInit(&(context->slu.stat));
1092
1093         panel_size = sp_ienv(1); /* sp_ienv give us the defaults */
1094         relax = sp_ienv(2);
1095
1096         /* Compute permutation and permuted matrix */
1097         context->slu.perm_r = __NL_NEW_ARRAY(NLint, n);
1098         context->slu.perm_c = __NL_NEW_ARRAY(NLint, n);
1099
1100         if ((permutation == NULL) || (*permutation == -1)) {
1101                 get_perm_c(3, &At, context->slu.perm_c);
1102
1103                 if (permutation)
1104                         memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n);
1105         }
1106         else
1107                 memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n);
1108
1109         sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP);
1110
1111         /* Decompose into L and U */
1112         sgstrf(&options, &AtP, relax, panel_size,
1113                 etree, NULL, 0, context->slu.perm_c, context->slu.perm_r,
1114                 &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info);
1115
1116         /* Cleanup */
1117
1118         Destroy_SuperMatrix_Store(&At);
1119         Destroy_CompCol_Permuted(&AtP);
1120
1121         __NL_DELETE_ARRAY(etree);
1122         __NL_DELETE_ARRAY(xa);
1123         __NL_DELETE_ARRAY(rhs);
1124         __NL_DELETE_ARRAY(a);
1125         __NL_DELETE_ARRAY(asub);
1126
1127         context->slu.alloc_slu = NL_TRUE;
1128
1129         return (info == 0);
1130 }
1131
1132 static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
1133
1134         /* OpenNL Context */
1135         NLfloat* b = (context->least_squares)? context->Mtb: context->b;
1136         NLfloat* x = context->x;
1137         NLuint n = context->n, j;
1138
1139         /* SuperLU variables */
1140         SuperMatrix B;
1141         NLint info;
1142
1143         for(j=0; j<context->nb_rhs; j++, b+=n, x+=n) {
1144                 /* Create superlu array for B */
1145                 sCreate_Dense_Matrix(
1146                         &B, n, 1, b, n, 
1147                         SLU_DN, /* Fortran-type column-wise storage */
1148                         SLU_S,  /* floats                                                 */
1149                         SLU_GE  /* general                                                */
1150                 );
1151
1152                 /* Forward/Back substitution to compute x */
1153                 sgstrs(TRANS, &(context->slu.L), &(context->slu.U),
1154                         context->slu.perm_c, context->slu.perm_r, &B,
1155                         &(context->slu.stat), &info);
1156
1157                 if(info == 0)
1158                         memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n);
1159
1160                 Destroy_SuperMatrix_Store(&B);
1161         }
1162
1163         return (info == 0);
1164 }
1165
1166 static void __nlFree_SUPERLU(__NLContext *context) {
1167
1168         Destroy_SuperNode_Matrix(&(context->slu.L));
1169         Destroy_CompCol_Matrix(&(context->slu.U));
1170
1171         StatFree(&(context->slu.stat));
1172
1173         __NL_DELETE_ARRAY(context->slu.perm_r);
1174         __NL_DELETE_ARRAY(context->slu.perm_c);
1175
1176         context->slu.alloc_slu = NL_FALSE;
1177 }
1178
1179 void nlPrintMatrix(void) {
1180         __NLContext *context = __nlCurrentContext;
1181         __NLSparseMatrix* M  = &(context->M);
1182         __NLSparseMatrix* MtM  = &(context->MtM);
1183         float *b = context->b;
1184         NLuint i, jj, k;
1185         NLuint m = context->m;
1186         NLuint n = context->n;
1187         __NLRowColumn* Ri = NULL;
1188         float *value = malloc(sizeof(*value)*(n+m));
1189
1190         printf("A:\n");
1191         for(i=0; i<m; i++) {
1192                 Ri = &(M->row[i]);
1193
1194                 memset(value, 0.0, sizeof(*value)*n);
1195                 for(jj=0; jj<Ri->size; jj++)
1196                         value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
1197
1198                 for (k = 0; k<n; k++)
1199                         printf("%.3f ", value[k]);
1200                 printf("\n");
1201         }
1202
1203         for(k=0; k<context->nb_rhs; k++) {
1204                 printf("b (%d):\n", k);
1205                 for(i=0; i<n; i++)
1206                         printf("%f ", b[context->n*k + i]);
1207                 printf("\n");
1208         }
1209
1210         if(context->alloc_MtM) {
1211                 printf("AtA:\n");
1212                 for(i=0; i<n; i++) {
1213                         Ri = &(MtM->row[i]);
1214
1215                         memset(value, 0.0, sizeof(*value)*m);
1216                         for(jj=0; jj<Ri->size; jj++)
1217                                 value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
1218
1219                         for (k = 0; k<n; k++)
1220                                 printf("%.3f ", value[k]);
1221                         printf("\n");
1222                 }
1223
1224                 for(k=0; k<context->nb_rhs; k++) {
1225                         printf("Mtb (%d):\n", k);
1226                         for(i=0; i<n; i++)
1227                                 printf("%f ", context->Mtb[context->n*k + i]);
1228                         printf("\n");
1229                 }
1230                 printf("\n");
1231         }
1232
1233         free(value);
1234 }
1235
1236 /************************************************************************/
1237 /* nlSolve() driver routine */
1238
1239 NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) {
1240         NLboolean result = NL_TRUE;
1241
1242         __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED);
1243
1244         if (!__nlCurrentContext->solve_again)
1245                 result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation);
1246
1247         if (result) {
1248                 result = __nlInvert_SUPERLU(__nlCurrentContext);
1249
1250                 if (result) {
1251                         __nlVectorToVariables();
1252
1253                         if (solveAgain)
1254                                 __nlCurrentContext->solve_again = NL_TRUE;
1255
1256                         __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SYSTEM_SOLVED);
1257                 }
1258         }
1259
1260         return result;
1261 }
1262
1263 NLboolean nlSolve() {
1264         return nlSolveAdvanced(NULL, NL_FALSE);
1265 }
1266