Updates to opennl for mesh laplacian matrix building, to make matrix
[blender-staging.git] / intern / opennl / intern / opennl.c
1 /*
2  *  $Id$
3  *
4  *  OpenNL: Numerical Library
5  *  Copyright (C) 2004 Bruno Levy
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  *  If you modify this software, you should include a notice giving the
22  *  name of the person performing the modification, the date of modification,
23  *  and the reason for such modification.
24  *
25  *  Contact: Bruno Levy
26  *
27  *       levy@loria.fr
28  *
29  *       ISA Project
30  *       LORIA, INRIA Lorraine, 
31  *       Campus Scientifique, BP 239
32  *       54506 VANDOEUVRE LES NANCY CEDEX 
33  *       FRANCE
34  *
35  *  Note that the GNU General Public License does not permit incorporating
36  *  the Software into proprietary programs. 
37  */
38
39 #include "ONL_opennl.h"
40
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <math.h>
45
46 #ifdef NL_PARANOID
47 #ifndef NL_DEBUG
48 #define NL_DEBUG
49 #endif
50 #endif
51
52 /* SuperLU includes */
53 #include <ssp_defs.h>
54 #include <util.h>
55
56 /************************************************************************************/
57 /* Assertions */
58
59
60 static void __nl_assertion_failed(char* cond, char* file, int line) {
61         fprintf(
62                 stderr, 
63                 "OpenNL assertion failed: %s, file:%s, line:%d\n",
64                 cond,file,line
65         );
66         abort();
67 }
68
69 static void __nl_range_assertion_failed(
70         float x, float min_val, float max_val, char* file, int line
71 ) {
72         fprintf(
73                 stderr, 
74                 "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n",
75                 x, min_val, max_val, file,line
76         );
77         abort();
78 }
79
80 static void __nl_should_not_have_reached(char* file, int line) {
81         fprintf(
82                 stderr, 
83                 "OpenNL should not have reached this point: file:%s, line:%d\n",
84                 file,line
85         );
86         abort();
87 }
88
89
90 #define __nl_assert(x) {                                                                                \
91         if(!(x)) {                                                                                                \
92                 __nl_assertion_failed(#x,__FILE__, __LINE__);             \
93         }                                                                                                                  \
94
95
96 #define __nl_range_assert(x,min_val,max_val) {                            \
97         if(((x) < (min_val)) || ((x) > (max_val))) {                            \
98                 __nl_range_assertion_failed(x, min_val, max_val,                \
99                         __FILE__, __LINE__                                                                \
100                 );                                                                                                       \
101         }                                                                                                                  \
102 }
103
104 #define __nl_assert_not_reached {                                                          \
105         __nl_should_not_have_reached(__FILE__, __LINE__);                 \
106 }
107
108 #ifdef NL_DEBUG
109 #define __nl_debug_assert(x) __nl_assert(x)
110 #define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
111 #else
112 #define __nl_debug_assert(x) 
113 #define __nl_debug_range_assert(x,min_val,max_val) 
114 #endif
115
116 #ifdef NL_PARANOID
117 #define __nl_parano_assert(x) __nl_assert(x)
118 #define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
119 #else
120 #define __nl_parano_assert(x) 
121 #define __nl_parano_range_assert(x,min_val,max_val) 
122 #endif
123
124 /************************************************************************************/
125 /* classic macros */
126
127 #ifndef MIN
128 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 
129 #endif
130
131 #ifndef MAX
132 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 
133 #endif
134
135 /************************************************************************************/
136 /* memory management */
137
138 #define __NL_NEW(T)                             (T*)(calloc(1, sizeof(T))) 
139 #define __NL_NEW_ARRAY(T,NB)       (T*)(calloc((NB),sizeof(T))) 
140 #define __NL_RENEW_ARRAY(T,x,NB)   (T*)(realloc(x,(NB)*sizeof(T))) 
141 #define __NL_DELETE(x)                   free(x); x = NULL 
142 #define __NL_DELETE_ARRAY(x)       free(x); x = NULL
143
144 #define __NL_CLEAR(T, x)                   memset(x, 0, sizeof(T)) 
145 #define __NL_CLEAR_ARRAY(T,x,NB)   memset(x, 0, (NB)*sizeof(T)) 
146
147 /************************************************************************************/
148 /* Dynamic arrays for sparse row/columns */
149
150 typedef struct {
151         NLuint   index;
152         NLfloat value;
153 } __NLCoeff;
154
155 typedef struct {
156         NLuint size;
157         NLuint capacity;
158         __NLCoeff* coeff;
159 } __NLRowColumn;
160
161 static void __nlRowColumnConstruct(__NLRowColumn* c) {
162         c->size  = 0;
163         c->capacity = 0;
164         c->coeff        = NULL;
165 }
166
167 static void __nlRowColumnDestroy(__NLRowColumn* c) {
168         __NL_DELETE_ARRAY(c->coeff);
169 #ifdef NL_PARANOID
170         __NL_CLEAR(__NLRowColumn, c); 
171 #endif
172 }
173
174 static void __nlRowColumnGrow(__NLRowColumn* c) {
175         if(c->capacity != 0) {
176                 c->capacity = 2 * c->capacity;
177                 c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity);
178         } else {
179                 c->capacity = 4;
180                 c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity);
181         }
182 }
183
184 static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
185         NLuint i;
186         for(i=0; i<c->size; i++) {
187                 if(c->coeff[i].index == (NLuint)index) {
188                         c->coeff[i].value += value;
189                         return;
190                 }
191         }
192         if(c->size == c->capacity) {
193                 __nlRowColumnGrow(c);
194         }
195         c->coeff[c->size].index = index;
196         c->coeff[c->size].value = value;
197         c->size++;
198 }
199
200 /* Does not check whether the index already exists */
201 static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
202         if(c->size == c->capacity) {
203                 __nlRowColumnGrow(c);
204         }
205         c->coeff[c->size].index = index;
206         c->coeff[c->size].value = value;
207         c->size++;
208 }
209
210 static void __nlRowColumnZero(__NLRowColumn* c) {
211         c->size = 0;
212 }
213
214 static void __nlRowColumnClear(__NLRowColumn* c) {
215         c->size  = 0;
216         c->capacity = 0;
217         __NL_DELETE_ARRAY(c->coeff);
218 }
219
220 /************************************************************************************/
221 /* SparseMatrix data structure */
222
223 #define __NL_ROWS         1
224 #define __NL_COLUMNS   2
225 #define __NL_SYMMETRIC 4
226
227 typedef struct {
228         NLuint m;
229         NLuint n;
230         NLuint diag_size;
231         NLenum storage;
232         __NLRowColumn* row;
233         __NLRowColumn* column;
234         NLfloat*          diag;
235 } __NLSparseMatrix;
236
237
238 static void __nlSparseMatrixConstruct(
239         __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage
240 ) {
241         NLuint i;
242         M->m = m;
243         M->n = n;
244         M->storage = storage;
245         if(storage & __NL_ROWS) {
246                 M->row = __NL_NEW_ARRAY(__NLRowColumn, m);
247                 for(i=0; i<n; i++) {
248                         __nlRowColumnConstruct(&(M->row[i]));
249                 }
250         } else {
251                 M->row = NULL;
252         }
253
254         if(storage & __NL_COLUMNS) {
255                 M->column = __NL_NEW_ARRAY(__NLRowColumn, n);
256                 for(i=0; i<n; i++) {
257                         __nlRowColumnConstruct(&(M->column[i]));
258                 }
259         } else {
260                 M->column = NULL;
261         }
262
263         M->diag_size = MIN(m,n);
264         M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
265 }
266
267 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
268         NLuint i;
269         __NL_DELETE_ARRAY(M->diag);
270         if(M->storage & __NL_ROWS) {
271                 for(i=0; i<M->m; i++) {
272                         __nlRowColumnDestroy(&(M->row[i]));
273                 }
274                 __NL_DELETE_ARRAY(M->row);
275         }
276         if(M->storage & __NL_COLUMNS) {
277                 for(i=0; i<M->n; i++) {
278                         __nlRowColumnDestroy(&(M->column[i]));
279                 }
280                 __NL_DELETE_ARRAY(M->column);
281         }
282 #ifdef NL_PARANOID
283         __NL_CLEAR(__NLSparseMatrix,M);
284 #endif
285 }
286
287 static void __nlSparseMatrixAdd(
288         __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
289 ) {
290         __nl_parano_range_assert(i, 0, M->m - 1);
291         __nl_parano_range_assert(j, 0, M->n - 1);
292         if((M->storage & __NL_SYMMETRIC) && (j > i)) {
293                 return;
294         }
295         if(i == j) {
296                 M->diag[i] += value;
297         }
298         if(M->storage & __NL_ROWS) {
299                 __nlRowColumnAdd(&(M->row[i]), j, value);
300         }
301         if(M->storage & __NL_COLUMNS) {
302                 __nlRowColumnAdd(&(M->column[j]), i, value);
303         }
304 }
305
306 static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
307         NLuint i;
308         if(M->storage & __NL_ROWS) {
309                 for(i=0; i<M->m; i++) {
310                         __nlRowColumnClear(&(M->row[i]));
311                 }
312         }
313         if(M->storage & __NL_COLUMNS) {
314                 for(i=0; i<M->n; i++) {
315                         __nlRowColumnClear(&(M->column[i]));
316                 }
317         }
318         __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);       
319 }
320
321 /* Returns the number of non-zero coefficients */
322 static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
323         NLuint nnz = 0;
324         NLuint i;
325         if(M->storage & __NL_ROWS) {
326                 for(i = 0; i<M->m; i++) {
327                         nnz += M->row[i].size;
328                 }
329         } else if (M->storage & __NL_COLUMNS) {
330                 for(i = 0; i<M->n; i++) {
331                         nnz += M->column[i].size;
332                 }
333         } else {
334                 __nl_assert_not_reached;
335         }
336         return nnz;
337 }
338
339 /************************************************************************************/
340 /* SparseMatrix x Vector routines, internal helper routines */
341
342 static void __nlSparseMatrix_mult_rows_symmetric(
343         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
344 ) {
345         NLuint m = A->m;
346         NLuint i,ij;
347         __NLRowColumn* Ri = NULL;
348         __NLCoeff* c = NULL;
349         for(i=0; i<m; i++) {
350                 y[i] = 0;
351                 Ri = &(A->row[i]);
352                 for(ij=0; ij<Ri->size; ij++) {
353                         c = &(Ri->coeff[ij]);
354                         y[i] += c->value * x[c->index];
355                         if(i != c->index) {
356                                 y[c->index] += c->value * x[i];
357                         }
358                 }
359         }
360 }
361
362 static void __nlSparseMatrix_mult_rows(
363         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
364 ) {
365         NLuint m = A->m;
366         NLuint i,ij;
367         __NLRowColumn* Ri = NULL;
368         __NLCoeff* c = NULL;
369         for(i=0; i<m; i++) {
370                 y[i] = 0;
371                 Ri = &(A->row[i]);
372                 for(ij=0; ij<Ri->size; ij++) {
373                         c = &(Ri->coeff[ij]);
374                         y[i] += c->value * x[c->index];
375                 }
376         }
377 }
378
379 static void __nlSparseMatrix_mult_cols_symmetric(
380         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
381 ) {
382         NLuint n = A->n;
383         NLuint j,ii;
384         __NLRowColumn* Cj = NULL;
385         __NLCoeff* c = NULL;
386         for(j=0; j<n; j++) {
387                 y[j] = 0;
388                 Cj = &(A->column[j]);
389                 for(ii=0; ii<Cj->size; ii++) {
390                         c = &(Cj->coeff[ii]);
391                         y[c->index] += c->value * x[j];
392                         if(j != c->index) {
393                                 y[j] += c->value * x[c->index];
394                         }
395                 }
396         }
397 }
398
399 static void __nlSparseMatrix_mult_cols(
400         __NLSparseMatrix* A, NLfloat* x, NLfloat* y
401 ) {
402         NLuint n = A->n;
403         NLuint j,ii; 
404         __NLRowColumn* Cj = NULL;
405         __NLCoeff* c = NULL;
406         __NL_CLEAR_ARRAY(NLfloat, y, A->m);
407         for(j=0; j<n; j++) {
408                 Cj = &(A->column[j]);
409                 for(ii=0; ii<Cj->size; ii++) {
410                         c = &(Cj->coeff[ii]);
411                         y[c->index] += c->value * x[j];
412                 }
413         }
414 }
415
416 /************************************************************************************/
417 /* SparseMatrix x Vector routines, main driver routine */
418
419 static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
420         if(A->storage & __NL_ROWS) {
421                 if(A->storage & __NL_SYMMETRIC) {
422                         __nlSparseMatrix_mult_rows_symmetric(A, x, y);
423                 } else {
424                         __nlSparseMatrix_mult_rows(A, x, y);
425                 }
426         } else {
427                 if(A->storage & __NL_SYMMETRIC) {
428                         __nlSparseMatrix_mult_cols_symmetric(A, x, y);
429                 } else {
430                         __nlSparseMatrix_mult_cols(A, x, y);
431                 }
432         }
433 }
434
435 /************************************************************************************/
436 /* NLContext data structure */
437
438 typedef void(*__NLMatrixFunc)(float* x, float* y);
439
440 typedef struct {
441         NLfloat  value;
442         NLboolean locked;
443         NLuint  index;
444         __NLRowColumn *a;
445 } __NLVariable;
446
447 #define __NL_STATE_INITIAL                              0
448 #define __NL_STATE_SYSTEM                               1
449 #define __NL_STATE_MATRIX                               2
450 #define __NL_STATE_ROW                                  3
451 #define __NL_STATE_MATRIX_CONSTRUCTED   4
452 #define __NL_STATE_SYSTEM_CONSTRUCTED   5
453 #define __NL_STATE_SYSTEM_SOLVED                7
454
455 typedef struct {
456         NLenum             state;
457         NLuint             n;
458         __NLVariable*   variable;
459         NLfloat*                b;
460         __NLSparseMatrix M;
461         __NLRowColumn   af;
462         __NLRowColumn   al;
463         NLfloat*                x;
464         NLfloat          right_hand_side;
465         NLuint             nb_variables;
466         NLuint             current_row;
467         NLboolean               least_squares;
468         NLboolean               symmetric;
469         NLboolean               solve_again;
470         NLboolean               alloc_M;
471         NLboolean               alloc_af;
472         NLboolean               alloc_al;
473         NLboolean               alloc_variable;
474         NLboolean               alloc_x;
475         NLboolean               alloc_b;
476         NLfloat                 error;
477         __NLMatrixFunc  matrix_vector_prod;
478
479         struct __NLSuperLUContext {
480                 NLboolean alloc_slu;
481                 SuperMatrix L, U;
482                 NLint *perm_c, *perm_r;
483                 SuperLUStat_t stat;
484         } slu;
485 } __NLContext;
486
487 static __NLContext* __nlCurrentContext = NULL;
488
489 static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
490         __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
491 }
492
493
494 NLContext nlNewContext(void) {
495         __NLContext* result       = __NL_NEW(__NLContext);
496         result->state                   = __NL_STATE_INITIAL;
497         result->right_hand_side  = 0.0;
498         result->matrix_vector_prod = __nlMatrixVectorProd_default;
499         nlMakeCurrent(result);
500         return result;
501 }
502
503 static void __nlFree_SUPERLU(__NLContext *context);
504
505 void nlDeleteContext(NLContext context_in) {
506         __NLContext* context = (__NLContext*)(context_in);
507         int i;
508
509         if(__nlCurrentContext == context) {
510                 __nlCurrentContext = NULL;
511         }
512         if(context->alloc_M) {
513                 __nlSparseMatrixDestroy(&context->M);
514         }
515         if(context->alloc_af) {
516                 __nlRowColumnDestroy(&context->af);
517         }
518         if(context->alloc_al) {
519                 __nlRowColumnDestroy(&context->al);
520         }
521         if(context->alloc_variable) {
522                 for(i=0; i<context->nb_variables; i++) {
523                         if(context->variable[i].a) {
524                                 __nlRowColumnDestroy(context->variable[i].a);
525                                 __NL_DELETE(context->variable[i].a);
526                         }
527                 }
528         }
529         if(context->alloc_b) {
530                 __NL_DELETE_ARRAY(context->b);
531         }
532         if(context->alloc_x) {
533                 __NL_DELETE_ARRAY(context->x);
534         }
535         if (context->slu.alloc_slu) {
536                 __nlFree_SUPERLU(context);
537         }
538
539 #ifdef NL_PARANOID
540         __NL_CLEAR(__NLContext, context);
541 #endif
542         __NL_DELETE(context);
543 }
544
545 void nlMakeCurrent(NLContext context) {
546         __nlCurrentContext = (__NLContext*)(context);
547 }
548
549 NLContext nlGetCurrent(void) {
550         return __nlCurrentContext;
551 }
552
553 static void __nlCheckState(NLenum state) {
554         __nl_assert(__nlCurrentContext->state == state);
555 }
556
557 static void __nlTransition(NLenum from_state, NLenum to_state) {
558         __nlCheckState(from_state);
559         __nlCurrentContext->state = to_state;
560 }
561
562 /************************************************************************************/
563 /* Get/Set parameters */
564
565 void nlSolverParameterf(NLenum pname, NLfloat param) {
566         __nlCheckState(__NL_STATE_INITIAL);
567         switch(pname) {
568         case NL_NB_VARIABLES: {
569                 __nl_assert(param > 0);
570                 __nlCurrentContext->nb_variables = (NLuint)param;
571         } break;
572         case NL_LEAST_SQUARES: {
573                 __nlCurrentContext->least_squares = (NLboolean)param;
574         } break;
575         case NL_SYMMETRIC: {
576                 __nlCurrentContext->symmetric = (NLboolean)param;               
577         }
578         default: {
579                 __nl_assert_not_reached;
580         } break;
581         }
582 }
583
584 void nlSolverParameteri(NLenum pname, NLint param) {
585         __nlCheckState(__NL_STATE_INITIAL);
586         switch(pname) {
587         case NL_NB_VARIABLES: {
588                 __nl_assert(param > 0);
589                 __nlCurrentContext->nb_variables = (NLuint)param;
590         } break;
591         case NL_LEAST_SQUARES: {
592                 __nlCurrentContext->least_squares = (NLboolean)param;
593         } break;
594         case NL_SYMMETRIC: {
595                 __nlCurrentContext->symmetric = (NLboolean)param;               
596         }
597         default: {
598                 __nl_assert_not_reached;
599         } break;
600         }
601 }
602
603 void nlRowParameterf(NLenum pname, NLfloat param) {
604         __nlCheckState(__NL_STATE_MATRIX);
605         switch(pname) {
606         case NL_RIGHT_HAND_SIDE: {
607                 __nlCurrentContext->right_hand_side = param;
608         } break;
609         }
610 }
611
612 void nlRowParameteri(NLenum pname, NLint param) {
613         __nlCheckState(__NL_STATE_MATRIX);
614         switch(pname) {
615         case NL_RIGHT_HAND_SIDE: {
616                 __nlCurrentContext->right_hand_side = (NLfloat)param;
617         } break;
618         }
619 }
620
621 void nlGetBooleanv(NLenum pname, NLboolean* params) {
622         switch(pname) {
623         case NL_LEAST_SQUARES: {
624                 *params = __nlCurrentContext->least_squares;
625         } break;
626         case NL_SYMMETRIC: {
627                 *params = __nlCurrentContext->symmetric;
628         } break;
629         default: {
630                 __nl_assert_not_reached;
631         } break;
632         }
633 }
634
635 void nlGetFloatv(NLenum pname, NLfloat* params) {
636         switch(pname) {
637         case NL_NB_VARIABLES: {
638                 *params = (NLfloat)(__nlCurrentContext->nb_variables);
639         } break;
640         case NL_LEAST_SQUARES: {
641                 *params = (NLfloat)(__nlCurrentContext->least_squares);
642         } break;
643         case NL_SYMMETRIC: {
644                 *params = (NLfloat)(__nlCurrentContext->symmetric);
645         } break;
646         case NL_ERROR: {
647                 *params = (NLfloat)(__nlCurrentContext->error);
648         } break;
649         default: {
650                 __nl_assert_not_reached;
651         } break;
652         }
653 }
654
655 void nlGetIntergerv(NLenum pname, NLint* params) {
656         switch(pname) {
657         case NL_NB_VARIABLES: {
658                 *params = (NLint)(__nlCurrentContext->nb_variables);
659         } break;
660         case NL_LEAST_SQUARES: {
661                 *params = (NLint)(__nlCurrentContext->least_squares);
662         } break;
663         case NL_SYMMETRIC: {
664                 *params = (NLint)(__nlCurrentContext->symmetric);
665         } break;
666         default: {
667                 __nl_assert_not_reached;
668         } break;
669         }
670 }
671
672 /************************************************************************************/
673 /* Enable / Disable */
674
675 void nlEnable(NLenum pname) {
676         switch(pname) {
677         default: {
678                 __nl_assert_not_reached;
679         }
680         }
681 }
682
683 void nlDisable(NLenum pname) {
684         switch(pname) {
685         default: {
686                 __nl_assert_not_reached;
687         }
688         }
689 }
690
691 NLboolean nlIsEnabled(NLenum pname) {
692         switch(pname) {
693         default: {
694                 __nl_assert_not_reached;
695         }
696         }
697         return NL_FALSE;
698 }
699
700 /************************************************************************************/
701 /* Get/Set Lock/Unlock variables */
702
703 void nlSetVariable(NLuint index, NLfloat value) {
704         __nlCheckState(__NL_STATE_SYSTEM);
705         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
706         __nlCurrentContext->variable[index].value = value;      
707 }
708
709 NLfloat nlGetVariable(NLuint index) {
710         __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
711         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
712         return __nlCurrentContext->variable[index].value;
713 }
714
715 void nlLockVariable(NLuint index) {
716         __nlCheckState(__NL_STATE_SYSTEM);
717         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
718         __nlCurrentContext->variable[index].locked = NL_TRUE;
719 }
720
721 void nlUnlockVariable(NLuint index) {
722         __nlCheckState(__NL_STATE_SYSTEM);
723         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
724         __nlCurrentContext->variable[index].locked = NL_FALSE;
725 }
726
727 NLboolean nlVariableIsLocked(NLuint index) {
728         __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
729         __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
730         return __nlCurrentContext->variable[index].locked;
731 }
732
733 /************************************************************************************/
734 /* System construction */
735
736 static void __nlVariablesToVector() {
737         NLuint i;
738
739         __nl_assert(__nlCurrentContext->alloc_x);
740         __nl_assert(__nlCurrentContext->alloc_variable);
741
742         for(i=0; i<__nlCurrentContext->nb_variables; i++) {
743                 __NLVariable* v = &(__nlCurrentContext->variable[i]);
744                 if(!v->locked) {
745                         __nl_assert(v->index < __nlCurrentContext->n);
746                         __nlCurrentContext->x[v->index] = v->value;
747                 }
748         }
749 }
750
751 static void __nlVectorToVariables() {
752         NLuint i;
753
754         __nl_assert(__nlCurrentContext->alloc_x);
755         __nl_assert(__nlCurrentContext->alloc_variable);
756
757         for(i=0; i<__nlCurrentContext->nb_variables; i++) {
758                 __NLVariable* v = &(__nlCurrentContext->variable[i]);
759                 if(!v->locked) {
760                         __nl_assert(v->index < __nlCurrentContext->n);
761                         v->value = __nlCurrentContext->x[v->index];
762                 }
763         }
764 }
765
766 static void __nlBeginSystem() {
767         __nl_assert(__nlCurrentContext->nb_variables > 0);
768
769         if (__nlCurrentContext->solve_again)
770                 __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM);
771         else {
772                 __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM);
773
774                 __nlCurrentContext->variable = __NL_NEW_ARRAY(
775                         __NLVariable, __nlCurrentContext->nb_variables);
776                 
777                 __nlCurrentContext->alloc_variable = NL_TRUE;
778         }
779 }
780
781 static void __nlEndSystem() {
782         __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED);   
783 }
784
785 static void __nlBeginMatrix() {
786         NLuint i, j;
787         NLuint n = 0;
788         NLenum storage = __NL_ROWS;
789         __NLContext *context = __nlCurrentContext;
790
791         __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX);
792
793         if (!context->solve_again) {
794                 for(i=0; i<context->nb_variables; i++) {
795                         if(context->variable[i].locked) {
796                                 context->variable[i].index = ~0;
797                                 context->variable[i].a = __NL_NEW(__NLRowColumn);
798                                 __nlRowColumnConstruct(context->variable[i].a);
799                         }
800                         else
801                                 context->variable[i].index = n++;
802                 }
803
804                 context->n = n;
805
806                 /* a least squares problem results in a symmetric matrix */
807                 if(context->least_squares)
808                         context->symmetric = NL_TRUE;
809
810                 if(context->symmetric)
811                         storage = (storage | __NL_SYMMETRIC);
812
813                 /* SuperLU storage does not support symmetric storage */
814                 storage = (storage & ~__NL_SYMMETRIC);
815
816                 __nlSparseMatrixConstruct(&context->M, n, n, storage);
817                 context->alloc_M = NL_TRUE;
818
819                 context->b = __NL_NEW_ARRAY(NLfloat, n);
820                 context->alloc_b = NL_TRUE;
821
822                 context->x = __NL_NEW_ARRAY(NLfloat, n);
823                 context->alloc_x = NL_TRUE;
824         }
825         else {
826                 /* need to recompute b only, A is not constructed anymore */
827                 __NL_CLEAR_ARRAY(NLfloat, context->b, context->n);
828         }
829
830         __nlVariablesToVector();
831
832         __nlRowColumnConstruct(&context->af);
833         context->alloc_af = NL_TRUE;
834         __nlRowColumnConstruct(&context->al);
835         context->alloc_al = NL_TRUE;
836
837         context->current_row = 0;
838 }
839
840 static void __nlEndMatrix() {
841         __NLContext *context = __nlCurrentContext;
842         __NLVariable *variable;
843         __NLRowColumn *a;
844         NLfloat *b;
845         NLuint i, j;
846
847         __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED);       
848         
849         __nlRowColumnDestroy(&context->af);
850         context->alloc_af = NL_FALSE;
851         __nlRowColumnDestroy(&context->al);
852         context->alloc_al = NL_FALSE;
853         
854         b = context->b;
855
856         for(i=0; i<__nlCurrentContext->nb_variables; i++) {
857                 variable = &(context->variable[i]);
858
859                 if(variable->locked) {
860                         a = variable->a;
861
862                         for(j=0; j<a->size; j++) {
863                                 b[a->coeff[j].index] -= a->coeff[j].value*variable->value;
864                         }
865                 }
866         }
867
868 #if 0
869         if(!context->least_squares) {
870                 __nl_assert(
871                         context->current_row == 
872                         context->n
873                 );
874         }
875 #endif
876 }
877
878 static void __nlBeginRow() {
879         __nlTransition(__NL_STATE_MATRIX, __NL_STATE_ROW);
880         __nlRowColumnZero(&__nlCurrentContext->af);
881         __nlRowColumnZero(&__nlCurrentContext->al);
882 }
883
884 static void __nlEndRow() {
885         __NLRowColumn*  af = &__nlCurrentContext->af;
886         __NLRowColumn*  al = &__nlCurrentContext->al;
887         __NLSparseMatrix* M  = &__nlCurrentContext->M;
888         NLfloat* b              = __nlCurrentContext->b;
889         NLuint nf                 = af->size;
890         NLuint nl                 = al->size;
891         NLuint current_row = __nlCurrentContext->current_row;
892         NLuint i;
893         NLuint j;
894         NLfloat S;
895         __nlTransition(__NL_STATE_ROW, __NL_STATE_MATRIX);
896
897         if(__nlCurrentContext->least_squares) {
898                 if (!__nlCurrentContext->solve_again) {
899                         for(i=0; i<nf; i++) {
900                                 for(j=0; j<nf; j++) {
901                                         __nlSparseMatrixAdd(
902                                                 M, af->coeff[i].index, af->coeff[j].index,
903                                                 af->coeff[i].value * af->coeff[j].value
904                                         );
905                                 }
906                         }
907                 }
908
909                 S = -__nlCurrentContext->right_hand_side;
910                 for(j=0; j<nl; j++)
911                         S += al->coeff[j].value;
912
913                 for(i=0; i<nf; i++)
914                         b[ af->coeff[i].index ] -= af->coeff[i].value * S;
915         } else {
916                 if (!__nlCurrentContext->solve_again) {
917                         for(i=0; i<nf; i++) {
918                                 __nlSparseMatrixAdd(
919                                         M, current_row, af->coeff[i].index, af->coeff[i].value
920                                 );
921                         }
922                 }
923                 b[current_row] = -__nlCurrentContext->right_hand_side;
924                 for(i=0; i<nl; i++) {
925                         b[current_row] -= al->coeff[i].value;
926                 }
927         }
928         __nlCurrentContext->current_row++;
929         __nlCurrentContext->right_hand_side = 0.0;      
930 }
931
932 void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
933 {
934         __NLContext *context = __nlCurrentContext;
935
936         __nlCheckState(__NL_STATE_MATRIX);
937         __nl_assert(!context->least_squares);
938
939         if (context->variable[row].locked);
940         else if (context->variable[col].locked) {
941                 row = context->variable[row].index;
942                 __nlRowColumnAppend(context->variable[col].a, row, value);
943         }
944         else {
945                 __NLSparseMatrix* M  = &context->M;
946
947                 row = context->variable[row].index;
948                 col = context->variable[col].index;
949                 
950                 __nl_range_assert(row, 0, context->n - 1);
951                 __nl_range_assert(col, 0, context->n - 1);
952
953                 __nlSparseMatrixAdd(M, row, col, value);
954         }
955 }
956
957 void nlRightHandSideAdd(NLuint index, NLfloat value)
958 {
959         __NLContext *context = __nlCurrentContext;
960         NLfloat* b = context->b;
961
962         __nlCheckState(__NL_STATE_MATRIX);
963         __nl_assert(!context->least_squares);
964
965         if(!context->variable[index].locked) {
966                 index = context->variable[index].index;
967                 __nl_range_assert(index, 0, context->n - 1);
968
969                 b[index] += value;
970         }
971 }
972
973 void nlRightHandSideSet(NLuint index, NLfloat value)
974 {
975         __NLContext *context = __nlCurrentContext;
976         NLfloat* b = context->b;
977
978         __nlCheckState(__NL_STATE_MATRIX);
979         __nl_assert(!context->least_squares);
980
981         if(!context->variable[index].locked) {
982                 index = context->variable[index].index;
983                 __nl_range_assert(index, 0, context->n - 1);
984
985                 b[index] = value;
986         }
987 }
988
989 void nlCoefficient(NLuint index, NLfloat value) {
990         __NLVariable* v;
991         unsigned int zero= 0;
992         __nlCheckState(__NL_STATE_ROW);
993         __nl_range_assert(index, zero, __nlCurrentContext->nb_variables - 1);
994         v = &(__nlCurrentContext->variable[index]);
995         if(v->locked)
996                 __nlRowColumnAppend(&(__nlCurrentContext->al), 0, value*v->value);
997         else
998                 __nlRowColumnAppend(&(__nlCurrentContext->af), v->index, value);
999 }
1000
1001 void nlBegin(NLenum prim) {
1002         switch(prim) {
1003         case NL_SYSTEM: {
1004                 __nlBeginSystem();
1005         } break;
1006         case NL_MATRIX: {
1007                 __nlBeginMatrix();
1008         } break;
1009         case NL_ROW: {
1010                 __nlBeginRow();
1011         } break;
1012         default: {
1013                 __nl_assert_not_reached;
1014         }
1015         }
1016 }
1017
1018 void nlEnd(NLenum prim) {
1019         switch(prim) {
1020         case NL_SYSTEM: {
1021                 __nlEndSystem();
1022         } break;
1023         case NL_MATRIX: {
1024                 __nlEndMatrix();
1025         } break;
1026         case NL_ROW: {
1027                 __nlEndRow();
1028         } break;
1029         default: {
1030                 __nl_assert_not_reached;
1031         }
1032         }
1033 }
1034
1035 /************************************************************************/
1036 /* SuperLU wrapper */
1037
1038 /* Note: SuperLU is difficult to call, but it is worth it.      */
1039 /* Here is a driver inspired by A. Sheffer's "cow flattener". */
1040 static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) {
1041
1042         /* OpenNL Context */
1043         __NLSparseMatrix* M = &(context->M);
1044         NLuint n = context->n;
1045         NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */
1046
1047         /* Compressed Row Storage matrix representation */
1048         NLint   *xa             = __NL_NEW_ARRAY(NLint, n+1);
1049         NLfloat *rhs    = __NL_NEW_ARRAY(NLfloat, n);
1050         NLfloat *a              = __NL_NEW_ARRAY(NLfloat, nnz);
1051         NLint   *asub   = __NL_NEW_ARRAY(NLint, nnz);
1052         NLint   *etree  = __NL_NEW_ARRAY(NLint, n);
1053
1054         /* SuperLU variables */
1055         SuperMatrix At, AtP;
1056         NLint info, panel_size, relax;
1057         superlu_options_t options;
1058
1059         /* Temporary variables */
1060         __NLRowColumn* Ri = NULL;
1061         NLuint i, jj, count;
1062         
1063         __nl_assert(!(M->storage & __NL_SYMMETRIC));
1064         __nl_assert(M->storage & __NL_ROWS);
1065         __nl_assert(M->m == M->n);
1066         
1067         /* Convert M to compressed column format */
1068         for(i=0, count=0; i<n; i++) {
1069                 __NLRowColumn *Ri = M->row + i;
1070                 xa[i] = count;
1071
1072                 for(jj=0; jj<Ri->size; jj++, count++) {
1073                         a[count] = Ri->coeff[jj].value;
1074                         asub[count] = Ri->coeff[jj].index;
1075                 }
1076         }
1077         xa[n] = nnz;
1078
1079         /* Free M, don't need it anymore at this point */
1080         __nlSparseMatrixClear(M);
1081
1082         /* Create superlu A matrix transposed */
1083         sCreate_CompCol_Matrix(
1084                 &At, n, n, nnz, a, asub, xa, 
1085                 SLU_NC,         /* Colum wise, no supernode */
1086                 SLU_S,          /* floats */ 
1087                 SLU_GE          /* general storage */
1088         );
1089
1090         /* Set superlu options */
1091         set_default_options(&options);
1092         options.ColPerm = MY_PERMC;
1093         options.Fact = DOFACT;
1094
1095         StatInit(&(context->slu.stat));
1096
1097         panel_size = sp_ienv(1); /* sp_ienv give us the defaults */
1098         relax = sp_ienv(2);
1099
1100         /* Compute permutation and permuted matrix */
1101         context->slu.perm_r = __NL_NEW_ARRAY(NLint, n);
1102         context->slu.perm_c = __NL_NEW_ARRAY(NLint, n);
1103
1104         if ((permutation == NULL) || (*permutation == -1)) {
1105                 get_perm_c(3, &At, context->slu.perm_c);
1106
1107                 if (permutation)
1108                         memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n);
1109         }
1110         else
1111                 memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n);
1112
1113         sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP);
1114
1115         /* Decompose into L and U */
1116         sgstrf(&options, &AtP, relax, panel_size,
1117                 etree, NULL, 0, context->slu.perm_c, context->slu.perm_r,
1118                 &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info);
1119
1120         /* Cleanup */
1121
1122         Destroy_SuperMatrix_Store(&At);
1123         Destroy_CompCol_Permuted(&AtP);
1124
1125         __NL_DELETE_ARRAY(etree);
1126         __NL_DELETE_ARRAY(xa);
1127         __NL_DELETE_ARRAY(rhs);
1128         __NL_DELETE_ARRAY(a);
1129         __NL_DELETE_ARRAY(asub);
1130
1131         context->slu.alloc_slu = NL_TRUE;
1132
1133         return (info == 0);
1134 }
1135
1136 static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
1137
1138         /* OpenNL Context */
1139         NLfloat* b = context->b;
1140         NLfloat* x = context->x;
1141         NLuint n = context->n;
1142
1143         /* SuperLU variables */
1144         SuperMatrix B;
1145         NLint info;
1146
1147         /* Create superlu array for B */
1148         sCreate_Dense_Matrix(
1149                 &B, n, 1, b, n, 
1150                 SLU_DN, /* Fortran-type column-wise storage */
1151                 SLU_S,  /* floats                                                 */
1152                 SLU_GE  /* general                                                */
1153         );
1154
1155         /* Forward/Back substitution to compute x */
1156         sgstrs(TRANS, &(context->slu.L), &(context->slu.U),
1157                 context->slu.perm_c, context->slu.perm_r, &B,
1158                 &(context->slu.stat), &info);
1159
1160         if(info == 0)
1161                 memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n);
1162
1163         Destroy_SuperMatrix_Store(&B);
1164
1165         return (info == 0);
1166 }
1167
1168 static void __nlFree_SUPERLU(__NLContext *context) {
1169
1170         Destroy_SuperNode_Matrix(&(context->slu.L));
1171         Destroy_CompCol_Matrix(&(context->slu.U));
1172
1173         StatFree(&(context->slu.stat));
1174
1175         __NL_DELETE_ARRAY(context->slu.perm_r);
1176         __NL_DELETE_ARRAY(context->slu.perm_c);
1177
1178         context->slu.alloc_slu = NL_FALSE;
1179 }
1180
1181 void nlPrintMatrix(void) {
1182         __NLSparseMatrix* M  = &(__nlCurrentContext->M);
1183         float *b = __nlCurrentContext->b;
1184         NLuint i, jj, k;
1185         NLuint n = __nlCurrentContext->n;
1186         __NLRowColumn* Ri = NULL;
1187         float *value = malloc(sizeof(*value)*n);
1188
1189         printf("A:\n");
1190         for(i=0; i<n; i++) {
1191                 Ri = &(M->row[i]);
1192
1193                 memset(value, 0.0, sizeof(*value)*n);
1194                 for(jj=0; jj<Ri->size; jj++)
1195                         value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
1196
1197                 for (k = 0; k<n; k++)
1198                         printf("%.3f ", value[k]);
1199                 printf("\n");
1200         }
1201
1202         printf("b:\n");
1203         for(i=0; i<n; i++)
1204                 printf("%f ", b[i]);
1205         printf("\n");
1206
1207         free(value);
1208 }
1209
1210 /************************************************************************/
1211 /* nlSolve() driver routine */
1212
1213 NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) {
1214         NLboolean result = NL_TRUE;
1215
1216         __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED);
1217
1218         if (!__nlCurrentContext->solve_again)
1219                 result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation);
1220
1221         if (result) {
1222                 result = __nlInvert_SUPERLU(__nlCurrentContext);
1223
1224                 if (result) {
1225                         __nlVectorToVariables();
1226
1227                         if (solveAgain)
1228                                 __nlCurrentContext->solve_again = NL_TRUE;
1229
1230                         __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SYSTEM_SOLVED);
1231                 }
1232         }
1233
1234         return result;
1235 }
1236
1237 NLboolean nlSolve() {
1238         return nlSolveAdvanced(NULL, NL_FALSE);
1239 }
1240