4 * OpenNL: Numerical Library
5 * Copyright (C) 2004 Bruno Levy
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * If you modify this software, you should include a notice giving the
22 * name of the person performing the modification, the date of modification,
23 * and the reason for such modification.
30 * LORIA, INRIA Lorraine,
31 * Campus Scientifique, BP 239
32 * 54506 VANDOEUVRE LES NANCY CEDEX
35 * Note that the GNU General Public License does not permit incorporating
36 * the Software into proprietary programs.
39 #include "ONL_opennl.h"
52 /* SuperLU includes */
56 /************************************************************************************/
60 static void __nl_assertion_failed(char* cond, char* file, int line) {
63 "OpenNL assertion failed: %s, file:%s, line:%d\n",
69 static void __nl_range_assertion_failed(
70 float x, float min_val, float max_val, char* file, int line
74 "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n",
75 x, min_val, max_val, file,line
80 static void __nl_should_not_have_reached(char* file, int line) {
83 "OpenNL should not have reached this point: file:%s, line:%d\n",
90 #define __nl_assert(x) { \
92 __nl_assertion_failed(#x,__FILE__, __LINE__); \
96 #define __nl_range_assert(x,min_val,max_val) { \
97 if(((x) < (min_val)) || ((x) > (max_val))) { \
98 __nl_range_assertion_failed(x, min_val, max_val, \
104 #define __nl_assert_not_reached { \
105 __nl_should_not_have_reached(__FILE__, __LINE__); \
109 #define __nl_debug_assert(x) __nl_assert(x)
110 #define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
112 #define __nl_debug_assert(x)
113 #define __nl_debug_range_assert(x,min_val,max_val)
117 #define __nl_parano_assert(x) __nl_assert(x)
118 #define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
120 #define __nl_parano_assert(x)
121 #define __nl_parano_range_assert(x,min_val,max_val)
124 /************************************************************************************/
128 #define MIN(x,y) (((x) < (y)) ? (x) : (y))
132 #define MAX(x,y) (((x) > (y)) ? (x) : (y))
135 /************************************************************************************/
136 /* memory management */
138 #define __NL_NEW(T) (T*)(calloc(1, sizeof(T)))
139 #define __NL_NEW_ARRAY(T,NB) (T*)(calloc((NB),sizeof(T)))
140 #define __NL_RENEW_ARRAY(T,x,NB) (T*)(realloc(x,(NB)*sizeof(T)))
141 #define __NL_DELETE(x) free(x); x = NULL
142 #define __NL_DELETE_ARRAY(x) free(x); x = NULL
144 #define __NL_CLEAR(T, x) memset(x, 0, sizeof(T))
145 #define __NL_CLEAR_ARRAY(T,x,NB) memset(x, 0, (NB)*sizeof(T))
147 /************************************************************************************/
148 /* Dynamic arrays for sparse row/columns */
161 static void __nlRowColumnConstruct(__NLRowColumn* c) {
167 static void __nlRowColumnDestroy(__NLRowColumn* c) {
168 __NL_DELETE_ARRAY(c->coeff);
170 __NL_CLEAR(__NLRowColumn, c);
174 static void __nlRowColumnGrow(__NLRowColumn* c) {
175 if(c->capacity != 0) {
176 c->capacity = 2 * c->capacity;
177 c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity);
180 c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity);
184 static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
186 for(i=0; i<c->size; i++) {
187 if(c->coeff[i].index == (NLuint)index) {
188 c->coeff[i].value += value;
192 if(c->size == c->capacity) {
193 __nlRowColumnGrow(c);
195 c->coeff[c->size].index = index;
196 c->coeff[c->size].value = value;
200 /* Does not check whether the index already exists */
201 static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
202 if(c->size == c->capacity) {
203 __nlRowColumnGrow(c);
205 c->coeff[c->size].index = index;
206 c->coeff[c->size].value = value;
210 static void __nlRowColumnZero(__NLRowColumn* c) {
214 static void __nlRowColumnClear(__NLRowColumn* c) {
217 __NL_DELETE_ARRAY(c->coeff);
220 /************************************************************************************/
221 /* SparseMatrix data structure */
224 #define __NL_COLUMNS 2
225 #define __NL_SYMMETRIC 4
233 __NLRowColumn* column;
238 static void __nlSparseMatrixConstruct(
239 __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage
244 M->storage = storage;
245 if(storage & __NL_ROWS) {
246 M->row = __NL_NEW_ARRAY(__NLRowColumn, m);
248 __nlRowColumnConstruct(&(M->row[i]));
254 if(storage & __NL_COLUMNS) {
255 M->column = __NL_NEW_ARRAY(__NLRowColumn, n);
257 __nlRowColumnConstruct(&(M->column[i]));
263 M->diag_size = MIN(m,n);
264 M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size);
267 static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
269 __NL_DELETE_ARRAY(M->diag);
270 if(M->storage & __NL_ROWS) {
271 for(i=0; i<M->m; i++) {
272 __nlRowColumnDestroy(&(M->row[i]));
274 __NL_DELETE_ARRAY(M->row);
276 if(M->storage & __NL_COLUMNS) {
277 for(i=0; i<M->n; i++) {
278 __nlRowColumnDestroy(&(M->column[i]));
280 __NL_DELETE_ARRAY(M->column);
283 __NL_CLEAR(__NLSparseMatrix,M);
287 static void __nlSparseMatrixAdd(
288 __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
290 __nl_parano_range_assert(i, 0, M->m - 1);
291 __nl_parano_range_assert(j, 0, M->n - 1);
292 if((M->storage & __NL_SYMMETRIC) && (j > i)) {
298 if(M->storage & __NL_ROWS) {
299 __nlRowColumnAdd(&(M->row[i]), j, value);
301 if(M->storage & __NL_COLUMNS) {
302 __nlRowColumnAdd(&(M->column[j]), i, value);
306 static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
308 if(M->storage & __NL_ROWS) {
309 for(i=0; i<M->m; i++) {
310 __nlRowColumnClear(&(M->row[i]));
313 if(M->storage & __NL_COLUMNS) {
314 for(i=0; i<M->n; i++) {
315 __nlRowColumnClear(&(M->column[i]));
318 __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size);
321 /* Returns the number of non-zero coefficients */
322 static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
325 if(M->storage & __NL_ROWS) {
326 for(i = 0; i<M->m; i++) {
327 nnz += M->row[i].size;
329 } else if (M->storage & __NL_COLUMNS) {
330 for(i = 0; i<M->n; i++) {
331 nnz += M->column[i].size;
334 __nl_assert_not_reached;
339 /************************************************************************************/
340 /* SparseMatrix x Vector routines, internal helper routines */
342 static void __nlSparseMatrix_mult_rows_symmetric(
343 __NLSparseMatrix* A, NLfloat* x, NLfloat* y
347 __NLRowColumn* Ri = NULL;
352 for(ij=0; ij<Ri->size; ij++) {
353 c = &(Ri->coeff[ij]);
354 y[i] += c->value * x[c->index];
356 y[c->index] += c->value * x[i];
362 static void __nlSparseMatrix_mult_rows(
363 __NLSparseMatrix* A, NLfloat* x, NLfloat* y
367 __NLRowColumn* Ri = NULL;
372 for(ij=0; ij<Ri->size; ij++) {
373 c = &(Ri->coeff[ij]);
374 y[i] += c->value * x[c->index];
379 static void __nlSparseMatrix_mult_cols_symmetric(
380 __NLSparseMatrix* A, NLfloat* x, NLfloat* y
384 __NLRowColumn* Cj = NULL;
388 Cj = &(A->column[j]);
389 for(ii=0; ii<Cj->size; ii++) {
390 c = &(Cj->coeff[ii]);
391 y[c->index] += c->value * x[j];
393 y[j] += c->value * x[c->index];
399 static void __nlSparseMatrix_mult_cols(
400 __NLSparseMatrix* A, NLfloat* x, NLfloat* y
404 __NLRowColumn* Cj = NULL;
406 __NL_CLEAR_ARRAY(NLfloat, y, A->m);
408 Cj = &(A->column[j]);
409 for(ii=0; ii<Cj->size; ii++) {
410 c = &(Cj->coeff[ii]);
411 y[c->index] += c->value * x[j];
416 /************************************************************************************/
417 /* SparseMatrix x Vector routines, main driver routine */
419 static void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
420 if(A->storage & __NL_ROWS) {
421 if(A->storage & __NL_SYMMETRIC) {
422 __nlSparseMatrix_mult_rows_symmetric(A, x, y);
424 __nlSparseMatrix_mult_rows(A, x, y);
427 if(A->storage & __NL_SYMMETRIC) {
428 __nlSparseMatrix_mult_cols_symmetric(A, x, y);
430 __nlSparseMatrix_mult_cols(A, x, y);
435 /************************************************************************************/
436 /* NLContext data structure */
438 typedef void(*__NLMatrixFunc)(float* x, float* y);
446 #define __NL_STATE_INITIAL 0
447 #define __NL_STATE_SYSTEM 1
448 #define __NL_STATE_MATRIX 2
449 #define __NL_STATE_ROW 3
450 #define __NL_STATE_MATRIX_CONSTRUCTED 4
451 #define __NL_STATE_SYSTEM_CONSTRUCTED 5
452 #define __NL_STATE_SYSTEM_SOLVED 7
456 __NLVariable* variable;
463 NLfloat right_hand_side;
466 NLboolean least_squares;
468 NLboolean solve_again;
472 NLboolean alloc_variable;
476 __NLMatrixFunc matrix_vector_prod;
478 struct __NLSuperLUContext {
481 NLint *perm_c, *perm_r;
486 static __NLContext* __nlCurrentContext = NULL;
488 static void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
489 __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y);
493 NLContext nlNewContext(void) {
494 __NLContext* result = __NL_NEW(__NLContext);
495 result->state = __NL_STATE_INITIAL;
496 result->right_hand_side = 0.0;
497 result->matrix_vector_prod = __nlMatrixVectorProd_default;
498 nlMakeCurrent(result);
502 static void __nlFree_SUPERLU(__NLContext *context);
504 void nlDeleteContext(NLContext context_in) {
505 __NLContext* context = (__NLContext*)(context_in);
506 if(__nlCurrentContext == context) {
507 __nlCurrentContext = NULL;
509 if(context->alloc_M) {
510 __nlSparseMatrixDestroy(&context->M);
512 if(context->alloc_af) {
513 __nlRowColumnDestroy(&context->af);
515 if(context->alloc_al) {
516 __nlRowColumnDestroy(&context->al);
518 if(context->alloc_variable) {
519 __NL_DELETE_ARRAY(context->variable);
521 if(context->alloc_x) {
522 __NL_DELETE_ARRAY(context->x);
524 if(context->alloc_b) {
525 __NL_DELETE_ARRAY(context->b);
527 if (context->slu.alloc_slu) {
528 __nlFree_SUPERLU(context);
532 __NL_CLEAR(__NLContext, context);
534 __NL_DELETE(context);
537 void nlMakeCurrent(NLContext context) {
538 __nlCurrentContext = (__NLContext*)(context);
541 NLContext nlGetCurrent(void) {
542 return __nlCurrentContext;
545 static void __nlCheckState(NLenum state) {
546 __nl_assert(__nlCurrentContext->state == state);
549 static void __nlTransition(NLenum from_state, NLenum to_state) {
550 __nlCheckState(from_state);
551 __nlCurrentContext->state = to_state;
554 /************************************************************************************/
555 /* Get/Set parameters */
557 void nlSolverParameterf(NLenum pname, NLfloat param) {
558 __nlCheckState(__NL_STATE_INITIAL);
560 case NL_NB_VARIABLES: {
561 __nl_assert(param > 0);
562 __nlCurrentContext->nb_variables = (NLuint)param;
564 case NL_LEAST_SQUARES: {
565 __nlCurrentContext->least_squares = (NLboolean)param;
568 __nlCurrentContext->symmetric = (NLboolean)param;
571 __nl_assert_not_reached;
576 void nlSolverParameteri(NLenum pname, NLint param) {
577 __nlCheckState(__NL_STATE_INITIAL);
579 case NL_NB_VARIABLES: {
580 __nl_assert(param > 0);
581 __nlCurrentContext->nb_variables = (NLuint)param;
583 case NL_LEAST_SQUARES: {
584 __nlCurrentContext->least_squares = (NLboolean)param;
587 __nlCurrentContext->symmetric = (NLboolean)param;
590 __nl_assert_not_reached;
595 void nlRowParameterf(NLenum pname, NLfloat param) {
596 __nlCheckState(__NL_STATE_MATRIX);
598 case NL_RIGHT_HAND_SIDE: {
599 __nlCurrentContext->right_hand_side = param;
604 void nlRowParameteri(NLenum pname, NLint param) {
605 __nlCheckState(__NL_STATE_MATRIX);
607 case NL_RIGHT_HAND_SIDE: {
608 __nlCurrentContext->right_hand_side = (NLfloat)param;
613 void nlGetBooleanv(NLenum pname, NLboolean* params) {
615 case NL_LEAST_SQUARES: {
616 *params = __nlCurrentContext->least_squares;
619 *params = __nlCurrentContext->symmetric;
622 __nl_assert_not_reached;
627 void nlGetFloatv(NLenum pname, NLfloat* params) {
629 case NL_NB_VARIABLES: {
630 *params = (NLfloat)(__nlCurrentContext->nb_variables);
632 case NL_LEAST_SQUARES: {
633 *params = (NLfloat)(__nlCurrentContext->least_squares);
636 *params = (NLfloat)(__nlCurrentContext->symmetric);
639 *params = (NLfloat)(__nlCurrentContext->error);
642 __nl_assert_not_reached;
647 void nlGetIntergerv(NLenum pname, NLint* params) {
649 case NL_NB_VARIABLES: {
650 *params = (NLint)(__nlCurrentContext->nb_variables);
652 case NL_LEAST_SQUARES: {
653 *params = (NLint)(__nlCurrentContext->least_squares);
656 *params = (NLint)(__nlCurrentContext->symmetric);
659 __nl_assert_not_reached;
664 /************************************************************************************/
665 /* Enable / Disable */
667 void nlEnable(NLenum pname) {
670 __nl_assert_not_reached;
675 void nlDisable(NLenum pname) {
678 __nl_assert_not_reached;
683 NLboolean nlIsEnabled(NLenum pname) {
686 __nl_assert_not_reached;
692 /************************************************************************************/
693 /* Get/Set Lock/Unlock variables */
695 void nlSetVariable(NLuint index, NLfloat value) {
696 __nlCheckState(__NL_STATE_SYSTEM);
697 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
698 __nlCurrentContext->variable[index].value = value;
701 NLfloat nlGetVariable(NLuint index) {
702 __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
703 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
704 return __nlCurrentContext->variable[index].value;
707 void nlLockVariable(NLuint index) {
708 __nlCheckState(__NL_STATE_SYSTEM);
709 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
710 __nlCurrentContext->variable[index].locked = NL_TRUE;
713 void nlUnlockVariable(NLuint index) {
714 __nlCheckState(__NL_STATE_SYSTEM);
715 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
716 __nlCurrentContext->variable[index].locked = NL_FALSE;
719 NLboolean nlVariableIsLocked(NLuint index) {
720 __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL);
721 __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1);
722 return __nlCurrentContext->variable[index].locked;
725 /************************************************************************************/
726 /* System construction */
728 static void __nlVariablesToVector() {
730 __nl_assert(__nlCurrentContext->alloc_x);
731 __nl_assert(__nlCurrentContext->alloc_variable);
732 for(i=0; i<__nlCurrentContext->nb_variables; i++) {
733 __NLVariable* v = &(__nlCurrentContext->variable[i]);
735 __nl_assert(v->index < __nlCurrentContext->n);
736 __nlCurrentContext->x[v->index] = v->value;
741 static void __nlVectorToVariables() {
743 __nl_assert(__nlCurrentContext->alloc_x);
744 __nl_assert(__nlCurrentContext->alloc_variable);
745 for(i=0; i<__nlCurrentContext->nb_variables; i++) {
746 __NLVariable* v = &(__nlCurrentContext->variable[i]);
748 __nl_assert(v->index < __nlCurrentContext->n);
749 v->value = __nlCurrentContext->x[v->index];
754 static void __nlBeginSystem() {
755 __nl_assert(__nlCurrentContext->nb_variables > 0);
757 if (__nlCurrentContext->solve_again)
758 __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM);
760 __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM);
762 __nlCurrentContext->variable = __NL_NEW_ARRAY(
763 __NLVariable, __nlCurrentContext->nb_variables
765 __nlCurrentContext->alloc_variable = NL_TRUE;
769 static void __nlEndSystem() {
770 __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED);
773 static void __nlBeginMatrix() {
776 NLenum storage = __NL_ROWS;
778 __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX);
780 if (!__nlCurrentContext->solve_again) {
781 for(i=0; i<__nlCurrentContext->nb_variables; i++) {
782 if(!__nlCurrentContext->variable[i].locked)
783 __nlCurrentContext->variable[i].index = n++;
785 __nlCurrentContext->variable[i].index = ~0;
788 __nlCurrentContext->n = n;
790 /* a least squares problem results in a symmetric matrix */
791 if(__nlCurrentContext->least_squares)
792 __nlCurrentContext->symmetric = NL_TRUE;
794 if(__nlCurrentContext->symmetric)
795 storage = (storage | __NL_SYMMETRIC);
797 /* SuperLU storage does not support symmetric storage */
798 storage = (storage & ~__NL_SYMMETRIC);
800 __nlSparseMatrixConstruct(&__nlCurrentContext->M, n, n, storage);
801 __nlCurrentContext->alloc_M = NL_TRUE;
803 __nlCurrentContext->x = __NL_NEW_ARRAY(NLfloat, n);
804 __nlCurrentContext->alloc_x = NL_TRUE;
806 __nlCurrentContext->b = __NL_NEW_ARRAY(NLfloat, n);
807 __nlCurrentContext->alloc_b = NL_TRUE;
810 /* need to recompute b only, A is not constructed anymore */
811 __NL_CLEAR_ARRAY(NLfloat, __nlCurrentContext->b, __nlCurrentContext->n);
814 __nlVariablesToVector();
816 __nlRowColumnConstruct(&__nlCurrentContext->af);
817 __nlCurrentContext->alloc_af = NL_TRUE;
818 __nlRowColumnConstruct(&__nlCurrentContext->al);
819 __nlCurrentContext->alloc_al = NL_TRUE;
821 __nlCurrentContext->current_row = 0;
824 static void __nlEndMatrix() {
825 __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED);
827 __nlRowColumnDestroy(&__nlCurrentContext->af);
828 __nlCurrentContext->alloc_af = NL_FALSE;
829 __nlRowColumnDestroy(&__nlCurrentContext->al);
830 __nlCurrentContext->alloc_al = NL_FALSE;
833 if(!__nlCurrentContext->least_squares) {
835 __nlCurrentContext->current_row ==
836 __nlCurrentContext->n
842 static void __nlBeginRow() {
843 __nlTransition(__NL_STATE_MATRIX, __NL_STATE_ROW);
844 __nlRowColumnZero(&__nlCurrentContext->af);
845 __nlRowColumnZero(&__nlCurrentContext->al);
848 static void __nlEndRow() {
849 __NLRowColumn* af = &__nlCurrentContext->af;
850 __NLRowColumn* al = &__nlCurrentContext->al;
851 __NLSparseMatrix* M = &__nlCurrentContext->M;
852 NLfloat* b = __nlCurrentContext->b;
853 NLuint nf = af->size;
854 NLuint nl = al->size;
855 NLuint current_row = __nlCurrentContext->current_row;
859 __nlTransition(__NL_STATE_ROW, __NL_STATE_MATRIX);
861 if(__nlCurrentContext->least_squares) {
862 if (!__nlCurrentContext->solve_again) {
863 for(i=0; i<nf; i++) {
864 for(j=0; j<nf; j++) {
866 M, af->coeff[i].index, af->coeff[j].index,
867 af->coeff[i].value * af->coeff[j].value
873 S = -__nlCurrentContext->right_hand_side;
875 S += al->coeff[j].value;
878 b[ af->coeff[i].index ] -= af->coeff[i].value * S;
880 if (!__nlCurrentContext->solve_again) {
881 for(i=0; i<nf; i++) {
883 M, current_row, af->coeff[i].index, af->coeff[i].value
887 b[current_row] = -__nlCurrentContext->right_hand_side;
888 for(i=0; i<nl; i++) {
889 b[current_row] -= al->coeff[i].value;
892 __nlCurrentContext->current_row++;
893 __nlCurrentContext->right_hand_side = 0.0;
896 void nlMatrixAdd(NLuint row, NLuint col, NLfloat value)
898 __NLSparseMatrix* M = &__nlCurrentContext->M;
899 __nlCheckState(__NL_STATE_MATRIX);
900 __nl_range_assert(row, 0, __nlCurrentContext->n - 1);
901 __nl_range_assert(col, 0, __nlCurrentContext->nb_variables - 1);
902 __nl_assert(!__nlCurrentContext->least_squares);
904 __nlSparseMatrixAdd(M, row, col, value);
907 void nlRightHandSideAdd(NLuint index, NLfloat value)
909 NLfloat* b = __nlCurrentContext->b;
911 __nlCheckState(__NL_STATE_MATRIX);
912 __nl_range_assert(index, 0, __nlCurrentContext->n - 1);
913 __nl_assert(!__nlCurrentContext->least_squares);
918 void nlCoefficient(NLuint index, NLfloat value) {
920 unsigned int zero= 0;
921 __nlCheckState(__NL_STATE_ROW);
922 __nl_range_assert(index, zero, __nlCurrentContext->nb_variables - 1);
923 v = &(__nlCurrentContext->variable[index]);
925 __nlRowColumnAppend(&(__nlCurrentContext->al), 0, value*v->value);
927 __nlRowColumnAppend(&(__nlCurrentContext->af), v->index, value);
930 void nlBegin(NLenum prim) {
942 __nl_assert_not_reached;
947 void nlEnd(NLenum prim) {
959 __nl_assert_not_reached;
964 /************************************************************************/
965 /* SuperLU wrapper */
967 /* Note: SuperLU is difficult to call, but it is worth it. */
968 /* Here is a driver inspired by A. Sheffer's "cow flattener". */
969 static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) {
972 __NLSparseMatrix* M = &(context->M);
973 NLuint n = context->n;
974 NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */
976 /* Compressed Row Storage matrix representation */
977 NLint *xa = __NL_NEW_ARRAY(NLint, n+1);
978 NLfloat *rhs = __NL_NEW_ARRAY(NLfloat, n);
979 NLfloat *a = __NL_NEW_ARRAY(NLfloat, nnz);
980 NLint *asub = __NL_NEW_ARRAY(NLint, nnz);
981 NLint *etree = __NL_NEW_ARRAY(NLint, n);
983 /* SuperLU variables */
985 NLint info, panel_size, relax;
986 superlu_options_t options;
988 /* Temporary variables */
989 __NLRowColumn* Ri = NULL;
992 __nl_assert(!(M->storage & __NL_SYMMETRIC));
993 __nl_assert(M->storage & __NL_ROWS);
994 __nl_assert(M->m == M->n);
996 /* Convert M to compressed column format */
997 for(i=0, count=0; i<n; i++) {
998 __NLRowColumn *Ri = M->row + i;
1001 for(jj=0; jj<Ri->size; jj++, count++) {
1002 a[count] = Ri->coeff[jj].value;
1003 asub[count] = Ri->coeff[jj].index;
1008 /* Free M, don't need it anymore at this point */
1009 __nlSparseMatrixClear(M);
1011 /* Create superlu A matrix transposed */
1012 sCreate_CompCol_Matrix(
1013 &At, n, n, nnz, a, asub, xa,
1014 SLU_NC, /* Colum wise, no supernode */
1016 SLU_GE /* general storage */
1019 /* Set superlu options */
1020 set_default_options(&options);
1021 options.ColPerm = MY_PERMC;
1022 options.Fact = DOFACT;
1024 StatInit(&(context->slu.stat));
1026 panel_size = sp_ienv(1); /* sp_ienv give us the defaults */
1029 /* Compute permutation and permuted matrix */
1030 context->slu.perm_r = __NL_NEW_ARRAY(NLint, n);
1031 context->slu.perm_c = __NL_NEW_ARRAY(NLint, n);
1033 if ((permutation == NULL) || (*permutation == -1)) {
1034 get_perm_c(3, &At, context->slu.perm_c);
1037 memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n);
1040 memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n);
1042 sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP);
1044 /* Decompose into L and U */
1045 sgstrf(&options, &AtP, relax, panel_size,
1046 etree, NULL, 0, context->slu.perm_c, context->slu.perm_r,
1047 &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info);
1051 Destroy_SuperMatrix_Store(&At);
1052 Destroy_SuperMatrix_Store(&AtP);
1054 __NL_DELETE_ARRAY(etree);
1055 __NL_DELETE_ARRAY(xa);
1056 __NL_DELETE_ARRAY(rhs);
1057 __NL_DELETE_ARRAY(a);
1058 __NL_DELETE_ARRAY(asub);
1060 context->slu.alloc_slu = NL_TRUE;
1065 static NLboolean __nlInvert_SUPERLU(__NLContext *context) {
1067 /* OpenNL Context */
1068 NLfloat* b = context->b;
1069 NLfloat* x = context->x;
1070 NLuint n = context->n;
1072 /* SuperLU variables */
1076 /* Create superlu array for B */
1077 sCreate_Dense_Matrix(
1079 SLU_DN, /* Fortran-type column-wise storage */
1081 SLU_GE /* general */
1084 /* Forward/Back substitution to compute x */
1085 sgstrs(TRANS, &(context->slu.L), &(context->slu.U),
1086 context->slu.perm_c, context->slu.perm_r, &B,
1087 &(context->slu.stat), &info);
1090 memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n);
1092 Destroy_SuperMatrix_Store(&B);
1097 static void __nlFree_SUPERLU(__NLContext *context) {
1099 Destroy_SuperNode_Matrix(&(context->slu.L));
1100 Destroy_CompCol_Matrix(&(context->slu.U));
1102 StatFree(&(context->slu.stat));
1104 __NL_DELETE_ARRAY(context->slu.perm_r);
1105 __NL_DELETE_ARRAY(context->slu.perm_c);
1107 context->slu.alloc_slu = NL_FALSE;
1110 void nlPrintMatrix(void) {
1111 __NLSparseMatrix* M = &(__nlCurrentContext->M);
1112 float *b = __nlCurrentContext->b;
1114 NLuint n = __nlCurrentContext->n;
1115 __NLRowColumn* Ri = NULL;
1116 float *value = malloc(sizeof(*value)*n);
1119 for(i=0; i<n; i++) {
1122 memset(value, 0.0, sizeof(*value)*n);
1123 for(jj=0; jj<Ri->size; jj++)
1124 value[Ri->coeff[jj].index] = Ri->coeff[jj].value;
1126 for (k = 0; k<n; k++)
1127 printf("%.3f ", value[k]);
1133 printf("%f ", b[i]);
1139 /************************************************************************/
1140 /* nlSolve() driver routine */
1142 NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) {
1143 NLboolean result = NL_TRUE;
1145 __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED);
1147 if (!__nlCurrentContext->solve_again)
1148 result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation);
1151 result = __nlInvert_SUPERLU(__nlCurrentContext);
1154 __nlVectorToVariables();
1157 __nlCurrentContext->solve_again = NL_TRUE;
1159 __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SYSTEM_SOLVED);
1166 NLboolean nlSolve() {
1167 return nlSolveAdvanced(NULL, NL_FALSE);