Libmv: Update Ceres to the latest upstream version
authorSergey Sharybin <sergey.vfx@gmail.com>
Mon, 29 Sep 2014 18:39:45 +0000 (00:39 +0600)
committerSergey Sharybin <sergey.vfx@gmail.com>
Mon, 29 Sep 2014 18:39:45 +0000 (00:39 +0600)
Mainly to let ITERATIVE_SCHUR use an explicit Schur Complement matrix.

17 files changed:
extern/libmv/third_party/ceres/ChangeLog
extern/libmv/third_party/ceres/include/ceres/solver.h
extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.cc
extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.h
extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.cc
extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.h
extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.cc
extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.h
extern/libmv/third_party/ceres/internal/ceres/callbacks.cc
extern/libmv/third_party/ceres/internal/ceres/linear_solver.cc
extern/libmv/third_party/ceres/internal/ceres/linear_solver.h
extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc
extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.h
extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.cc
extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.h
extern/libmv/third_party/ceres/internal/ceres/solver.cc
extern/libmv/third_party/ceres/internal/ceres/trust_region_preprocessor.cc

index cd168a44b356b480051fdfa9abe64422cf082bdd..ee67da5afb95191771076e56980c23616568e679 100644 (file)
@@ -1,3 +1,106 @@
+commit b44cfdef25f6bf0917a23b3fd65cce38aa6a3362
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date:   Mon Sep 29 07:53:54 2014 -0700
+
+    Let ITERATIVE_SCHUR use an explicit Schur Complement matrix.
+    
+    Up till now ITERATIVE_SCHUR evaluates matrix-vector products
+    between the Schur complement and a vector implicitly by exploiting
+    the algebraic expression for the Schur complement.
+    
+    This cost of this evaluation scales with the number of non-zeros
+    in the Jacobian.
+    
+    For small to medium sized problems there is a sweet spot where
+    computing the Schur complement is cheap enough that it is much
+    more efficient to explicitly compute it and use it for evaluating
+    the matrix-vector products.
+    
+    This changes implements support for an explicit Schur complement
+    in ITERATIVE_SCHUR in combination with the SCHUR_JACOBI preconditioner.
+    
+    API wise a new bool Solver::Options::use_explicit_schur_complement
+    has been added.
+    
+    The implementation extends the SparseSchurComplementSolver to use
+    Conjugate Gradients.
+    
+    Example speedup:
+    
+    use_explicit_schur_complement = false
+    
+    Time (in seconds):
+    Preprocessor                            0.585
+    
+      Residual evaluation                   0.319
+      Jacobian evaluation                   1.590
+      Linear solver                        25.685
+    Minimizer                              27.990
+    
+    Postprocessor                           0.010
+    Total                                  28.585
+    
+    use_explicit_schur_complement = true
+    
+    Time (in seconds):
+    Preprocessor                            0.638
+    
+      Residual evaluation                   0.318
+      Jacobian evaluation                   1.507
+      Linear solver                         5.930
+    Minimizer                               8.144
+    
+    Postprocessor                           0.010
+    Total                                   8.791
+    
+    Which indicates an end-to-end speedup of more than 3x, with the linear
+    solver being sped up by > 4x.
+    
+    The idea to explore this optimization was inspired by the recent paper:
+    
+    Mining structure fragments for smart bundle adjustment
+    L. Carlone, P. Alcantarilla, H. Chiu, K. Zsolt, F. Dellaert
+    British Machine Vision Conference, 2014
+    
+    which uses a more complicated algorithm to compute parts of the
+    Schur complement to speed up the matrix-vector product.
+    
+    Change-Id: I95324af0ab351faa1600f5204039a1d2a64ae61d
+
+commit 4ad91490827f2ebebcc70d17e63ef653bf06fd0d
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date:   Wed Sep 24 23:54:18 2014 -0700
+
+    Simplify the Block Jacobi and Schur Jacobi preconditioners.
+    
+    1. Extend the implementation of BlockRandomAccessDiagonalMatrix
+    by adding Invert and RightMultiply methods.
+    
+    2. Simplify the implementation of the Schur Jacobi preconditioner
+    using these new methods.
+    
+    3. Replace the custom storage used inside Block Jacobi preconditioner
+    with BlockRandomAccessDiagonalMatrix and simplify its implementation
+    too.
+    
+    Change-Id: I9d4888b35f0f228c08244abbdda5298b3ce9c466
+
+commit 8f7be1036b853addc33224d97b92412b5a1281b6
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date:   Mon Sep 29 08:13:35 2014 -0700
+
+    Fix a formatting error TrustRegionMinimizer logging.
+    
+    Change-Id: Iad1873c51eece46c3fdee1356d154367cfd7925e
+
+commit c99872d48e322662ea19efb9010a62b7432687ae
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date:   Wed Sep 24 21:30:02 2014 -0700
+
+    Add BlockRandomAccessSparseMatrix::SymmetricRightMultiply.
+    
+    Change-Id: Ib06a22a209b4c985ba218162dfb6bf46bd93169e
+
 commit d3ecd18625ba260e0d00912a305a448b566acc59
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Tue Sep 23 10:12:42 2014 -0700
@@ -560,93 +663,3 @@ Date:   Mon Aug 4 22:45:53 2014 -0700
     Small changes from Jim Roseborough.
     
     Change-Id: Ic8b19ea5c5f4f8fd782eb4420b30514153087d18
-
-commit a521fc3afc11425b46992388a83ef07017d02ac9
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date:   Fri Aug 1 08:27:35 2014 -0700
-
-    Simplify, cleanup and instrument SchurComplementSolver.
-    
-    The instrumentation revealed that EIGEN_SPARSE can be upto
-    an order of magnitude slower than CX_SPARSE on some bundle
-    adjustment problems.
-    
-    The problem comes down to the quality of AMD ordering that
-    CXSparse/Eigen implements. It does particularly badly
-    on the Schur complement. In the CXSparse implementation
-    we got around this by considering the block sparsity structure
-    and computing the AMD ordering on it and lifting it to the
-    full matrix.
-    
-    This is currently not possible with the release version of
-    Eigen, as the support for using preordered/natural orderings
-    is in the master branch but has not been released yet.
-    
-    Change-Id: I25588d3e723e50606f327db5759f174f58439e29
-
-commit b43e73a03485f0fd0fe514e356ad8925731d3a81
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date:   Fri Aug 1 12:09:09 2014 -0700
-
-    Simplify the Eigen code in SparseNormalCholeskySolver.
-    
-    Simplifying some of the template handling, and remove the use
-    of SelfAdjointView as it is not needed. The solver itself takes
-    an argument for where the data is actually stored.
-    
-    The performance of SparseNormalCholesky with EIGEN_SPARSE
-    seems to be on par with CX_SPARSE.
-    
-    Change-Id: I69e22a144b447c052b6cbe59ef1aa33eae2dd9e3
-
-commit 031598295c6b2f061c171b9b2338919f41b7eb0b
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date:   Thu Jul 17 14:35:18 2014 -0700
-
-    Enable Eigen as sparse linear algebra library.
-    
-    SPARSE_NORMAL_CHOLESKY and SPARSE_SCHUR can now be used
-    with EIGEN_SPARSE as the backend.
-    
-    The performance is not as good as CXSparse. This needs to be
-    investigated. Is it because the quality of AMD ordering that
-    we are computing is not as good as the one for CXSparse? This
-    could be because we are working with the scalar matrix instead
-    of the block matrix.
-    
-    Also, the upper/lower triangular story is not completely clear.
-    Both of these issues will be benchmarked and tackled in the
-    near future.
-    
-    Also included in this change is a bunch of cleanup to the
-    SparseNormalCholeskySolver and SparseSchurComplementSolver
-    classes around the use of the of defines used to conditionally
-    compile out parts of the code.
-    
-    The system_test has been updated to test EIGEN_SPARSE also.
-    
-    Change-Id: I46a57e9c4c97782696879e0b15cfc7a93fe5496a
-
-commit 1b17145adf6aa0072db2989ad799e90313970ab3
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date:   Wed Jul 30 10:14:15 2014 -0700
-
-    Make canned loss functions more robust.
-    
-    The loss functions that ship with ceres can sometimes
-    generate a zero first derivative if the residual is too
-    large.
-    
-    In such cases Corrector fails with an ugly undebuggable
-    crash. This CL is the first in a series of fixes to
-    take care of this.
-    
-    We clamp the values of rho' from below by
-    numeric_limits<double>::min().
-    
-    Also included here is some minor cleanup where the constants
-    are treated as doubles rather than integers.
-    
-    Thanks to Pierre Moulon for reporting this problem.
-    
-    Change-Id: I3aaf375303ecc2659bbf6fb56a812e7dc3a41106
index 0af34cacef284b506aaf47d8ee12657ab4002b11..a5efa2a39151924e96162d991c77d59c45883d66 100644 (file)
@@ -118,6 +118,7 @@ class CERES_EXPORT Solver {
 #endif
 
       num_linear_solver_threads = 1;
+      use_explicit_schur_complement = false;
       use_postordering = false;
       dynamic_sparsity = false;
       min_linear_solver_iterations = 0;
@@ -496,6 +497,29 @@ class CERES_EXPORT Solver {
     // smaller than the group containing cameras.
     shared_ptr<ParameterBlockOrdering> linear_solver_ordering;
 
+    // Use an explicitly computed Schur complement matrix with
+    // ITERATIVE_SCHUR.
+    //
+    // By default this option is disabled and ITERATIVE_SCHUR
+    // evaluates evaluates matrix-vector products between the Schur
+    // complement and a vector implicitly by exploiting the algebraic
+    // expression for the Schur complement.
+    //
+    // The cost of this evaluation scales with the number of non-zeros
+    // in the Jacobian.
+    //
+    // For small to medium sized problems there is a sweet spot where
+    // computing the Schur complement is cheap enough that it is much
+    // more efficient to explicitly compute it and use it for evaluating
+    // the matrix-vector products.
+    //
+    // Enabling this option tells ITERATIVE_SCHUR to use an explicitly
+    // computed Schur complement.
+    //
+    // NOTE: This option can only be used with the SCHUR_JACOBI
+    // preconditioner.
+    bool use_explicit_schur_complement;
+
     // Sparse Cholesky factorization algorithms use a fill-reducing
     // ordering to permute the columns of the Jacobian matrix. There
     // are two ways of doing this.
index 19b749bfc39b9e5802318c2718b46fd8a5e2a66a..ea49f077e37b706367520446dd287ed0b2fe54d5 100644 (file)
@@ -30,9 +30,9 @@
 
 #include "ceres/block_jacobi_preconditioner.h"
 
-#include "Eigen/Cholesky"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
+#include "ceres/block_random_access_diagonal_matrix.h"
 #include "ceres/casts.h"
 #include "ceres/integral_types.h"
 #include "ceres/internal/eigen.h"
@@ -41,27 +41,14 @@ namespace ceres {
 namespace internal {
 
 BlockJacobiPreconditioner::BlockJacobiPreconditioner(
-    const BlockSparseMatrix& A)
-    : num_rows_(A.num_rows()),
-      block_structure_(*A.block_structure()) {
-  // Calculate the amount of storage needed.
-  int storage_needed = 0;
-  for (int c = 0; c < block_structure_.cols.size(); ++c) {
-    int size = block_structure_.cols[c].size;
-    storage_needed += size * size;
+    const BlockSparseMatrix& A) {
+  const CompressedRowBlockStructure* bs = A.block_structure();
+  vector<int> blocks(bs->cols.size());
+  for (int i = 0; i < blocks.size(); ++i) {
+    blocks[i] = bs->cols[i].size;
   }
 
-  // Size the offsets and storage.
-  blocks_.resize(block_structure_.cols.size());
-  block_storage_.resize(storage_needed);
-
-  // Put pointers to the storage in the offsets.
-  double* block_cursor = &block_storage_[0];
-  for (int c = 0; c < block_structure_.cols.size(); ++c) {
-    int size = block_structure_.cols[c].size;
-    blocks_[c] = block_cursor;
-    block_cursor += size * size;
-  }
+  m_.reset(new BlockRandomAccessDiagonalMatrix(blocks));
 }
 
 BlockJacobiPreconditioner::~BlockJacobiPreconditioner() {}
@@ -69,70 +56,54 @@ BlockJacobiPreconditioner::~BlockJacobiPreconditioner() {}
 bool BlockJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
                                            const double* D) {
   const CompressedRowBlockStructure* bs = A.block_structure();
-
-  // Compute the diagonal blocks by block inner products.
-  std::fill(block_storage_.begin(), block_storage_.end(), 0.0);
   const double* values = A.values();
-  for (int r = 0; r < bs->rows.size(); ++r) {
-    const int row_block_size = bs->rows[r].block.size;
-    const vector<Cell>& cells = bs->rows[r].cells;
-    for (int c = 0; c < cells.size(); ++c) {
-      const int col_block_size = bs->cols[cells[c].block_id].size;
-      ConstMatrixRef m(values + cells[c].position,
+  m_->SetZero();
+  for (int i = 0; i < bs->rows.size(); ++i) {
+    const int row_block_size = bs->rows[i].block.size;
+    const vector<Cell>& cells = bs->rows[i].cells;
+    for (int j = 0; j < cells.size(); ++j) {
+      const int block_id = cells[j].block_id;
+      const int col_block_size = bs->cols[block_id].size;
+
+      int r, c, row_stride, col_stride;
+      CellInfo* cell_info = m_->GetCell(block_id, block_id,
+                                        &r, &c,
+                                        &row_stride, &col_stride);
+      MatrixRef m(cell_info->values, row_stride, col_stride);
+      ConstMatrixRef b(values + cells[j].position,
                        row_block_size,
                        col_block_size);
-
-      MatrixRef(blocks_[cells[c].block_id],
-                col_block_size,
-                col_block_size).noalias() += m.transpose() * m;
-
-      // TODO(keir): Figure out when the below expression is actually faster
-      // than doing the full rank update. The issue is that for smaller sizes,
-      // the rankUpdate() function is slower than the full product done above.
-      //
-      // On the typical bundling problems, the above product is ~5% faster.
-      //
-      //   MatrixRef(blocks_[cells[c].block_id],
-      //             col_block_size,
-      //             col_block_size)
-      //      .selfadjointView<Eigen::Upper>()
-      //      .rankUpdate(m);
-      //
+      m.block(r, c, col_block_size, col_block_size) += b.transpose() * b;
     }
   }
 
-  // Add the diagonal and invert each block.
-  for (int c = 0; c < bs->cols.size(); ++c) {
-    const int size = block_structure_.cols[c].size;
-    const int position = block_structure_.cols[c].position;
-    MatrixRef block(blocks_[c], size, size);
-
-    if (D != NULL) {
-      block.diagonal() +=
-          ConstVectorRef(D + position, size).array().square().matrix();
+  if (D != NULL) {
+    // Add the diagonal.
+    int position = 0;
+    for (int i = 0; i < bs->cols.size(); ++i) {
+      const int block_size = bs->cols[i].size;
+      int r, c, row_stride, col_stride;
+      CellInfo* cell_info = m_->GetCell(i, i,
+                                        &r, &c,
+                                        &row_stride, &col_stride);
+      MatrixRef m(cell_info->values, row_stride, col_stride);
+      m.block(r, c, block_size, block_size).diagonal() +=
+          ConstVectorRef(D + position, block_size).array().square().matrix();
+      position += block_size;
     }
-
-    block = block.selfadjointView<Eigen::Upper>()
-                 .llt()
-                 .solve(Matrix::Identity(size, size));
   }
+
+  m_->Invert();
   return true;
 }
 
 void BlockJacobiPreconditioner::RightMultiply(const double* x,
                                               double* y) const {
-  for (int c = 0; c < block_structure_.cols.size(); ++c) {
-    const int size = block_structure_.cols[c].size;
-    const int position = block_structure_.cols[c].position;
-    ConstMatrixRef D(blocks_[c], size, size);
-    ConstVectorRef x_block(x + position, size);
-    VectorRef y_block(y + position, size);
-    y_block += D * x_block;
-  }
+  m_->RightMultiply(x, y);
 }
 
 void BlockJacobiPreconditioner::LeftMultiply(const double* x, double* y) const {
-  RightMultiply(x, y);
+  m_->RightMultiply(x, y);
 }
 
 }  // namespace internal
index 3505a01248b431048b204fa66d5503c804946d4f..857929709250c447503fd78b57115a2a74d43717 100644 (file)
@@ -1,5 +1,5 @@
 // Ceres Solver - A fast non-linear least squares minimizer
-// Copyright 2012 Google Inc. All rights reserved.
+// Copyright 2014 Google Inc. All rights reserved.
 // http://code.google.com/p/ceres-solver/
 //
 // Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,8 @@
 #define CERES_INTERNAL_BLOCK_JACOBI_PRECONDITIONER_H_
 
 #include <vector>
+#include "ceres/block_random_access_diagonal_matrix.h"
+#include "ceres/internal/scoped_ptr.h"
 #include "ceres/preconditioner.h"
 
 namespace ceres {
@@ -39,7 +41,6 @@ namespace internal {
 
 class BlockSparseMatrix;
 struct CompressedRowBlockStructure;
-class LinearOperator;
 
 // A block Jacobi preconditioner. This is intended for use with
 // conjugate gradients, or other iterative symmetric solvers. To use
@@ -60,18 +61,14 @@ class BlockJacobiPreconditioner : public BlockSparseMatrixPreconditioner {
   // Preconditioner interface
   virtual void RightMultiply(const double* x, double* y) const;
   virtual void LeftMultiply(const double* x, double* y) const;
-  virtual int num_rows() const { return num_rows_; }
-  virtual int num_cols() const { return num_rows_; }
+  virtual int num_rows() const { return m_->num_rows(); }
+  virtual int num_cols() const { return m_->num_rows(); }
 
+  const BlockRandomAccessDiagonalMatrix& matrix() const { return *m_; }
  private:
   virtual bool UpdateImpl(const BlockSparseMatrix& A, const double* D);
 
-  std::vector<double*> blocks_;
-  std::vector<double> block_storage_;
-  int num_rows_;
-
-  // The block structure of the matrix this preconditioner is for (e.g. J).
-  const CompressedRowBlockStructure& block_structure_;
+  scoped_ptr<BlockRandomAccessDiagonalMatrix> m_;
 };
 
 }  // namespace internal
index d8bf4ef0cb51b4df9cfba74bca8100e462b994bd..b7ff33184cb61213b6c5acabfa990ffe9dd57221 100644 (file)
 #include <set>
 #include <utility>
 #include <vector>
+#include "Eigen/Dense"
 #include "ceres/internal/port.h"
 #include "ceres/internal/scoped_ptr.h"
+#include "ceres/stl_util.h"
 #include "ceres/triplet_sparse_matrix.h"
 #include "ceres/types.h"
-#include "ceres/stl_util.h"
 #include "glog/logging.h"
 
 namespace ceres {
 namespace internal {
 
+// TODO(sameeragarwal): Drop the dependence on TripletSparseMatrix.
+
 BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix(
     const vector<int>& blocks)
     : blocks_(blocks) {
@@ -51,9 +54,9 @@ BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix(
   // rows/columns.
   int num_cols = 0;
   int num_nonzeros = 0;
-  vector<int> col_layout;
+  vector<int> block_positions;
   for (int i = 0; i < blocks_.size(); ++i) {
-    col_layout.push_back(num_cols);
+    block_positions.push_back(num_cols);
     num_cols += blocks_[i];
     num_nonzeros += blocks_[i] * blocks_[i];
   }
@@ -72,7 +75,7 @@ BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix(
   for (int i = 0; i < blocks_.size(); ++i) {
     const int block_size = blocks_[i];
     layout_.push_back(new CellInfo(values + pos));
-    const int block_begin = col_layout[i];
+    const int block_begin = block_positions[i];
     for (int r = 0; r < block_size; ++r) {
       for (int c = 0; c < block_size; ++c, ++pos) {
         rows[pos] = block_begin + r;
@@ -116,5 +119,34 @@ void BlockRandomAccessDiagonalMatrix::SetZero() {
   }
 }
 
+void BlockRandomAccessDiagonalMatrix::Invert() {
+  double* values = tsm_->mutable_values();
+  for (int i = 0; i < blocks_.size(); ++i) {
+    const int block_size = blocks_[i];
+    MatrixRef block(values, block_size, block_size);
+    block =
+        block
+        .selfadjointView<Eigen::Upper>()
+        .llt()
+        .solve(Matrix::Identity(block_size, block_size));
+    values += block_size * block_size;
+  }
+}
+
+void BlockRandomAccessDiagonalMatrix::RightMultiply(const double* x,
+                                                    double* y) const {
+  CHECK_NOTNULL(x);
+  CHECK_NOTNULL(y);
+  const double* values = tsm_->values();
+  for (int i = 0; i < blocks_.size(); ++i) {
+    const int block_size = blocks_[i];
+    ConstMatrixRef block(values, block_size, block_size);
+    VectorRef(y, block_size).noalias() += block * ConstVectorRef(x, block_size);
+    x += block_size;
+    y += block_size;
+    values += block_size * block_size;
+  }
+}
+
 }  // namespace internal
 }  // namespace ceres
index 6b3cff2338f26e6a56fb80282167431af006830f..ea9967817db0e89735a1e9f426779759201abfc7 100644 (file)
@@ -52,7 +52,7 @@ namespace internal {
 class BlockRandomAccessDiagonalMatrix : public BlockRandomAccessMatrix {
  public:
   // blocks is an array of block sizes.
-  BlockRandomAccessDiagonalMatrix(const vector<int>& blocks);
+  explicit BlockRandomAccessDiagonalMatrix(const vector<int>& blocks);
 
   // The destructor is not thread safe. It assumes that no one is
   // modifying any cells when the matrix is being destroyed.
@@ -70,11 +70,16 @@ class BlockRandomAccessDiagonalMatrix : public BlockRandomAccessMatrix {
   // locked.
   virtual void SetZero();
 
+  // Invert the matrix assuming that each block is positive definite.
+  void Invert();
+
+  // y += S * x
+  void RightMultiply(const double* x, double* y) const;
+
   // Since the matrix is square, num_rows() == num_cols().
   virtual int num_rows() const { return tsm_->num_rows(); }
   virtual int num_cols() const { return tsm_->num_cols(); }
 
-  // Access to the underlying matrix object.
   const TripletSparseMatrix* matrix() const { return tsm_.get(); }
   TripletSparseMatrix* mutable_matrix() { return tsm_.get(); }
 
index f789436364a4d2c1ee362a3e396c3b656b76a323..9da16a469d5608967e4f0098e52536abdb1ae75e 100644 (file)
@@ -54,9 +54,9 @@ BlockRandomAccessSparseMatrix::BlockRandomAccessSparseMatrix(
   // Build the row/column layout vector and count the number of scalar
   // rows/columns.
   int num_cols = 0;
-  vector<int> col_layout;
+  block_positions_.reserve(blocks_.size());
   for (int i = 0; i < blocks_.size(); ++i) {
-    col_layout.push_back(num_cols);
+    block_positions_.push_back(num_cols);
     num_cols += blocks_[i];
   }
 
@@ -105,8 +105,8 @@ BlockRandomAccessSparseMatrix::BlockRandomAccessSparseMatrix(
         layout_[IntPairToLong(row_block_id, col_block_id)]->values - values;
     for (int r = 0; r < row_block_size; ++r) {
       for (int c = 0; c < col_block_size; ++c, ++pos) {
-          rows[pos] = col_layout[row_block_id] + r;
-          cols[pos] = col_layout[col_block_id] + c;
+          rows[pos] = block_positions_[row_block_id] + r;
+          cols[pos] = block_positions_[col_block_id] + c;
           values[pos] = 1.0;
           DCHECK_LT(rows[pos], tsm_->num_rows());
           DCHECK_LT(cols[pos], tsm_->num_rows());
@@ -154,5 +154,37 @@ void BlockRandomAccessSparseMatrix::SetZero() {
   }
 }
 
+void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x,
+                                                           double* y) const {
+  for (LayoutType::const_iterator it = layout_.begin();
+       it != layout_.end();
+       ++it) {
+    int row;
+    int col;
+    LongToIntPair(it->first, &row, &col);
+
+    const int row_block_size = blocks_[row];
+    const int row_block_pos = block_positions_[row];
+    const int col_block_size = blocks_[col];
+    const int col_block_pos = block_positions_[col];
+
+    MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+        it->second->values, row_block_size, col_block_size,
+        x + col_block_pos,
+        y + row_block_pos);
+
+    // Since the matrix is symmetric, but only the upper triangular
+    // part is stored, if the block being accessed is not a diagonal
+    // block, then use the same block to do the corresponding lower
+    // triangular multiply also.
+    if (row != col) {
+      MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+          it->second->values, row_block_size, col_block_size,
+          x + row_block_pos,
+          y + col_block_pos);
+    }
+  }
+}
+
 }  // namespace internal
 }  // namespace ceres
index 27b10296d6c8b9fbed5fa4e68e64e2a6b60ad53e..a4f89d8130f54ceec585fdf8b52999ac248a9a0d 100644 (file)
@@ -43,6 +43,7 @@
 #include "ceres/internal/port.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/types.h"
+#include "ceres/small_blas.h"
 
 namespace ceres {
 namespace internal {
@@ -75,6 +76,12 @@ class BlockRandomAccessSparseMatrix : public BlockRandomAccessMatrix {
   // locked.
   virtual void SetZero();
 
+  // Assume that the matrix is symmetric and only one half of the
+  // matrix is stored.
+  //
+  // y += S * x
+  void SymmetricRightMultiply(const double* x, double* y) const;
+
   // Since the matrix is square, num_rows() == num_cols().
   virtual int num_rows() const { return tsm_->num_rows(); }
   virtual int num_cols() const { return tsm_->num_cols(); }
@@ -84,13 +91,20 @@ class BlockRandomAccessSparseMatrix : public BlockRandomAccessMatrix {
   TripletSparseMatrix* mutable_matrix() { return tsm_.get(); }
 
  private:
-  int64 IntPairToLong(int a, int b) {
-    return a * kMaxRowBlocks + b;
+  int64 IntPairToLong(int row, int col) const {
+    return row * kMaxRowBlocks + col;
+  }
+
+  void LongToIntPair(int64 index, int* row, int* col) const {
+    *row = index / kMaxRowBlocks;
+    *col = index % kMaxRowBlocks;
   }
 
   const int64 kMaxRowBlocks;
+
   // row/column block sizes.
   const vector<int> blocks_;
+  vector<int> block_positions_;
 
   // A mapping from <row_block_id, col_block_id> to the position in
   // the values array of tsm_ where the block is stored.
index d2236335c53476608d0005ed526c92b1f2be476a..7d5ce2548e4069d1e18cb40dccf607d3717ab50b 100644 (file)
@@ -81,7 +81,7 @@ CallbackReturnType LoggingCallback::operator()(
       output = "iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time\n";
     }
     const char* kReportRowFormat =
-        "% 4d % 8e   % 3.2e   % 3.2e  % 3.2e  % 3.2e % 3.2e     % 3d   % 3.2e   % 3.2e";
+        "% 4d % 8e   % 3.2e   % 3.2e  % 3.2e  % 3.2e % 3.2e     % 4d   % 3.2e   % 3.2e";
     output += StringPrintf(kReportRowFormat,
                           summary.iteration,
                           summary.cost,
index d905ec2e1fdd362210aba9dd2de2a4beb6dcca03..e479b751363de7cf1c0a755d084f49983dcb219b 100644 (file)
@@ -96,7 +96,11 @@ LinearSolver* LinearSolver::Create(const LinearSolver::Options& options) {
       return new DenseSchurComplementSolver(options);
 
     case ITERATIVE_SCHUR:
-      return new IterativeSchurComplementSolver(options);
+      if (options.use_explicit_schur_complement) {
+        return new SparseSchurComplementSolver(options);
+      } else {
+        return new IterativeSchurComplementSolver(options);
+      }
 
     case DENSE_QR:
       return new DenseQRSolver(options);
index 58b9044b8f99f642066406f7086e5622a1f96eb2..5f59765f074478675049f2428e388affaad631f6 100644 (file)
@@ -99,6 +99,7 @@ class LinearSolver {
           sparse_linear_algebra_library_type(SUITE_SPARSE),
           use_postordering(false),
           dynamic_sparsity(false),
+          use_explicit_schur_complement(false),
           min_num_iterations(1),
           max_num_iterations(1),
           num_threads(1),
@@ -114,9 +115,10 @@ class LinearSolver {
     DenseLinearAlgebraLibraryType dense_linear_algebra_library_type;
     SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
 
-    // See solver.h for information about this flag.
+    // See solver.h for information about these flags.
     bool use_postordering;
     bool dynamic_sparsity;
+    bool use_explicit_schur_complement;
 
     // Number of internal iterations that the solver uses. This
     // parameter only makes sense for iterative solvers like CG.
index d2aa168c807063ffe684122a72f84d63b8c4cf47..33f812b8c964c4af4b40d70ffddea408230c9e56 100644 (file)
@@ -40,6 +40,7 @@
 #include "ceres/block_random_access_sparse_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
+#include "ceres/conjugate_gradients_solver.h"
 #include "ceres/cxsparse.h"
 #include "ceres/detect_structure.h"
 #include "ceres/internal/eigen.h"
 
 namespace ceres {
 namespace internal {
+namespace {
+
+class BlockRandomAccessSparseMatrixAdapter : public LinearOperator {
+  public:
+  explicit BlockRandomAccessSparseMatrixAdapter(
+      const BlockRandomAccessSparseMatrix& m)
+      : m_(m) {
+  }
+
+  virtual ~BlockRandomAccessSparseMatrixAdapter() {}
+
+  // y = y + Ax;
+  virtual void RightMultiply(const double* x, double* y) const {
+    m_.SymmetricRightMultiply(x, y);
+  }
+
+  // y = y + A'x;
+  virtual void LeftMultiply(const double* x, double* y) const {
+    m_.SymmetricRightMultiply(x, y);
+  }
+
+  virtual int num_rows() const { return m_.num_rows(); }
+  virtual int num_cols() const { return m_.num_rows(); }
+
+ private:
+  const BlockRandomAccessSparseMatrix& m_;
+};
+
+class BlockRandomAccessDiagonalMatrixAdapter : public LinearOperator {
+  public:
+  explicit BlockRandomAccessDiagonalMatrixAdapter(
+      const BlockRandomAccessDiagonalMatrix& m)
+      : m_(m) {
+  }
+
+  virtual ~BlockRandomAccessDiagonalMatrixAdapter() {}
+
+  // y = y + Ax;
+  virtual void RightMultiply(const double* x, double* y) const {
+    m_.RightMultiply(x, y);
+  }
+
+  // y = y + A'x;
+  virtual void LeftMultiply(const double* x, double* y) const {
+    m_.RightMultiply(x, y);
+  }
+
+  virtual int num_rows() const { return m_.num_rows(); }
+  virtual int num_cols() const { return m_.num_rows(); }
+
+ private:
+  const BlockRandomAccessDiagonalMatrix& m_;
+};
+
+} // namespace
 
 LinearSolver::Summary SchurComplementSolver::SolveImpl(
     BlockSparseMatrix* A,
@@ -82,7 +138,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl(
 
   double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
   const LinearSolver::Summary summary =
-      SolveReducedLinearSystem(reduced_solution);
+      SolveReducedLinearSystem(per_solve_options, reduced_solution);
   event_logger.AddEvent("ReducedSolve");
 
   if (summary.termination_type == LINEAR_SOLVER_SUCCESS) {
@@ -115,7 +171,9 @@ void DenseSchurComplementSolver::InitStorage(
 // BlockRandomAccessDenseMatrix. The linear system is solved using
 // Eigen's Cholesky factorization.
 LinearSolver::Summary
-DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
+DenseSchurComplementSolver::SolveReducedLinearSystem(
+    const LinearSolver::PerSolveOptions& per_solve_options,
+    double* solution) {
   LinearSolver::Summary summary;
   summary.num_iterations = 0;
   summary.termination_type = LINEAR_SOLVER_SUCCESS;
@@ -249,14 +307,25 @@ void SparseSchurComplementSolver::InitStorage(
 }
 
 LinearSolver::Summary
-SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
+SparseSchurComplementSolver::SolveReducedLinearSystem(
+          const LinearSolver::PerSolveOptions& per_solve_options,
+          double* solution) {
+  if (options().type == ITERATIVE_SCHUR) {
+    CHECK(options().use_explicit_schur_complement);
+    return SolveReducedLinearSystemUsingConjugateGradients(per_solve_options,
+                                                           solution);
+  }
+
   switch (options().sparse_linear_algebra_library_type) {
     case SUITE_SPARSE:
-      return SolveReducedLinearSystemUsingSuiteSparse(solution);
+      return SolveReducedLinearSystemUsingSuiteSparse(per_solve_options,
+                                                      solution);
     case CX_SPARSE:
-      return SolveReducedLinearSystemUsingCXSparse(solution);
+      return SolveReducedLinearSystemUsingCXSparse(per_solve_options,
+                                                   solution);
     case EIGEN_SPARSE:
-      return SolveReducedLinearSystemUsingEigen(solution);
+      return SolveReducedLinearSystemUsingEigen(per_solve_options,
+                                                solution);
     default:
       LOG(FATAL) << "Unknown sparse linear algebra library : "
                  << options().sparse_linear_algebra_library_type;
@@ -270,6 +339,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
 // CHOLMOD's sparse cholesky factorization routines.
 LinearSolver::Summary
 SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
+    const LinearSolver::PerSolveOptions& per_solve_options,
     double* solution) {
 #ifdef CERES_NO_SUITESPARSE
 
@@ -377,6 +447,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
 // CXSparse's sparse cholesky factorization routines.
 LinearSolver::Summary
 SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse(
+    const LinearSolver::PerSolveOptions& per_solve_options,
     double* solution) {
 #ifdef CERES_NO_CXSPARSE
 
@@ -433,6 +504,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse(
 // Eigen's sparse cholesky factorization routines.
 LinearSolver::Summary
 SparseSchurComplementSolver::SolveReducedLinearSystemUsingEigen(
+    const LinearSolver::PerSolveOptions& per_solve_options,
     double* solution) {
 #ifndef CERES_USE_EIGEN_SPARSE
 
@@ -514,5 +586,79 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingEigen(
 #endif  // CERES_USE_EIGEN_SPARSE
 }
 
+LinearSolver::Summary
+SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
+    const LinearSolver::PerSolveOptions& per_solve_options,
+    double* solution) {
+  const int num_rows = lhs()->num_rows();
+  // The case where there are no f blocks, and the system is block
+  // diagonal.
+  if (num_rows == 0) {
+    LinearSolver::Summary summary;
+    summary.num_iterations = 0;
+    summary.termination_type = LINEAR_SOLVER_SUCCESS;
+    summary.message = "Success.";
+    return summary;
+  }
+
+  // Only SCHUR_JACOBI is supported over here right now.
+  CHECK_EQ(options().preconditioner_type, SCHUR_JACOBI);
+
+  if (preconditioner_.get() == NULL) {
+    preconditioner_.reset(new BlockRandomAccessDiagonalMatrix(blocks_));
+  }
+
+  BlockRandomAccessSparseMatrix* sc =
+      down_cast<BlockRandomAccessSparseMatrix*>(
+          const_cast<BlockRandomAccessMatrix*>(lhs()));
+
+  // Extract block diagonal from the Schur complement to construct the
+  // schur_jacobi preconditioner.
+  for (int i = 0; i  < blocks_.size(); ++i) {
+    const int block_size = blocks_[i];
+
+    int sc_r, sc_c, sc_row_stride, sc_col_stride;
+    CellInfo* sc_cell_info =
+        CHECK_NOTNULL(sc->GetCell(i, i,
+                                  &sc_r, &sc_c,
+                                  &sc_row_stride, &sc_col_stride));
+    MatrixRef sc_m(sc_cell_info->values, sc_row_stride, sc_col_stride);
+
+    int pre_r, pre_c, pre_row_stride, pre_col_stride;
+    CellInfo* pre_cell_info = CHECK_NOTNULL(
+        preconditioner_->GetCell(i, i,
+                                 &pre_r, &pre_c,
+                                 &pre_row_stride, &pre_col_stride));
+    MatrixRef pre_m(pre_cell_info->values, pre_row_stride, pre_col_stride);
+
+    pre_m.block(pre_r, pre_c, block_size, block_size) =
+        sc_m.block(sc_r, sc_c, block_size, block_size);
+  }
+  preconditioner_->Invert();
+
+  VectorRef(solution, num_rows).setZero();
+
+  scoped_ptr<LinearOperator> lhs_adapter(
+      new BlockRandomAccessSparseMatrixAdapter(*sc));
+  scoped_ptr<LinearOperator> preconditioner_adapter(
+      new BlockRandomAccessDiagonalMatrixAdapter(*preconditioner_));
+
+
+  LinearSolver::Options cg_options;
+  cg_options.min_num_iterations = options().min_num_iterations;
+  cg_options.max_num_iterations = options().max_num_iterations;
+  ConjugateGradientsSolver cg_solver(cg_options);
+
+  LinearSolver::PerSolveOptions cg_per_solve_options;
+  cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance;
+  cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance;
+  cg_per_solve_options.preconditioner = preconditioner_adapter.get();
+
+  return cg_solver.Solve(lhs_adapter.get(),
+                         rhs(),
+                         cg_per_solve_options,
+                         solution);
+}
+
 }  // namespace internal
 }  // namespace ceres
index 1b431dc534036fdfa6343822208d09d78a1b75f0..93d23e3d0121446a2392f414ed3e8766cc62736b 100644 (file)
@@ -46,6 +46,7 @@
 #include "ceres/suitesparse.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/types.h"
+#include "ceres/block_random_access_diagonal_matrix.h"
 
 #ifdef CERES_USE_EIGEN_SPARSE
 #include "Eigen/SparseCholesky"
@@ -134,6 +135,7 @@ class SchurComplementSolver : public BlockSparseMatrixSolver {
  private:
   virtual void InitStorage(const CompressedRowBlockStructure* bs) = 0;
   virtual LinearSolver::Summary SolveReducedLinearSystem(
+      const LinearSolver::PerSolveOptions& per_solve_options,
       double* solution) = 0;
 
   LinearSolver::Options options_;
@@ -155,6 +157,7 @@ class DenseSchurComplementSolver : public SchurComplementSolver {
  private:
   virtual void InitStorage(const CompressedRowBlockStructure* bs);
   virtual LinearSolver::Summary SolveReducedLinearSystem(
+      const LinearSolver::PerSolveOptions& per_solve_options,
       double* solution);
 
   CERES_DISALLOW_COPY_AND_ASSIGN(DenseSchurComplementSolver);
@@ -169,12 +172,19 @@ class SparseSchurComplementSolver : public SchurComplementSolver {
  private:
   virtual void InitStorage(const CompressedRowBlockStructure* bs);
   virtual LinearSolver::Summary SolveReducedLinearSystem(
+      const LinearSolver::PerSolveOptions& per_solve_options,
       double* solution);
   LinearSolver::Summary SolveReducedLinearSystemUsingSuiteSparse(
+      const LinearSolver::PerSolveOptions& per_solve_options,
       double* solution);
   LinearSolver::Summary SolveReducedLinearSystemUsingCXSparse(
+      const LinearSolver::PerSolveOptions& per_solve_options,
       double* solution);
   LinearSolver::Summary SolveReducedLinearSystemUsingEigen(
+      const LinearSolver::PerSolveOptions& per_solve_options,
+      double* solution);
+  LinearSolver::Summary SolveReducedLinearSystemUsingConjugateGradients(
+      const LinearSolver::PerSolveOptions& per_solve_options,
       double* solution);
 
   // Size of the blocks in the Schur complement.
@@ -206,6 +216,7 @@ class SparseSchurComplementSolver : public SchurComplementSolver {
   scoped_ptr<SimplicialLDLT> simplicial_ldlt_;
 #endif
 
+  scoped_ptr<BlockRandomAccessDiagonalMatrix> preconditioner_;
   CERES_DISALLOW_COPY_AND_ASSIGN(SparseSchurComplementSolver);
 };
 
index 6dc9e89d3ccd91d010e0742b8a10c30c6d626b05..cbdb7086102560c084f8b21c4f47bfe95d7f37c3 100644 (file)
@@ -32,7 +32,6 @@
 
 #include <utility>
 #include <vector>
-#include "Eigen/Dense"
 #include "ceres/block_random_access_diagonal_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/collections_port.h"
@@ -55,12 +54,12 @@ SchurJacobiPreconditioner::SchurJacobiPreconditioner(
       << "Jacobian should have atleast 1 f_block for "
       << "SCHUR_JACOBI preconditioner.";
 
-  block_size_.resize(num_blocks);
+  vector<int> blocks(num_blocks);
   for (int i = 0; i < num_blocks; ++i) {
-    block_size_[i] = bs.cols[i + options_.elimination_groups[0]].size;
+    blocks[i] = bs.cols[i + options_.elimination_groups[0]].size;
   }
 
-  m_.reset(new BlockRandomAccessDiagonalMatrix(block_size_));
+  m_.reset(new BlockRandomAccessDiagonalMatrix(blocks));
   InitEliminator(bs);
 }
 
@@ -99,32 +98,13 @@ bool SchurJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
 
   // Compute a subset of the entries of the Schur complement.
   eliminator_->Eliminate(&A, b.data(), D, m_.get(), rhs.data());
+  m_->Invert();
   return true;
 }
 
 void SchurJacobiPreconditioner::RightMultiply(const double* x,
                                               double* y) const {
-  CHECK_NOTNULL(x);
-  CHECK_NOTNULL(y);
-
-  const double* lhs_values =
-      down_cast<BlockRandomAccessDiagonalMatrix*>(m_.get())->matrix()->values();
-
-  // This loop can be easily multi-threaded with OpenMP if need be.
-  for (int i = 0; i < block_size_.size(); ++i) {
-    const int block_size = block_size_[i];
-    ConstMatrixRef block(lhs_values, block_size, block_size);
-
-    VectorRef(y, block_size) =
-        block
-        .selfadjointView<Eigen::Upper>()
-        .llt()
-        .solve(ConstVectorRef(x, block_size));
-
-    x += block_size;
-    y += block_size;
-    lhs_values += block_size * block_size;
-  }
+  m_->RightMultiply(x, y);
 }
 
 int SchurJacobiPreconditioner::num_rows() const {
index aecb015108370874926de22906b49c2d8efdfa06..8b528e250752b9e2b0359f614a9826cf07114498 100644 (file)
@@ -94,11 +94,7 @@ class SchurJacobiPreconditioner : public BlockSparseMatrixPreconditioner {
   virtual bool UpdateImpl(const BlockSparseMatrix& A, const double* D);
 
   Preconditioner::Options options_;
-
-  // Sizes of the blocks in the schur complement.
-  vector<int> block_size_;
   scoped_ptr<SchurEliminatorBase> eliminator_;
-
   // Preconditioner matrix.
   scoped_ptr<BlockRandomAccessDiagonalMatrix> m_;
   CERES_DISALLOW_COPY_AND_ASSIGN(SchurJacobiPreconditioner);
index f90045baac9f522ef19fe08c11d3822b81eeda26..e1c5ee30ba5d42e7711a330be4cc7947ed1b1c71 100644 (file)
@@ -120,6 +120,14 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
     OPTION_GT(max_consecutive_nonmonotonic_steps, 0);
   }
 
+  if (options.linear_solver_type == ITERATIVE_SCHUR &&
+      options.use_explicit_schur_complement &&
+      options.preconditioner_type != SCHUR_JACOBI) {
+    *error =  "use_explicit_schur_complement only supports"
+        "SCHUR_JACOBI as the preconditioner.";
+    return false;
+  }
+
   if (options.preconditioner_type == CLUSTER_JACOBI &&
       options.sparse_linear_algebra_library_type != SUITE_SPARSE) {
     *error =  "CLUSTER_JACOBI requires "
index ba3baa12c1f3acf0807b69c44367dccb84f3b11e..22ea1ec8c805dab662a29c2f6d2dbd2119c7e3fa 100644 (file)
@@ -184,6 +184,8 @@ bool SetupLinearSolver(PreprocessedProblem* pp) {
       options.sparse_linear_algebra_library_type;
   pp->linear_solver_options.dense_linear_algebra_library_type =
       options.dense_linear_algebra_library_type;
+  pp->linear_solver_options.use_explicit_schur_complement =
+      options.use_explicit_schur_complement;
   pp->linear_solver_options.dynamic_sparsity = options.dynamic_sparsity;
   pp->linear_solver_options.num_threads = options.num_linear_solver_threads;