Update Ceres to version 2.2.0

Brings a lot of performance improvements and bug fixes.

Keyframe selection in bundle-adjustment.blend goes down from
4.5 seconds to 3.0 on M2 Ultra. The reconstruction itself stays
within 0.2 seconds.

Full change log can be found at http://ceres-solver.org/version_history.html

Pull Request: https://projects.blender.org/blender/blender/pulls/136896
This commit is contained in:
Sergey Sharybin
2025-04-03 16:20:38 +02:00
committed by Sergey Sharybin
parent 0eccadd452
commit 59991e54f5
371 changed files with 14807 additions and 7059 deletions

View File

@@ -17,11 +17,11 @@ set(INC_SYS
set(SRC
include/ceres/autodiff_cost_function.h
include/ceres/autodiff_first_order_function.h
include/ceres/autodiff_local_parameterization.h
include/ceres/autodiff_manifold.h
include/ceres/c_api.h
include/ceres/ceres.h
include/ceres/conditioned_cost_function.h
include/ceres/constants.h
include/ceres/context.h
include/ceres/cost_function.h
include/ceres/cost_function_to_functor.h
@@ -41,7 +41,6 @@ set(SRC
include/ceres/jet.h
include/ceres/jet_fwd.h
include/ceres/line_manifold.h
include/ceres/local_parameterization.h
include/ceres/loss_function.h
include/ceres/manifold.h
include/ceres/manifold_test_utils.h
@@ -66,6 +65,7 @@ set(SRC
include/ceres/internal/autodiff.h
include/ceres/internal/disable_warnings.h
include/ceres/internal/eigen.h
include/ceres/internal/euler_angles.h
include/ceres/internal/fixed_array.h
include/ceres/internal/householder_vector.h
include/ceres/internal/integer_sequence_algorithm.h
@@ -107,7 +107,6 @@ set(SRC
internal/ceres/canonical_views_clustering.cc
internal/ceres/canonical_views_clustering.h
internal/ceres/casts.h
internal/ceres/cgnr_linear_operator.h
internal/ceres/cgnr_solver.cc
internal/ceres/cgnr_solver.h
internal/ceres/compressed_col_sparse_matrix_utils.cc
@@ -118,7 +117,6 @@ set(SRC
internal/ceres/compressed_row_sparse_matrix.h
internal/ceres/concurrent_queue.h
internal/ceres/conditioned_cost_function.cc
internal/ceres/conjugate_gradients_solver.cc
internal/ceres/conjugate_gradients_solver.h
internal/ceres/context.cc
internal/ceres/context_impl.cc
@@ -131,9 +129,23 @@ set(SRC
internal/ceres/covariance.cc
internal/ceres/covariance_impl.cc
internal/ceres/covariance_impl.h
internal/ceres/cuda_block_sparse_crs_view.cc
internal/ceres/cuda_block_sparse_crs_view.h
internal/ceres/cuda_block_structure.cc
internal/ceres/cuda_block_structure.h
internal/ceres/cuda_buffer.h
internal/ceres/cxsparse.cc
internal/ceres/cxsparse.h
# internal/ceres/cuda_kernels_bsm_to_crs.cu.cc
# internal/ceres/cuda_kernels_bsm_to_crs.h
internal/ceres/cuda_kernels_utils.h
# internal/ceres/cuda_kernels_vector_ops.cu.cc
internal/ceres/cuda_kernels_vector_ops.h
internal/ceres/cuda_partitioned_block_sparse_crs_view.cc
internal/ceres/cuda_partitioned_block_sparse_crs_view.h
internal/ceres/cuda_sparse_matrix.cc
internal/ceres/cuda_sparse_matrix.h
internal/ceres/cuda_streamed_buffer.h
internal/ceres/cuda_vector.cc
internal/ceres/cuda_vector.h
internal/ceres/dense_cholesky.cc
internal/ceres/dense_cholesky.h
internal/ceres/dense_jacobian_writer.h
@@ -156,21 +168,25 @@ set(SRC
internal/ceres/dynamic_compressed_row_sparse_matrix.h
internal/ceres/dynamic_sparse_normal_cholesky_solver.cc
internal/ceres/dynamic_sparse_normal_cholesky_solver.h
internal/ceres/eigen_vector_ops.h
internal/ceres/eigensparse.cc
internal/ceres/eigensparse.h
internal/ceres/evaluation_callback.cc
internal/ceres/evaluator.cc
internal/ceres/evaluator.h
internal/ceres/execution_summary.h
internal/ceres/fake_bundle_adjustment_jacobian.cc
internal/ceres/fake_bundle_adjustment_jacobian.h
internal/ceres/file.cc
internal/ceres/file.h
internal/ceres/first_order_function.cc
internal/ceres/float_cxsparse.cc
internal/ceres/float_cxsparse.h
internal/ceres/float_suitesparse.cc
internal/ceres/float_suitesparse.h
internal/ceres/function_sample.cc
internal/ceres/function_sample.h
internal/ceres/generate_bundle_adjustment_tests.py
internal/ceres/generate_template_specializations.py
internal/ceres/generated
internal/ceres/gradient_checker.cc
internal/ceres/gradient_checking_cost_function.cc
internal/ceres/gradient_checking_cost_function.h
@@ -207,31 +223,34 @@ set(SRC
internal/ceres/linear_operator.h
internal/ceres/linear_solver.cc
internal/ceres/linear_solver.h
internal/ceres/local_parameterization.cc
internal/ceres/loss_function.cc
internal/ceres/low_rank_inverse_hessian.cc
internal/ceres/low_rank_inverse_hessian.h
internal/ceres/manifold.cc
internal/ceres/manifold_adapter.h
internal/ceres/map_util.h
internal/ceres/minimizer.cc
internal/ceres/minimizer.h
internal/ceres/normal_prior.cc
internal/ceres/pair_hash.h
internal/ceres/parallel_for.h
internal/ceres/parallel_for_cxx.cc
internal/ceres/parallel_for_nothreads.cc
internal/ceres/parallel_for_openmp.cc
internal/ceres/parallel_invoke.cc
internal/ceres/parallel_invoke.h
internal/ceres/parallel_utils.cc
internal/ceres/parallel_utils.h
internal/ceres/parallel_vector_ops.cc
internal/ceres/parallel_vector_ops.h
internal/ceres/parameter_block.h
internal/ceres/parameter_block_ordering.cc
internal/ceres/parameter_block_ordering.h
internal/ceres/partition_range_for_parallel_for.h
internal/ceres/partitioned_matrix_view.cc
internal/ceres/partitioned_matrix_view.h
internal/ceres/partitioned_matrix_view_impl.h
internal/ceres/partitioned_matrix_view_template.py
internal/ceres/polynomial.cc
internal/ceres/polynomial.h
internal/ceres/power_series_expansion_preconditioner.cc
internal/ceres/power_series_expansion_preconditioner.h
internal/ceres/preconditioner.cc
internal/ceres/preconditioner.h
internal/ceres/preprocessor.cc
@@ -242,7 +261,6 @@ set(SRC
internal/ceres/program.cc
internal/ceres/program.h
internal/ceres/program_evaluator.h
internal/ceres/random.h
internal/ceres/reorder_program.cc
internal/ceres/reorder_program.h
internal/ceres/residual_block.cc
@@ -254,6 +272,7 @@ set(SRC
internal/ceres/schur_eliminator.cc
internal/ceres/schur_eliminator.h
internal/ceres/schur_eliminator_impl.h
internal/ceres/schur_eliminator_template.py
internal/ceres/schur_jacobi_preconditioner.cc
internal/ceres/schur_jacobi_preconditioner.h
internal/ceres/schur_templates.cc

View File

@@ -1,5 +1,5 @@
Ceres Solver - A fast non-linear least squares minimizer
Copyright 2015 Google Inc. All rights reserved.
Copyright 2023 Google Inc. All rights reserved.
http://ceres-solver.org/
Redistribution and use in source and binary forms, with or without

View File

@@ -1,6 +1,6 @@
Project: Ceres Solver
URL: http://ceres-solver.org/
License: SPDX:BSD-3-Clause
Upstream version 2.1.0
Copyright: Copyright 2015 Google Inc. All rights reserved.
Upstream version 2.2.0
Copyright: Copyright 2023 Google Inc. All rights reserved.
Local modifications: None

View File

@@ -50,9 +50,6 @@
// If defined, Ceres was compiled without SuiteSparse.
#define CERES_NO_SUITESPARSE
// If defined, Ceres was compiled without CXSparse.
#define CERES_NO_CXSPARSE
// If defined, Ceres was compiled without CUDA.
#define CERES_NO_CUDA
@@ -61,7 +58,6 @@
#if defined(CERES_NO_SUITESPARSE) && \
defined(CERES_NO_ACCELERATE_SPARSE) && \
defined(CERES_NO_CXSPARSE) && \
!defined(CERES_USE_EIGEN_SPARSE) // NOLINT
// If defined Ceres was compiled without any sparse linear algebra support.
#define CERES_NO_SPARSE
@@ -74,12 +70,11 @@
// routines.
// #define CERES_NO_CUSTOM_BLAS
// If defined, Ceres was compiled without multithreading support.
// #define CERES_NO_THREADS
// If defined Ceres was compiled with OpenMP multithreading.
// #define CERES_USE_OPENMP
// If defined Ceres was compiled with modern C++ multithreading.
#define CERES_USE_CXX_THREADS
// If defined, Ceres was compiled with a version of SuiteSparse/CHOLMOD without
// the Partition module (requires METIS).
#define CERES_NO_CHOLMOD_PARTITION
// If defined Ceres was compiled without support for METIS via Eigen.
#define CERES_NO_EIGEN_METIS
// If defined, Ceres was compiled with a version MSVC >= 2005 which
// deprecated the standard POSIX names for bessel functions, replacing them
@@ -88,22 +83,6 @@
#define CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS
#endif
#if defined(CERES_USE_OPENMP)
#if defined(CERES_USE_CXX_THREADS) || defined(CERES_NO_THREADS)
#error CERES_USE_OPENMP is mutually exclusive to CERES_USE_CXX_THREADS and CERES_NO_THREADS
#endif
#elif defined(CERES_USE_CXX_THREADS)
#if defined(CERES_USE_OPENMP) || defined(CERES_NO_THREADS)
#error CERES_USE_CXX_THREADS is mutually exclusive to CERES_USE_OPENMP, CERES_USE_CXX_THREADS and CERES_NO_THREADS
#endif
#elif defined(CERES_NO_THREADS)
#if defined(CERES_USE_OPENMP) || defined(CERES_USE_CXX_THREADS)
#error CERES_NO_THREADS is mutually exclusive to CERES_USE_OPENMP and CERES_USE_CXX_THREADS
#endif
#else
# error One of CERES_USE_OPENMP, CERES_USE_CXX_THREADS or CERES_NO_THREADS must be defined.
#endif
// CERES_NO_SPARSE should be automatically defined by config.h if Ceres was
// compiled without any sparse back-end. Verify that it has not subsequently
// been inconsistently redefined.
@@ -111,9 +90,6 @@
#if !defined(CERES_NO_SUITESPARSE)
#error CERES_NO_SPARSE requires CERES_NO_SUITESPARSE.
#endif
#if !defined(CERES_NO_CXSPARSE)
#error CERES_NO_SPARSE requires CERES_NO_CXSPARSE
#endif
#if !defined(CERES_NO_ACCELERATE_SPARSE)
#error CERES_NO_SPARSE requires CERES_NO_ACCELERATE_SPARSE
#endif

View File

@@ -33,6 +33,7 @@
# define CERES_DEPRECATED_NO_EXPORT CERES_NO_EXPORT CERES_DEPRECATED
#endif
/* NOLINTNEXTLINE(readability-avoid-unconditional-preprocessor-if) */
#if 0 /* DEFINE_NO_DEPRECATED */
# ifndef CERES_NO_DEPRECATED
# define CERES_NO_DEPRECATED

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,158 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sergey.vfx@gmail.com (Sergey Sharybin)
// mierle@gmail.com (Keir Mierle)
// sameeragarwal@google.com (Sameer Agarwal)
#ifndef CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_
#define CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_
#include <memory>
#include "ceres/internal/autodiff.h"
#include "ceres/local_parameterization.h"
namespace ceres {
// WARNING: LocalParameterizations are deprecated, so is
// AutoDiffLocalParameterization. They will be removed from Ceres Solver in
// version 2.2.0. Please use Manifolds and AutoDiffManifold instead.
// Create local parameterization with Jacobians computed via automatic
// differentiation. For more information on local parameterizations,
// see include/ceres/local_parameterization.h
//
// To get an auto differentiated local parameterization, you must define
// a class with a templated operator() (a functor) that computes
//
// x_plus_delta = Plus(x, delta);
//
// the template parameter T. The autodiff framework substitutes appropriate
// "Jet" objects for T in order to compute the derivative when necessary, but
// this is hidden, and you should write the function as if T were a scalar type
// (e.g. a double-precision floating point number).
//
// The function must write the computed value in the last argument (the only
// non-const one) and return true to indicate success.
//
// For example, Quaternions have a three dimensional local
// parameterization. It's plus operation can be implemented as (taken
// from internal/ceres/auto_diff_local_parameterization_test.cc)
//
// struct QuaternionPlus {
// template<typename T>
// bool operator()(const T* x, const T* delta, T* x_plus_delta) const {
// const T squared_norm_delta =
// delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2];
//
// T q_delta[4];
// if (squared_norm_delta > T(0.0)) {
// T norm_delta = sqrt(squared_norm_delta);
// const T sin_delta_by_delta = sin(norm_delta) / norm_delta;
// q_delta[0] = cos(norm_delta);
// q_delta[1] = sin_delta_by_delta * delta[0];
// q_delta[2] = sin_delta_by_delta * delta[1];
// q_delta[3] = sin_delta_by_delta * delta[2];
// } else {
// // We do not just use q_delta = [1,0,0,0] here because that is a
// // constant and when used for automatic differentiation will
// // lead to a zero derivative. Instead we take a first order
// // approximation and evaluate it at zero.
// q_delta[0] = T(1.0);
// q_delta[1] = delta[0];
// q_delta[2] = delta[1];
// q_delta[3] = delta[2];
// }
//
// QuaternionProduct(q_delta, x, x_plus_delta);
// return true;
// }
// };
//
// Then given this struct, the auto differentiated local
// parameterization can now be constructed as
//
// LocalParameterization* local_parameterization =
// new AutoDiffLocalParameterization<QuaternionPlus, 4, 3>;
// | |
// Global Size ---------------+ |
// Local Size -------------------+
//
// WARNING: Since the functor will get instantiated with different types for
// T, you must to convert from other numeric types to T before mixing
// computations with other variables of type T. In the example above, this is
// seen where instead of using k_ directly, k_ is wrapped with T(k_).
template <typename Functor, int kGlobalSize, int kLocalSize>
class CERES_DEPRECATED_WITH_MSG("Use AutoDiffManifold instead.")
AutoDiffLocalParameterization : public LocalParameterization {
public:
AutoDiffLocalParameterization() : functor_(new Functor()) {}
// Takes ownership of functor.
explicit AutoDiffLocalParameterization(Functor* functor)
: functor_(functor) {}
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override {
return (*functor_)(x, delta, x_plus_delta);
}
bool ComputeJacobian(const double* x, double* jacobian) const override {
double zero_delta[kLocalSize];
for (int i = 0; i < kLocalSize; ++i) {
zero_delta[i] = 0.0;
}
double x_plus_delta[kGlobalSize];
for (int i = 0; i < kGlobalSize; ++i) {
x_plus_delta[i] = 0.0;
}
const double* parameter_ptrs[2] = {x, zero_delta};
double* jacobian_ptrs[2] = {nullptr, jacobian};
return internal::AutoDifferentiate<
kGlobalSize,
internal::StaticParameterDims<kGlobalSize, kLocalSize>>(
*functor_, parameter_ptrs, kGlobalSize, x_plus_delta, jacobian_ptrs);
}
int GlobalSize() const override { return kGlobalSize; }
int LocalSize() const override { return kLocalSize; }
const Functor& functor() const { return *functor_; }
private:
std::unique_ptr<Functor> functor_;
};
} // namespace ceres
#endif // CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
/* Ceres Solver - A fast non-linear least squares minimizer
* Copyright 2019 Google Inc. All rights reserved.
* Copyright 2023 Google Inc. All rights reserved.
* http://ceres-solver.org/
*
* Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -34,11 +34,12 @@
#ifndef CERES_PUBLIC_CERES_H_
#define CERES_PUBLIC_CERES_H_
// IWYU pragma: begin_exports
#include "ceres/autodiff_cost_function.h"
#include "ceres/autodiff_first_order_function.h"
#include "ceres/autodiff_local_parameterization.h"
#include "ceres/autodiff_manifold.h"
#include "ceres/conditioned_cost_function.h"
#include "ceres/constants.h"
#include "ceres/context.h"
#include "ceres/cost_function.h"
#include "ceres/cost_function_to_functor.h"
@@ -56,7 +57,6 @@
#include "ceres/iteration_callback.h"
#include "ceres/jet.h"
#include "ceres/line_manifold.h"
#include "ceres/local_parameterization.h"
#include "ceres/loss_function.h"
#include "ceres/manifold.h"
#include "ceres/numeric_diff_cost_function.h"
@@ -70,5 +70,6 @@
#include "ceres/sphere_manifold.h"
#include "ceres/types.h"
#include "ceres/version.h"
// IWYU pragma: end_exports
#endif // CERES_PUBLIC_CERES_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -26,24 +26,17 @@
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
// Author: hellston20a@gmail.com (H S Helson Go)
#include "ceres/float_cxsparse.h"
#ifndef CERES_PUBLIC_CONSTANTS_H_
#define CERES_PUBLIC_CONSTANTS_H_
#include <memory>
// TODO(HSHelson): This header should no longer be necessary once C++20's
// <numbers> (e.g. std::numbers::pi_v) becomes usable
namespace ceres::constants {
template <typename T>
inline constexpr T pi_v(3.141592653589793238462643383279502884);
inline constexpr double pi = pi_v<double>;
} // namespace ceres::constants
#if !defined(CERES_NO_CXSPARSE)
namespace ceres {
namespace internal {
std::unique_ptr<SparseCholesky> FloatCXSparseCholesky::Create(
OrderingType ordering_type) {
LOG(FATAL) << "FloatCXSparseCholesky is not available.";
return {};
}
} // namespace internal
} // namespace ceres
#endif // !defined(CERES_NO_CXSPARSE)
#endif // CERES_PUBLIC_CONSTANTS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -120,7 +120,7 @@ class CostFunctionToFunctor {
if (parameter_block_sizes.size() == num_parameter_blocks) {
for (int block = 0; block < num_parameter_blocks; ++block) {
CHECK_EQ(ParameterDims::GetDim(block), parameter_block_sizes[block])
<< "Parameter block size missmatch. The specified static parameter "
<< "Parameter block size mismatch. The specified static parameter "
"block dimension does not match the one from the cost function.";
}
}

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -146,7 +146,7 @@ class CovarianceImpl;
// a. The rank deficiency arises from overparameterization. e.g., a
// four dimensional quaternion used to parameterize SO(3), which is
// a three dimensional manifold. In cases like this, the user should
// use an appropriate LocalParameterization/Manifold. Not only will this lead
// use an appropriate Manifold. Not only will this lead
// to better numerical behaviour of the Solver, it will also expose
// the rank deficiency to the Covariance object so that it can
// handle it correctly.
@@ -246,6 +246,20 @@ class CERES_EXPORT Covariance {
// used.
CovarianceAlgorithmType algorithm_type = SPARSE_QR;
// During QR factorization, if a column with Euclidean norm less
// than column_pivot_threshold is encountered it is treated as
// zero.
//
// If column_pivot_threshold < 0, then an automatic default value
// of 20*(m+n)*eps*sqrt(max(diag(J*J))) is used. Here m and n are
// the number of rows and columns of the Jacobian (J)
// respectively.
//
// This is an advanced option meant for users who know enough
// about their Jacobian matrices that they can determine a value
// better than the default.
double column_pivot_threshold = -1;
// If the Jacobian matrix is near singular, then inverting J'J
// will result in unreliable results, e.g, if
//
@@ -266,7 +280,7 @@ class CERES_EXPORT Covariance {
//
// min_sigma / max_sigma < sqrt(min_reciprocal_condition_number)
//
// where min_sigma and max_sigma are the minimum and maxiumum
// where min_sigma and max_sigma are the minimum and maximum
// singular values of J respectively.
//
// 2. SPARSE_QR
@@ -394,11 +408,9 @@ class CERES_EXPORT Covariance {
const double* parameter_block2,
double* covariance_block) const;
// Return the block of the cross-covariance matrix corresponding to
// parameter_block1 and parameter_block2.
// Returns cross-covariance in the tangent space if a local
// parameterization is associated with either parameter block;
// else returns cross-covariance in the ambient space.
// Returns the block of the cross-covariance in the tangent space if a
// manifold is associated with either parameter block; else returns
// cross-covariance in the ambient space.
//
// Compute must be called before the first call to
// GetCovarianceBlock and the pair <parameter_block1,
@@ -430,9 +442,8 @@ class CERES_EXPORT Covariance {
double* covariance_matrix) const;
// Return the covariance matrix corresponding to parameter_blocks
// in the tangent space if a local parameterization is associated
// with one of the parameter blocks else returns the covariance
// matrix in the ambient space.
// in the tangent space if a manifold is associated with one of the parameter
// blocks else returns the covariance matrix in the ambient space.
//
// Compute must be called before calling GetCovarianceMatrix and all
// parameter_blocks must have been present in the vector

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -368,7 +368,7 @@ class BiCubicInterpolator {
//
// f001, f002, f011, f012, ...
//
// A commonly occuring example are color images (RGB) where the three
// A commonly occurring example are color images (RGB) where the three
// channels are stored interleaved.
//
// If kInterleaved = false, then it is stored as

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -264,11 +264,23 @@ class DynamicAutoDiffCostFunction final : public DynamicCostFunction {
return true;
}
const CostFunctor& functor() const { return *functor_; }
private:
std::unique_ptr<CostFunctor> functor_;
Ownership ownership_;
};
// Deduction guide that allows the user to avoid explicitly specifying the
// template parameter of DynamicAutoDiffCostFunction. The class can instead be
// instantiated as follows:
//
// new DynamicAutoDiffCostFunction{new MyCostFunctor{}};
//
template <typename CostFunctor>
DynamicAutoDiffCostFunction(CostFunctor* functor, Ownership ownership)
-> DynamicAutoDiffCostFunction<CostFunctor>;
} // namespace ceres
#endif // CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -76,7 +76,7 @@ namespace ceres {
// cost_function.AddParameterBlock(5);
// cost_function.AddParameterBlock(10);
// cost_function.SetNumResiduals(21);
template <typename CostFunctor, NumericDiffMethodType method = CENTRAL>
template <typename CostFunctor, NumericDiffMethodType kMethod = CENTRAL>
class DynamicNumericDiffCostFunction final : public DynamicCostFunction {
public:
explicit DynamicNumericDiffCostFunction(
@@ -134,7 +134,7 @@ class DynamicNumericDiffCostFunction final : public DynamicCostFunction {
for (size_t block = 0; block < block_sizes.size(); ++block) {
if (jacobians[block] != nullptr &&
!NumericDiff<CostFunctor,
method,
kMethod,
ceres::DYNAMIC,
internal::DynamicParameterDims,
ceres::DYNAMIC,

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -66,8 +66,12 @@ class CERES_EXPORT EvaluationCallback {
// Called before Ceres requests residuals or jacobians for a given setting of
// the parameters. User parameters (the double* values provided to the cost
// functions) are fixed until the next call to PrepareForEvaluation(). If
// new_evaluation_point == true, then this is a new point that is different
// functions) are fixed until the next call to PrepareForEvaluation().
//
// If evaluate_jacobians == true, then the user provided CostFunctions will be
// asked to evaluate one or more of their Jacobians.
//
// If new_evaluation_point == true, then this is a new point that is different
// from the last evaluated point. Otherwise, it is the same point that was
// evaluated previously (either jacobian or residual) and the user can use
// cached results from previous evaluations.

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -25,7 +25,7 @@
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
// Copyright 2007 Google Inc. All Rights Reserved.
// Copyright 2023 Google Inc. All Rights Reserved.
//
// Authors: wjr@google.com (William Rucklidge),
// keir@google.com (Keir Mierle),
@@ -44,7 +44,6 @@
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/internal/fixed_array.h"
#include "ceres/local_parameterization.h"
#include "ceres/manifold.h"
#include "glog/logging.h"
@@ -59,37 +58,15 @@ namespace ceres {
// ------------------------------------ < relative_precision
// max(J_actual(i, j), J_numeric(i, j))
//
// where J_actual(i, j) is the jacobian as computed by the supplied cost
// function (by the user) multiplied by the local parameterization Jacobian
// and J_numeric is the jacobian as computed by finite differences, multiplied
// by the local parameterization Jacobian as well.
// where J_actual(i, j) is the Jacobian as computed by the supplied cost
// function (by the user) multiplied by the manifold Jacobian and J_numeric is
// the Jacobian as computed by finite differences, multiplied by the manifold
// Jacobian as well.
//
// How to use: Fill in an array of pointers to parameter blocks for your
// CostFunction, and then call Probe(). Check that the return value is 'true'.
class CERES_EXPORT GradientChecker {
public:
// This constructor will not take ownership of the cost function or local
// parameterizations.
//
// function: The cost function to probe.
//
// local_parameterizations: A vector of local parameterizations, one for each
// parameter block. May be nullptr or contain nullptrs to indicate that the
// respective parameter does not have a local parameterization.
//
// options: Options to use for numerical differentiation.
//
// NOTE: This constructor is deprecated and will be removed in the next public
// release of Ceres Solver. Please transition to using the Manifold based
// version.
CERES_DEPRECATED_WITH_MSG(
"Local Parameterizations are deprecated. Use the constructor that uses "
"Manifolds instead.")
GradientChecker(
const CostFunction* function,
const std::vector<const LocalParameterization*>* local_parameterizations,
const NumericDiffOptions& options);
// This will not take ownership of the cost function or manifolds.
//
// function: The cost function to probe.
@@ -102,7 +79,6 @@ class CERES_EXPORT GradientChecker {
GradientChecker(const CostFunction* function,
const std::vector<const Manifold*>* manifolds,
const NumericDiffOptions& options);
~GradientChecker();
// Contains results from a call to Probe for later inspection.
struct CERES_EXPORT ProbeResults {
@@ -166,17 +142,6 @@ class CERES_EXPORT GradientChecker {
GradientChecker(const GradientChecker&) = delete;
void operator=(const GradientChecker&) = delete;
// This bool is used to determine whether the constructor with the
// LocalParameterizations is called or the one with Manifolds is called. If
// the former, then the vector of manifolds is a vector of ManifoldAdapter
// objects which we own and should be deleted. If the latter then they are
// real Manifold objects owned by the caller and will not be deleted.
//
// This bool is only needed during the LocalParameterization to Manifold
// transition, once this transition is complete the LocalParameterization
// based constructor and this bool will be removed.
const bool delete_manifolds_ = false;
std::vector<const Manifold*> manifolds_;
const CostFunction* function_;
std::unique_ptr<CostFunction> finite_diff_cost_function_;

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,6 @@
#include "ceres/first_order_function.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
#include "ceres/local_parameterization.h"
#include "ceres/manifold.h"
namespace ceres {
@@ -90,47 +89,19 @@ class FirstOrderFunction;
// };
//
// ceres::GradientProblem problem(new Rosenbrock());
//
// NOTE: We are currently in the process of transitioning from
// LocalParameterization to Manifolds in the Ceres API. During this period,
// GradientProblem will support using both Manifold and LocalParameterization
// objects interchangably. For methods in the API affected by this change, see
// their documentation below.
class CERES_EXPORT GradientProblem {
public:
// Takes ownership of the function.
explicit GradientProblem(FirstOrderFunction* function);
// Takes ownership of the function and the parameterization.
//
// NOTE: This constructor is deprecated and will be removed in the next public
// release of Ceres Solver. Please move to using the Manifold based
// constructor.
CERES_DEPRECATED_WITH_MSG(
"LocalParameterizations are deprecated. Please use the constructor that "
"uses Manifold instead.")
GradientProblem(FirstOrderFunction* function,
LocalParameterization* parameterization);
// Takes ownership of the function and the manifold.
GradientProblem(FirstOrderFunction* function, Manifold* manifold);
int NumParameters() const;
// Dimension of the manifold (and its tangent space).
//
// During the transition from LocalParameterization to Manifold, this method
// reports the LocalSize of the LocalParameterization or the TangentSize of
// the Manifold object associated with this problem.
int NumTangentParameters() const;
// Dimension of the manifold (and its tangent space).
//
// NOTE: This method is deprecated and will be removed in the next public
// release of Ceres Solver. Please move to using NumTangentParameters()
// instead.
int NumLocalParameters() const { return NumTangentParameters(); }
// This call is not thread safe.
bool Evaluate(const double* parameters, double* cost, double* gradient) const;
bool Plus(const double* x, const double* delta, double* x_plus_delta) const;
@@ -138,42 +109,11 @@ class CERES_EXPORT GradientProblem {
const FirstOrderFunction* function() const { return function_.get(); }
FirstOrderFunction* mutable_function() { return function_.get(); }
// NOTE: During the transition from LocalParameterization to Manifold we need
// to support both The LocalParameterization and Manifold based constructors.
//
// When the user uses the LocalParameterization, internally the solver will
// wrap it in a ManifoldAdapter object and return it when manifold or
// mutable_manifold are called.
//
// As a result this method will return a non-nullptr result if a Manifold or a
// LocalParameterization was used when constructing the GradientProblem.
const Manifold* manifold() const { return manifold_.get(); }
Manifold* mutable_manifold() { return manifold_.get(); }
// If the problem is constructed without a LocalParameterization or with a
// Manifold this method will return a nullptr.
//
// NOTE: This method is deprecated and will be removed in the next public
// release of Ceres Solver.
CERES_DEPRECATED_WITH_MSG("Use Manifolds instead.")
const LocalParameterization* parameterization() const {
return parameterization_.get();
}
// If the problem is constructed without a LocalParameterization or with a
// Manifold this method will return a nullptr.
//
// NOTE: This method is deprecated and will be removed in the next public
// release of Ceres Solver.
CERES_DEPRECATED_WITH_MSG("Use Manifolds instead.")
LocalParameterization* mutable_parameterization() {
return parameterization_.get();
}
private:
std::unique_ptr<FirstOrderFunction> function_;
CERES_DEPRECATED_WITH_MSG("")
std::unique_ptr<LocalParameterization> parameterization_;
std::unique_ptr<Manifold> manifold_;
std::unique_ptr<double[]> scratch_;
};

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -305,10 +305,6 @@ class CERES_EXPORT GradientProblemSolver {
// Number of parameters in the problem.
int num_parameters = -1;
// Dimension of the tangent space of the problem.
CERES_DEPRECATED_WITH_MSG("Use num_tangent_parameters.")
int num_local_parameters = -1;
// Dimension of the tangent space of the problem.
int num_tangent_parameters = -1;

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2020 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "ceres/internal/fixed_array.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// StaticFixedArray selects the best array implementation based on template
// arguments. If the size is not known at compile-time, pass
@@ -91,7 +90,6 @@ struct ArraySelector<T, num_elements, max_num_elements_on_stack, false, false>
}
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_ARRAY_SELECTOR_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -164,8 +164,7 @@
#define CERES_AUTODIFF_MAX_RESIDUALS_ON_STACK 20
#endif
namespace ceres {
namespace internal {
namespace ceres::internal {
// Extends src by a 1st order perturbation for every dimension and puts it in
// dst. The size of src is N. Since this is also used for perturbations in
@@ -359,7 +358,6 @@ inline bool AutoDifferentiate(const Functor& functor,
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_AUTODIFF_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -0,0 +1,199 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
#ifndef CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_
#define CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_
#include <type_traits>
namespace ceres {
namespace internal {
// The EulerSystem struct represents an Euler Angle Convention in compile time.
// It acts like a trait structure and is also used as a tag for dispatching
// Euler angle conversion function templates
//
// Internally, it implements the convention laid out in "Euler angle
// conversion", Ken Shoemake, Graphics Gems IV, where a choice of axis for the
// first rotation (out of 3) and 3 binary choices compactly specify all 24
// rotation conventions
//
// - InnerAxis: Axis for the first rotation. This is specified by struct tags
// axis::X, axis::Y, and axis::Z
//
// - Parity: Defines the parity of the axis permutation. The axis sequence has
// Even parity if the second axis of rotation is 'greater-than' the first axis
// of rotation according to the order X<Y<Z<X, otherwise it has Odd parity.
// This is specified by struct tags Even and Odd
//
// - AngleConvention: Defines whether Proper Euler Angles (originally defined
// by Euler, which has the last axis repeated, i.e. ZYZ, ZXZ, etc), or
// Tait-Bryan Angles (introduced by the nautical and aerospace fields, i.e.
// using ZYX for roll-pitch-yaw) are used. This is specified by struct Tags
// ProperEuler and TaitBryan.
//
// - FrameConvention: Defines whether the three rotations are be in a global
// frame of reference (extrinsic) or in a body centred frame of reference
// (intrinsic). This is specified by struct tags Extrinsic and Intrinsic
namespace axis {
struct X : std::integral_constant<int, 0> {};
struct Y : std::integral_constant<int, 1> {};
struct Z : std::integral_constant<int, 2> {};
} // namespace axis
struct Even;
struct Odd;
struct ProperEuler;
struct TaitBryan;
struct Extrinsic;
struct Intrinsic;
template <typename InnerAxisType,
typename ParityType,
typename AngleConventionType,
typename FrameConventionType>
struct EulerSystem {
static constexpr bool kIsParityOdd = std::is_same_v<ParityType, Odd>;
static constexpr bool kIsProperEuler =
std::is_same_v<AngleConventionType, ProperEuler>;
static constexpr bool kIsIntrinsic =
std::is_same_v<FrameConventionType, Intrinsic>;
static constexpr int kAxes[3] = {
InnerAxisType::value,
(InnerAxisType::value + 1 + static_cast<int>(kIsParityOdd)) % 3,
(InnerAxisType::value + 2 - static_cast<int>(kIsParityOdd)) % 3};
};
} // namespace internal
// Define human readable aliases to the type of the tags
using ExtrinsicXYZ = internal::EulerSystem<internal::axis::X,
internal::Even,
internal::TaitBryan,
internal::Extrinsic>;
using ExtrinsicXYX = internal::EulerSystem<internal::axis::X,
internal::Even,
internal::ProperEuler,
internal::Extrinsic>;
using ExtrinsicXZY = internal::EulerSystem<internal::axis::X,
internal::Odd,
internal::TaitBryan,
internal::Extrinsic>;
using ExtrinsicXZX = internal::EulerSystem<internal::axis::X,
internal::Odd,
internal::ProperEuler,
internal::Extrinsic>;
using ExtrinsicYZX = internal::EulerSystem<internal::axis::Y,
internal::Even,
internal::TaitBryan,
internal::Extrinsic>;
using ExtrinsicYZY = internal::EulerSystem<internal::axis::Y,
internal::Even,
internal::ProperEuler,
internal::Extrinsic>;
using ExtrinsicYXZ = internal::EulerSystem<internal::axis::Y,
internal::Odd,
internal::TaitBryan,
internal::Extrinsic>;
using ExtrinsicYXY = internal::EulerSystem<internal::axis::Y,
internal::Odd,
internal::ProperEuler,
internal::Extrinsic>;
using ExtrinsicZXY = internal::EulerSystem<internal::axis::Z,
internal::Even,
internal::TaitBryan,
internal::Extrinsic>;
using ExtrinsicZXZ = internal::EulerSystem<internal::axis::Z,
internal::Even,
internal::ProperEuler,
internal::Extrinsic>;
using ExtrinsicZYX = internal::EulerSystem<internal::axis::Z,
internal::Odd,
internal::TaitBryan,
internal::Extrinsic>;
using ExtrinsicZYZ = internal::EulerSystem<internal::axis::Z,
internal::Odd,
internal::ProperEuler,
internal::Extrinsic>;
/* Rotating axes */
using IntrinsicZYX = internal::EulerSystem<internal::axis::X,
internal::Even,
internal::TaitBryan,
internal::Intrinsic>;
using IntrinsicXYX = internal::EulerSystem<internal::axis::X,
internal::Even,
internal::ProperEuler,
internal::Intrinsic>;
using IntrinsicYZX = internal::EulerSystem<internal::axis::X,
internal::Odd,
internal::TaitBryan,
internal::Intrinsic>;
using IntrinsicXZX = internal::EulerSystem<internal::axis::X,
internal::Odd,
internal::ProperEuler,
internal::Intrinsic>;
using IntrinsicXZY = internal::EulerSystem<internal::axis::Y,
internal::Even,
internal::TaitBryan,
internal::Intrinsic>;
using IntrinsicYZY = internal::EulerSystem<internal::axis::Y,
internal::Even,
internal::ProperEuler,
internal::Intrinsic>;
using IntrinsicZXY = internal::EulerSystem<internal::axis::Y,
internal::Odd,
internal::TaitBryan,
internal::Intrinsic>;
using IntrinsicYXY = internal::EulerSystem<internal::axis::Y,
internal::Odd,
internal::ProperEuler,
internal::Intrinsic>;
using IntrinsicYXZ = internal::EulerSystem<internal::axis::Z,
internal::Even,
internal::TaitBryan,
internal::Intrinsic>;
using IntrinsicZXZ = internal::EulerSystem<internal::axis::Z,
internal::Even,
internal::ProperEuler,
internal::Intrinsic>;
using IntrinsicXYZ = internal::EulerSystem<internal::axis::Z,
internal::Odd,
internal::TaitBryan,
internal::Intrinsic>;
using IntrinsicZYZ = internal::EulerSystem<internal::axis::Z,
internal::Odd,
internal::ProperEuler,
internal::Intrinsic>;
} // namespace ceres
#endif // CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_

View File

@@ -41,8 +41,7 @@
#include "ceres/internal/memory.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
constexpr static auto kFixedArrayUseDefault = static_cast<size_t>(-1);
@@ -372,8 +371,8 @@ class FixedArray {
return std::addressof(ptr->array);
}
static_assert(sizeof(StorageElement) == sizeof(value_type), "");
static_assert(alignof(StorageElement) == alignof(value_type), "");
static_assert(sizeof(StorageElement) == sizeof(value_type));
static_assert(alignof(StorageElement) == alignof(value_type));
class NonEmptyInlinedStorage {
public:
@@ -461,7 +460,6 @@ template <typename T, size_t N, typename A>
constexpr typename FixedArray<T, N, A>::size_type
FixedArray<T, N, A>::inline_elements;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_FIXED_ARRAY_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://code.google.com/p/ceres-solver/
//
// Redistribution and use in source and binary forms, with or without
@@ -34,8 +34,7 @@
#include "Eigen/Core"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Algorithm 5.1.1 from 'Matrix Computations' by Golub et al. (Johns Hopkins
// Studies in Mathematical Sciences) but using the nth element of the input
@@ -90,7 +89,6 @@ typename Derived::PlainObject ApplyHouseholderVector(
return (y - v * (beta * (v.transpose() * y)));
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_HOUSEHOLDER_VECTOR_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,70 +40,7 @@
#include "ceres/jet_fwd.h"
namespace ceres {
namespace internal {
// Implementation of calculating the sum of an integer sequence.
// Recursively instantiate SumImpl and calculate the sum of the N first
// numbers. This reduces the number of instantiations and speeds up
// compilation.
//
// Examples:
// 1) integer_sequence<int, 5>:
// Value = 5
//
// 2) integer_sequence<int, 4, 2>:
// Value = 4 + 2 + SumImpl<integer_sequence<int>>::Value
// Value = 4 + 2 + 0
//
// 3) integer_sequence<int, 2, 1, 4>:
// Value = 2 + 1 + SumImpl<integer_sequence<int, 4>>::Value
// Value = 2 + 1 + 4
template <typename Seq>
struct SumImpl;
// Strip of and sum the first number.
template <typename T, T N, T... Ns>
struct SumImpl<std::integer_sequence<T, N, Ns...>> {
static constexpr T Value =
N + SumImpl<std::integer_sequence<T, Ns...>>::Value;
};
// Strip of and sum the first two numbers.
template <typename T, T N1, T N2, T... Ns>
struct SumImpl<std::integer_sequence<T, N1, N2, Ns...>> {
static constexpr T Value =
N1 + N2 + SumImpl<std::integer_sequence<T, Ns...>>::Value;
};
// Strip of and sum the first four numbers.
template <typename T, T N1, T N2, T N3, T N4, T... Ns>
struct SumImpl<std::integer_sequence<T, N1, N2, N3, N4, Ns...>> {
static constexpr T Value =
N1 + N2 + N3 + N4 + SumImpl<std::integer_sequence<T, Ns...>>::Value;
};
// Only one number is left. 'Value' is just that number ('recursion' ends).
template <typename T, T N>
struct SumImpl<std::integer_sequence<T, N>> {
static constexpr T Value = N;
};
// No number is left. 'Value' is the identity element (for sum this is zero).
template <typename T>
struct SumImpl<std::integer_sequence<T>> {
static constexpr T Value = T(0);
};
// Calculate the sum of an integer sequence. The resulting sum will be stored in
// 'Value'.
template <typename Seq>
class Sum {
using T = typename Seq::value_type;
public:
static constexpr T Value = SumImpl<Seq>::Value;
};
namespace ceres::internal {
// Implementation of calculating an exclusive scan (exclusive prefix sum) of an
// integer sequence. Exclusive means that the i-th input element is not included
@@ -232,40 +169,11 @@ struct RemoveValue
template <typename Sequence, typename Sequence::value_type ValueToRemove>
using RemoveValue_t = typename RemoveValue<Sequence, ValueToRemove>::type;
// Determines whether the values of an integer sequence are all the same.
// Returns true if all elements of Values are equal to HeadValue.
//
// The integer sequence must contain at least one value. The predicate is
// undefined for empty sequences. The evaluation result of the predicate for a
// sequence containing only one value is defined to be true.
template <typename... Sequence>
struct AreAllEqual;
// The predicate result for a sequence containing one element is defined to be
// true.
template <typename T, T Value>
struct AreAllEqual<std::integer_sequence<T, Value>> : std::true_type {};
// Recursion end.
template <typename T, T Value1, T Value2>
struct AreAllEqual<std::integer_sequence<T, Value1, Value2>>
: std::integral_constant<bool, Value1 == Value2> {};
// Recursion for sequences containing at least two elements.
template <typename T, T Value1, T Value2, T... Values>
// clang-format off
struct AreAllEqual<std::integer_sequence<T, Value1, Value2, Values...> >
: std::integral_constant
<
bool,
AreAllEqual<std::integer_sequence<T, Value1, Value2> >::value &&
AreAllEqual<std::integer_sequence<T, Value2, Values...> >::value
>
// clang-format on
{};
// Convenience variable template for AreAllEqual.
template <class Sequence>
constexpr bool AreAllEqual_v = AreAllEqual<Sequence>::value;
// Returns true if Values is empty.
template <typename T, T HeadValue, T... Values>
inline constexpr bool AreAllEqual_v = ((HeadValue == Values) && ...);
// Predicate determining whether an integer sequence is either empty or all
// values are equal.
@@ -279,13 +187,13 @@ struct IsEmptyOrAreAllEqual<std::integer_sequence<T>> : std::true_type {};
// General case for sequences containing at least one value.
template <typename T, T HeadValue, T... Values>
struct IsEmptyOrAreAllEqual<std::integer_sequence<T, HeadValue, Values...>>
: AreAllEqual<std::integer_sequence<T, HeadValue, Values...>> {};
: std::integral_constant<bool, AreAllEqual_v<T, HeadValue, Values...>> {};
// Convenience variable template for IsEmptyOrAreAllEqual.
template <class Sequence>
constexpr bool IsEmptyOrAreAllEqual_v = IsEmptyOrAreAllEqual<Sequence>::value;
inline constexpr bool IsEmptyOrAreAllEqual_v =
IsEmptyOrAreAllEqual<Sequence>::value;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_INTEGER_SEQUENCE_ALGORITHM_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -42,17 +42,6 @@
namespace ceres {
namespace internal {
// Predicate that determines whether T is a Jet.
template <typename T, typename E = void>
struct IsJet : std::false_type {};
template <typename T, int N>
struct IsJet<Jet<T, N>> : std::true_type {};
// Convenience variable template for IsJet.
template <typename T>
constexpr bool IsJet_v = IsJet<T>::value;
// Predicate that determines whether any of the Types is a Jet.
template <typename... Types>
struct AreAnyJet : std::false_type {};
@@ -65,7 +54,7 @@ struct AreAnyJet<Jet<T, N>, Types...> : std::true_type {};
// Convenience variable template for AreAnyJet.
template <typename... Types>
constexpr bool AreAnyJet_v = AreAnyJet<Types...>::value;
inline constexpr bool AreAnyJet_v = AreAnyJet<Types...>::value;
// Extracts the underlying floating-point from a type T.
template <typename T, typename E = void>
@@ -84,27 +73,8 @@ using UnderlyingScalar_t = typename UnderlyingScalar<T>::type;
//
// Specifically, the predicate applies std::is_same recursively to pairs of
// Types in the pack.
//
// The predicate is defined only for template packs containing at least two
// types.
template <typename T1, typename T2, typename... Types>
// clang-format off
struct AreAllSame : std::integral_constant
<
bool,
AreAllSame<T1, T2>::value &&
AreAllSame<T2, Types...>::value
>
// clang-format on
{};
// AreAllSame pairwise test.
template <typename T1, typename T2>
struct AreAllSame<T1, T2> : std::is_same<T1, T2> {};
// Convenience variable template for AreAllSame.
template <typename... Types>
constexpr bool AreAllSame_v = AreAllSame<Types...>::value;
template <typename T1, typename... Types>
inline constexpr bool AreAllSame_v = (std::is_same<T1, Types>::value && ...);
// Determines the rank of a type. This allows to ensure that types passed as
// arguments are compatible to each other. The rank of Jet is determined by the
@@ -124,7 +94,7 @@ struct Rank<Jet<T, N>> : std::integral_constant<int, N> {};
// Convenience variable template for Rank.
template <typename T>
constexpr int Rank_v = Rank<T>::value;
inline constexpr int Rank_v = Rank<T>::value;
// Constructs an integer sequence of ranks for each of the Types in the pack.
template <typename... Types>
@@ -186,7 +156,8 @@ struct CompatibleJetOperands<> : std::false_type {};
// This trait is a candidate for a concept definition once C++20 features can
// be used.
template <typename... Types>
constexpr bool CompatibleJetOperands_v = CompatibleJetOperands<Types...>::value;
inline constexpr bool CompatibleJetOperands_v =
CompatibleJetOperands<Types...>::value;
// Type trait ensuring at least one of the types is a Jet,
// the underlying scalar types are compatible among each other and Jet
@@ -216,7 +187,8 @@ struct PromotableJetOperands : std::integral_constant
// This trait is a candidate for a concept definition once C++20 features can
// be used.
template <typename... Types>
constexpr bool PromotableJetOperands_v = PromotableJetOperands<Types...>::value;
inline constexpr bool PromotableJetOperands_v =
PromotableJetOperands<Types...>::value;
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2020 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -40,8 +40,7 @@
} while (false)
#endif // CERES_HAVE_EXCEPTIONS
namespace ceres {
namespace internal {
namespace ceres::internal {
template <typename Allocator, typename Iterator, typename... Args>
void ConstructRange(Allocator& alloc,
@@ -84,7 +83,6 @@ void CopyRange(Allocator& alloc,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_MEMORY_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,8 +47,7 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// This is split from the main class because C++ doesn't allow partial template
// specializations for member functions. The alternative is to repeat the main
@@ -502,7 +501,6 @@ struct EvaluateJacobianForParameterBlocks<ParameterDims,
}
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_NUMERIC_DIFF_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,22 +36,7 @@
#include "ceres/internal/integer_sequence_algorithm.h"
namespace ceres {
namespace internal {
// Checks, whether the given parameter block sizes are valid. Valid means every
// dimension is bigger than zero.
constexpr bool IsValidParameterDimensionSequence(std::integer_sequence<int>) {
return true;
}
template <int N, int... Ts>
constexpr bool IsValidParameterDimensionSequence(
std::integer_sequence<int, N, Ts...>) {
return (N <= 0) ? false
: IsValidParameterDimensionSequence(
std::integer_sequence<int, Ts...>());
}
namespace ceres::internal {
// Helper class that represents the parameter dimensions. The parameter
// dimensions are either dynamic or the sizes are known at compile time. It is
@@ -70,8 +55,7 @@ class ParameterDims {
// The parameter dimensions are only valid if all parameter block dimensions
// are greater than zero.
static constexpr bool kIsValid =
IsValidParameterDimensionSequence(Parameters());
static constexpr bool kIsValid = ((Ns > 0) && ...);
static_assert(kIsValid,
"Invalid parameter block dimension detected. Each parameter "
"block dimension must be bigger than zero.");
@@ -81,8 +65,7 @@ class ParameterDims {
static_assert(kIsDynamic || kNumParameterBlocks > 0,
"At least one parameter block must be specified.");
static constexpr int kNumParameters =
Sum<std::integer_sequence<int, Ns...>>::Value;
static constexpr int kNumParameters = (Ns + ... + 0);
static constexpr int GetDim(int dim) { return params_[dim]; }
@@ -118,7 +101,6 @@ template <int... Ns>
using StaticParameterDims = ParameterDims<false, Ns...>;
using DynamicParameterDims = ParameterDims<true>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_PARAMETER_DIMS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,14 +47,6 @@
#define CERES_GET_FLAG(X) X
#endif
// Indicates whether C++17 is currently active
#ifndef CERES_HAS_CPP17
#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
#define CERES_HAS_CPP17
#endif // __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >=
// 201703L)
#endif // !defined(CERES_HAS_CPP17)
// Indicates whether C++20 is currently active
#ifndef CERES_HAS_CPP20
#if __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
@@ -85,4 +77,15 @@
//
#define CERES_PREVENT_MACRO_SUBSTITUTION // Yes, it's empty
// CERES_DISABLE_DEPRECATED_WARNING and CERES_RESTORE_DEPRECATED_WARNING allow
// to temporarily disable deprecation warnings
#if defined(_MSC_VER)
#define CERES_DISABLE_DEPRECATED_WARNING \
_Pragma("warning(push)") _Pragma("warning(disable : 4996)")
#define CERES_RESTORE_DEPRECATED_WARNING _Pragma("warning(pop)")
#else // defined(_MSC_VER)
#define CERES_DISABLE_DEPRECATED_WARNING
#define CERES_RESTORE_DEPRECATED_WARNING
#endif // defined(_MSC_VER)
#endif // CERES_PUBLIC_INTERNAL_PORT_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,7 @@
#ifndef CERES_PUBLIC_INTERNAL_SPHERE_MANIFOLD_HELPERS_H_
#define CERES_PUBLIC_INTERNAL_SPHERE_MANIFOLD_HELPERS_H_
#include "ceres/constants.h"
#include "ceres/internal/householder_vector.h"
// This module contains functions to compute the SphereManifold plus and minus
@@ -58,26 +59,23 @@
// used in order to allow also Eigen::Ref and Eigen block expressions to
// be passed to the function.
namespace ceres {
namespace internal {
namespace ceres::internal {
template <typename VT, typename XT, typename DeltaT, typename XPlusDeltaT>
inline void ComputeSphereManifoldPlus(const VT& v,
double beta,
const XT& x,
const DeltaT& delta,
double norm_delta,
const double norm_delta,
XPlusDeltaT* x_plus_delta) {
constexpr int AmbientDim = VT::RowsAtCompileTime;
// Map the delta from the minimum representation to the over parameterized
// homogeneous vector. See B.2 p.25 equation (106) - (107) for more details.
const double norm_delta_div_2 = 0.5 * norm_delta;
const double sin_delta_by_delta =
std::sin(norm_delta_div_2) / norm_delta_div_2;
const double sin_delta_by_delta = std::sin(norm_delta) / norm_delta;
Eigen::Matrix<double, AmbientDim, 1> y(v.size());
y << 0.5 * sin_delta_by_delta * delta, std::cos(norm_delta_div_2);
y << sin_delta_by_delta * delta, std::cos(norm_delta);
// Apply the delta update to remain on the sphere.
*x_plus_delta = x.norm() * ApplyHouseholderVector(y, v, beta);
@@ -99,11 +97,11 @@ inline void ComputeSphereManifoldPlusJacobian(const VT& x,
// have trouble deducing the type of v automatically.
ComputeHouseholderVector<VT, double, AmbientSpaceDim>(x, &v, &beta);
// The Jacobian is equal to J = 0.5 * H.leftCols(size_ - 1) where H is the
// The Jacobian is equal to J = H.leftCols(size_ - 1) where H is the
// Householder matrix (H = I - beta * v * v').
for (int i = 0; i < tangent_size; ++i) {
(*jacobian).col(i) = -0.5 * beta * v(i) * v;
(*jacobian)(i, i) += 0.5;
(*jacobian).col(i) = -beta * v(i) * v;
(*jacobian)(i, i) += 1.0;
}
(*jacobian) *= x.norm();
}
@@ -116,18 +114,19 @@ inline void ComputeSphereManifoldMinus(
AmbientSpaceDim == Eigen::Dynamic ? Eigen::Dynamic : AmbientSpaceDim - 1;
using AmbientVector = Eigen::Matrix<double, AmbientSpaceDim, 1>;
const int tanget_size = v.size() - 1;
const int tangent_size = v.size() - 1;
const AmbientVector hy = ApplyHouseholderVector(y, v, beta) / x.norm();
// Calculate y - x. See B.2 p.25 equation (108).
double y_last = hy[tanget_size];
double hy_norm = hy.template head<TangentSpaceDim>(tanget_size).norm();
const double y_last = hy[tangent_size];
const double hy_norm = hy.template head<TangentSpaceDim>(tangent_size).norm();
if (hy_norm == 0.0) {
y_minus_x->setZero();
y_minus_x->data()[tangent_size - 1] = y_last >= 0 ? 0.0 : constants::pi;
} else {
*y_minus_x = 2.0 * std::atan2(hy_norm, y_last) / hy_norm *
hy.template head<TangentSpaceDim>(tanget_size);
*y_minus_x = std::atan2(hy_norm, y_last) / hy_norm *
hy.template head<TangentSpaceDim>(tangent_size);
}
}
@@ -147,16 +146,18 @@ inline void ComputeSphereManifoldMinusJacobian(const VT& x,
// have trouble deducing the type of v automatically.
ComputeHouseholderVector<VT, double, AmbientSpaceDim>(x, &v, &beta);
// The Jacobian is equal to J = 2.0 * H.leftCols(size_ - 1) where H is the
// The Jacobian is equal to J = H.leftCols(size_ - 1) where H is the
// Householder matrix (H = I - beta * v * v').
for (int i = 0; i < tangent_size; ++i) {
(*jacobian).row(i) = -2.0 * beta * v(i) * v;
(*jacobian)(i, i) += 2.0;
// NOTE: The transpose is used for correctness (the product is expected to
// be a row vector), although here there seems to be no difference between
// transposing or not for Eigen (possibly a compile-time auto fix).
(*jacobian).row(i) = -beta * v(i) * v.transpose();
(*jacobian)(i, i) += 1.0;
}
(*jacobian) /= x.norm();
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "ceres/cost_function.h"
#include "ceres/internal/parameter_dims.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// For fixed size cost functors
template <typename Functor, typename T, int... Indices>
@@ -50,7 +49,7 @@ inline bool VariadicEvaluateImpl(const Functor& functor,
T* output,
std::false_type /*is_dynamic*/,
std::integer_sequence<int, Indices...>) {
static_assert(sizeof...(Indices),
static_assert(sizeof...(Indices) > 0,
"Invalid number of parameter blocks. At least one parameter "
"block must be specified.");
return functor(input[Indices]..., output);
@@ -107,7 +106,29 @@ inline bool VariadicEvaluate(const Functor& functor,
return VariadicEvaluateImpl<ParameterDims>(functor, input, output, &functor);
}
} // namespace internal
} // namespace ceres
// When differentiating dynamically sized CostFunctions, VariadicEvaluate
// expects a functor with the signature:
//
// bool operator()(double const* const* parameters, double* cost) const
//
// However for NumericDiffFirstOrderFunction, the functor has the signature
//
// bool operator()(double const* parameters, double* cost) const
//
// This thin wrapper adapts the latter to the former.
template <typename Functor>
class FirstOrderFunctorAdapter {
public:
explicit FirstOrderFunctorAdapter(const Functor& functor)
: functor_(functor) {}
bool operator()(double const* const* parameters, double* cost) const {
return functor_(*parameters, cost);
}
private:
const Functor& functor_;
};
} // namespace ceres::internal
#endif // CERES_PUBLIC_INTERNAL_VARIADIC_EVALUATE_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -724,7 +724,6 @@ inline Jet<T, N> hypot(const Jet<T, N>& x, const Jet<T, N>& y) {
return Jet<T, N>(tmp, x.a / tmp * x.v + y.a / tmp * y.v);
}
#ifdef CERES_HAS_CPP17
// Like sqrt(x^2 + y^2 + z^2),
// but acts to prevent underflow/overflow for small/large x/y/z.
// Note that the function is non-smooth at x=y=z=0,
@@ -744,7 +743,6 @@ inline Jet<T, N> hypot(const Jet<T, N>& x,
const T tmp = hypot(x.a, y.a, z.a);
return Jet<T, N>(tmp, x.a / tmp * x.v + y.a / tmp * y.v + z.a / tmp * z.v);
}
#endif // defined(CERES_HAS_CPP17)
// Like x * y + z but rounded only once.
template <typename T, int N>
@@ -757,28 +755,76 @@ inline Jet<T, N> fma(const Jet<T, N>& x,
return Jet<T, N>(fma(x.a, y.a, z.a), y.a * x.v + x.a * y.v + z.v);
}
// Returns the larger of the two arguments. NaNs are treated as missing data.
// Return value of fmax() and fmin() on equality
// ---------------------------------------------
//
// There is arguably no good answer to what fmax() & fmin() should return on
// equality, which for Jets by definition ONLY compares the scalar parts. We
// choose what we think is the least worst option (averaging as Jets) which
// minimises undesirable/unexpected behaviour as used, and also supports client
// code written against Ceres versions prior to type promotion being supported
// in Jet comparisons (< v2.1).
//
// The std::max() convention of returning the first argument on equality is
// problematic, as it means that the derivative component may or may not be
// preserved (when comparing a Jet with a scalar) depending upon the ordering.
//
// Always returning the Jet in {Jet, scalar} cases on equality is problematic
// as it is inconsistent with the behaviour that would be obtained if the scalar
// was first cast to Jet and the {Jet, Jet} case was used. Prior to type
// promotion (Ceres v2.1) client code would typically cast constants to Jets
// e.g: fmax(x, T(2.0)) which means the {Jet, Jet} case predominates, and we
// still want the result to be order independent.
//
// Our intuition is that preserving a non-zero derivative is best, even if
// its value does not match either of the inputs. Averaging achieves this
// whilst ensuring argument ordering independence. This is also the approach
// used by the Jax library, and TensorFlow's reduce_max().
// Returns the larger of the two arguments, with Jet averaging on equality.
// NaNs are treated as missing data.
//
// NOTE: This function is NOT subject to any of the error conditions specified
// in `math_errhandling`.
// in `math_errhandling`.
template <typename Lhs,
typename Rhs,
std::enable_if_t<CompatibleJetOperands_v<Lhs, Rhs>>* = nullptr>
inline decltype(auto) fmax(const Lhs& f, const Rhs& g) {
inline decltype(auto) fmax(const Lhs& x, const Rhs& y) {
using J = std::common_type_t<Lhs, Rhs>;
return (isnan(g) || isgreater(f, g)) ? J{f} : J{g};
// As x == y may set FP exceptions in the presence of NaNs when used with
// non-default compiler options so we avoid its use here.
if (isnan(x) || isnan(y) || islessgreater(x, y)) {
return isnan(x) || isless(x, y) ? J{y} : J{x};
}
// x == y (scalar parts) return the average of their Jet representations.
#if defined(CERES_HAS_CPP20)
return midpoint(J{x}, J{y});
#else
return (J{x} + J{y}) * typename J::Scalar(0.5);
#endif // defined(CERES_HAS_CPP20)
}
// Returns the smaller of the two arguments. NaNs are treated as missing data.
// Returns the smaller of the two arguments, with Jet averaging on equality.
// NaNs are treated as missing data.
//
// NOTE: This function is NOT subject to any of the error conditions specified
// in `math_errhandling`.
// in `math_errhandling`.
template <typename Lhs,
typename Rhs,
std::enable_if_t<CompatibleJetOperands_v<Lhs, Rhs>>* = nullptr>
inline decltype(auto) fmin(const Lhs& f, const Rhs& g) {
inline decltype(auto) fmin(const Lhs& x, const Rhs& y) {
using J = std::common_type_t<Lhs, Rhs>;
return (isnan(f) || isless(g, f)) ? J{g} : J{f};
// As x == y may set FP exceptions in the presence of NaNs when used with
// non-default compiler options so we avoid its use here.
if (isnan(x) || isnan(y) || islessgreater(x, y)) {
return isnan(x) || isgreater(x, y) ? J{y} : J{x};
}
// x == y (scalar parts) return the average of their Jet representations.
#if defined(CERES_HAS_CPP20)
return midpoint(J{x}, J{y});
#else
return (J{x} + J{y}) * typename J::Scalar(0.5);
#endif // defined(CERES_HAS_CPP20)
}
// Returns the positive difference (f - g) of two arguments and zero if f <= g.
@@ -804,7 +850,7 @@ template <typename T, int N>
inline Jet<T, N> erf(const Jet<T, N>& x) {
// We evaluate the constant as follows:
// 2 / sqrt(pi) = 1 / sqrt(atan(1.))
// On POSIX sytems it is defined as M_2_SQRTPI, but this is not
// On POSIX systems it is defined as M_2_SQRTPI, but this is not
// portable and the type may not be T. The above expression
// evaluates to full precision with IEEE arithmetic and, since it's
// constant, the compiler can generate exactly the same code. gcc
@@ -828,25 +874,19 @@ inline Jet<T, N> erfc(const Jet<T, N>& x) {
// function errors in client code (the specific warning is suppressed when
// Ceres itself is built).
inline double BesselJ0(double x) {
#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS)
return _j0(x);
#else
CERES_DISABLE_DEPRECATED_WARNING
return j0(x);
#endif
CERES_RESTORE_DEPRECATED_WARNING
}
inline double BesselJ1(double x) {
#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS)
return _j1(x);
#else
CERES_DISABLE_DEPRECATED_WARNING
return j1(x);
#endif
CERES_RESTORE_DEPRECATED_WARNING
}
inline double BesselJn(int n, double x) {
#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS)
return _jn(n, x);
#else
CERES_DISABLE_DEPRECATED_WARNING
return jn(n, x);
#endif
CERES_RESTORE_DEPRECATED_WARNING
}
// For the formulae of the derivatives of the Bessel functions see the book:
@@ -1264,8 +1304,13 @@ struct numeric_limits<ceres::Jet<T, N>> {
static constexpr bool is_bounded = std::numeric_limits<T>::is_bounded;
static constexpr bool is_modulo = std::numeric_limits<T>::is_modulo;
// has_denorm (and has_denorm_loss, not defined for Jet) has been deprecated
// in C++23. However, without an intent to remove the declaration. Disable
// deprecation warnings temporarily just for the corresponding symbols.
CERES_DISABLE_DEPRECATED_WARNING
static constexpr std::float_denorm_style has_denorm =
std::numeric_limits<T>::has_denorm;
CERES_RESTORE_DEPRECATED_WARNING
static constexpr std::float_round_style round_style =
std::numeric_limits<T>::round_style;
@@ -1335,6 +1380,7 @@ struct NumTraits<ceres::Jet<T, N>> {
}
static inline int digits10() { return NumTraits<T>::digits10(); }
static inline int max_digits10() { return NumTraits<T>::max_digits10(); }
enum {
IsComplex = 0,

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -156,7 +156,7 @@ bool LineManifold<AmbientSpaceDimension>::Plus(const double* x_ptr,
//
// The direction update function Plus_d is the same as as the SphereManifold:
//
// d* = H_{v(d)} [0.5 sinc(0.5 |delta_d|) delta_d, cos(0.5 |delta_d|)]^T
// d* = H_{v(d)} [sinc(|delta_d|) delta_d, cos(|delta_d|)]^T
//
// where H is the householder matrix
// H_{v} = I - (2 / |v|^2) v v^T
@@ -165,7 +165,7 @@ bool LineManifold<AmbientSpaceDimension>::Plus(const double* x_ptr,
//
// The origin point update function Plus_o is defined as
//
// o* = o + H_{v(d)} [0.5 delta_o, 0]^T.
// o* = o + H_{v(d)} [delta_o, 0]^T.
Eigen::Map<const AmbientVector> o(x_ptr, size_);
Eigen::Map<const AmbientVector> d(x_ptr + size_, size_);
@@ -208,11 +208,8 @@ bool LineManifold<AmbientSpaceDimension>::Plus(const double* x_ptr,
// perpendicular to the line direction. This is achieved by using the
// householder matrix of the direction and allow only movements
// perpendicular to e_n.
//
// The factor of 0.5 is used to be consistent with the line direction
// update.
AmbientVector y(size_);
y << 0.5 * delta_o, 0;
y << delta_o, 0;
o_plus_delta += internal::ApplyHouseholderVector(y, v, beta);
return true;
@@ -266,7 +263,7 @@ bool LineManifold<AmbientSpaceDimension>::Minus(const double* y_ptr,
AmbientVector delta_o = y_o - x_o;
const AmbientVector h_delta_o =
2.0 * internal::ApplyHouseholderVector(delta_o, v, beta);
internal::ApplyHouseholderVector(delta_o, v, beta);
y_minus_x_o = h_delta_o.template head<TangentSpaceDimension>(size_ - 1);
return true;

View File

@@ -1,371 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: keir@google.com (Keir Mierle)
// sameeragarwal@google.com (Sameer Agarwal)
#ifndef CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_
#define CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_
#include <array>
#include <memory>
#include <vector>
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
#include "ceres/internal/port.h"
namespace ceres {
// WARNING: LocalParameterizations are deprecated. They will be removed from
// Ceres Solver in version 2.2.0. Please use Manifolds instead.
// Purpose: Sometimes parameter blocks x can overparameterize a problem
//
// min f(x)
// x
//
// In that case it is desirable to choose a parameterization for the
// block itself to remove the null directions of the cost. More
// generally, if x lies on a manifold of a smaller dimension than the
// ambient space that it is embedded in, then it is numerically and
// computationally more effective to optimize it using a
// parameterization that lives in the tangent space of that manifold
// at each point.
//
// For example, a sphere in three dimensions is a 2 dimensional
// manifold, embedded in a three dimensional space. At each point on
// the sphere, the plane tangent to it defines a two dimensional
// tangent space. For a cost function defined on this sphere, given a
// point x, moving in the direction normal to the sphere at that point
// is not useful. Thus a better way to do a local optimization is to
// optimize over two dimensional vector delta in the tangent space at
// that point and then "move" to the point x + delta, where the move
// operation involves projecting back onto the sphere. Doing so
// removes a redundant dimension from the optimization, making it
// numerically more robust and efficient.
//
// More generally we can define a function
//
// x_plus_delta = Plus(x, delta),
//
// where x_plus_delta has the same size as x, and delta is of size
// less than or equal to x. The function Plus, generalizes the
// definition of vector addition. Thus it satisfies the identify
//
// Plus(x, 0) = x, for all x.
//
// A trivial version of Plus is when delta is of the same size as x
// and
//
// Plus(x, delta) = x + delta
//
// A more interesting case if x is two dimensional vector, and the
// user wishes to hold the first coordinate constant. Then, delta is a
// scalar and Plus is defined as
//
// Plus(x, delta) = x + [0] * delta
// [1]
//
// An example that occurs commonly in Structure from Motion problems
// is when camera rotations are parameterized using Quaternion. There,
// it is useful to only make updates orthogonal to that 4-vector
// defining the quaternion. One way to do this is to let delta be a 3
// dimensional vector and define Plus to be
//
// Plus(x, delta) = [cos(|delta|), sin(|delta|) delta / |delta|] * x
//
// The multiplication between the two 4-vectors on the RHS is the
// standard quaternion product.
//
// Given f and a point x, optimizing f can now be restated as
//
// min f(Plus(x, delta))
// delta
//
// Given a solution delta to this problem, the optimal value is then
// given by
//
// x* = Plus(x, delta)
//
// The class LocalParameterization defines the function Plus and its
// Jacobian which is needed to compute the Jacobian of f w.r.t delta.
class CERES_DEPRECATED_WITH_MSG(
"LocalParameterizations will be removed from the Ceres Solver API in "
"version 2.2.0. Use Manifolds instead.")
CERES_EXPORT LocalParameterization {
public:
virtual ~LocalParameterization();
// Generalization of the addition operation,
//
// x_plus_delta = Plus(x, delta)
//
// with the condition that Plus(x, 0) = x.
//
virtual bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const = 0;
// The jacobian of Plus(x, delta) w.r.t delta at delta = 0.
//
// jacobian is a row-major GlobalSize() x LocalSize() matrix.
virtual bool ComputeJacobian(const double* x, double* jacobian) const = 0;
// local_matrix = global_matrix * jacobian
//
// global_matrix is a num_rows x GlobalSize row major matrix.
// local_matrix is a num_rows x LocalSize row major matrix.
// jacobian(x) is the matrix returned by ComputeJacobian at x.
//
// This is only used by GradientProblem. For most normal uses, it is
// okay to use the default implementation.
virtual bool MultiplyByJacobian(const double* x,
const int num_rows,
const double* global_matrix,
double* local_matrix) const;
// Size of x.
virtual int GlobalSize() const = 0;
// Size of delta.
virtual int LocalSize() const = 0;
};
// Some basic parameterizations
// Identity Parameterization: Plus(x, delta) = x + delta
class CERES_DEPRECATED_WITH_MSG("Use EuclideanManifold instead.")
CERES_EXPORT IdentityParameterization : public LocalParameterization {
public:
explicit IdentityParameterization(int size);
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override;
bool ComputeJacobian(const double* x, double* jacobian) const override;
bool MultiplyByJacobian(const double* x,
const int num_cols,
const double* global_matrix,
double* local_matrix) const override;
int GlobalSize() const override { return size_; }
int LocalSize() const override { return size_; }
private:
const int size_;
};
// Hold a subset of the parameters inside a parameter block constant.
class CERES_DEPRECATED_WITH_MSG("Use SubsetManifold instead.")
CERES_EXPORT SubsetParameterization : public LocalParameterization {
public:
explicit SubsetParameterization(int size,
const std::vector<int>& constant_parameters);
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override;
bool ComputeJacobian(const double* x, double* jacobian) const override;
bool MultiplyByJacobian(const double* x,
const int num_cols,
const double* global_matrix,
double* local_matrix) const override;
int GlobalSize() const override {
return static_cast<int>(constancy_mask_.size());
}
int LocalSize() const override { return local_size_; }
private:
const int local_size_;
std::vector<char> constancy_mask_;
};
// Plus(x, delta) = [cos(|delta|), sin(|delta|) delta / |delta|] * x
// with * being the quaternion multiplication operator. Here we assume
// that the first element of the quaternion vector is the real (cos
// theta) part.
class CERES_DEPRECATED_WITH_MSG("Use QuaternionManifold instead.")
CERES_EXPORT QuaternionParameterization : public LocalParameterization {
public:
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override;
bool ComputeJacobian(const double* x, double* jacobian) const override;
int GlobalSize() const override { return 4; }
int LocalSize() const override { return 3; }
};
// Implements the quaternion local parameterization for Eigen's representation
// of the quaternion. Eigen uses a different internal memory layout for the
// elements of the quaternion than what is commonly used. Specifically, Eigen
// stores the elements in memory as [x, y, z, w] where the real part is last
// whereas it is typically stored first. Note, when creating an Eigen quaternion
// through the constructor the elements are accepted in w, x, y, z order. Since
// Ceres operates on parameter blocks which are raw double pointers this
// difference is important and requires a different parameterization.
//
// Plus(x, delta) = [sin(|delta|) delta / |delta|, cos(|delta|)] * x
// with * being the quaternion multiplication operator.
class CERES_DEPRECATED_WITH_MSG("Use EigenQuaternionManifold instead.")
CERES_EXPORT EigenQuaternionParameterization
: public ceres::LocalParameterization {
public:
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override;
bool ComputeJacobian(const double* x, double* jacobian) const override;
int GlobalSize() const override { return 4; }
int LocalSize() const override { return 3; }
};
// This provides a parameterization for homogeneous vectors which are commonly
// used in Structure from Motion problems. One example where they are used is
// in representing points whose triangulation is ill-conditioned. Here it is
// advantageous to use an over-parameterization since homogeneous vectors can
// represent points at infinity.
//
// The plus operator is defined as
// Plus(x, delta) =
// [sin(0.5 * |delta|) * delta / |delta|, cos(0.5 * |delta|)] * x
//
// with * defined as an operator which applies the update orthogonal to x to
// remain on the sphere. We assume that the last element of x is the scalar
// component. The size of the homogeneous vector is required to be greater than
// 1.
class CERES_DEPRECATED_WITH_MSG("Use SphereManifold instead.") CERES_EXPORT
HomogeneousVectorParameterization : public LocalParameterization {
public:
explicit HomogeneousVectorParameterization(int size);
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override;
bool ComputeJacobian(const double* x, double* jacobian) const override;
int GlobalSize() const override { return size_; }
int LocalSize() const override { return size_ - 1; }
private:
const int size_;
};
// This provides a parameterization for lines, where the line is
// over-parameterized by an origin point and a direction vector. So the
// parameter vector size needs to be two times the ambient space dimension,
// where the first half is interpreted as the origin point and the second half
// as the direction.
//
// The plus operator for the line direction is the same as for the
// HomogeneousVectorParameterization. The update of the origin point is
// perpendicular to the line direction before the update.
//
// This local parameterization is a special case of the affine Grassmannian
// manifold (see https://en.wikipedia.org/wiki/Affine_Grassmannian_(manifold))
// for the case Graff_1(R^n).
template <int AmbientSpaceDimension>
class CERES_DEPRECATED_WITH_MSG("Use LineManifold instead.")
LineParameterization : public LocalParameterization {
public:
static_assert(AmbientSpaceDimension >= 2,
"The ambient space must be at least 2");
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override;
bool ComputeJacobian(const double* x, double* jacobian) const override;
int GlobalSize() const override { return 2 * AmbientSpaceDimension; }
int LocalSize() const override { return 2 * (AmbientSpaceDimension - 1); }
};
// Construct a local parameterization by taking the Cartesian product
// of a number of other local parameterizations. This is useful, when
// a parameter block is the cartesian product of two or more
// manifolds. For example the parameters of a camera consist of a
// rotation and a translation, i.e., SO(3) x R^3.
//
// Example usage:
//
// ProductParameterization product_param(new QuaterionionParameterization(),
// new IdentityParameterization(3));
//
// is the local parameterization for a rigid transformation, where the
// rotation is represented using a quaternion.
//
class CERES_DEPRECATED_WITH_MSG("Use ProductManifold instead.")
CERES_EXPORT ProductParameterization : public LocalParameterization {
public:
ProductParameterization(const ProductParameterization&) = delete;
ProductParameterization& operator=(const ProductParameterization&) = delete;
//
// NOTE: The constructor takes ownership of the input local
// parameterizations.
//
template <typename... LocalParams>
explicit ProductParameterization(LocalParams*... local_params)
: local_params_(sizeof...(LocalParams)) {
constexpr int kNumLocalParams = sizeof...(LocalParams);
static_assert(kNumLocalParams >= 2,
"At least two local parameterizations must be specified.");
using LocalParameterizationPtr = std::unique_ptr<LocalParameterization>;
// Wrap all raw pointers into std::unique_ptr for exception safety.
std::array<LocalParameterizationPtr, kNumLocalParams> local_params_array{
LocalParameterizationPtr(local_params)...};
// Initialize internal state.
for (int i = 0; i < kNumLocalParams; ++i) {
LocalParameterizationPtr& param = local_params_[i];
param = std::move(local_params_array[i]);
buffer_size_ =
std::max(buffer_size_, param->LocalSize() * param->GlobalSize());
global_size_ += param->GlobalSize();
local_size_ += param->LocalSize();
}
}
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override;
bool ComputeJacobian(const double* x, double* jacobian) const override;
int GlobalSize() const override { return global_size_; }
int LocalSize() const override { return local_size_; }
private:
std::vector<std::unique_ptr<LocalParameterization>> local_params_;
int local_size_{0};
int global_size_{0};
int buffer_size_{0};
};
} // namespace ceres
// clang-format off
#include "ceres/internal/reenable_warnings.h"
// clang-format on
#include "ceres/internal/line_parameterization.h"
#endif // CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -42,24 +42,54 @@
namespace ceres {
// Matchers and macros for help with testing Manifold objects.
// Matchers and macros to simplify testing of custom Manifold objects using the
// gtest testing framework.
//
// Testing a Manifold has two parts.
//
// 1. Checking that Manifold::Plus is correctly defined. This requires per
// manifold tests.
// 1. Checking that Manifold::Plus() and Manifold::Minus() are correctly
// defined. This requires per manifold tests.
//
// 2. The other methods of the manifold have mathematical properties that make
// it compatible with Plus, as described in:
// them compatible with Plus() and Minus(), as described in [1].
//
// "Integrating Generic Sensor Fusion Algorithms with Sound State
// Representations through Encapsulation of Manifolds"
// By C. Hertzberg, R. Wagner, U. Frese and L. Schroder
// https://arxiv.org/pdf/1107.1119.pdf
// To verify these general requirements for a custom Manifold, use the
// EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD() macro from within a gtest test. Note
// that additional domain-specific tests may also be prudent, e.g to verify the
// behaviour of a Quaternion Manifold about pi.
//
// These tests are implemented using generic matchers defined below which can
// all be called by the macro EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x,
// delta, y, tolerance). See manifold_test.cc for example usage.
// [1] "Integrating Generic Sensor Fusion Algorithms with Sound State
// Representations through Encapsulation of Manifolds", C. Hertzberg,
// R. Wagner, U. Frese and L. Schroder, https://arxiv.org/pdf/1107.1119.pdf
// Verifies the general requirements for a custom Manifold are satisfied to
// within the specified (numerical) tolerance.
//
// Example usage for a custom Manifold: ExampleManifold:
//
// TEST(ExampleManifold, ManifoldInvariantsHold) {
// constexpr double kTolerance = 1.0e-9;
// ExampleManifold manifold;
// ceres::Vector x = ceres::Vector::Zero(manifold.AmbientSize());
// ceres::Vector y = ceres::Vector::Zero(manifold.AmbientSize());
// ceres::Vector delta = ceres::Vector::Zero(manifold.TangentSize());
// EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, kTolerance);
// }
#define EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, tolerance) \
::ceres::Vector zero_tangent = \
::ceres::Vector::Zero(manifold.TangentSize()); \
EXPECT_THAT(manifold, ::ceres::XPlusZeroIsXAt(x, tolerance)); \
EXPECT_THAT(manifold, ::ceres::XMinusXIsZeroAt(x, tolerance)); \
EXPECT_THAT(manifold, ::ceres::MinusPlusIsIdentityAt(x, delta, tolerance)); \
EXPECT_THAT(manifold, \
::ceres::MinusPlusIsIdentityAt(x, zero_tangent, tolerance)); \
EXPECT_THAT(manifold, ::ceres::PlusMinusIsIdentityAt(x, x, tolerance)); \
EXPECT_THAT(manifold, ::ceres::PlusMinusIsIdentityAt(x, y, tolerance)); \
EXPECT_THAT(manifold, ::ceres::HasCorrectPlusJacobianAt(x, tolerance)); \
EXPECT_THAT(manifold, ::ceres::HasCorrectMinusJacobianAt(x, tolerance)); \
EXPECT_THAT(manifold, ::ceres::MinusPlusJacobianIsIdentityAt(x, tolerance)); \
EXPECT_THAT(manifold, \
::ceres::HasCorrectRightMultiplyByPlusJacobianAt(x, tolerance));
// Checks that the invariant Plus(x, 0) == x holds.
MATCHER_P2(XPlusZeroIsXAt, x, tolerance, "") {
@@ -69,7 +99,7 @@ MATCHER_P2(XPlusZeroIsXAt, x, tolerance, "") {
Vector actual = Vector::Zero(ambient_size);
Vector zero = Vector::Zero(tangent_size);
EXPECT_TRUE(arg.Plus(x.data(), zero.data(), actual.data()));
const double n = (actual - x).norm();
const double n = (actual - Vector{x}).norm();
const double d = x.norm();
const double diffnorm = (d == 0.0) ? n : (n / d);
if (diffnorm > tolerance) {
@@ -159,7 +189,7 @@ MATCHER_P3(MinusPlusIsIdentityAt, x, delta, tolerance, "") {
Vector actual = Vector::Zero(tangent_size);
EXPECT_TRUE(arg.Minus(x_plus_delta.data(), x.data(), actual.data()));
const double n = (actual - delta).norm();
const double n = (actual - Vector{delta}).norm();
const double d = delta.norm();
const double diffnorm = (d == 0.0) ? n : (n / d);
if (diffnorm > tolerance) {
@@ -184,7 +214,7 @@ MATCHER_P3(PlusMinusIsIdentityAt, x, y, tolerance, "") {
Vector actual = Vector::Zero(ambient_size);
EXPECT_TRUE(arg.Plus(x.data(), y_minus_x.data(), actual.data()));
const double n = (actual - y).norm();
const double n = (actual - Vector{y}).norm();
const double d = y.norm();
const double diffnorm = (d == 0.0) ? n : (n / d);
if (diffnorm > tolerance) {
@@ -312,17 +342,4 @@ MATCHER_P2(HasCorrectRightMultiplyByPlusJacobianAt, x, tolerance, "") {
return true;
}
#define EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, tolerance) \
Vector zero_tangent = Vector::Zero(manifold.TangentSize()); \
EXPECT_THAT(manifold, XPlusZeroIsXAt(x, tolerance)); \
EXPECT_THAT(manifold, XMinusXIsZeroAt(x, tolerance)); \
EXPECT_THAT(manifold, MinusPlusIsIdentityAt(x, delta, tolerance)); \
EXPECT_THAT(manifold, MinusPlusIsIdentityAt(x, zero_tangent, tolerance)); \
EXPECT_THAT(manifold, PlusMinusIsIdentityAt(x, x, tolerance)); \
EXPECT_THAT(manifold, PlusMinusIsIdentityAt(x, y, tolerance)); \
EXPECT_THAT(manifold, HasCorrectPlusJacobianAt(x, tolerance)); \
EXPECT_THAT(manifold, HasCorrectMinusJacobianAt(x, tolerance)); \
EXPECT_THAT(manifold, MinusPlusJacobianIsIdentityAt(x, tolerance)); \
EXPECT_THAT(manifold, HasCorrectRightMultiplyByPlusJacobianAt(x, tolerance));
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -61,7 +61,7 @@ class CERES_EXPORT NormalPrior final : public CostFunction {
public:
// Check that the number of rows in the vector b are the same as the
// number of columns in the matrix A, crash otherwise.
NormalPrior(const Matrix& A, const Vector& b);
NormalPrior(const Matrix& A, Vector b);
bool Evaluate(double const* const* parameters,
double* residuals,
double** jacobians) const override;

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -176,7 +176,7 @@
namespace ceres {
template <typename CostFunctor,
NumericDiffMethodType method = CENTRAL,
NumericDiffMethodType kMethod = CENTRAL,
int kNumResiduals = 0, // Number of residuals, or ceres::DYNAMIC
int... Ns> // Parameters dimensions for each block.
class NumericDiffCostFunction final
@@ -236,7 +236,7 @@ class NumericDiffCostFunction final
}
internal::EvaluateJacobianForParameterBlocks<ParameterDims>::
template Apply<method, kNumResiduals>(
template Apply<kMethod, kNumResiduals>(
functor_.get(),
residuals,
options_,

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,7 @@
#include "ceres/internal/variadic_evaluate.h"
#include "ceres/numeric_diff_options.h"
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
@@ -99,19 +100,55 @@ namespace ceres {
// "QuadraticCostFunctor", "CENTRAL, 4", describe the finite
// differencing scheme as "central differencing" and the functor as
// computing its cost from a 4 dimensional input.
//
// If the size of the parameter vector is not known at compile time, then an
// alternate construction syntax can be used:
//
// FirstOrderFunction* function
// = new NumericDiffFirstOrderFunction<MyScalarCostFunctor, CENTRAL>(
// new QuadraticCostFunctor(1.0), 4);
//
// Note that instead of passing 4 as a template argument, it is now passed as
// the second argument to the constructor.
template <typename FirstOrderFunctor,
NumericDiffMethodType method,
int kNumParameters>
NumericDiffMethodType kMethod,
int kNumParameters = DYNAMIC>
class NumericDiffFirstOrderFunction final : public FirstOrderFunction {
public:
// Constructor for the case where the parameter size is known at compile time.
explicit NumericDiffFirstOrderFunction(
FirstOrderFunctor* functor,
Ownership ownership = TAKE_OWNERSHIP,
const NumericDiffOptions& options = NumericDiffOptions())
: functor_(functor), ownership_(ownership), options_(options) {
: functor_(functor),
num_parameters_(kNumParameters),
ownership_(ownership),
options_(options) {
static_assert(kNumParameters != DYNAMIC,
"Number of parameters must be static when defined via the "
"template parameter. Use the other constructor for "
"dynamically sized functions.");
static_assert(kNumParameters > 0, "kNumParameters must be positive");
}
// Constructor for the case where the parameter size is specified at run time.
explicit NumericDiffFirstOrderFunction(
FirstOrderFunctor* functor,
int num_parameters,
Ownership ownership = TAKE_OWNERSHIP,
const NumericDiffOptions& options = NumericDiffOptions())
: functor_(functor),
num_parameters_(num_parameters),
ownership_(ownership),
options_(options) {
static_assert(
kNumParameters == DYNAMIC,
"Template parameter must be DYNAMIC when using this constructor. If "
"you want to provide the number of parameters statically use the other "
"constructor.");
CHECK_GT(num_parameters, 0);
}
~NumericDiffFirstOrderFunction() override {
if (ownership_ != TAKE_OWNERSHIP) {
functor_.release();
@@ -121,12 +158,8 @@ class NumericDiffFirstOrderFunction final : public FirstOrderFunction {
bool Evaluate(const double* const parameters,
double* cost,
double* gradient) const override {
using ParameterDims = internal::StaticParameterDims<kNumParameters>;
constexpr int kNumResiduals = 1;
// Get the function value (cost) at the the point to evaluate.
if (!internal::VariadicEvaluate<ParameterDims>(
*functor_, &parameters, cost)) {
if (!(*functor_)(parameters, cost)) {
return false;
}
@@ -135,27 +168,47 @@ class NumericDiffFirstOrderFunction final : public FirstOrderFunction {
}
// Create a copy of the parameters which will get mutated.
internal::FixedArray<double, 32> parameters_copy(kNumParameters);
std::copy_n(parameters, kNumParameters, parameters_copy.data());
internal::FixedArray<double, 32> parameters_copy(num_parameters_);
std::copy_n(parameters, num_parameters_, parameters_copy.data());
double* parameters_ptr = parameters_copy.data();
internal::EvaluateJacobianForParameterBlocks<
ParameterDims>::template Apply<method, kNumResiduals>(functor_.get(),
cost,
options_,
kNumResiduals,
&parameters_ptr,
&gradient);
return true;
constexpr int kNumResiduals = 1;
if constexpr (kNumParameters == DYNAMIC) {
internal::FirstOrderFunctorAdapter<FirstOrderFunctor> fofa(*functor_);
return internal::NumericDiff<
internal::FirstOrderFunctorAdapter<FirstOrderFunctor>,
kMethod,
kNumResiduals,
internal::DynamicParameterDims,
0,
DYNAMIC>::EvaluateJacobianForParameterBlock(&fofa,
cost,
options_,
kNumResiduals,
0,
num_parameters_,
&parameters_ptr,
gradient);
} else {
return internal::EvaluateJacobianForParameterBlocks<
internal::StaticParameterDims<kNumParameters>>::
template Apply<kMethod, 1>(functor_.get(),
cost,
options_,
kNumResiduals,
&parameters_ptr,
&gradient);
}
}
int NumParameters() const override { return kNumParameters; }
int NumParameters() const override { return num_parameters_; }
const FirstOrderFunctor& functor() const { return *functor_; }
private:
std::unique_ptr<FirstOrderFunctor> functor_;
Ownership ownership_;
NumericDiffOptions options_;
const int num_parameters_;
const Ownership ownership_;
const NumericDiffOptions options_;
};
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2021 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -53,7 +53,6 @@ namespace ceres {
class CostFunction;
class EvaluationCallback;
class LossFunction;
class LocalParameterization;
class Manifold;
class Solver;
struct CRSMatrix;
@@ -118,29 +117,17 @@ using ResidualBlockId = internal::ResidualBlock*;
// problem.AddResidualBlock(new MyBinaryCostFunction(...), nullptr, x2, x3);
//
// Please see cost_function.h for details of the CostFunction object.
//
// NOTE: We are currently in the process of transitioning from
// LocalParameterization to Manifolds in the Ceres API. During this period,
// Problem will support using both Manifold and LocalParameterization objects
// interchangably. In particular, adding a LocalParameterization to a parameter
// block is the same as adding a Manifold to that parameter block. For methods
// in the API affected by this change, see their documentation below.
class CERES_EXPORT Problem {
public:
struct CERES_EXPORT Options {
// These flags control whether the Problem object owns the CostFunctions,
// LossFunctions, LocalParameterizations, and Manifolds passed into the
// Problem.
// LossFunctions, and Manifolds passed into the Problem.
//
// If set to TAKE_OWNERSHIP, then the problem object will delete the
// corresponding object on destruction. The destructor is careful to delete
// the pointers only once, since sharing objects is allowed.
Ownership cost_function_ownership = TAKE_OWNERSHIP;
Ownership loss_function_ownership = TAKE_OWNERSHIP;
CERES_DEPRECATED_WITH_MSG(
"Local Parameterizations are deprecated. Use Manifold and "
"manifold_ownership instead.")
Ownership local_parameterization_ownership = TAKE_OWNERSHIP;
Ownership manifold_ownership = TAKE_OWNERSHIP;
// If true, trades memory for faster RemoveResidualBlock() and
@@ -271,66 +258,23 @@ class CERES_EXPORT Problem {
// pointer but a different size will result in a crash.
void AddParameterBlock(double* values, int size);
// Add a parameter block with appropriate size and parameterization to the
// problem. It is okay for local_parameterization to be nullptr.
//
// Repeated calls with the same arguments are ignored. Repeated calls
// with the same double pointer but a different size results in a crash
// (unless Solver::Options::diable_all_safety_checks is set to true).
//
// Repeated calls with the same double pointer and size but different
// LocalParameterization is equivalent to calling
// SetParameterization(local_parameterization), i.e., any previously
// associated LocalParameterization or Manifold object will be replaced with
// the local_parameterization.
//
// NOTE:
// ----
//
// This method is deprecated and will be removed in the next public
// release of Ceres Solver. Please move to using the Manifold based version of
// AddParameterBlock.
//
// During the transition from LocalParameterization to Manifold, internally
// the LocalParameterization is treated as a Manifold by wrapping it using a
// ManifoldAdapter object. So HasManifold() will return true, GetManifold()
// will return the wrapped object and ParameterBlockTangentSize() will return
// the LocalSize of the LocalParameterization.
CERES_DEPRECATED_WITH_MSG(
"LocalParameterizations are deprecated. Use the version with Manifolds "
"instead.")
void AddParameterBlock(double* values,
int size,
LocalParameterization* local_parameterization);
// Add a parameter block with appropriate size and Manifold to the
// problem. It is okay for manifold to be nullptr.
//
// Repeated calls with the same arguments are ignored. Repeated calls
// with the same double pointer but a different size results in a crash
// (unless Solver::Options::diable_all_safety_checks is set to true).
// (unless Solver::Options::disable_all_safety_checks is set to true).
//
// Repeated calls with the same double pointer and size but different Manifold
// is equivalent to calling SetManifold(manifold), i.e., any previously
// associated LocalParameterization or Manifold object will be replaced with
// the manifold.
//
// Note:
// ----
//
// During the transition from LocalParameterization to Manifold, calling
// AddParameterBlock with a Manifold when a LocalParameterization is already
// associated with the parameter block is okay. It is equivalent to calling
// SetManifold(manifold), i.e., any previously associated
// LocalParameterization or Manifold object will be replaced with the
// manifold.
// associated Manifold object will be replaced with the manifold.
void AddParameterBlock(double* values, int size, Manifold* manifold);
// Remove a parameter block from the problem. The LocalParameterization or
// Manifold of the parameter block, if it exists, will persist until the
// deletion of the problem (similar to cost/loss functions in residual block
// removal). Any residual blocks that depend on the parameter are also
// removed, as described above in RemoveResidualBlock().
// Remove a parameter block from the problem. The Manifold of the parameter
// block, if it exists, will persist until the deletion of the problem
// (similar to cost/loss functions in residual block removal). Any residual
// blocks that depend on the parameter are also removed, as described above
// in RemoveResidualBlock().
//
// If Problem::Options::enable_fast_removal is true, then the removal is fast
// (almost constant time). Otherwise, removing a parameter block will incur a
@@ -361,76 +305,15 @@ class CERES_EXPORT Problem {
// Returns true if a parameter block is set constant, and false otherwise. A
// parameter block may be set constant in two ways: either by calling
// SetParameterBlockConstant or by associating a LocalParameterization or
// Manifold with a zero dimensional tangent space with it.
// SetParameterBlockConstant or by associating a Manifold with a zero
// dimensional tangent space with it.
bool IsParameterBlockConstant(const double* values) const;
// Set the LocalParameterization for the parameter block. Calling
// SetParameterization with nullptr will clear any previously set
// LocalParameterization or Manifold for the parameter block.
//
// Repeated calls will cause any previously associated LocalParameterization
// or Manifold object to be replaced with the local_parameterization.
//
// The local_parameterization is owned by the Problem by default (See
// Problem::Options to override this behaviour).
//
// It is acceptable to set the same LocalParameterization for multiple
// parameter blocks; the destructor is careful to delete
// LocalParamaterizations only once.
//
// NOTE:
// ----
//
// This method is deprecated and will be removed in the next public
// release of Ceres Solver. Please move to using the SetManifold instead.
//
// During the transition from LocalParameterization to Manifold, internally
// the LocalParameterization is treated as a Manifold by wrapping it using a
// ManifoldAdapter object. So HasManifold() will return true, GetManifold()
// will return the wrapped object and ParameterBlockTangentSize will return
// the same value of ParameterBlockLocalSize.
CERES_DEPRECATED_WITH_MSG(
"LocalParameterizations are deprecated. Use SetManifold instead.")
void SetParameterization(double* values,
LocalParameterization* local_parameterization);
// Get the LocalParameterization object associated with this parameter block.
// If there is no LocalParameterization associated then nullptr is returned.
//
// NOTE: This method is deprecated and will be removed in the next public
// release of Ceres Solver. Use GetManifold instead.
//
// Note also that if a LocalParameterization is associated with a parameter
// block, HasManifold will return true and GetManifold will return the
// LocalParameterization wrapped in a ManifoldAdapter.
//
// The converse is NOT true, i.e., if a Manifold is associated with a
// parameter block, HasParameterization will return false and
// GetParameterization will return a nullptr.
CERES_DEPRECATED_WITH_MSG(
"LocalParameterizations are deprecated. Use GetManifold "
"instead.")
const LocalParameterization* GetParameterization(const double* values) const;
// Returns true if a LocalParameterization is associated with this parameter
// block, false otherwise.
//
// NOTE: This method is deprecated and will be removed in the next public
// release of Ceres Solver. Use HasManifold instead.
//
// Note also that if a Manifold is associated with the parameter block, this
// method will return false.
CERES_DEPRECATED_WITH_MSG(
"LocalParameterizations are deprecated. Use HasManifold instead.")
bool HasParameterization(const double* values) const;
// Set the Manifold for the parameter block. Calling SetManifold with nullptr
// will clear any previously set LocalParameterization or Manifold for the
// parameter block.
// will clear any previously set Manifold for the parameter block.
//
// Repeated calls will result in any previously associated
// LocalParameterization or Manifold object to be replaced with the manifold.
// Repeated calls will result in any previously associated Manifold object to
// be replaced with the manifold.
//
// The manifold is owned by the Problem by default (See Problem::Options to
// override this behaviour).
@@ -440,18 +323,11 @@ class CERES_EXPORT Problem {
// Get the Manifold object associated with this parameter block.
//
// If there is no Manifold Or LocalParameterization object associated then
// nullptr is returned.
//
// NOTE: During the transition from LocalParameterization to Manifold,
// internally the LocalParameterization is treated as a Manifold by wrapping
// it using a ManifoldAdapter object. So calling GetManifold on a parameter
// block with a LocalParameterization associated with it will return the
// LocalParameterization wrapped in a ManifoldAdapter
// If there is no Manifold object associated then nullptr is returned.
const Manifold* GetManifold(const double* values) const;
// Returns true if a Manifold or a LocalParameterization is associated with
// this parameter block, false otherwise.
// Returns true if a Manifold is associated with this parameter block, false
// otherwise.
bool HasManifold(const double* values) const;
// Set the lower/upper bound for the parameter at position "index".
@@ -484,19 +360,9 @@ class CERES_EXPORT Problem {
// The size of the parameter block.
int ParameterBlockSize(const double* values) const;
// The dimension of the tangent space of the LocalParameterization or Manifold
// for the parameter block. If there is no LocalParameterization or Manifold
// associated with this parameter block, then ParameterBlockLocalSize =
// ParameterBlockSize.
CERES_DEPRECATED_WITH_MSG(
"LocalParameterizations are deprecated. Use ParameterBlockTangentSize "
"instead.")
int ParameterBlockLocalSize(const double* values) const;
// The dimenion of the tangent space of the LocalParameterization or Manifold
// for the parameter block. If there is no LocalParameterization or Manifold
// associated with this parameter block, then ParameterBlockTangentSize =
// ParameterBlockSize.
// The dimension of the tangent space of the Manifold for the parameter block.
// If there is no Manifold associated with this parameter block, then
// ParameterBlockTangentSize = ParameterBlockSize.
int ParameterBlockTangentSize(const double* values) const;
// Is the given parameter block present in this problem or not?
@@ -596,11 +462,11 @@ class CERES_EXPORT Problem {
//
// is the way to do so.
//
// Note 2: If no LocalParameterizations or Manifolds are used, then the size
// of the gradient vector (and the number of columns in the jacobian) is the
// sum of the sizes of all the parameter blocks. If a parameter block has a
// LocalParameterization or Manifold, then it contributes "TangentSize"
// entries to the gradient vector (and the number of columns in the jacobian).
// Note 2: If no Manifolds are used, then the size of the gradient vector (and
// the number of columns in the jacobian) is the sum of the sizes of all the
// parameter blocks. If a parameter block has a Manifold, then it contributes
// "TangentSize" entries to the gradient vector (and the number of columns in
// the jacobian).
//
// Note 3: This function cannot be called while the problem is being solved,
// for example it cannot be called from an IterationCallback at the end of an
@@ -631,11 +497,10 @@ class CERES_EXPORT Problem {
// returns false, the caller should expect the output memory locations to have
// been modified.
//
// The returned cost and jacobians have had robustification and
// LocalParameterization/Manifold applied already; for example, the jacobian
// for a 4-dimensional quaternion parameter using the
// "QuaternionParameterization" is num_residuals by 3 instead of num_residuals
// by 4.
// The returned cost and jacobians have had robustification and Manifold
// applied already; for example, the jacobian for a 4-dimensional quaternion
// parameter using the "QuaternionParameterization" is num_residuals by 3
// instead of num_residuals by 4.
//
// apply_loss_function as the name implies allows the user to switch the
// application of the loss function on and off.
@@ -672,9 +537,13 @@ class CERES_EXPORT Problem {
double* residuals,
double** jacobians) const;
// Returns reference to the options with which the Problem was constructed.
const Options& options() const;
// Returns pointer to Problem implementation
internal::ProblemImpl* mutable_impl();
private:
friend class Solver;
friend class Covariance;
std::unique_ptr<internal::ProblemImpl> impl_;
};

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -257,28 +257,21 @@ class ProductManifold final : public Manifold {
template <typename T, std::size_t N>
static std::array<T, N> ExclusiveScan(const std::array<T, N>& values) {
std::array<T, N> result;
// TODO Replace with std::exclusive_scan once all platforms have full C++17
// STL support.
T init = 0;
// TODO Replace by std::exclusive_scan once C++17 is available
for (std::size_t i = 0; i != N; ++i) {
result[i] = init;
init += values[i];
}
return result;
}
// TODO Replace by std::void_t once C++17 is available
template <typename... Types>
struct Void {
using type = void;
};
template <typename T, typename E = void>
struct IsDereferenceable : std::false_type {};
template <typename T>
struct IsDereferenceable<T, typename Void<decltype(*std::declval<T>())>::type>
struct IsDereferenceable<T, std::void_t<decltype(*std::declval<T>())>>
: std::true_type {};
template <typename T,
@@ -311,7 +304,6 @@ class ProductManifold final : public Manifold {
int tangent_size_;
};
#ifdef CERES_HAS_CPP17
// C++17 deduction guide that allows the user to avoid explicitly specifying
// the template parameters of ProductManifold. The class can instead be
// instantiated as follows:
@@ -321,7 +313,6 @@ class ProductManifold final : public Manifold {
template <typename Manifold0, typename Manifold1, typename... Manifolds>
ProductManifold(Manifold0&&, Manifold1&&, Manifolds&&...)
-> ProductManifold<Manifold0, Manifold1, Manifolds...>;
#endif
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,8 +47,9 @@
#include <algorithm>
#include <cmath>
#include <limits>
#include "ceres/constants.h"
#include "ceres/internal/euler_angles.h"
#include "glog/logging.h"
namespace ceres {
@@ -60,7 +61,7 @@ namespace ceres {
//
// the expression M(i, j) is equivalent to
//
// arrary[i * row_stride + j * col_stride]
// array[i * row_stride + j * col_stride]
//
// Conversion functions to and from rotation matrices accept
// MatrixAdapters to permit using row-major and column-major layouts,
@@ -136,6 +137,71 @@ template <typename T, int row_stride, int col_stride>
void EulerAnglesToRotationMatrix(
const T* euler, const MatrixAdapter<T, row_stride, col_stride>& R);
// Convert a generic Euler Angle sequence (in radians) to a 3x3 rotation matrix.
//
// Euler Angles define a sequence of 3 rotations about a sequence of axes,
// typically taken to be the X, Y, or Z axes. The last axis may be the same as
// the first axis (e.g. ZYZ) per Euler's original definition of his angles
// (proper Euler angles) or not (e.g. ZYX / yaw-pitch-roll), per common usage in
// the nautical and aerospace fields (Tait-Bryan angles). The three rotations
// may be in a global frame of reference (Extrinsic) or in a body fixed frame of
// reference (Intrinsic) that moves with the rotating object.
//
// Internally, Euler Axis sequences are classified by Ken Shoemake's scheme from
// "Euler angle conversion", Graphics Gems IV, where a choice of axis for the
// first rotation and 3 binary choices:
// 1. Parity of the axis permutation. The axis sequence has Even parity if the
// second axis of rotation is 'greater-than' the first axis of rotation
// according to the order X<Y<Z<X, otherwise it has Odd parity.
// 2. Proper Euler Angles v.s. Tait-Bryan Angles
// 3. Extrinsic Rotations v.s. Intrinsic Rotations
// compactly represent all 24 possible Euler Angle Conventions
//
// One template parameter: EulerSystem must be explicitly given. This parameter
// is a tag named by 'Extrinsic' or 'Intrinsic' followed by three characters in
// the set '[XYZ]', specifying the axis sequence, e.g. ceres::ExtrinsicYZY
// (robotic arms), ceres::IntrinsicZYX (for aerospace), etc.
//
// The order of elements in the input array 'euler' follows the axis sequence
template <typename EulerSystem, typename T>
inline void EulerAnglesToRotation(const T* euler, T* R);
template <typename EulerSystem, typename T, int row_stride, int col_stride>
void EulerAnglesToRotation(const T* euler,
const MatrixAdapter<T, row_stride, col_stride>& R);
// Convert a 3x3 rotation matrix to a generic Euler Angle sequence (in radians)
//
// Euler Angles define a sequence of 3 rotations about a sequence of axes,
// typically taken to be the X, Y, or Z axes. The last axis may be the same as
// the first axis (e.g. ZYZ) per Euler's original definition of his angles
// (proper Euler angles) or not (e.g. ZYX / yaw-pitch-roll), per common usage in
// the nautical and aerospace fields (Tait-Bryan angles). The three rotations
// may be in a global frame of reference (Extrinsic) or in a body fixed frame of
// reference (Intrinsic) that moves with the rotating object.
//
// Internally, Euler Axis sequences are classified by Ken Shoemake's scheme from
// "Euler angle conversion", Graphics Gems IV, where a choice of axis for the
// first rotation and 3 binary choices:
// 1. Oddness of the axis permutation, that defines whether the second axis is
// 'greater-than' the first axis according to the order X>Y>Z>X)
// 2. Proper Euler Angles v.s. Tait-Bryan Angles
// 3. Extrinsic Rotations v.s. Intrinsic Rotations
// compactly represent all 24 possible Euler Angle Conventions
//
// One template parameter: EulerSystem must be explicitly given. This parameter
// is a tag named by 'Extrinsic' or 'Intrinsic' followed by three characters in
// the set '[XYZ]', specifying the axis sequence, e.g. ceres::ExtrinsicYZY
// (robotic arms), ceres::IntrinsicZYX (for aerospace), etc.
//
// The order of elements in the output array 'euler' follows the axis sequence
template <typename EulerSystem, typename T>
inline void RotationMatrixToEulerAngles(const T* R, T* euler);
template <typename EulerSystem, typename T, int row_stride, int col_stride>
void RotationMatrixToEulerAngles(
const MatrixAdapter<const T, row_stride, col_stride>& R, T* euler);
// Convert a 4-vector to a 3x3 scaled rotation matrix.
//
// The choice of rotation is such that the quaternion [1 0 0 0] goes to an
@@ -247,14 +313,15 @@ MatrixAdapter<T, 3, 1> RowMajorAdapter3x3(T* pointer) {
template <typename T>
inline void AngleAxisToQuaternion(const T* angle_axis, T* quaternion) {
using std::fpclassify;
using std::hypot;
const T& a0 = angle_axis[0];
const T& a1 = angle_axis[1];
const T& a2 = angle_axis[2];
const T theta_squared = a0 * a0 + a1 * a1 + a2 * a2;
const T theta = hypot(a0, a1, a2);
// For points not at the origin, the full conversion is numerically stable.
if (theta_squared > T(0.0)) {
const T theta = sqrt(theta_squared);
if (fpclassify(theta) != FP_ZERO) {
const T half_theta = theta * T(0.5);
const T k = sin(half_theta) / theta;
quaternion[0] = cos(half_theta);
@@ -276,15 +343,16 @@ inline void AngleAxisToQuaternion(const T* angle_axis, T* quaternion) {
template <typename T>
inline void QuaternionToAngleAxis(const T* quaternion, T* angle_axis) {
using std::fpclassify;
using std::hypot;
const T& q1 = quaternion[1];
const T& q2 = quaternion[2];
const T& q3 = quaternion[3];
const T sin_squared_theta = q1 * q1 + q2 * q2 + q3 * q3;
const T sin_theta = hypot(q1, q2, q3);
// For quaternions representing non-zero rotation, the conversion
// is numerically stable.
if (sin_squared_theta > T(0.0)) {
const T sin_theta = sqrt(sin_squared_theta);
if (fpclassify(sin_theta) != FP_ZERO) {
const T& cos_theta = quaternion[0];
// If cos_theta is negative, theta is greater than pi/2, which
@@ -385,13 +453,14 @@ inline void AngleAxisToRotationMatrix(const T* angle_axis, T* R) {
template <typename T, int row_stride, int col_stride>
void AngleAxisToRotationMatrix(
const T* angle_axis, const MatrixAdapter<T, row_stride, col_stride>& R) {
using std::fpclassify;
using std::hypot;
static const T kOne = T(1.0);
const T theta2 = DotProduct(angle_axis, angle_axis);
if (theta2 > T(std::numeric_limits<double>::epsilon())) {
const T theta = hypot(angle_axis[0], angle_axis[1], angle_axis[2]);
if (fpclassify(theta) != FP_ZERO) {
// We want to be careful to only evaluate the square root if the
// norm of the angle_axis vector is greater than zero. Otherwise
// we get a division by zero.
const T theta = sqrt(theta2);
const T wx = angle_axis[0] / theta;
const T wy = angle_axis[1] / theta;
const T wz = angle_axis[2] / theta;
@@ -411,7 +480,7 @@ void AngleAxisToRotationMatrix(
R(2, 2) = costheta + wz*wz*(kOne - costheta);
// clang-format on
} else {
// Near zero, we switch to using the first order Taylor expansion.
// At zero, we switch to using the first order Taylor expansion.
R(0, 0) = kOne;
R(1, 0) = angle_axis[2];
R(2, 0) = -angle_axis[1];
@@ -424,6 +493,141 @@ void AngleAxisToRotationMatrix(
}
}
template <typename EulerSystem, typename T>
inline void EulerAnglesToRotation(const T* euler, T* R) {
EulerAnglesToRotation<EulerSystem>(euler, RowMajorAdapter3x3(R));
}
template <typename EulerSystem, typename T, int row_stride, int col_stride>
void EulerAnglesToRotation(const T* euler,
const MatrixAdapter<T, row_stride, col_stride>& R) {
using std::cos;
using std::sin;
const auto [i, j, k] = EulerSystem::kAxes;
T ea[3];
ea[1] = euler[1];
if constexpr (EulerSystem::kIsIntrinsic) {
ea[0] = euler[2];
ea[2] = euler[0];
} else {
ea[0] = euler[0];
ea[2] = euler[2];
}
if constexpr (EulerSystem::kIsParityOdd) {
ea[0] = -ea[0];
ea[1] = -ea[1];
ea[2] = -ea[2];
}
const T ci = cos(ea[0]);
const T cj = cos(ea[1]);
const T ch = cos(ea[2]);
const T si = sin(ea[0]);
const T sj = sin(ea[1]);
const T sh = sin(ea[2]);
const T cc = ci * ch;
const T cs = ci * sh;
const T sc = si * ch;
const T ss = si * sh;
if constexpr (EulerSystem::kIsProperEuler) {
R(i, i) = cj;
R(i, j) = sj * si;
R(i, k) = sj * ci;
R(j, i) = sj * sh;
R(j, j) = -cj * ss + cc;
R(j, k) = -cj * cs - sc;
R(k, i) = -sj * ch;
R(k, j) = cj * sc + cs;
R(k, k) = cj * cc - ss;
} else {
R(i, i) = cj * ch;
R(i, j) = sj * sc - cs;
R(i, k) = sj * cc + ss;
R(j, i) = cj * sh;
R(j, j) = sj * ss + cc;
R(j, k) = sj * cs - sc;
R(k, i) = -sj;
R(k, j) = cj * si;
R(k, k) = cj * ci;
}
}
template <typename EulerSystem, typename T>
inline void RotationMatrixToEulerAngles(const T* R, T* euler) {
RotationMatrixToEulerAngles<EulerSystem>(RowMajorAdapter3x3(R), euler);
}
template <typename EulerSystem, typename T, int row_stride, int col_stride>
void RotationMatrixToEulerAngles(
const MatrixAdapter<const T, row_stride, col_stride>& R, T* euler) {
using std::atan2;
using std::fpclassify;
using std::hypot;
const auto [i, j, k] = EulerSystem::kAxes;
T ea[3];
if constexpr (EulerSystem::kIsProperEuler) {
const T sy = hypot(R(i, j), R(i, k));
if (fpclassify(sy) != FP_ZERO) {
ea[0] = atan2(R(i, j), R(i, k));
ea[1] = atan2(sy, R(i, i));
ea[2] = atan2(R(j, i), -R(k, i));
} else {
ea[0] = atan2(-R(j, k), R(j, j));
ea[1] = atan2(sy, R(i, i));
ea[2] = T(0.0);
}
} else {
const T cy = hypot(R(i, i), R(j, i));
if (fpclassify(cy) != FP_ZERO) {
ea[0] = atan2(R(k, j), R(k, k));
ea[1] = atan2(-R(k, i), cy);
ea[2] = atan2(R(j, i), R(i, i));
} else {
ea[0] = atan2(-R(j, k), R(j, j));
ea[1] = atan2(-R(k, i), cy);
ea[2] = T(0.0);
}
}
if constexpr (EulerSystem::kIsParityOdd) {
ea[0] = -ea[0];
ea[1] = -ea[1];
ea[2] = -ea[2];
}
euler[1] = ea[1];
if constexpr (EulerSystem::kIsIntrinsic) {
euler[0] = ea[2];
euler[2] = ea[0];
} else {
euler[0] = ea[0];
euler[2] = ea[2];
}
// Proper euler angles are defined for angles in
// [-pi, pi) x [0, pi / 2) x [-pi, pi)
// which is enforced here
if constexpr (EulerSystem::kIsProperEuler) {
const T kPi(constants::pi);
const T kTwoPi(2.0 * kPi);
if (euler[1] < T(0.0) || ea[1] > kPi) {
euler[0] += kPi;
euler[1] = -euler[1];
euler[2] -= kPi;
}
for (int i = 0; i < 3; ++i) {
if (euler[i] < -kPi) {
euler[i] += kTwoPi;
} else if (euler[i] > kPi) {
euler[i] -= kTwoPi;
}
}
}
}
template <typename T>
inline void EulerAnglesToRotationMatrix(const T* euler,
const int row_stride_parameter,
@@ -589,9 +793,12 @@ inline void AngleAxisRotatePoint(const T angle_axis[3],
const T pt[3],
T result[3]) {
DCHECK_NE(pt, result) << "Inplace rotation is not supported.";
using std::fpclassify;
using std::hypot;
const T theta2 = DotProduct(angle_axis, angle_axis);
if (theta2 > T(std::numeric_limits<double>::epsilon())) {
const T theta = hypot(angle_axis[0], angle_axis[1], angle_axis[2]);
if (fpclassify(theta) != FP_ZERO) {
// Away from zero, use the rodriguez formula
//
// result = pt costheta +
@@ -602,7 +809,6 @@ inline void AngleAxisRotatePoint(const T angle_axis[3],
// norm of the angle_axis vector is greater than zero. Otherwise
// we get a division by zero.
//
const T theta = sqrt(theta2);
const T costheta = cos(theta);
const T sintheta = sin(theta);
const T theta_inverse = T(1.0) / theta;
@@ -623,7 +829,7 @@ inline void AngleAxisRotatePoint(const T angle_axis[3],
result[1] = pt[1] * costheta + w_cross_pt[1] * sintheta + w[1] * tmp;
result[2] = pt[2] * costheta + w_cross_pt[2] * sintheta + w[2] * tmp;
} else {
// Near zero, the first order Taylor approximation of the rotation
// At zero, the first order Taylor approximation of the rotation
// matrix R corresponding to a vector w and angle theta is
//
// R = I + hat(w) * sin(theta)
@@ -635,7 +841,7 @@ inline void AngleAxisRotatePoint(const T angle_axis[3],
// and actually performing multiplication with the point pt, gives us
// R * pt = pt + angle_axis x pt.
//
// Switching to the Taylor expansion near zero provides meaningful
// Switching to the Taylor expansion at zero provides meaningful
// derivatives when evaluated using Jets.
//
// Explicitly inlined evaluation of the cross product for

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -64,8 +64,6 @@ class CERES_EXPORT Solver {
// with a message describing the problem.
bool IsValid(std::string* error) const;
// Minimizer options ----------------------------------------
// Ceres supports the two major families of optimization strategies -
// Trust Region and Line Search.
//
@@ -378,88 +376,144 @@ class CERES_EXPORT Solver {
DenseLinearAlgebraLibraryType dense_linear_algebra_library_type = EIGEN;
// Ceres supports using multiple sparse linear algebra libraries for sparse
// matrix ordering and factorizations. Currently, SUITE_SPARSE and CX_SPARSE
// are the valid choices, depending on whether they are linked into Ceres at
// build time.
// matrix ordering and factorizations.
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
#if !defined(CERES_NO_SUITESPARSE)
SUITE_SPARSE;
#elif defined(CERES_USE_EIGEN_SPARSE)
EIGEN_SPARSE;
#elif !defined(CERES_NO_CXSPARSE)
CX_SPARSE;
#elif !defined(CERES_NO_ACCELERATE_SPARSE)
ACCELERATE_SPARSE;
#elif defined(CERES_USE_EIGEN_SPARSE)
EIGEN_SPARSE;
#else
NO_SPARSE;
#endif
// The order in which variables are eliminated in a linear solver
// can have a significant of impact on the efficiency and accuracy
// of the method. e.g., when doing sparse Cholesky factorization,
// can have a significant impact on the efficiency and accuracy of
// the method. e.g., when doing sparse Cholesky factorization,
// there are matrices for which a good ordering will give a
// Cholesky factor with O(n) storage, where as a bad ordering will
// result in an completely dense factor.
//
// Ceres allows the user to provide varying amounts of hints to
// the solver about the variable elimination ordering to use. This
// can range from no hints, where the solver is free to decide the
// best possible ordering based on the user's choices like the
// linear solver being used, to an exact order in which the
// variables should be eliminated, and a variety of possibilities
// in between.
// Sparse direct solvers like SPARSE_NORMAL_CHOLESKY and
// SPARSE_SCHUR use a fill reducing ordering of the columns and
// rows of the matrix being factorized before computing the
// numeric factorization.
//
// Instances of the ParameterBlockOrdering class are used to
// communicate this information to Ceres.
// This enum controls the type of algorithm used to compute
// this fill reducing ordering. There is no single algorithm
// that works on all matrices, so determining which algorithm
// works better is a matter of empirical experimentation.
//
// Formally an ordering is an ordered partitioning of the
// parameter blocks, i.e, each parameter block belongs to exactly
// one group, and each group has a unique non-negative integer
// associated with it, that determines its order in the set of
// groups.
// The exact behaviour of this setting is affected by the value of
// linear_solver_ordering as described below.
LinearSolverOrderingType linear_solver_ordering_type = AMD;
// Besides specifying the fill reducing ordering via
// linear_solver_ordering_type, Ceres allows the user to provide varying
// amounts of hints to the linear solver about the variable elimination
// ordering to use. This can range from no hints, where the solver is free
// to decide the best possible ordering based on the user's choices like the
// linear solver being used, to an exact order in which the variables should
// be eliminated, and a variety of possibilities in between.
//
// Given such an ordering, Ceres ensures that the parameter blocks in
// the lowest numbered group are eliminated first, and then the
// parameter blocks in the next lowest numbered group and so on. Within
// each group, Ceres is free to order the parameter blocks as it
// chooses.
// Instances of the ParameterBlockOrdering class are used to communicate
// this information to Ceres.
//
// If nullptr, then all parameter blocks are assumed to be in the
// same group and the solver is free to decide the best
// ordering.
// Formally an ordering is an ordered partitioning of the parameter blocks,
// i.e, each parameter block belongs to exactly one group, and each group
// has a unique non-negative integer associated with it, that determines its
// order in the set of groups.
//
// e.g. Consider the linear system
//
// x + y = 3
// 2x + 3y = 7
//
// There are two ways in which it can be solved. First eliminating x
// from the two equations, solving for y and then back substituting
// for x, or first eliminating y, solving for x and back substituting
// for y. The user can construct three orderings here.
// There are two ways in which it can be solved. First eliminating x from
// the two equations, solving for y and then back substituting for x, or
// first eliminating y, solving for x and back substituting for y. The user
// can construct three orderings here.
//
// {0: x}, {1: y} - eliminate x first.
// {0: y}, {1: x} - eliminate y first.
// {0: x, y} - Solver gets to decide the elimination order.
//
// Thus, to have Ceres determine the ordering automatically using
// heuristics, put all the variables in group 0 and to control the
// ordering for every variable, create groups 0..N-1, one per
// variable, in the desired order.
// Thus, to have Ceres determine the ordering automatically, put all the
// variables in group 0 and to control the ordering for every variable
// create groups 0 ... N-1, one per variable, in the desired
// order.
//
// linear_solver_ordering == nullptr and an ordering where all the parameter
// blocks are in one elimination group mean the same thing - the solver is
// free to choose what it thinks is the best elimination ordering. Therefore
// in the following we will only consider the case where
// linear_solver_ordering is nullptr.
//
// The exact interpretation of this information depends on the values of
// linear_solver_ordering_type and linear_solver_type/preconditioner_type
// and sparse_linear_algebra_type.
//
// Bundle Adjustment
// -----------------
// =================
//
// A particular case of interest is bundle adjustment, where the user
// has two options. The default is to not specify an ordering at all,
// the solver will see that the user wants to use a Schur type solver
// and figure out the right elimination ordering.
// If the user is using one of the Schur solvers (DENSE_SCHUR,
// SPARSE_SCHUR, ITERATIVE_SCHUR) and chooses to specify an
// ordering, it must have one important property. The lowest
// numbered elimination group must form an independent set in the
// graph corresponding to the Hessian, or in other words, no two
// parameter blocks in in the first elimination group should
// co-occur in the same residual block. For the best performance,
// this elimination group should be as large as possible. For
// standard bundle adjustment problems, this corresponds to the
// first elimination group containing all the 3d points, and the
// second containing the all the cameras parameter blocks.
//
// But if the user already knows what parameter blocks are points and
// what are cameras, they can save preprocessing time by partitioning
// the parameter blocks into two groups, one for the points and one
// for the cameras, where the group containing the points has an id
// smaller than the group containing cameras.
// If the user leaves the choice to Ceres, then the solver uses an
// approximate maximum independent set algorithm to identify the first
// elimination group.
//
// sparse_linear_algebra_library_type = SUITE_SPARSE
// =================================================
//
// linear_solver_ordering_type = AMD
// ---------------------------------
//
// A Constrained Approximate Minimum Degree (CAMD) ordering used where the
// parameter blocks in the lowest numbered group are eliminated first, and
// then the parameter blocks in the next lowest numbered group and so
// on. Within each group, CAMD free to order the parameter blocks as it
// chooses.
//
// linear_solver_ordering_type = NESDIS
// -------------------------------------
//
// a. linear_solver_type = SPARSE_NORMAL_CHOLESKY or
// linear_solver_type = CGNR and preconditioner_type = SUBSET
//
// The value of linear_solver_ordering is ignored and a Nested Dissection
// algorithm is used to compute a fill reducing ordering.
//
// b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR
//
// ONLY the lowest group are used to compute the Schur complement, and
// Nested Dissection is used to compute a fill reducing ordering for the
// Schur Complement (or its preconditioner).
//
// sparse_linear_algebra_library_type = EIGEN_SPARSE or ACCELERATE_SPARSE
// ======================================================================
//
// a. linear_solver_type = SPARSE_NORMAL_CHOLESKY or
// linear_solver_type = CGNR and preconditioner_type = SUBSET
//
// then the value of linear_solver_ordering is ignored and AMD or NESDIS is
// used to compute a fill reducing ordering as requested by the user.
//
// b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR
//
// ONLY the lowest group are used to compute the Schur complement, and AMD
// or NESDIS is used to compute a fill reducing ordering for the Schur
// Complement (or its preconditioner).
std::shared_ptr<ParameterBlockOrdering> linear_solver_ordering;
// Use an explicitly computed Schur complement matrix with
@@ -500,12 +554,6 @@ class CERES_EXPORT Solver {
// Jacobian matrix and generally speaking, there is no performance
// penalty for doing so.
// In some rare cases, it is worth using a more complicated
// reordering algorithm which has slightly better runtime
// performance at the expense of an extra copy of the Jacobian
// matrix. Setting use_postordering to true enables this tradeoff.
bool use_postordering = false;
// Some non-linear least squares problems are symbolically dense but
// numerically sparse. i.e. at any given state only a small number
// of jacobian entries are non-zero, but the position and number of
@@ -521,11 +569,6 @@ class CERES_EXPORT Solver {
// This settings only affects the SPARSE_NORMAL_CHOLESKY solver.
bool dynamic_sparsity = false;
// TODO(sameeragarwal): Further expand the documentation for the
// following two options.
// NOTE1: EXPERIMENTAL FEATURE, UNDER DEVELOPMENT, USE AT YOUR OWN RISK.
//
// If use_mixed_precision_solves is true, the Gauss-Newton matrix
// is computed in double precision, but its factorization is
// computed in single precision. This can result in significant
@@ -536,16 +579,57 @@ class CERES_EXPORT Solver {
// If use_mixed_precision_solves is true, we recommend setting
// max_num_refinement_iterations to 2-3.
//
// NOTE2: The following two options are currently only applicable
// if sparse_linear_algebra_library_type is EIGEN_SPARSE or
// ACCELERATE_SPARSE, and linear_solver_type is SPARSE_NORMAL_CHOLESKY
// or SPARSE_SCHUR.
// This options is available when linear solver uses sparse or dense
// cholesky factorization, except when sparse_linear_algebra_library_type =
// SUITE_SPARSE.
bool use_mixed_precision_solves = false;
// Number steps of the iterative refinement process to run when
// computing the Gauss-Newton step.
int max_num_refinement_iterations = 0;
// Minimum number of iterations for which the linear solver should
// run, even if the convergence criterion is satisfied.
int min_linear_solver_iterations = 0;
// Maximum number of iterations for which the linear solver should
// run. If the solver does not converge in less than
// max_linear_solver_iterations, then it returns MAX_ITERATIONS,
// as its termination type.
int max_linear_solver_iterations = 500;
// Maximum number of iterations performed by SCHUR_POWER_SERIES_EXPANSION.
// Each iteration corresponds to one more term in the power series expansion
// od the inverse of the Schur complement. This value controls the maximum
// number of iterations whether it is used as a preconditioner or just to
// initialize the solution for ITERATIVE_SCHUR.
int max_num_spse_iterations = 5;
// Use SCHUR_POWER_SERIES_EXPANSION to initialize the solution for
// ITERATIVE_SCHUR. This option can be set true regardless of what
// preconditioner is being used.
bool use_spse_initialization = false;
// When use_spse_initialization is true, this parameter along with
// max_num_spse_iterations controls the number of
// SCHUR_POWER_SERIES_EXPANSION iterations performed for initialization. It
// is not used to control the preconditioner.
double spse_tolerance = 0.1;
// Forcing sequence parameter. The truncated Newton solver uses
// this number to control the relative accuracy with which the
// Newton step is computed.
//
// This constant is passed to ConjugateGradientsSolver which uses
// it to terminate the iterations when
//
// (Q_i - Q_{i-1})/Q_i < eta/i
double eta = 1e-1;
// Normalize the jacobian using Jacobi scaling before calling
// the linear least squares solver.
bool jacobi_scaling = true;
// Some non-linear least squares problems have additional
// structure in the way the parameter blocks interact that it is
// beneficial to modify the way the trust region step is computed.
@@ -629,32 +713,6 @@ class CERES_EXPORT Solver {
// iterations is disabled.
double inner_iteration_tolerance = 1e-3;
// Minimum number of iterations for which the linear solver should
// run, even if the convergence criterion is satisfied.
int min_linear_solver_iterations = 0;
// Maximum number of iterations for which the linear solver should
// run. If the solver does not converge in less than
// max_linear_solver_iterations, then it returns MAX_ITERATIONS,
// as its termination type.
int max_linear_solver_iterations = 500;
// Forcing sequence parameter. The truncated Newton solver uses
// this number to control the relative accuracy with which the
// Newton step is computed.
//
// This constant is passed to ConjugateGradientsSolver which uses
// it to terminate the iterations when
//
// (Q_i - Q_{i-1})/Q_i < eta/i
double eta = 1e-1;
// Normalize the jacobian using Jacobi scaling before calling
// the linear least squares solver.
bool jacobi_scaling = true;
// Logging options ---------------------------------------------------------
LoggingType logging_type = PER_MINIMIZER_ITERATION;
// By default the Minimizer progress is logged to VLOG(1), which
@@ -791,10 +849,9 @@ class CERES_EXPORT Solver {
// IterationSummary for each minimizer iteration in order.
std::vector<IterationSummary> iterations;
// Number of minimizer iterations in which the step was
// accepted. Unless use_non_monotonic_steps is true this is also
// the number of steps in which the objective function value/cost
// went down.
// Number of minimizer iterations in which the step was accepted. Unless
// use_nonmonotonic_steps is true this is also the number of steps in which
// the objective function value/cost went down.
int num_successful_steps = -1;
// Number of minimizer iterations in which the step was rejected
@@ -884,7 +941,7 @@ class CERES_EXPORT Solver {
// Dimension of the tangent space of the problem (or the number of
// columns in the Jacobian for the problem). This is different
// from num_parameters if a parameter block is associated with a
// LocalParameterization/Manifold.
// Manifold.
int num_effective_parameters = -1;
// Number of residual blocks in the problem.
@@ -905,7 +962,7 @@ class CERES_EXPORT Solver {
// number of columns in the Jacobian for the reduced
// problem). This is different from num_parameters_reduced if a
// parameter block in the reduced problem is associated with a
// LocalParameterization/Manifold.
// Manifold.
int num_effective_parameters_reduced = -1;
// Number of residual blocks in the reduced problem.
@@ -922,8 +979,7 @@ class CERES_EXPORT Solver {
int num_threads_given = -1;
// Number of threads actually used by the solver for Jacobian and
// residual evaluation. This number is not equal to
// num_threads_given if OpenMP is not available.
// residual evaluation.
int num_threads_used = -1;
// Type of the linear solver requested by the user.
@@ -946,6 +1002,10 @@ class CERES_EXPORT Solver {
SPARSE_NORMAL_CHOLESKY;
#endif
bool mixed_precision_solves_used = false;
LinearSolverOrderingType linear_solver_ordering_type = AMD;
// Size of the elimination groups given by the user as hints to
// the linear solver.
std::vector<int> linear_solver_ordering_given;
@@ -1005,7 +1065,7 @@ class CERES_EXPORT Solver {
PreconditionerType preconditioner_type_used = IDENTITY;
// Type of clustering algorithm used for visibility based
// preconditioning. Only meaningful when the preconditioner_type
// preconditioning. Only meaningful when the preconditioner_type_used
// is CLUSTER_JACOBI or CLUSTER_TRIDIAGONAL.
VisibilityClusteringType visibility_clustering_type = CANONICAL_VIEWS;

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -114,12 +114,17 @@ class SphereManifold final : public Manifold {
static constexpr int TangentSpaceDimension =
AmbientSpaceDimension > 0 ? AmbientSpaceDimension - 1 : Eigen::Dynamic;
// NOTE: Eigen does not allow to have a RowMajor column vector.
// In that case, change the storage order
static constexpr int SafeRowMajor =
TangentSpaceDimension == 1 ? Eigen::ColMajor : Eigen::RowMajor;
using AmbientVector = Eigen::Matrix<double, AmbientSpaceDimension, 1>;
using TangentVector = Eigen::Matrix<double, TangentSpaceDimension, 1>;
using MatrixPlusJacobian = Eigen::Matrix<double,
AmbientSpaceDimension,
TangentSpaceDimension,
Eigen::RowMajor>;
SafeRowMajor>;
using MatrixMinusJacobian = Eigen::Matrix<double,
TangentSpaceDimension,
AmbientSpaceDimension,

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2021 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -248,10 +248,9 @@ class TinySolver {
jtj_regularized_ = jtj_;
const Scalar min_diagonal = 1e-6;
const Scalar max_diagonal = 1e32;
for (int i = 0; i < lm_diagonal_.rows(); ++i) {
lm_diagonal_[i] = std::sqrt(
u * (std::min)((std::max)(jtj_(i, i), min_diagonal), max_diagonal));
jtj_regularized_(i, i) += lm_diagonal_[i] * lm_diagonal_[i];
for (int i = 0; i < dx_.rows(); ++i) {
jtj_regularized_(i, i) +=
u * (std::min)((std::max)(jtj_(i, i), min_diagonal), max_diagonal);
}
// TODO(sameeragarwal): Check for failure and deal with it.
@@ -338,7 +337,7 @@ class TinySolver {
// linear system. This allows reusing the intermediate storage across solves.
LinearSolver linear_solver_;
Scalar cost_;
Parameters dx_, x_new_, g_, jacobi_scaling_, lm_diagonal_, lm_step_;
Parameters dx_, x_new_, g_, jacobi_scaling_, lm_step_;
Eigen::Matrix<Scalar, NUM_RESIDUALS, 1> residuals_, f_x_new_;
Eigen::Matrix<Scalar, NUM_RESIDUALS, NUM_PARAMETERS> jacobian_;
Eigen::Matrix<Scalar, NUM_PARAMETERS, NUM_PARAMETERS> jtj_, jtj_regularized_;
@@ -385,7 +384,6 @@ class TinySolver {
x_new_.resize(num_parameters);
g_.resize(num_parameters);
jacobi_scaling_.resize(num_parameters);
lm_diagonal_.resize(num_parameters);
lm_step_.resize(num_parameters);
residuals_.resize(num_residuals);
f_x_new_.resize(num_residuals);

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -171,7 +171,7 @@ class TinySolverAutoDiffFunction {
const CostFunctor& cost_functor_;
// The number of residuals at runtime.
// This will be overriden if NUM_RESIDUALS == Eigen::Dynamic.
// This will be overridden if NUM_RESIDUALS == Eigen::Dynamic.
int num_residuals_ = kNumResiduals;
// To evaluate the cost function with jets, temporary storage is needed. These

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -67,8 +67,7 @@ enum LinearSolverType {
// Eigen.
DENSE_QR,
// Solve the normal equations using a sparse cholesky solver; requires
// SuiteSparse or CXSparse.
// Solve the normal equations using a sparse cholesky solver;
SPARSE_NORMAL_CHOLESKY,
// Specialized solvers, specific to problems with a generalized
@@ -98,7 +97,7 @@ enum PreconditionerType {
// Block diagonal of the Gauss-Newton Hessian.
JACOBI,
// Note: The following three preconditioners can only be used with
// Note: The following four preconditioners can only be used with
// the ITERATIVE_SCHUR solver. They are well suited for Structure
// from Motion problems.
@@ -106,6 +105,10 @@ enum PreconditionerType {
// only be used with the ITERATIVE_SCHUR solver.
SCHUR_JACOBI,
// Use power series expansion to approximate the inversion of Schur complement
// as a preconditioner.
SCHUR_POWER_SERIES_EXPANSION,
// Visibility clustering based preconditioners.
//
// The following two preconditioners use the visibility structure of
@@ -134,7 +137,7 @@ enum PreconditionerType {
// well the matrix Q approximates J'J, or how well the chosen
// residual blocks approximate the non-linear least squares
// problem.
SUBSET,
SUBSET
};
enum VisibilityClusteringType {
@@ -165,11 +168,6 @@ enum SparseLinearAlgebraLibraryType {
// minimum degree ordering.
SUITE_SPARSE,
// A lightweight replacement for SuiteSparse, which does not require
// a LAPACK/BLAS implementation. Consequently, its performance is
// also a bit lower than SuiteSparse.
CX_SPARSE,
// Eigen's sparse linear algebra routines. In particular Ceres uses
// the Simplicial LDLT routines.
EIGEN_SPARSE,
@@ -177,12 +175,39 @@ enum SparseLinearAlgebraLibraryType {
// Apple's Accelerate framework sparse linear algebra routines.
ACCELERATE_SPARSE,
// Nvidia's cuSPARSE library.
CUDA_SPARSE,
// No sparse linear solver should be used. This does not necessarily
// imply that Ceres was built without any sparse library, although that
// is the likely use case, merely that one should not be used.
NO_SPARSE
};
// The order in which variables are eliminated in a linear solver
// can have a significant of impact on the efficiency and accuracy
// of the method. e.g., when doing sparse Cholesky factorization,
// there are matrices for which a good ordering will give a
// Cholesky factor with O(n) storage, where as a bad ordering will
// result in an completely dense factor.
//
// So sparse direct solvers like SPARSE_NORMAL_CHOLESKY and
// SPARSE_SCHUR and preconditioners like SUBSET, CLUSTER_JACOBI &
// CLUSTER_TRIDIAGONAL use a fill reducing ordering of the columns and
// rows of the matrix being factorized before actually the numeric
// factorization.
//
// This enum controls the class of algorithm used to compute this
// fill reducing ordering. There is no single algorithm that works
// on all matrices, so determining which algorithm works better is a
// matter of empirical experimentation.
enum LinearSolverOrderingType {
// Approximate Minimum Degree.
AMD,
// Nested Dissection.
NESDIS
};
enum DenseLinearAlgebraLibraryType {
EIGEN,
LAPACK,
@@ -467,6 +492,11 @@ CERES_EXPORT const char* SparseLinearAlgebraLibraryTypeToString(
CERES_EXPORT bool StringToSparseLinearAlgebraLibraryType(
std::string value, SparseLinearAlgebraLibraryType* type);
CERES_EXPORT const char* LinearSolverOrderingTypeToString(
LinearSolverOrderingType type);
CERES_EXPORT bool StringToLinearSolverOrderingType(
std::string value, LinearSolverOrderingType* type);
CERES_EXPORT const char* DenseLinearAlgebraLibraryTypeToString(
DenseLinearAlgebraLibraryType type);
CERES_EXPORT bool StringToDenseLinearAlgebraLibraryType(

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2021 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
#define CERES_PUBLIC_VERSION_H_
#define CERES_VERSION_MAJOR 2
#define CERES_VERSION_MINOR 1
#define CERES_VERSION_MINOR 2
#define CERES_VERSION_REVISION 0
// Classic CPP stringifcation; the extra level of indirection allows the

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -61,7 +61,7 @@ const char* SparseStatusToString(SparseStatus_t status) {
CASESTR(SparseParameterError);
CASESTR(SparseStatusReleased);
default:
return "UKNOWN";
return "UNKNOWN";
}
}
} // namespace.
@@ -114,12 +114,12 @@ AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
// Accelerate's columnStarts is a long*, not an int*. These types might be
// different (e.g. ARM on iOS) so always make a copy.
column_starts_.resize(A->num_rows() + 1); // +1 for final column length.
std::copy_n(A->rows(), column_starts_.size(), &column_starts_[0]);
std::copy_n(A->rows(), column_starts_.size(), column_starts_.data());
ASSparseMatrix At;
At.structure.rowCount = A->num_cols();
At.structure.columnCount = A->num_rows();
At.structure.columnStarts = &column_starts_[0];
At.structure.columnStarts = column_starts_.data();
At.structure.rowIndices = A->mutable_cols();
At.structure.attributes.transpose = false;
At.structure.attributes.triangle = SparseUpperTriangle;
@@ -127,8 +127,8 @@ AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
At.structure.attributes._reserved = 0;
At.structure.attributes._allocatedBySparse = 0;
At.structure.blockSize = 1;
if (std::is_same<Scalar, double>::value) {
At.data = reinterpret_cast<Scalar*>(A->mutable_values());
if constexpr (std::is_same_v<Scalar, double>) {
At.data = A->mutable_values();
} else {
values_ =
ConstVectorRef(A->values(), A->num_nonzeros()).template cast<Scalar>();
@@ -139,8 +139,23 @@ AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
template <typename Scalar>
typename AccelerateSparse<Scalar>::SymbolicFactorization
AccelerateSparse<Scalar>::AnalyzeCholesky(ASSparseMatrix* A) {
return SparseFactor(SparseFactorizationCholesky, A->structure);
AccelerateSparse<Scalar>::AnalyzeCholesky(OrderingType ordering_type,
ASSparseMatrix* A) {
SparseSymbolicFactorOptions sfoption;
sfoption.control = SparseDefaultControl;
sfoption.orderMethod = SparseOrderDefault;
sfoption.order = nullptr;
sfoption.ignoreRowsAndColumns = nullptr;
sfoption.malloc = malloc;
sfoption.free = free;
sfoption.reportError = nullptr;
if (ordering_type == OrderingType::AMD) {
sfoption.orderMethod = SparseOrderAMD;
} else if (ordering_type == OrderingType::NESDIS) {
sfoption.orderMethod = SparseOrderMetis;
}
return SparseFactor(SparseFactorizationCholesky, A->structure, sfoption);
}
template <typename Scalar>
@@ -190,7 +205,7 @@ AppleAccelerateCholesky<Scalar>::~AppleAccelerateCholesky() {
template <typename Scalar>
CompressedRowSparseMatrix::StorageType
AppleAccelerateCholesky<Scalar>::StorageType() const {
return CompressedRowSparseMatrix::LOWER_TRIANGULAR;
return CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR;
}
template <typename Scalar>
@@ -199,7 +214,7 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Factorize(
CHECK_EQ(lhs->storage_type(), StorageType());
if (lhs == nullptr) {
*message = "Failure: Input lhs is nullptr.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
typename SparseTypesTrait<Scalar>::SparseMatrix as_lhs =
as_.CreateSparseMatrixTransposeView(lhs);
@@ -207,13 +222,14 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Factorize(
if (!symbolic_factor_) {
symbolic_factor_ = std::make_unique<
typename SparseTypesTrait<Scalar>::SymbolicFactorization>(
as_.AnalyzeCholesky(&as_lhs));
as_.AnalyzeCholesky(ordering_type_, &as_lhs));
if (symbolic_factor_->status != SparseStatusOK) {
*message = StringPrintf(
"Apple Accelerate Failure : Symbolic factorisation failed: %s",
SparseStatusToString(symbolic_factor_->status));
FreeSymbolicFactorization();
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
}
@@ -230,10 +246,10 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Factorize(
"Apple Accelerate Failure : Numeric factorisation failed: %s",
SparseStatusToString(numeric_factor_->status));
FreeNumericFactorization();
return LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::FAILURE;
}
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
template <typename Scalar>
@@ -246,8 +262,8 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Solve(
typename SparseTypesTrait<Scalar>::DenseVector as_rhs_and_solution;
as_rhs_and_solution.count = num_cols;
if (std::is_same<Scalar, double>::value) {
as_rhs_and_solution.data = reinterpret_cast<Scalar*>(solution);
if constexpr (std::is_same_v<Scalar, double>) {
as_rhs_and_solution.data = solution;
std::copy_n(rhs, num_cols, solution);
} else {
scalar_rhs_and_solution_ =
@@ -259,7 +275,7 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Solve(
VectorRef(solution, num_cols) =
scalar_rhs_and_solution_.template cast<double>();
}
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
template <typename Scalar>

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -55,18 +55,18 @@ struct SparseTypesTrait {};
template <>
struct SparseTypesTrait<double> {
typedef DenseVector_Double DenseVector;
typedef SparseMatrix_Double SparseMatrix;
typedef SparseOpaqueSymbolicFactorization SymbolicFactorization;
typedef SparseOpaqueFactorization_Double NumericFactorization;
using DenseVector = DenseVector_Double;
using SparseMatrix = SparseMatrix_Double;
using SymbolicFactorization = SparseOpaqueSymbolicFactorization;
using NumericFactorization = SparseOpaqueFactorization_Double;
};
template <>
struct SparseTypesTrait<float> {
typedef DenseVector_Float DenseVector;
typedef SparseMatrix_Float SparseMatrix;
typedef SparseOpaqueSymbolicFactorization SymbolicFactorization;
typedef SparseOpaqueFactorization_Float NumericFactorization;
using DenseVector = DenseVector_Float;
using SparseMatrix = SparseMatrix_Float;
using SymbolicFactorization = SparseOpaqueSymbolicFactorization;
using NumericFactorization = SparseOpaqueFactorization_Float;
};
template <typename Scalar>
@@ -91,7 +91,8 @@ class AccelerateSparse {
// objects internally).
ASSparseMatrix CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A);
// Computes a symbolic factorisation of A that can be used in Solve().
SymbolicFactorization AnalyzeCholesky(ASSparseMatrix* A);
SymbolicFactorization AnalyzeCholesky(OrderingType ordering_type,
ASSparseMatrix* A);
// Compute the numeric Cholesky factorization of A, given its
// symbolic factorization.
NumericFactorization Cholesky(ASSparseMatrix* A,

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,14 +38,12 @@
#include "ceres/stringprintf.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
using std::string;
namespace ceres::internal {
bool IsArrayValid(const int size, const double* x) {
bool IsArrayValid(const int64_t size, const double* x) {
if (x != nullptr) {
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
if (!std::isfinite(x[i]) || (x[i] == kImpossibleValue)) {
return false;
}
@@ -54,12 +52,12 @@ bool IsArrayValid(const int size, const double* x) {
return true;
}
int FindInvalidValue(const int size, const double* x) {
int64_t FindInvalidValue(const int64_t size, const double* x) {
if (x == nullptr) {
return size;
}
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
if (!std::isfinite(x[i]) || (x[i] == kImpossibleValue)) {
return i;
}
@@ -68,16 +66,18 @@ int FindInvalidValue(const int size, const double* x) {
return size;
}
void InvalidateArray(const int size, double* x) {
void InvalidateArray(const int64_t size, double* x) {
if (x != nullptr) {
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
x[i] = kImpossibleValue;
}
}
}
void AppendArrayToString(const int size, const double* x, string* result) {
for (int i = 0; i < size; ++i) {
void AppendArrayToString(const int64_t size,
const double* x,
std::string* result) {
for (int64_t i = 0; i < size; ++i) {
if (x == nullptr) {
StringAppendF(result, "Not Computed ");
} else {
@@ -90,18 +90,17 @@ void AppendArrayToString(const int size, const double* x, string* result) {
}
}
void MapValuesToContiguousRange(const int size, int* array) {
void MapValuesToContiguousRange(const int64_t size, int* array) {
std::vector<int> unique_values(array, array + size);
std::sort(unique_values.begin(), unique_values.end());
unique_values.erase(std::unique(unique_values.begin(), unique_values.end()),
unique_values.end());
for (int i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
array[i] =
std::lower_bound(unique_values.begin(), unique_values.end(), array[i]) -
unique_values.begin();
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,30 +43,30 @@
#ifndef CERES_INTERNAL_ARRAY_UTILS_H_
#define CERES_INTERNAL_ARRAY_UTILS_H_
#include <cstdint>
#include <string>
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Fill the array x with an impossible value that the user code is
// never expected to compute.
CERES_NO_EXPORT void InvalidateArray(int size, double* x);
CERES_NO_EXPORT void InvalidateArray(const int64_t size, double* x);
// Check if all the entries of the array x are valid, i.e. all the
// values in the array should be finite and none of them should be
// equal to the "impossible" value used by InvalidateArray.
CERES_NO_EXPORT bool IsArrayValid(int size, const double* x);
CERES_NO_EXPORT bool IsArrayValid(const int64_t size, const double* x);
// If the array contains an invalid value, return the index for it,
// otherwise return size.
CERES_NO_EXPORT int FindInvalidValue(const int size, const double* x);
CERES_NO_EXPORT int64_t FindInvalidValue(const int64_t size, const double* x);
// Utility routine to print an array of doubles to a string. If the
// array pointer is nullptr, it is treated as an array of zeros.
CERES_NO_EXPORT void AppendArrayToString(const int size,
CERES_NO_EXPORT void AppendArrayToString(const int64_t size,
const double* x,
std::string* result);
@@ -83,10 +83,9 @@ CERES_NO_EXPORT void AppendArrayToString(const int size,
// gets mapped to
//
// [1 0 2 3 0 1 3]
CERES_NO_EXPORT void MapValuesToContiguousRange(int size, int* array);
CERES_NO_EXPORT void MapValuesToContiguousRange(const int64_t size, int* array);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "ceres/residual_block.h"
#include "ceres/sparse_matrix.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
void BlockEvaluatePreparer::Init(int const* const* jacobian_layout,
int max_derivatives_per_residual_block) {
@@ -78,5 +77,4 @@ void BlockEvaluatePreparer::Prepare(const ResidualBlock* residual_block,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@
#include "ceres/internal/export.h"
#include "ceres/scratch_evaluate_preparer.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ResidualBlock;
class SparseMatrix;
@@ -72,7 +71,6 @@ class CERES_NO_EXPORT BlockEvaluatePreparer {
ScratchEvaluatePreparer scratch_evaluate_preparer_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_BLOCK_EVALUATE_PREPARER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,71 +30,197 @@
#include "ceres/block_jacobi_preconditioner.h"
#include <memory>
#include <mutex>
#include <utility>
#include <vector>
#include "Eigen/Dense"
#include "ceres/block_random_access_diagonal_matrix.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/casts.h"
#include "ceres/internal/eigen.h"
#include "ceres/parallel_for.h"
#include "ceres/small_blas.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
BlockJacobiPreconditioner::BlockJacobiPreconditioner(
const BlockSparseMatrix& A) {
const CompressedRowBlockStructure* bs = A.block_structure();
std::vector<int> blocks(bs->cols.size());
for (int i = 0; i < blocks.size(); ++i) {
blocks[i] = bs->cols[i].size;
}
m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(blocks);
BlockSparseJacobiPreconditioner::BlockSparseJacobiPreconditioner(
Preconditioner::Options options, const BlockSparseMatrix& A)
: options_(std::move(options)) {
m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(
A.block_structure()->cols, options_.context, options_.num_threads);
}
BlockJacobiPreconditioner::~BlockJacobiPreconditioner() = default;
BlockSparseJacobiPreconditioner::~BlockSparseJacobiPreconditioner() = default;
bool BlockJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
const double* D) {
bool BlockSparseJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
const double* D) {
const CompressedRowBlockStructure* bs = A.block_structure();
const double* values = A.values();
m_->SetZero();
for (int i = 0; i < bs->rows.size(); ++i) {
const int row_block_size = bs->rows[i].block.size;
const std::vector<Cell>& cells = bs->rows[i].cells;
for (const auto& cell : cells) {
const int block_id = cell.block_id;
const int col_block_size = bs->cols[block_id].size;
int r, c, row_stride, col_stride;
CellInfo* cell_info =
m_->GetCell(block_id, block_id, &r, &c, &row_stride, &col_stride);
MatrixRef m(cell_info->values, row_stride, col_stride);
ConstMatrixRef b(values + cell.position, row_block_size, col_block_size);
m.block(r, c, col_block_size, col_block_size) += b.transpose() * b;
}
}
ParallelFor(options_.context,
0,
bs->rows.size(),
options_.num_threads,
[this, bs, values](int i) {
const int row_block_size = bs->rows[i].block.size;
const std::vector<Cell>& cells = bs->rows[i].cells;
for (const auto& cell : cells) {
const int block_id = cell.block_id;
const int col_block_size = bs->cols[block_id].size;
int r, c, row_stride, col_stride;
CellInfo* cell_info = m_->GetCell(
block_id, block_id, &r, &c, &row_stride, &col_stride);
MatrixRef m(cell_info->values, row_stride, col_stride);
ConstMatrixRef b(
values + cell.position, row_block_size, col_block_size);
auto lock =
MakeConditionalLock(options_.num_threads, cell_info->m);
// clang-format off
MatrixTransposeMatrixMultiply<Eigen::Dynamic, Eigen::Dynamic,
Eigen::Dynamic,Eigen::Dynamic, 1>(
values + cell.position, row_block_size,col_block_size,
values + cell.position, row_block_size,col_block_size,
cell_info->values,r, c,row_stride,col_stride);
// clang-format on
}
});
if (D != nullptr) {
// Add the diagonal.
int position = 0;
for (int i = 0; i < bs->cols.size(); ++i) {
const int block_size = bs->cols[i].size;
int r, c, row_stride, col_stride;
CellInfo* cell_info = m_->GetCell(i, i, &r, &c, &row_stride, &col_stride);
MatrixRef m(cell_info->values, row_stride, col_stride);
m.block(r, c, block_size, block_size).diagonal() +=
ConstVectorRef(D + position, block_size).array().square().matrix();
position += block_size;
}
ParallelFor(options_.context,
0,
bs->cols.size(),
options_.num_threads,
[this, bs, D](int i) {
const int block_size = bs->cols[i].size;
int r, c, row_stride, col_stride;
CellInfo* cell_info =
m_->GetCell(i, i, &r, &c, &row_stride, &col_stride);
MatrixRef m(cell_info->values, row_stride, col_stride);
m.block(r, c, block_size, block_size).diagonal() +=
ConstVectorRef(D + bs->cols[i].position, block_size)
.array()
.square()
.matrix();
});
}
m_->Invert();
return true;
}
void BlockJacobiPreconditioner::RightMultiply(const double* x,
double* y) const {
m_->RightMultiply(x, y);
BlockCRSJacobiPreconditioner::BlockCRSJacobiPreconditioner(
Preconditioner::Options options, const CompressedRowSparseMatrix& A)
: options_(std::move(options)), locks_(A.col_blocks().size()) {
auto& col_blocks = A.col_blocks();
// Compute the number of non-zeros in the preconditioner. This is needed so
// that we can construct the CompressedRowSparseMatrix.
const int m_nnz = SumSquaredSizes(col_blocks);
m_ = std::make_unique<CompressedRowSparseMatrix>(
A.num_cols(), A.num_cols(), m_nnz);
const int num_col_blocks = col_blocks.size();
// Populate the sparsity structure of the preconditioner matrix.
int* m_cols = m_->mutable_cols();
int* m_rows = m_->mutable_rows();
m_rows[0] = 0;
for (int i = 0, idx = 0; i < num_col_blocks; ++i) {
// For each column block populate a diagonal block in the preconditioner.
// Not that the because of the way the CompressedRowSparseMatrix format
// works, the entire diagonal block is laid out contiguously in memory as a
// row-major matrix. We will use this when updating the block.
auto& block = col_blocks[i];
for (int j = 0; j < block.size; ++j) {
for (int k = 0; k < block.size; ++k, ++idx) {
m_cols[idx] = block.position + k;
}
m_rows[block.position + j + 1] = idx;
}
}
// In reality we only need num_col_blocks locks, however that would require
// that in UpdateImpl we are able to look up the column block from the it
// first column. To save ourselves this map we will instead spend a few extra
// lock objects.
std::vector<std::mutex> locks(A.num_cols());
locks_.swap(locks);
CHECK_EQ(m_rows[A.num_cols()], m_nnz);
}
} // namespace internal
} // namespace ceres
BlockCRSJacobiPreconditioner::~BlockCRSJacobiPreconditioner() = default;
bool BlockCRSJacobiPreconditioner::UpdateImpl(
const CompressedRowSparseMatrix& A, const double* D) {
const auto& col_blocks = A.col_blocks();
const auto& row_blocks = A.row_blocks();
const int num_col_blocks = col_blocks.size();
const int num_row_blocks = row_blocks.size();
const int* a_rows = A.rows();
const int* a_cols = A.cols();
const double* a_values = A.values();
double* m_values = m_->mutable_values();
const int* m_rows = m_->rows();
m_->SetZero();
ParallelFor(
options_.context,
0,
num_row_blocks,
options_.num_threads,
[this, row_blocks, a_rows, a_cols, a_values, m_values, m_rows](int i) {
const int row = row_blocks[i].position;
const int row_block_size = row_blocks[i].size;
const int row_nnz = a_rows[row + 1] - a_rows[row];
ConstMatrixRef row_block(
a_values + a_rows[row], row_block_size, row_nnz);
int c = 0;
while (c < row_nnz) {
const int idx = a_rows[row] + c;
const int col = a_cols[idx];
const int col_block_size = m_rows[col + 1] - m_rows[col];
// We make use of the fact that the entire diagonal block is
// stored contiguously in memory as a row-major matrix.
MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size);
// We do not have a row_stride version of
// MatrixTransposeMatrixMultiply, otherwise we could use it
// here to further speed up the following expression.
auto b = row_block.middleCols(c, col_block_size);
auto lock = MakeConditionalLock(options_.num_threads, locks_[col]);
m.noalias() += b.transpose() * b;
c += col_block_size;
}
});
ParallelFor(
options_.context,
0,
num_col_blocks,
options_.num_threads,
[col_blocks, m_rows, m_values, D](int i) {
const int col = col_blocks[i].position;
const int col_block_size = col_blocks[i].size;
MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size);
if (D != nullptr) {
m.diagonal() +=
ConstVectorRef(D + col, col_block_size).array().square().matrix();
}
// TODO(sameeragarwal): Deal with Cholesky inversion failure here and
// elsewhere.
m = m.llt().solve(Matrix::Identity(col_block_size, col_block_size));
});
return true;
}
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,34 +38,30 @@
#include "ceres/internal/export.h"
#include "ceres/preconditioner.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockSparseMatrix;
struct CompressedRowBlockStructure;
class CompressedRowSparseMatrix;
// A block Jacobi preconditioner. This is intended for use with
// conjugate gradients, or other iterative symmetric solvers. To use
// the preconditioner, create one by passing a BlockSparseMatrix "A"
// to the constructor. This fixes the sparsity pattern to the pattern
// of the matrix A^TA.
// conjugate gradients, or other iterative symmetric solvers.
// This version of the preconditioner is for use with BlockSparseMatrix
// Jacobians.
//
// Before each use of the preconditioner in a solve with conjugate gradients,
// update the matrix by running Update(A, D). The values of the matrix A are
// inspected to construct the preconditioner. The vector D is applied as the
// D^TD diagonal term.
class CERES_NO_EXPORT BlockJacobiPreconditioner
// TODO(https://github.com/ceres-solver/ceres-solver/issues/936):
// BlockSparseJacobiPreconditioner::RightMultiply will benefit from
// multithreading
class CERES_NO_EXPORT BlockSparseJacobiPreconditioner
: public BlockSparseMatrixPreconditioner {
public:
// A must remain valid while the BlockJacobiPreconditioner is.
explicit BlockJacobiPreconditioner(const BlockSparseMatrix& A);
BlockJacobiPreconditioner(const BlockJacobiPreconditioner&) = delete;
void operator=(const BlockJacobiPreconditioner&) = delete;
~BlockJacobiPreconditioner() override;
// Preconditioner interface
void RightMultiply(const double* x, double* y) const final;
explicit BlockSparseJacobiPreconditioner(Preconditioner::Options,
const BlockSparseMatrix& A);
~BlockSparseJacobiPreconditioner() override;
void RightMultiplyAndAccumulate(const double* x, double* y) const final {
return m_->RightMultiplyAndAccumulate(x, y);
}
int num_rows() const final { return m_->num_rows(); }
int num_cols() const final { return m_->num_rows(); }
const BlockRandomAccessDiagonalMatrix& matrix() const { return *m_; }
@@ -73,11 +69,35 @@ class CERES_NO_EXPORT BlockJacobiPreconditioner
private:
bool UpdateImpl(const BlockSparseMatrix& A, const double* D) final;
Preconditioner::Options options_;
std::unique_ptr<BlockRandomAccessDiagonalMatrix> m_;
};
} // namespace internal
} // namespace ceres
// This version of the preconditioner is for use with CompressedRowSparseMatrix
// Jacobians.
class CERES_NO_EXPORT BlockCRSJacobiPreconditioner
: public CompressedRowSparseMatrixPreconditioner {
public:
// A must remain valid while the BlockJacobiPreconditioner is.
explicit BlockCRSJacobiPreconditioner(Preconditioner::Options options,
const CompressedRowSparseMatrix& A);
~BlockCRSJacobiPreconditioner() override;
void RightMultiplyAndAccumulate(const double* x, double* y) const final {
m_->RightMultiplyAndAccumulate(x, y);
}
int num_rows() const final { return m_->num_rows(); }
int num_cols() const final { return m_->num_rows(); }
const CompressedRowSparseMatrix& matrix() const { return *m_; }
private:
bool UpdateImpl(const CompressedRowSparseMatrix& A, const double* D) final;
Preconditioner::Options options_;
std::vector<std::mutex> locks_;
std::unique_ptr<CompressedRowSparseMatrix> m_;
};
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,7 @@
#include <algorithm>
#include <memory>
#include <vector>
#include "ceres/block_evaluate_preparer.h"
#include "ceres/block_sparse_matrix.h"
@@ -41,10 +42,7 @@
#include "ceres/program.h"
#include "ceres/residual_block.h"
namespace ceres {
namespace internal {
using std::vector;
namespace ceres::internal {
namespace {
@@ -56,19 +54,27 @@ namespace {
// the first num_eliminate_blocks parameter blocks as indicated by the parameter
// block ordering. The remaining parameter blocks are the F blocks.
//
// In order to simplify handling block-sparse to CRS conversion, cells within
// the row-block of non-partitioned matrix are stored in memory sequentially in
// the order of increasing column-block id. In case of partitioned matrices,
// cells corresponding to F sub-matrix are stored sequentially in the order of
// increasing column-block id (with cells corresponding to E sub-matrix stored
// separately).
//
// TODO(keir): Consider if we should use a boolean for each parameter block
// instead of num_eliminate_blocks.
void BuildJacobianLayout(const Program& program,
bool BuildJacobianLayout(const Program& program,
int num_eliminate_blocks,
vector<int*>* jacobian_layout,
vector<int>* jacobian_layout_storage) {
const vector<ResidualBlock*>& residual_blocks = program.residual_blocks();
std::vector<int*>* jacobian_layout,
std::vector<int>* jacobian_layout_storage) {
const std::vector<ResidualBlock*>& residual_blocks =
program.residual_blocks();
// Iterate over all the active residual blocks and determine how many E blocks
// are there. This will determine where the F blocks start in the jacobian
// matrix. Also compute the number of jacobian blocks.
int f_block_pos = 0;
int num_jacobian_blocks = 0;
unsigned int f_block_pos = 0;
unsigned int num_jacobian_blocks = 0;
for (auto* residual_block : residual_blocks) {
const int num_residuals = residual_block->NumResiduals();
const int num_parameter_blocks = residual_block->NumParameterBlocks();
@@ -84,6 +90,11 @@ void BuildJacobianLayout(const Program& program,
}
}
}
if (num_jacobian_blocks > std::numeric_limits<int>::max()) {
LOG(ERROR) << "Overlow error. Too many blocks in the jacobian matrix : "
<< num_jacobian_blocks;
return false;
}
}
// We now know that the E blocks are laid out starting at zero, and the F
@@ -95,65 +106,103 @@ void BuildJacobianLayout(const Program& program,
jacobian_layout_storage->resize(num_jacobian_blocks);
int e_block_pos = 0;
int* jacobian_pos = &(*jacobian_layout_storage)[0];
int* jacobian_pos = jacobian_layout_storage->data();
std::vector<std::pair<int, int>> active_parameter_blocks;
for (int i = 0; i < residual_blocks.size(); ++i) {
const ResidualBlock* residual_block = residual_blocks[i];
const int num_residuals = residual_block->NumResiduals();
const int num_parameter_blocks = residual_block->NumParameterBlocks();
(*jacobian_layout)[i] = jacobian_pos;
// Cells from F sub-matrix are to be stored sequentially with increasing
// column block id. For each non-constant parameter block, a pair of indices
// (index in the list of active parameter blocks and index in the list of
// all parameter blocks) is computed, and index pairs are sorted by the
// index of corresponding column block id.
active_parameter_blocks.clear();
active_parameter_blocks.reserve(num_parameter_blocks);
for (int j = 0; j < num_parameter_blocks; ++j) {
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
const int parameter_block_index = parameter_block->index();
if (parameter_block->IsConstant()) {
continue;
}
const int k = active_parameter_blocks.size();
active_parameter_blocks.emplace_back(k, j);
}
std::sort(active_parameter_blocks.begin(),
active_parameter_blocks.end(),
[&residual_block](const std::pair<int, int>& a,
const std::pair<int, int>& b) {
return residual_block->parameter_blocks()[a.second]->index() <
residual_block->parameter_blocks()[b.second]->index();
});
// Cell positions for each active parameter block are filled in the order of
// active parameter block indices sorted by columnd block index. This
// guarantees that cells are laid out sequentially with increasing column
// block indices.
for (const auto& indices : active_parameter_blocks) {
const auto [k, j] = indices;
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
const int parameter_block_index = parameter_block->index();
const int jacobian_block_size =
num_residuals * parameter_block->TangentSize();
if (parameter_block_index < num_eliminate_blocks) {
*jacobian_pos = e_block_pos;
jacobian_pos[k] = e_block_pos;
e_block_pos += jacobian_block_size;
} else {
*jacobian_pos = f_block_pos;
jacobian_pos[k] = static_cast<int>(f_block_pos);
f_block_pos += jacobian_block_size;
if (f_block_pos > std::numeric_limits<int>::max()) {
LOG(ERROR)
<< "Overlow error. Too many entries in the Jacobian matrix.";
return false;
}
}
jacobian_pos++;
}
jacobian_pos += active_parameter_blocks.size();
}
return true;
}
} // namespace
BlockJacobianWriter::BlockJacobianWriter(const Evaluator::Options& options,
Program* program)
: program_(program) {
: options_(options), program_(program) {
CHECK_GE(options.num_eliminate_blocks, 0)
<< "num_eliminate_blocks must be greater than 0.";
BuildJacobianLayout(*program,
options.num_eliminate_blocks,
&jacobian_layout_,
&jacobian_layout_storage_);
jacobian_layout_is_valid_ = BuildJacobianLayout(*program,
options.num_eliminate_blocks,
&jacobian_layout_,
&jacobian_layout_storage_);
}
// Create evaluate prepareres that point directly into the final jacobian. This
// makes the final Write() a nop.
std::unique_ptr<BlockEvaluatePreparer[]>
BlockJacobianWriter::CreateEvaluatePreparers(int num_threads) {
int max_derivatives_per_residual_block =
BlockJacobianWriter::CreateEvaluatePreparers(unsigned num_threads) {
const int max_derivatives_per_residual_block =
program_->MaxDerivativesPerResidualBlock();
auto preparers = std::make_unique<BlockEvaluatePreparer[]>(num_threads);
for (int i = 0; i < num_threads; i++) {
preparers[i].Init(&jacobian_layout_[0], max_derivatives_per_residual_block);
for (unsigned i = 0; i < num_threads; i++) {
preparers[i].Init(jacobian_layout_.data(),
max_derivatives_per_residual_block);
}
return preparers;
}
std::unique_ptr<SparseMatrix> BlockJacobianWriter::CreateJacobian() const {
if (!jacobian_layout_is_valid_) {
LOG(ERROR) << "Unable to create Jacobian matrix. Too many entries in the "
"Jacobian matrix.";
return nullptr;
}
auto* bs = new CompressedRowBlockStructure;
const vector<ParameterBlock*>& parameter_blocks =
const std::vector<ParameterBlock*>& parameter_blocks =
program_->parameter_blocks();
// Construct the column blocks.
@@ -167,7 +216,8 @@ std::unique_ptr<SparseMatrix> BlockJacobianWriter::CreateJacobian() const {
}
// Construct the cells in each row.
const vector<ResidualBlock*>& residual_blocks = program_->residual_blocks();
const std::vector<ResidualBlock*>& residual_blocks =
program_->residual_blocks();
int row_block_position = 0;
bs->rows.resize(residual_blocks.size());
for (int i = 0; i < residual_blocks.size(); ++i) {
@@ -206,8 +256,8 @@ std::unique_ptr<SparseMatrix> BlockJacobianWriter::CreateJacobian() const {
std::sort(row->cells.begin(), row->cells.end(), CellLessThan);
}
return std::make_unique<BlockSparseMatrix>(bs);
return std::make_unique<BlockSparseMatrix>(
bs, options_.sparse_linear_algebra_library_type == CUDA_SPARSE);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,16 +44,26 @@
#include "ceres/evaluator.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockEvaluatePreparer;
class Program;
class SparseMatrix;
// TODO(sameeragarwal): This class needs documemtation.
// TODO(sameeragarwal): This class needs documentation.
class CERES_NO_EXPORT BlockJacobianWriter {
public:
// Pre-computes positions of cells in block-sparse jacobian.
// Two possible memory layouts are implemented:
// - Non-partitioned case
// - Partitioned case (for Schur type linear solver)
//
// In non-partitioned case, cells are stored sequentially in the
// lexicographic order of (row block id, column block id).
//
// In the case of partitoned matrix, cells of each sub-matrix (E and F) are
// stored sequentially in the lexicographic order of (row block id, column
// block id) and cells from E sub-matrix precede cells from F sub-matrix.
BlockJacobianWriter(const Evaluator::Options& options, Program* program);
// JacobianWriter interface.
@@ -61,7 +71,7 @@ class CERES_NO_EXPORT BlockJacobianWriter {
// Create evaluate prepareres that point directly into the final jacobian.
// This makes the final Write() a nop.
std::unique_ptr<BlockEvaluatePreparer[]> CreateEvaluatePreparers(
int num_threads);
unsigned num_threads);
std::unique_ptr<SparseMatrix> CreateJacobian() const;
@@ -75,12 +85,13 @@ class CERES_NO_EXPORT BlockJacobianWriter {
}
private:
Evaluator::Options options_;
Program* program_;
// Stores the position of each residual / parameter jacobian.
//
// The block sparse matrix that this writer writes to is stored as a set of
// contiguos dense blocks, one after each other; see BlockSparseMatrix. The
// contiguous dense blocks, one after each other; see BlockSparseMatrix. The
// "double* values_" member of the block sparse matrix contains all of these
// blocks. Given a pointer to the first element of a block and the size of
// that block, it's possible to write to it.
@@ -122,9 +133,14 @@ class CERES_NO_EXPORT BlockJacobianWriter {
// The pointers in jacobian_layout_ point directly into this vector.
std::vector<int> jacobian_layout_storage_;
// The constructor computes the layout of the Jacobian, and this bool keeps
// track of whether the computation of the layout completed successfully or
// not, if it is false, then jacobian_layout and jacobian_layout_storage are
// both in an invalid state.
bool jacobian_layout_is_valid_ = false;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_BLOCK_JACOBIAN_WRITER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,26 +30,21 @@
#include "ceres/block_random_access_dense_matrix.h"
#include <utility>
#include <vector>
#include "ceres/internal/eigen.h"
#include "ceres/parallel_vector_ops.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
BlockRandomAccessDenseMatrix::BlockRandomAccessDenseMatrix(
const std::vector<int>& blocks) {
const int num_blocks = blocks.size();
block_layout_.resize(num_blocks, 0);
num_rows_ = 0;
for (int i = 0; i < num_blocks; ++i) {
block_layout_[i] = num_rows_;
num_rows_ += blocks[i];
}
std::vector<Block> blocks, ContextImpl* context, int num_threads)
: blocks_(std::move(blocks)), context_(context), num_threads_(num_threads) {
const int num_blocks = blocks_.size();
num_rows_ = NumScalarEntries(blocks_);
values_ = std::make_unique<double[]>(num_rows_ * num_rows_);
cell_infos_ = std::make_unique<CellInfo[]>(num_blocks * num_blocks);
for (int i = 0; i < num_blocks * num_blocks; ++i) {
cell_infos_[i].values = values_.get();
@@ -58,30 +53,23 @@ BlockRandomAccessDenseMatrix::BlockRandomAccessDenseMatrix(
SetZero();
}
// Assume that the user does not hold any locks on any cell blocks
// when they are calling SetZero.
BlockRandomAccessDenseMatrix::~BlockRandomAccessDenseMatrix() = default;
CellInfo* BlockRandomAccessDenseMatrix::GetCell(const int row_block_id,
const int col_block_id,
int* row,
int* col,
int* row_stride,
int* col_stride) {
*row = block_layout_[row_block_id];
*col = block_layout_[col_block_id];
*row = blocks_[row_block_id].position;
*col = blocks_[col_block_id].position;
*row_stride = num_rows_;
*col_stride = num_rows_;
return &cell_infos_[row_block_id * block_layout_.size() + col_block_id];
return &cell_infos_[row_block_id * blocks_.size() + col_block_id];
}
// Assume that the user does not hold any locks on any cell blocks
// when they are calling SetZero.
void BlockRandomAccessDenseMatrix::SetZero() {
if (num_rows_) {
VectorRef(values_.get(), num_rows_ * num_rows_).setZero();
}
ParallelSetZero(context_, num_threads_, values_.get(), num_rows_ * num_rows_);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,11 +35,12 @@
#include <vector>
#include "ceres/block_random_access_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/context_impl.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// A square block random accessible matrix with the same row and
// column block structure. All cells are stored in the same single
@@ -56,13 +57,11 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix
public:
// blocks is a vector of block sizes. The resulting matrix has
// blocks.size() * blocks.size() cells.
explicit BlockRandomAccessDenseMatrix(const std::vector<int>& blocks);
BlockRandomAccessDenseMatrix(const BlockRandomAccessDenseMatrix&) = delete;
void operator=(const BlockRandomAccessDenseMatrix&) = delete;
explicit BlockRandomAccessDenseMatrix(std::vector<Block> blocks,
ContextImpl* context,
int num_threads);
// The destructor is not thread safe. It assumes that no one is
// modifying any cells when the matrix is being destroyed.
~BlockRandomAccessDenseMatrix() override;
~BlockRandomAccessDenseMatrix() override = default;
// BlockRandomAccessMatrix interface.
CellInfo* GetCell(int row_block_id,
@@ -72,8 +71,6 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix
int* row_stride,
int* col_stride) final;
// This is not a thread safe method, it assumes that no cell is
// locked.
void SetZero() final;
// Since the matrix is square with the same row and column block
@@ -86,14 +83,15 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix
double* mutable_values() { return values_.get(); }
private:
int num_rows_;
std::vector<int> block_layout_;
std::vector<Block> blocks_;
ContextImpl* context_ = nullptr;
int num_threads_ = -1;
int num_rows_ = -1;
std::unique_ptr<double[]> values_;
std::unique_ptr<CellInfo[]> cell_infos_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -37,61 +37,26 @@
#include <vector>
#include "Eigen/Dense"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/internal/export.h"
#include "ceres/parallel_for.h"
#include "ceres/parallel_vector_ops.h"
#include "ceres/stl_util.h"
#include "ceres/triplet_sparse_matrix.h"
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::vector;
// TODO(sameeragarwal): Drop the dependence on TripletSparseMatrix.
namespace ceres::internal {
BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix(
const vector<int>& blocks)
: blocks_(blocks) {
// Build the row/column layout vector and count the number of scalar
// rows/columns.
int num_cols = 0;
int num_nonzeros = 0;
vector<int> block_positions;
for (int block_size : blocks_) {
block_positions.push_back(num_cols);
num_cols += block_size;
num_nonzeros += block_size * block_size;
const std::vector<Block>& blocks, ContextImpl* context, int num_threads)
: context_(context), num_threads_(num_threads) {
m_ = CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(nullptr, blocks);
double* values = m_->mutable_values();
layout_.reserve(blocks.size());
for (auto& block : blocks) {
layout_.emplace_back(std::make_unique<CellInfo>(values));
values += block.size * block.size;
}
VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] "
<< num_nonzeros;
tsm_ =
std::make_unique<TripletSparseMatrix>(num_cols, num_cols, num_nonzeros);
tsm_->set_num_nonzeros(num_nonzeros);
int* rows = tsm_->mutable_rows();
int* cols = tsm_->mutable_cols();
double* values = tsm_->mutable_values();
int pos = 0;
for (int i = 0; i < blocks_.size(); ++i) {
const int block_size = blocks_[i];
layout_.push_back(new CellInfo(values + pos));
const int block_begin = block_positions[i];
for (int r = 0; r < block_size; ++r) {
for (int c = 0; c < block_size; ++c, ++pos) {
rows[pos] = block_begin + r;
cols[pos] = block_begin + c;
}
}
}
}
// Assume that the user does not hold any locks on any cell blocks
// when they are calling SetZero.
BlockRandomAccessDiagonalMatrix::~BlockRandomAccessDiagonalMatrix() {
STLDeleteContainerPointers(layout_.begin(), layout_.end());
}
CellInfo* BlockRandomAccessDiagonalMatrix::GetCell(int row_block_id,
@@ -103,47 +68,51 @@ CellInfo* BlockRandomAccessDiagonalMatrix::GetCell(int row_block_id,
if (row_block_id != col_block_id) {
return nullptr;
}
const int stride = blocks_[row_block_id];
auto& blocks = m_->row_blocks();
const int stride = blocks[row_block_id].size;
// Each cell is stored contiguously as its own little dense matrix.
*row = 0;
*col = 0;
*row_stride = stride;
*col_stride = stride;
return layout_[row_block_id];
return layout_[row_block_id].get();
}
// Assume that the user does not hold any locks on any cell blocks
// when they are calling SetZero.
void BlockRandomAccessDiagonalMatrix::SetZero() {
if (tsm_->num_nonzeros()) {
VectorRef(tsm_->mutable_values(), tsm_->num_nonzeros()).setZero();
}
ParallelSetZero(
context_, num_threads_, m_->mutable_values(), m_->num_nonzeros());
}
void BlockRandomAccessDiagonalMatrix::Invert() {
double* values = tsm_->mutable_values();
for (int block_size : blocks_) {
MatrixRef block(values, block_size, block_size);
block = block.selfadjointView<Eigen::Upper>().llt().solve(
Matrix::Identity(block_size, block_size));
values += block_size * block_size;
}
auto& blocks = m_->row_blocks();
const int num_blocks = blocks.size();
ParallelFor(context_, 0, num_blocks, num_threads_, [this, blocks](int i) {
auto* cell_info = layout_[i].get();
auto& block = blocks[i];
MatrixRef b(cell_info->values, block.size, block.size);
b = b.selfadjointView<Eigen::Upper>().llt().solve(
Matrix::Identity(block.size, block.size));
});
}
void BlockRandomAccessDiagonalMatrix::RightMultiply(const double* x,
double* y) const {
void BlockRandomAccessDiagonalMatrix::RightMultiplyAndAccumulate(
const double* x, double* y) const {
CHECK(x != nullptr);
CHECK(y != nullptr);
const double* values = tsm_->values();
for (int block_size : blocks_) {
ConstMatrixRef block(values, block_size, block_size);
VectorRef(y, block_size).noalias() += block * ConstVectorRef(x, block_size);
x += block_size;
y += block_size;
values += block_size * block_size;
}
auto& blocks = m_->row_blocks();
const int num_blocks = blocks.size();
ParallelFor(
context_, 0, num_blocks, num_threads_, [this, blocks, x, y](int i) {
auto* cell_info = layout_[i].get();
auto& block = blocks[i];
ConstMatrixRef b(cell_info->values, block.size, block.size);
VectorRef(y + block.position, block.size).noalias() +=
b * ConstVectorRef(x + block.position, block.size);
});
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,33 +32,30 @@
#define CERES_INTERNAL_BLOCK_RANDOM_ACCESS_DIAGONAL_MATRIX_H_
#include <memory>
#include <set>
#include <utility>
#include <vector>
#include "ceres/block_random_access_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/context_impl.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
#include "ceres/triplet_sparse_matrix.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// A thread safe block diagonal matrix implementation of
// BlockRandomAccessMatrix.
// A BlockRandomAccessMatrix which only stores the block diagonal.
// BlockRandomAccessSparseMatrix can also be used to do this, but this class is
// more efficient in time and in space.
class CERES_NO_EXPORT BlockRandomAccessDiagonalMatrix
: public BlockRandomAccessMatrix {
public:
// blocks is an array of block sizes.
explicit BlockRandomAccessDiagonalMatrix(const std::vector<int>& blocks);
BlockRandomAccessDiagonalMatrix(const BlockRandomAccessDiagonalMatrix&) =
delete;
void operator=(const BlockRandomAccessDiagonalMatrix&) = delete;
// The destructor is not thread safe. It assumes that no one is
// modifying any cells when the matrix is being destroyed.
~BlockRandomAccessDiagonalMatrix() override;
BlockRandomAccessDiagonalMatrix(const std::vector<Block>& blocks,
ContextImpl* context,
int num_threads);
~BlockRandomAccessDiagonalMatrix() override = default;
// BlockRandomAccessMatrix Interface.
CellInfo* GetCell(int row_block_id,
@@ -68,36 +65,30 @@ class CERES_NO_EXPORT BlockRandomAccessDiagonalMatrix
int* row_stride,
int* col_stride) final;
// This is not a thread safe method, it assumes that no cell is
// locked.
// m = 0
void SetZero() final;
// Invert the matrix assuming that each block is positive definite.
// m = m^{-1}
void Invert();
// y += S * x
void RightMultiply(const double* x, double* y) const;
// y += m * x
void RightMultiplyAndAccumulate(const double* x, double* y) const;
// Since the matrix is square, num_rows() == num_cols().
int num_rows() const final { return tsm_->num_rows(); }
int num_cols() const final { return tsm_->num_cols(); }
int num_rows() const final { return m_->num_rows(); }
int num_cols() const final { return m_->num_cols(); }
const TripletSparseMatrix* matrix() const { return tsm_.get(); }
TripletSparseMatrix* mutable_matrix() { return tsm_.get(); }
const CompressedRowSparseMatrix* matrix() const { return m_.get(); }
CompressedRowSparseMatrix* mutable_matrix() { return m_.get(); }
private:
// row/column block sizes.
const std::vector<int> blocks_;
std::vector<CellInfo*> layout_;
// The underlying matrix object which actually stores the cells.
std::unique_ptr<TripletSparseMatrix> tsm_;
friend class BlockRandomAccessDiagonalMatrixTest;
ContextImpl* context_ = nullptr;
const int num_threads_ = 1;
std::unique_ptr<CompressedRowSparseMatrix> m_;
std::vector<std::unique_ptr<CellInfo>> layout_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,10 +30,8 @@
#include "ceres/block_random_access_matrix.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
BlockRandomAccessMatrix::~BlockRandomAccessMatrix() = default;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -37,8 +37,7 @@
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// A matrix implementing the BlockRandomAccessMatrix interface is a
// matrix whose rows and columns are divided into blocks. For example
@@ -123,7 +122,6 @@ class CERES_NO_EXPORT BlockRandomAccessMatrix {
virtual int num_cols() const = 0;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_BLOCK_RANDOM_ACCESS_MATRIX_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -37,87 +37,63 @@
#include <vector>
#include "ceres/internal/export.h"
#include "ceres/parallel_vector_ops.h"
#include "ceres/triplet_sparse_matrix.h"
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::make_pair;
using std::pair;
using std::set;
using std::vector;
namespace ceres::internal {
BlockRandomAccessSparseMatrix::BlockRandomAccessSparseMatrix(
const vector<int>& blocks, const set<pair<int, int>>& block_pairs)
: kMaxRowBlocks(10 * 1000 * 1000), blocks_(blocks) {
CHECK_LT(blocks.size(), kMaxRowBlocks);
const std::vector<Block>& blocks,
const std::set<std::pair<int, int>>& block_pairs,
ContextImpl* context,
int num_threads)
: blocks_(blocks), context_(context), num_threads_(num_threads) {
CHECK_LE(blocks.size(), std::numeric_limits<std::int32_t>::max());
// Build the row/column layout vector and count the number of scalar
// rows/columns.
int num_cols = 0;
block_positions_.reserve(blocks_.size());
for (int block_size : blocks_) {
block_positions_.push_back(num_cols);
num_cols += block_size;
const int num_cols = NumScalarEntries(blocks);
const int num_blocks = blocks.size();
std::vector<int> num_cells_at_row(num_blocks);
for (auto& p : block_pairs) {
++num_cells_at_row[p.first];
}
// Count the number of scalar non-zero entries and build the layout
// object for looking into the values array of the
// TripletSparseMatrix.
auto block_structure_ = new CompressedRowBlockStructure;
block_structure_->cols = blocks;
block_structure_->rows.resize(num_blocks);
auto p = block_pairs.begin();
int num_nonzeros = 0;
for (const auto& block_pair : block_pairs) {
const int row_block_size = blocks_[block_pair.first];
const int col_block_size = blocks_[block_pair.second];
num_nonzeros += row_block_size * col_block_size;
}
VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] "
<< num_nonzeros;
tsm_ =
std::make_unique<TripletSparseMatrix>(num_cols, num_cols, num_nonzeros);
tsm_->set_num_nonzeros(num_nonzeros);
int* rows = tsm_->mutable_rows();
int* cols = tsm_->mutable_cols();
double* values = tsm_->mutable_values();
int pos = 0;
for (const auto& block_pair : block_pairs) {
const int row_block_size = blocks_[block_pair.first];
const int col_block_size = blocks_[block_pair.second];
cell_values_.emplace_back(block_pair, values + pos);
layout_[IntPairToLong(block_pair.first, block_pair.second)] =
new CellInfo(values + pos);
pos += row_block_size * col_block_size;
}
// Fill the sparsity pattern of the underlying matrix.
for (const auto& block_pair : block_pairs) {
const int row_block_id = block_pair.first;
const int col_block_id = block_pair.second;
const int row_block_size = blocks_[row_block_id];
const int col_block_size = blocks_[col_block_id];
int pos =
layout_[IntPairToLong(row_block_id, col_block_id)]->values - values;
for (int r = 0; r < row_block_size; ++r) {
for (int c = 0; c < col_block_size; ++c, ++pos) {
rows[pos] = block_positions_[row_block_id] + r;
cols[pos] = block_positions_[col_block_id] + c;
values[pos] = 1.0;
DCHECK_LT(rows[pos], tsm_->num_rows());
DCHECK_LT(cols[pos], tsm_->num_rows());
}
// Pairs of block indices are sorted lexicographically, thus pairs
// corresponding to a single row-block are stored in segments of index pairs
// with constant row-block index and increasing column-block index.
// CompressedRowBlockStructure is created by traversing block_pairs set.
for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
auto& row = block_structure_->rows[row_block_id];
row.block = blocks[row_block_id];
row.cells.reserve(num_cells_at_row[row_block_id]);
const int row_block_size = blocks[row_block_id].size;
// Process all index pairs corresponding to the current row block. Because
// index pairs are sorted lexicographically, cells are being appended to the
// current row-block till the first change in row-block index
for (; p != block_pairs.end() && row_block_id == p->first; ++p) {
const int col_block_id = p->second;
row.cells.emplace_back(col_block_id, num_nonzeros);
num_nonzeros += row_block_size * blocks[col_block_id].size;
}
}
}
// Assume that the user does not hold any locks on any cell blocks
// when they are calling SetZero.
BlockRandomAccessSparseMatrix::~BlockRandomAccessSparseMatrix() {
for (const auto& entry : layout_) {
delete entry.second;
bsm_ = std::make_unique<BlockSparseMatrix>(block_structure_);
VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] "
<< num_nonzeros;
double* values = bsm_->mutable_values();
for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
const auto& cells = block_structure_->rows[row_block_id].cells;
for (auto& c : cells) {
const int col_block_id = c.block_id;
double* const data = values + c.position;
layout_[IntPairToInt64(row_block_id, col_block_id)] =
std::make_unique<CellInfo>(data);
}
}
}
@@ -127,8 +103,7 @@ CellInfo* BlockRandomAccessSparseMatrix::GetCell(int row_block_id,
int* col,
int* row_stride,
int* col_stride) {
const LayoutType::iterator it =
layout_.find(IntPairToLong(row_block_id, col_block_id));
const auto it = layout_.find(IntPairToInt64(row_block_id, col_block_id));
if (it == layout_.end()) {
return nullptr;
}
@@ -136,44 +111,49 @@ CellInfo* BlockRandomAccessSparseMatrix::GetCell(int row_block_id,
// Each cell is stored contiguously as its own little dense matrix.
*row = 0;
*col = 0;
*row_stride = blocks_[row_block_id];
*col_stride = blocks_[col_block_id];
return it->second;
*row_stride = blocks_[row_block_id].size;
*col_stride = blocks_[col_block_id].size;
return it->second.get();
}
// Assume that the user does not hold any locks on any cell blocks
// when they are calling SetZero.
void BlockRandomAccessSparseMatrix::SetZero() {
if (tsm_->num_nonzeros()) {
VectorRef(tsm_->mutable_values(), tsm_->num_nonzeros()).setZero();
}
bsm_->SetZero(context_, num_threads_);
}
void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x,
double* y) const {
for (const auto& cell_position_and_data : cell_values_) {
const int row = cell_position_and_data.first.first;
const int row_block_size = blocks_[row];
const int row_block_pos = block_positions_[row];
void BlockRandomAccessSparseMatrix::SymmetricRightMultiplyAndAccumulate(
const double* x, double* y) const {
const auto bs = bsm_->block_structure();
const auto values = bsm_->values();
const int num_blocks = blocks_.size();
const int col = cell_position_and_data.first.second;
const int col_block_size = blocks_[col];
const int col_block_pos = block_positions_[col];
for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
const auto& row_block = bs->rows[row_block_id];
const int row_block_size = row_block.block.size;
const int row_block_pos = row_block.block.position;
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
cell_position_and_data.second,
row_block_size,
col_block_size,
x + col_block_pos,
y + row_block_pos);
for (auto& c : row_block.cells) {
const int col_block_id = c.block_id;
const int col_block_size = blocks_[col_block_id].size;
const int col_block_pos = blocks_[col_block_id].position;
// Since the matrix is symmetric, but only the upper triangular
// part is stored, if the block being accessed is not a diagonal
// block, then use the same block to do the corresponding lower
// triangular multiply also.
if (row != col) {
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + c.position,
row_block_size,
col_block_size,
x + col_block_pos,
y + row_block_pos);
if (col_block_id == row_block_id) {
continue;
}
// Since the matrix is symmetric, but only the upper triangular
// part is stored, if the block being accessed is not a diagonal
// block, then use the same block to do the corresponding lower
// triangular multiply also
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
cell_position_and_data.second,
values + c.position,
row_block_size,
col_block_size,
x + row_block_pos,
@@ -182,5 +162,4 @@ void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,17 +39,18 @@
#include <vector>
#include "ceres/block_random_access_matrix.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/context_impl.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
#include "ceres/small_blas.h"
#include "ceres/triplet_sparse_matrix.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// A thread safe square block sparse implementation of
// BlockRandomAccessMatrix. Internally a TripletSparseMatrix is used
// BlockRandomAccessMatrix. Internally a BlockSparseMatrix is used
// for doing the actual storage. This class augments this matrix with
// an unordered_map that allows random read/write access.
class CERES_NO_EXPORT BlockRandomAccessSparseMatrix
@@ -59,14 +60,14 @@ class CERES_NO_EXPORT BlockRandomAccessSparseMatrix
// <row_block_id, col_block_id> pairs to identify the non-zero cells
// of this matrix.
BlockRandomAccessSparseMatrix(
const std::vector<int>& blocks,
const std::set<std::pair<int, int>>& block_pairs);
BlockRandomAccessSparseMatrix(const BlockRandomAccessSparseMatrix&) = delete;
void operator=(const BlockRandomAccessSparseMatrix&) = delete;
const std::vector<Block>& blocks,
const std::set<std::pair<int, int>>& block_pairs,
ContextImpl* context,
int num_threads);
// The destructor is not thread safe. It assumes that no one is
// modifying any cells when the matrix is being destroyed.
~BlockRandomAccessSparseMatrix() override;
~BlockRandomAccessSparseMatrix() override = default;
// BlockRandomAccessMatrix Interface.
CellInfo* GetCell(int row_block_id,
@@ -80,53 +81,49 @@ class CERES_NO_EXPORT BlockRandomAccessSparseMatrix
// locked.
void SetZero() final;
// Assume that the matrix is symmetric and only one half of the
// matrix is stored.
// Assume that the matrix is symmetric and only one half of the matrix is
// stored.
//
// y += S * x
void SymmetricRightMultiply(const double* x, double* y) const;
void SymmetricRightMultiplyAndAccumulate(const double* x, double* y) const;
// Since the matrix is square, num_rows() == num_cols().
int num_rows() const final { return tsm_->num_rows(); }
int num_cols() const final { return tsm_->num_cols(); }
int num_rows() const final { return bsm_->num_rows(); }
int num_cols() const final { return bsm_->num_cols(); }
// Access to the underlying matrix object.
const TripletSparseMatrix* matrix() const { return tsm_.get(); }
TripletSparseMatrix* mutable_matrix() { return tsm_.get(); }
const BlockSparseMatrix* matrix() const { return bsm_.get(); }
BlockSparseMatrix* mutable_matrix() { return bsm_.get(); }
private:
int64_t IntPairToLong(int row, int col) const {
return row * kMaxRowBlocks + col;
int64_t IntPairToInt64(int row, int col) const {
return row * kRowShift + col;
}
void LongToIntPair(int64_t index, int* row, int* col) const {
*row = index / kMaxRowBlocks;
*col = index % kMaxRowBlocks;
void Int64ToIntPair(int64_t index, int* row, int* col) const {
*row = index / kRowShift;
*col = index % kRowShift;
}
const int64_t kMaxRowBlocks;
constexpr static int64_t kRowShift{1ll << 32};
// row/column block sizes.
const std::vector<int> blocks_;
std::vector<int> block_positions_;
const std::vector<Block> blocks_;
ContextImpl* context_ = nullptr;
const int num_threads_ = 1;
// A mapping from <row_block_id, col_block_id> to the position in
// the values array of tsm_ where the block is stored.
using LayoutType = std::unordered_map<long, CellInfo*>;
using LayoutType = std::unordered_map<int64_t, std::unique_ptr<CellInfo>>;
LayoutType layout_;
// In order traversal of contents of the matrix. This allows us to
// implement a matrix-vector which is 20% faster than using the
// iterator in the Layout object instead.
std::vector<std::pair<std::pair<int, int>, double*>> cell_values_;
// The underlying matrix object which actually stores the cells.
std::unique_ptr<TripletSparseMatrix> tsm_;
std::unique_ptr<BlockSparseMatrix> bsm_;
friend class BlockRandomAccessSparseMatrixTest;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,23 +33,151 @@
#include <algorithm>
#include <cstddef>
#include <memory>
#include <numeric>
#include <random>
#include <vector>
#include "ceres/block_structure.h"
#include "ceres/crs_matrix.h"
#include "ceres/internal/eigen.h"
#include "ceres/random.h"
#include "ceres/parallel_for.h"
#include "ceres/parallel_vector_ops.h"
#include "ceres/small_blas.h"
#include "ceres/triplet_sparse_matrix.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
#ifndef CERES_NO_CUDA
#include "cuda_runtime.h"
#endif
using std::vector;
namespace ceres::internal {
namespace {
void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) {
if (rows.empty()) {
return;
}
rows[0].cumulative_nnz = rows[0].nnz;
for (int c = 1; c < rows.size(); ++c) {
const int curr_nnz = rows[c].nnz;
rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz;
}
}
template <bool transpose>
std::unique_ptr<CompressedRowSparseMatrix>
CreateStructureOfCompressedRowSparseMatrix(
const double* values,
int num_rows,
int num_cols,
int num_nonzeros,
const CompressedRowBlockStructure* block_structure) {
auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>(
num_rows, num_cols, num_nonzeros);
auto crs_cols = crs_matrix->mutable_cols();
auto crs_rows = crs_matrix->mutable_rows();
int value_offset = 0;
const int num_row_blocks = block_structure->rows.size();
const auto& cols = block_structure->cols;
*crs_rows++ = 0;
for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
const auto& row_block = block_structure->rows[row_block_id];
// Empty row block: only requires setting row offsets
if (row_block.cells.empty()) {
std::fill(crs_rows, crs_rows + row_block.block.size, value_offset);
crs_rows += row_block.block.size;
continue;
}
int row_nnz = 0;
if constexpr (transpose) {
// Transposed block structure comes with nnz in row-block filled-in
row_nnz = row_block.nnz / row_block.block.size;
} else {
// Nnz field of non-transposed block structure is not filled and it can
// have non-sequential structure (consider the case of jacobian for
// Schur-complement solver: E and F blocks are stored separately).
for (auto& c : row_block.cells) {
row_nnz += cols[c.block_id].size;
}
}
// Row-wise setup of matrix structure
for (int row = 0; row < row_block.block.size; ++row) {
value_offset += row_nnz;
*crs_rows++ = value_offset;
for (auto& c : row_block.cells) {
const int col_block_size = cols[c.block_id].size;
const int col_position = cols[c.block_id].position;
std::iota(crs_cols, crs_cols + col_block_size, col_position);
crs_cols += col_block_size;
}
}
}
return crs_matrix;
}
template <bool transpose>
void UpdateCompressedRowSparseMatrixImpl(
CompressedRowSparseMatrix* crs_matrix,
const double* values,
const CompressedRowBlockStructure* block_structure) {
auto crs_values = crs_matrix->mutable_values();
auto crs_rows = crs_matrix->mutable_rows();
const int num_row_blocks = block_structure->rows.size();
const auto& cols = block_structure->cols;
for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
const auto& row_block = block_structure->rows[row_block_id];
const int row_block_size = row_block.block.size;
const int row_nnz = crs_rows[1] - crs_rows[0];
crs_rows += row_block_size;
if (row_nnz == 0) {
continue;
}
MatrixRef crs_row_block(crs_values, row_block_size, row_nnz);
int col_offset = 0;
for (auto& c : row_block.cells) {
const int col_block_size = cols[c.block_id].size;
auto crs_cell =
crs_row_block.block(0, col_offset, row_block_size, col_block_size);
if constexpr (transpose) {
// Transposed matrix is filled using transposed block-strucutre
ConstMatrixRef cell(
values + c.position, col_block_size, row_block_size);
crs_cell = cell.transpose();
} else {
ConstMatrixRef cell(
values + c.position, row_block_size, col_block_size);
crs_cell = cell;
}
col_offset += col_block_size;
}
crs_values += row_nnz * row_block_size;
}
}
void SetBlockStructureOfCompressedRowSparseMatrix(
CompressedRowSparseMatrix* crs_matrix,
CompressedRowBlockStructure* block_structure) {
const int num_row_blocks = block_structure->rows.size();
auto& row_blocks = *crs_matrix->mutable_row_blocks();
row_blocks.resize(num_row_blocks);
for (int i = 0; i < num_row_blocks; ++i) {
row_blocks[i] = block_structure->rows[i].block;
}
auto& col_blocks = *crs_matrix->mutable_col_blocks();
col_blocks = block_structure->cols;
}
} // namespace
BlockSparseMatrix::BlockSparseMatrix(
CompressedRowBlockStructure* block_structure)
: num_rows_(0),
CompressedRowBlockStructure* block_structure, bool use_page_locked_memory)
: use_page_locked_memory_(use_page_locked_memory),
num_rows_(0),
num_cols_(0),
num_nonzeros_(0),
block_structure_(block_structure) {
@@ -66,7 +194,7 @@ BlockSparseMatrix::BlockSparseMatrix(
int row_block_size = block_structure_->rows[i].block.size;
num_rows_ += row_block_size;
const vector<Cell>& cells = block_structure_->rows[i].cells;
const std::vector<Cell>& cells = block_structure_->rows[i].cells;
for (const auto& cell : cells) {
int col_block_id = cell.block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
@@ -79,51 +207,138 @@ BlockSparseMatrix::BlockSparseMatrix(
CHECK_GE(num_nonzeros_, 0);
VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double)
<< " bytes."; // NOLINT
values_ = std::make_unique<double[]>(num_nonzeros_);
values_ = AllocateValues(num_nonzeros_);
max_num_nonzeros_ = num_nonzeros_;
CHECK(values_ != nullptr);
AddTransposeBlockStructure();
}
void BlockSparseMatrix::SetZero() {
std::fill(values_.get(), values_.get() + num_nonzeros_, 0.0);
}
BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); }
void BlockSparseMatrix::RightMultiply(const double* x, double* y) const {
CHECK(x != nullptr);
CHECK(y != nullptr);
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_pos = block_structure_->rows[i].block.position;
int row_block_size = block_structure_->rows[i].block.size;
const vector<Cell>& cells = block_structure_->rows[i].cells;
for (const auto& cell : cells) {
int col_block_id = cell.block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values_.get() + cell.position,
row_block_size,
col_block_size,
x + col_block_pos,
y + row_block_pos);
}
void BlockSparseMatrix::AddTransposeBlockStructure() {
if (transpose_block_structure_ == nullptr) {
transpose_block_structure_ = CreateTranspose(*block_structure_);
}
}
void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const {
void BlockSparseMatrix::SetZero() {
std::fill(values_, values_ + num_nonzeros_, 0.0);
}
void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) {
ParallelSetZero(context, num_threads, values_, num_nonzeros_);
}
void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
double* y) const {
RightMultiplyAndAccumulate(x, y, nullptr, 1);
}
void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const {
CHECK(x != nullptr);
CHECK(y != nullptr);
const auto values = values_;
const auto block_structure = block_structure_.get();
const auto num_row_blocks = block_structure->rows.size();
ParallelFor(context,
0,
num_row_blocks,
num_threads,
[values, block_structure, x, y](int row_block_id) {
const int row_block_pos =
block_structure->rows[row_block_id].block.position;
const int row_block_size =
block_structure->rows[row_block_id].block.size;
const auto& cells = block_structure->rows[row_block_id].cells;
for (const auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_size =
block_structure->cols[col_block_id].size;
const int col_block_pos =
block_structure->cols[col_block_id].position;
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position,
row_block_size,
col_block_size,
x + col_block_pos,
y + row_block_pos);
}
});
}
// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
// might benefit from caching column-block partition
void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const {
// While utilizing transposed structure allows to perform parallel
// left-multiplication by dense vector, it makes access patterns to matrix
// elements scattered. Thus, multiplication using transposed structure
// is only useful for parallel execution
CHECK(x != nullptr);
CHECK(y != nullptr);
if (transpose_block_structure_ == nullptr || num_threads == 1) {
LeftMultiplyAndAccumulate(x, y);
return;
}
auto transpose_bs = transpose_block_structure_.get();
const auto values = values_;
const int num_col_blocks = transpose_bs->rows.size();
if (!num_col_blocks) {
return;
}
// Use non-zero count as iteration cost for guided parallel-for loop
ParallelFor(
context,
0,
num_col_blocks,
num_threads,
[values, transpose_bs, x, y](int row_block_id) {
int row_block_pos = transpose_bs->rows[row_block_id].block.position;
int row_block_size = transpose_bs->rows[row_block_id].block.size;
auto& cells = transpose_bs->rows[row_block_id].cells;
for (auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_size = transpose_bs->cols[col_block_id].size;
const int col_block_pos = transpose_bs->cols[col_block_id].position;
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position,
col_block_size,
row_block_size,
x + col_block_pos,
y + row_block_pos);
}
},
transpose_bs->rows.data(),
[](const CompressedRow& row) { return row.cumulative_nnz; });
}
void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
double* y) const {
CHECK(x != nullptr);
CHECK(y != nullptr);
// Single-threaded left products are always computed using a non-transpose
// block structure, because it has linear acess pattern to matrix elements
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_pos = block_structure_->rows[i].block.position;
int row_block_size = block_structure_->rows[i].block.size;
const vector<Cell>& cells = block_structure_->rows[i].cells;
const auto& cells = block_structure_->rows[i].cells;
for (const auto& cell : cells) {
int col_block_id = cell.block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values_.get() + cell.position,
values_ + cell.position,
row_block_size,
col_block_size,
x + row_block_pos,
@@ -137,35 +352,144 @@ void BlockSparseMatrix::SquaredColumnNorm(double* x) const {
VectorRef(x, num_cols_).setZero();
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_size = block_structure_->rows[i].block.size;
const vector<Cell>& cells = block_structure_->rows[i].cells;
auto& cells = block_structure_->rows[i].cells;
for (const auto& cell : cells) {
int col_block_id = cell.block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
const MatrixRef m(
values_.get() + cell.position, row_block_size, col_block_size);
values_ + cell.position, row_block_size, col_block_size);
VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm();
}
}
}
// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
// might benefit from caching column-block partition
void BlockSparseMatrix::SquaredColumnNorm(double* x,
ContextImpl* context,
int num_threads) const {
if (transpose_block_structure_ == nullptr || num_threads == 1) {
SquaredColumnNorm(x);
return;
}
CHECK(x != nullptr);
ParallelSetZero(context, num_threads, x, num_cols_);
auto transpose_bs = transpose_block_structure_.get();
const auto values = values_;
const int num_col_blocks = transpose_bs->rows.size();
ParallelFor(
context,
0,
num_col_blocks,
num_threads,
[values, transpose_bs, x](int row_block_id) {
const auto& row = transpose_bs->rows[row_block_id];
for (auto& cell : row.cells) {
const auto& col = transpose_bs->cols[cell.block_id];
const MatrixRef m(values + cell.position, col.size, row.block.size);
VectorRef(x + row.block.position, row.block.size) +=
m.colwise().squaredNorm();
}
},
transpose_bs->rows.data(),
[](const CompressedRow& row) { return row.cumulative_nnz; });
}
void BlockSparseMatrix::ScaleColumns(const double* scale) {
CHECK(scale != nullptr);
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_size = block_structure_->rows[i].block.size;
const vector<Cell>& cells = block_structure_->rows[i].cells;
auto& cells = block_structure_->rows[i].cells;
for (const auto& cell : cells) {
int col_block_id = cell.block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
MatrixRef m(
values_.get() + cell.position, row_block_size, col_block_size);
MatrixRef m(values_ + cell.position, row_block_size, col_block_size);
m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal();
}
}
}
// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
// might benefit from caching column-block partition
void BlockSparseMatrix::ScaleColumns(const double* scale,
ContextImpl* context,
int num_threads) {
if (transpose_block_structure_ == nullptr || num_threads == 1) {
ScaleColumns(scale);
return;
}
CHECK(scale != nullptr);
auto transpose_bs = transpose_block_structure_.get();
auto values = values_;
const int num_col_blocks = transpose_bs->rows.size();
ParallelFor(
context,
0,
num_col_blocks,
num_threads,
[values, transpose_bs, scale](int row_block_id) {
const auto& row = transpose_bs->rows[row_block_id];
for (auto& cell : row.cells) {
const auto& col = transpose_bs->cols[cell.block_id];
MatrixRef m(values + cell.position, col.size, row.block.size);
m *= ConstVectorRef(scale + row.block.position, row.block.size)
.asDiagonal();
}
},
transpose_bs->rows.data(),
[](const CompressedRow& row) { return row.cumulative_nnz; });
}
std::unique_ptr<CompressedRowSparseMatrix>
BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const {
auto bs = transpose_block_structure_.get();
auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>(
values(), num_cols_, num_rows_, num_nonzeros_, bs);
SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs);
UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get());
return crs_matrix;
}
std::unique_ptr<CompressedRowSparseMatrix>
BlockSparseMatrix::ToCompressedRowSparseMatrix() const {
auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>(
values(), num_rows_, num_cols_, num_nonzeros_, block_structure_.get());
SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(),
block_structure_.get());
UpdateCompressedRowSparseMatrix(crs_matrix.get());
return crs_matrix;
}
void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose(
CompressedRowSparseMatrix* crs_matrix) const {
CHECK(crs_matrix != nullptr);
CHECK_EQ(crs_matrix->num_rows(), num_cols_);
CHECK_EQ(crs_matrix->num_cols(), num_rows_);
CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
UpdateCompressedRowSparseMatrixImpl<true>(
crs_matrix, values(), transpose_block_structure_.get());
}
void BlockSparseMatrix::UpdateCompressedRowSparseMatrix(
CompressedRowSparseMatrix* crs_matrix) const {
CHECK(crs_matrix != nullptr);
CHECK_EQ(crs_matrix->num_rows(), num_rows_);
CHECK_EQ(crs_matrix->num_cols(), num_cols_);
CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
UpdateCompressedRowSparseMatrixImpl<false>(
crs_matrix, values(), block_structure_.get());
}
void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
CHECK(dense_matrix != nullptr);
@@ -176,14 +500,14 @@ void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_pos = block_structure_->rows[i].block.position;
int row_block_size = block_structure_->rows[i].block.size;
const vector<Cell>& cells = block_structure_->rows[i].cells;
auto& cells = block_structure_->rows[i].cells;
for (const auto& cell : cells) {
int col_block_id = cell.block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
int jac_pos = cell.position;
m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) +=
MatrixRef(values_.get() + jac_pos, row_block_size, col_block_size);
MatrixRef(values_ + jac_pos, row_block_size, col_block_size);
}
}
}
@@ -199,7 +523,7 @@ void BlockSparseMatrix::ToTripletSparseMatrix(
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_pos = block_structure_->rows[i].block.position;
int row_block_size = block_structure_->rows[i].block.size;
const vector<Cell>& cells = block_structure_->rows[i].cells;
const auto& cells = block_structure_->rows[i].cells;
for (const auto& cell : cells) {
int col_block_id = cell.block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
@@ -223,12 +547,19 @@ const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const {
return block_structure_.get();
}
// Return a pointer to the block structure of matrix transpose. We continue to
// hold ownership of the object though.
const CompressedRowBlockStructure*
BlockSparseMatrix::transpose_block_structure() const {
return transpose_block_structure_.get();
}
void BlockSparseMatrix::ToTextFile(FILE* file) const {
CHECK(file != nullptr);
for (int i = 0; i < block_structure_->rows.size(); ++i) {
const int row_block_pos = block_structure_->rows[i].block.position;
const int row_block_size = block_structure_->rows[i].block.size;
const vector<Cell>& cells = block_structure_->rows[i].cells;
const auto& cells = block_structure_->rows[i].cells;
for (const auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_size = block_structure_->cols[col_block_id].size;
@@ -293,34 +624,51 @@ void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) {
for (int i = 0; i < m_bs->rows.size(); ++i) {
const CompressedRow& m_row = m_bs->rows[i];
CompressedRow& row = block_structure_->rows[old_num_row_blocks + i];
const int row_block_id = old_num_row_blocks + i;
CompressedRow& row = block_structure_->rows[row_block_id];
row.block.size = m_row.block.size;
row.block.position = num_rows_;
num_rows_ += m_row.block.size;
row.cells.resize(m_row.cells.size());
if (transpose_block_structure_) {
transpose_block_structure_->cols.emplace_back(row.block);
}
for (int c = 0; c < m_row.cells.size(); ++c) {
const int block_id = m_row.cells[c].block_id;
row.cells[c].block_id = block_id;
row.cells[c].position = num_nonzeros_;
num_nonzeros_ += m_row.block.size * m_bs->cols[block_id].size;
const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size;
if (transpose_block_structure_) {
transpose_block_structure_->rows[block_id].cells.emplace_back(
row_block_id, num_nonzeros_);
transpose_block_structure_->rows[block_id].nnz += cell_nnz;
}
num_nonzeros_ += cell_nnz;
}
}
if (num_nonzeros_ > max_num_nonzeros_) {
std::unique_ptr<double[]> new_values =
std::make_unique<double[]>(num_nonzeros_);
std::copy_n(values_.get(), old_num_nonzeros, new_values.get());
values_ = std::move(new_values);
double* old_values = values_;
values_ = AllocateValues(num_nonzeros_);
std::copy_n(old_values, old_num_nonzeros, values_);
max_num_nonzeros_ = num_nonzeros_;
FreeValues(old_values);
}
std::copy(m.values(),
m.values() + m.num_nonzeros(),
values_.get() + old_num_nonzeros);
std::copy(
m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros);
if (transpose_block_structure_ == nullptr) {
return;
}
ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
}
void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
const int num_row_blocks = block_structure_->rows.size();
const int new_num_row_blocks = num_row_blocks - delta_row_blocks;
int delta_num_nonzeros = 0;
int delta_num_rows = 0;
const std::vector<Block>& column_blocks = block_structure_->cols;
@@ -330,15 +678,40 @@ void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
for (int c = 0; c < row.cells.size(); ++c) {
const Cell& cell = row.cells[c];
delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size;
if (transpose_block_structure_) {
auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells;
while (!col_cells.empty() &&
col_cells.back().block_id >= new_num_row_blocks) {
const int del_block_id = col_cells.back().block_id;
const int del_block_rows =
block_structure_->rows[del_block_id].block.size;
const int del_block_cols = column_blocks[cell.block_id].size;
const int del_cell_nnz = del_block_rows * del_block_cols;
transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz;
col_cells.pop_back();
}
}
}
}
num_nonzeros_ -= delta_num_nonzeros;
num_rows_ -= delta_num_rows;
block_structure_->rows.resize(num_row_blocks - delta_row_blocks);
block_structure_->rows.resize(new_num_row_blocks);
if (transpose_block_structure_ == nullptr) {
return;
}
for (int i = 0; i < delta_row_blocks; ++i) {
transpose_block_structure_->cols.pop_back();
}
ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
}
std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
const BlockSparseMatrix::RandomMatrixOptions& options) {
const BlockSparseMatrix::RandomMatrixOptions& options,
std::mt19937& prng,
bool use_page_locked_memory) {
CHECK_GT(options.num_row_blocks, 0);
CHECK_GT(options.min_row_block_size, 0);
CHECK_GT(options.max_row_block_size, 0);
@@ -346,7 +719,11 @@ std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
CHECK_GT(options.block_density, 0.0);
CHECK_LE(options.block_density, 1.0);
auto* bs = new CompressedRowBlockStructure();
std::uniform_int_distribution<int> col_distribution(
options.min_col_block_size, options.max_col_block_size);
std::uniform_int_distribution<int> row_distribution(
options.min_row_block_size, options.max_row_block_size);
auto bs = std::make_unique<CompressedRowBlockStructure>();
if (options.col_blocks.empty()) {
CHECK_GT(options.num_col_blocks, 0);
CHECK_GT(options.min_col_block_size, 0);
@@ -356,10 +733,7 @@ std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
// Generate the col block structure.
int col_block_position = 0;
for (int i = 0; i < options.num_col_blocks; ++i) {
// Generate a random integer in [min_col_block_size, max_col_block_size]
const int delta_block_size =
Uniform(options.max_col_block_size - options.min_col_block_size);
const int col_block_size = options.min_col_block_size + delta_block_size;
const int col_block_size = col_distribution(prng);
bs->cols.emplace_back(col_block_size, col_block_position);
col_block_position += col_block_size;
}
@@ -368,22 +742,21 @@ std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
}
bool matrix_has_blocks = false;
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
while (!matrix_has_blocks) {
VLOG(1) << "Clearing";
bs->rows.clear();
int row_block_position = 0;
int value_position = 0;
for (int r = 0; r < options.num_row_blocks; ++r) {
const int delta_block_size =
Uniform(options.max_row_block_size - options.min_row_block_size);
const int row_block_size = options.min_row_block_size + delta_block_size;
const int row_block_size = row_distribution(prng);
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = row_block_size;
row.block.position = row_block_position;
row_block_position += row_block_size;
for (int c = 0; c < bs->cols.size(); ++c) {
if (RandDouble() > options.block_density) continue;
if (uniform01(prng) > options.block_density) continue;
row.cells.emplace_back();
Cell& cell = row.cells.back();
@@ -395,14 +768,76 @@ std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
}
}
auto matrix = std::make_unique<BlockSparseMatrix>(bs);
auto matrix =
std::make_unique<BlockSparseMatrix>(bs.release(), use_page_locked_memory);
double* values = matrix->mutable_values();
for (int i = 0; i < matrix->num_nonzeros(); ++i) {
values[i] = RandNormal();
}
std::normal_distribution<double> standard_normal_distribution;
std::generate_n(
values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] {
return standard_normal_distribution(prng);
});
return matrix;
}
} // namespace internal
} // namespace ceres
std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
const CompressedRowBlockStructure& bs) {
auto transpose = std::make_unique<CompressedRowBlockStructure>();
transpose->rows.resize(bs.cols.size());
for (int i = 0; i < bs.cols.size(); ++i) {
transpose->rows[i].block = bs.cols[i];
transpose->rows[i].nnz = 0;
}
transpose->cols.resize(bs.rows.size());
for (int i = 0; i < bs.rows.size(); ++i) {
auto& row = bs.rows[i];
transpose->cols[i] = row.block;
const int nrows = row.block.size;
for (auto& cell : row.cells) {
transpose->rows[cell.block_id].cells.emplace_back(i, cell.position);
const int ncols = transpose->rows[cell.block_id].block.size;
transpose->rows[cell.block_id].nnz += nrows * ncols;
}
}
ComputeCumulativeNumberOfNonZeros(transpose->rows);
return transpose;
}
double* BlockSparseMatrix::AllocateValues(int size) {
if (!use_page_locked_memory_) {
return new double[size];
}
#ifndef CERES_NO_CUDA
double* values = nullptr;
CHECK_EQ(cudaSuccess,
cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault));
return values;
#else
LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
<< "This is a Ceres bug; please contact the developers!";
return nullptr;
#endif
};
void BlockSparseMatrix::FreeValues(double*& values) {
if (!use_page_locked_memory_) {
delete[] values;
values = nullptr;
return;
}
#ifndef CERES_NO_CUDA
CHECK_EQ(cudaSuccess, cudaFreeHost(values));
values = nullptr;
#else
LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
<< "This is a Ceres bug; please contact the developers!";
#endif
};
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,15 +35,17 @@
#define CERES_INTERNAL_BLOCK_SPARSE_MATRIX_H_
#include <memory>
#include <random>
#include "ceres/block_structure.h"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/context_impl.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/sparse_matrix.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class TripletSparseMatrix;
@@ -63,31 +65,64 @@ class CERES_NO_EXPORT BlockSparseMatrix final : public SparseMatrix {
//
// TODO(sameeragarwal): Add a function which will validate legal
// CompressedRowBlockStructure objects.
explicit BlockSparseMatrix(CompressedRowBlockStructure* block_structure);
explicit BlockSparseMatrix(CompressedRowBlockStructure* block_structure,
bool use_page_locked_memory = false);
~BlockSparseMatrix();
BlockSparseMatrix();
BlockSparseMatrix(const BlockSparseMatrix&) = delete;
void operator=(const BlockSparseMatrix&) = delete;
// Implementation of SparseMatrix interface.
void SetZero() final;
void RightMultiply(const double* x, double* y) const final;
void LeftMultiply(const double* x, double* y) const final;
void SetZero() override final;
void SetZero(ContextImpl* context, int num_threads) override final;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const final;
void LeftMultiplyAndAccumulate(const double* x, double* y) const final;
void LeftMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const final;
void SquaredColumnNorm(double* x) const final;
void SquaredColumnNorm(double* x,
ContextImpl* context,
int num_threads) const final;
void ScaleColumns(const double* scale) final;
void ScaleColumns(const double* scale,
ContextImpl* context,
int num_threads) final;
// Convert to CompressedRowSparseMatrix
std::unique_ptr<CompressedRowSparseMatrix> ToCompressedRowSparseMatrix()
const;
// Create CompressedRowSparseMatrix corresponding to transposed matrix
std::unique_ptr<CompressedRowSparseMatrix>
ToCompressedRowSparseMatrixTranspose() const;
// Copy values to CompressedRowSparseMatrix that has compatible structure
void UpdateCompressedRowSparseMatrix(
CompressedRowSparseMatrix* crs_matrix) const;
// Copy values to CompressedRowSparseMatrix that has structure of transposed
// matrix
void UpdateCompressedRowSparseMatrixTranspose(
CompressedRowSparseMatrix* crs_matrix) const;
void ToDenseMatrix(Matrix* dense_matrix) const final;
void ToTextFile(FILE* file) const final;
void AddTransposeBlockStructure();
// clang-format off
int num_rows() const final { return num_rows_; }
int num_cols() const final { return num_cols_; }
int num_nonzeros() const final { return num_nonzeros_; }
const double* values() const final { return values_.get(); }
double* mutable_values() final { return values_.get(); }
const double* values() const final { return values_; }
double* mutable_values() final { return values_; }
// clang-format on
void ToTripletSparseMatrix(TripletSparseMatrix* matrix) const;
const CompressedRowBlockStructure* block_structure() const;
const CompressedRowBlockStructure* transpose_block_structure() const;
// Append the contents of m to the bottom of this matrix. m must
// have the same column blocks structure as this matrix.
@@ -122,15 +157,22 @@ class CERES_NO_EXPORT BlockSparseMatrix final : public SparseMatrix {
// distributed and whose structure is determined by
// RandomMatrixOptions.
static std::unique_ptr<BlockSparseMatrix> CreateRandomMatrix(
const RandomMatrixOptions& options);
const RandomMatrixOptions& options,
std::mt19937& prng,
bool use_page_locked_memory = false);
private:
double* AllocateValues(int size);
void FreeValues(double*& values);
const bool use_page_locked_memory_;
int num_rows_;
int num_cols_;
int num_nonzeros_;
int max_num_nonzeros_;
std::unique_ptr<double[]> values_;
double* values_;
std::unique_ptr<CompressedRowBlockStructure> block_structure_;
std::unique_ptr<CompressedRowBlockStructure> transpose_block_structure_;
};
// A number of algorithms like the SchurEliminator do not need
@@ -158,8 +200,10 @@ class CERES_NO_EXPORT BlockSparseMatrixData {
const double* values_;
};
} // namespace internal
} // namespace ceres
std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
const CompressedRowBlockStructure& bs);
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,8 +30,11 @@
#include "ceres/block_structure.h"
namespace ceres {
namespace internal {
#include <vector>
#include "glog/logging.h"
namespace ceres::internal {
bool CellLessThan(const Cell& lhs, const Cell& rhs) {
if (lhs.block_id == rhs.block_id) {
@@ -40,5 +43,28 @@ bool CellLessThan(const Cell& lhs, const Cell& rhs) {
return (lhs.block_id < rhs.block_id);
}
} // namespace internal
} // namespace ceres
std::vector<Block> Tail(const std::vector<Block>& blocks, int n) {
CHECK_LE(n, blocks.size());
std::vector<Block> tail;
const int num_blocks = blocks.size();
const int start = num_blocks - n;
int position = 0;
tail.reserve(n);
for (int i = start; i < num_blocks; ++i) {
tail.emplace_back(blocks[i].size, position);
position += blocks[i].size;
}
return tail;
}
int SumSquaredSizes(const std::vector<Block>& blocks) {
int sum = 0;
for (const auto& b : blocks) {
sum += b.size * b.size;
}
return sum;
}
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,9 @@
#include "ceres/internal/export.h"
// This file is being included into source files that are compiled with nvcc.
// nvcc shipped with ubuntu 20.04 does not support some features of c++17,
// including nested namespace definitions
namespace ceres {
namespace internal {
@@ -50,15 +53,19 @@ using BlockSize = int32_t;
struct CERES_NO_EXPORT Block {
Block() = default;
Block(int size_, int position_) : size(size_), position(position_) {}
Block(int size_, int position_) noexcept : size(size_), position(position_) {}
BlockSize size{-1};
int position{-1}; // Position along the row/column.
};
inline bool operator==(const Block& left, const Block& right) noexcept {
return (left.size == right.size) && (left.position == right.position);
}
struct CERES_NO_EXPORT Cell {
Cell() = default;
Cell(int block_id_, int position_)
Cell(int block_id_, int position_) noexcept
: block_id(block_id_), position(position_) {}
// Column or row block id as the case maybe.
@@ -75,14 +82,95 @@ struct CERES_NO_EXPORT CompressedList {
// Construct a CompressedList with the cells containing num_cells
// entries.
explicit CompressedList(int num_cells) : cells(num_cells) {}
explicit CompressedList(int num_cells) noexcept : cells(num_cells) {}
Block block;
std::vector<Cell> cells;
// Number of non-zeros in cells of this row block
int nnz{-1};
// Number of non-zeros in cells of this and every preceeding row block in
// block-sparse matrix
int cumulative_nnz{-1};
};
using CompressedRow = CompressedList;
using CompressedColumn = CompressedList;
// CompressedRowBlockStructure specifies the storage structure of a row block
// sparse matrix.
//
// Consider the following matrix A:
// A = [A_11 A_12 ...
// A_21 A_22 ...
// ...
// A_m1 A_m2 ... ]
//
// A row block sparse matrix is a matrix where the following properties hold:
// 1. The number of rows in every block A_ij and A_ik are the same.
// 2. The number of columns in every block A_ij and A_kj are the same.
// 3. The number of rows in A_ij and A_kj may be different (i != k).
// 4. The number of columns in A_ij and A_ik may be different (j != k).
// 5. Any block A_ij may be all 0s, in which case the block is not stored.
//
// The structure of the matrix is stored as follows:
//
// The `rows' array contains the following information for each row block:
// - rows[i].block.size: The number of rows in each block A_ij in the row block.
// - rows[i].block.position: The starting row in the full matrix A of the
// row block i.
// - rows[i].cells[j].block_id: The index into the `cols' array corresponding to
// the non-zero blocks A_ij.
// - rows[i].cells[j].position: The index in the `values' array for the contents
// of block A_ij.
//
// The `cols' array contains the following information for block:
// - cols[.].size: The number of columns spanned by the block.
// - cols[.].position: The starting column in the full matrix A of the block.
//
//
// Example of a row block sparse matrix:
// block_id: | 0 |1|2 |3 |
// rows[0]: [ 1 2 0 3 4 0 ]
// [ 5 6 0 7 8 0 ]
// rows[1]: [ 0 0 9 0 0 0 ]
//
// This matrix is stored as follows:
//
// There are four column blocks:
// cols[0].size = 2
// cols[0].position = 0
// cols[1].size = 1
// cols[1].position = 2
// cols[2].size = 2
// cols[2].position = 3
// cols[3].size = 1
// cols[3].position = 5
// The first row block spans two rows, starting at row 0:
// rows[0].block.size = 2 // This row block spans two rows.
// rows[0].block.position = 0 // It starts at row 0.
// rows[0] has two cells, at column blocks 0 and 2:
// rows[0].cells[0].block_id = 0 // This cell is in column block 0.
// rows[0].cells[0].position = 0 // See below for an explanation of this.
// rows[0].cells[1].block_id = 2 // This cell is in column block 2.
// rows[0].cells[1].position = 4 // See below for an explanation of this.
//
// The second row block spans two rows, starting at row 2:
// rows[1].block.size = 1 // This row block spans one row.
// rows[1].block.position = 2 // It starts at row 2.
// rows[1] has one cell at column block 1:
// rows[1].cells[0].block_id = 1 // This cell is in column block 1.
// rows[1].cells[0].position = 8 // See below for an explanation of this.
//
// The values in each blocks are stored contiguously in row major order.
// However, there is no unique way to order the blocks -- it is usually
// optimized to promote cache coherent access, e.g. ordering it so that
// Jacobian blocks of parameters of the same type are stored nearby.
// This is one possible way to store the values of the blocks in a values array:
// values = { 1, 2, 5, 6, 3, 4, 7, 8, 9 }
// | | | | // The three blocks.
// ^ rows[0].cells[0].position = 0
// ^ rows[0].cells[1].position = 4
// ^ rows[1].cells[0].position = 8
struct CERES_NO_EXPORT CompressedRowBlockStructure {
std::vector<Block> cols;
std::vector<CompressedRow> rows;
@@ -93,6 +181,18 @@ struct CERES_NO_EXPORT CompressedColumnBlockStructure {
std::vector<CompressedColumn> cols;
};
inline int NumScalarEntries(const std::vector<Block>& blocks) {
if (blocks.empty()) {
return 0;
}
auto& block = blocks.back();
return block.position + block.size;
}
std::vector<Block> Tail(const std::vector<Block>& blocks, int n);
int SumSquaredSizes(const std::vector<Block>& blocks);
} // namespace internal
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,15 +32,13 @@
#include <algorithm>
#include <iostream> // NO LINT
#include <string>
#include "ceres/program.h"
#include "ceres/stringprintf.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::string;
namespace ceres::internal {
StateUpdatingCallback::StateUpdatingCallback(Program* program,
double* parameters)
@@ -49,7 +47,7 @@ StateUpdatingCallback::StateUpdatingCallback(Program* program,
StateUpdatingCallback::~StateUpdatingCallback() = default;
CallbackReturnType StateUpdatingCallback::operator()(
const IterationSummary& summary) {
const IterationSummary& /*summary*/) {
program_->StateVectorToParameterBlocks(parameters_);
program_->CopyParameterBlockStateToUserState();
return SOLVER_CONTINUE;
@@ -83,7 +81,7 @@ LoggingCallback::~LoggingCallback() = default;
CallbackReturnType LoggingCallback::operator()(
const IterationSummary& summary) {
string output;
std::string output;
if (minimizer_type == LINE_SEARCH) {
output = StringPrintf(
"% 4d: f:% 8e d:% 3.2e g:% 3.2e h:% 3.2e s:% 3.2e e:% 3d it:% 3.2e "
@@ -127,5 +125,4 @@ CallbackReturnType LoggingCallback::operator()(
return SOLVER_CONTINUE;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,8 +36,7 @@
#include "ceres/internal/export.h"
#include "ceres/iteration_callback.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Program;
@@ -84,7 +83,6 @@ class CERES_NO_EXPORT LoggingCallback final : public IterationCallback {
const bool log_to_stdout_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_CALLBACKS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,16 +33,14 @@
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "ceres/graph.h"
#include "ceres/internal/export.h"
#include "ceres/map_util.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::vector;
namespace ceres::internal {
using IntMap = std::unordered_map<int, int>;
using IntSet = std::unordered_set<int>;
@@ -59,15 +57,15 @@ class CERES_NO_EXPORT CanonicalViewsClustering {
// are assigned to a cluster with id = kInvalidClusterId.
void ComputeClustering(const CanonicalViewsClusteringOptions& options,
const WeightedGraph<int>& graph,
vector<int>* centers,
std::vector<int>* centers,
IntMap* membership);
private:
void FindValidViews(IntSet* valid_views) const;
double ComputeClusteringQualityDifference(const int candidate,
const vector<int>& centers) const;
double ComputeClusteringQualityDifference(
int candidate, const std::vector<int>& centers) const;
void UpdateCanonicalViewAssignments(const int canonical_view);
void ComputeClusterMembership(const vector<int>& centers,
void ComputeClusterMembership(const std::vector<int>& centers,
IntMap* membership) const;
CanonicalViewsClusteringOptions options_;
@@ -82,7 +80,7 @@ class CERES_NO_EXPORT CanonicalViewsClustering {
void ComputeCanonicalViewsClustering(
const CanonicalViewsClusteringOptions& options,
const WeightedGraph<int>& graph,
vector<int>* centers,
std::vector<int>* centers,
IntMap* membership) {
time_t start_time = time(nullptr);
CanonicalViewsClustering cv;
@@ -95,7 +93,7 @@ void ComputeCanonicalViewsClustering(
void CanonicalViewsClustering::ComputeClustering(
const CanonicalViewsClusteringOptions& options,
const WeightedGraph<int>& graph,
vector<int>* centers,
std::vector<int>* centers,
IntMap* membership) {
options_ = options;
CHECK(centers != nullptr);
@@ -151,7 +149,7 @@ void CanonicalViewsClustering::FindValidViews(IntSet* valid_views) const {
// Computes the difference in the quality score if 'candidate' were
// added to the set of canonical views.
double CanonicalViewsClustering::ComputeClusteringQualityDifference(
const int candidate, const vector<int>& centers) const {
const int candidate, const std::vector<int>& centers) const {
// View score.
double difference =
options_.view_score_weight * graph_->VertexWeight(candidate);
@@ -198,7 +196,7 @@ void CanonicalViewsClustering::UpdateCanonicalViewAssignments(
// Assign a cluster id to each view.
void CanonicalViewsClustering::ComputeClusterMembership(
const vector<int>& centers, IntMap* membership) const {
const std::vector<int>& centers, IntMap* membership) const {
CHECK(membership != nullptr);
membership->clear();
@@ -222,5 +220,4 @@ void CanonicalViewsClustering::ComputeClusterMembership(
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -48,8 +48,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
struct CanonicalViewsClusteringOptions;
@@ -120,8 +119,7 @@ struct CERES_NO_EXPORT CanonicalViewsClusteringOptions {
double view_score_weight = 0.0;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,123 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: keir@google.com (Keir Mierle)
#ifndef CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_
#define CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_
#include <algorithm>
#include <memory>
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/linear_operator.h"
namespace ceres {
namespace internal {
class SparseMatrix;
// A linear operator which takes a matrix A and a diagonal vector D and
// performs products of the form
//
// (A^T A + D^T D)x
//
// This is used to implement iterative general sparse linear solving with
// conjugate gradients, where A is the Jacobian and D is a regularizing
// parameter. A brief proof that D^T D is the correct regularizer:
//
// Given a regularized least squares problem:
//
// min ||Ax - b||^2 + ||Dx||^2
// x
//
// First expand into matrix notation:
//
// (Ax - b)^T (Ax - b) + xD^TDx
//
// Then multiply out to get:
//
// = xA^TAx - 2b^T Ax + b^Tb + xD^TDx
//
// Take the derivative:
//
// 0 = 2A^TAx - 2A^T b + 2 D^TDx
// 0 = A^TAx - A^T b + D^TDx
// 0 = (A^TA + D^TD)x - A^T b
//
// Thus, the symmetric system we need to solve for CGNR is
//
// Sx = z
//
// with S = A^TA + D^TD
// and z = A^T b
//
// Note: This class is not thread safe, since it uses some temporary storage.
class CERES_NO_EXPORT CgnrLinearOperator final : public LinearOperator {
public:
CgnrLinearOperator(const LinearOperator& A, const double* D)
: A_(A), D_(D), z_(new double[A.num_rows()]) {}
void RightMultiply(const double* x, double* y) const final {
std::fill(z_.get(), z_.get() + A_.num_rows(), 0.0);
// z = Ax
A_.RightMultiply(x, z_.get());
// y = y + Atz
A_.LeftMultiply(z_.get(), y);
// y = y + DtDx
if (D_ != nullptr) {
int n = A_.num_cols();
VectorRef(y, n).array() +=
ConstVectorRef(D_, n).array().square() * ConstVectorRef(x, n).array();
}
}
void LeftMultiply(const double* x, double* y) const final {
RightMultiply(x, y);
}
int num_rows() const final { return A_.num_cols(); }
int num_cols() const final { return A_.num_cols(); }
private:
const LinearOperator& A_;
const double* D_;
std::unique_ptr<double[]> z_;
};
} // namespace internal
} // namespace ceres
#include "ceres/internal/reenable_warnings.h"
#endif // CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -34,16 +34,92 @@
#include <utility>
#include "ceres/block_jacobi_preconditioner.h"
#include "ceres/cgnr_linear_operator.h"
#include "ceres/conjugate_gradients_solver.h"
#include "ceres/cuda_sparse_matrix.h"
#include "ceres/cuda_vector.h"
#include "ceres/internal/eigen.h"
#include "ceres/linear_solver.h"
#include "ceres/subset_preconditioner.h"
#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// A linear operator which takes a matrix A and a diagonal vector D and
// performs products of the form
//
// (A^T A + D^T D)x
//
// This is used to implement iterative general sparse linear solving with
// conjugate gradients, where A is the Jacobian and D is a regularizing
// parameter. A brief proof that D^T D is the correct regularizer:
//
// Given a regularized least squares problem:
//
// min ||Ax - b||^2 + ||Dx||^2
// x
//
// First expand into matrix notation:
//
// (Ax - b)^T (Ax - b) + xD^TDx
//
// Then multiply out to get:
//
// = xA^TAx - 2b^T Ax + b^Tb + xD^TDx
//
// Take the derivative:
//
// 0 = 2A^TAx - 2A^T b + 2 D^TDx
// 0 = A^TAx - A^T b + D^TDx
// 0 = (A^TA + D^TD)x - A^T b
//
// Thus, the symmetric system we need to solve for CGNR is
//
// Sx = z
//
// with S = A^TA + D^TD
// and z = A^T b
//
// Note: This class is not thread safe, since it uses some temporary storage.
class CERES_NO_EXPORT CgnrLinearOperator final
: public ConjugateGradientsLinearOperator<Vector> {
public:
CgnrLinearOperator(const LinearOperator& A,
const double* D,
ContextImpl* context,
int num_threads)
: A_(A),
D_(D),
z_(Vector::Zero(A.num_rows())),
context_(context),
num_threads_(num_threads) {}
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
// z = Ax
// y = y + Atz
z_.setZero();
A_.RightMultiplyAndAccumulate(x, z_, context_, num_threads_);
A_.LeftMultiplyAndAccumulate(z_, y, context_, num_threads_);
// y = y + DtDx
if (D_ != nullptr) {
int n = A_.num_cols();
ParallelAssign(
context_,
num_threads_,
y,
y.array() + ConstVectorRef(D_, n).array().square() * x.array());
}
}
private:
const LinearOperator& A_;
const double* D_;
Vector z_;
ContextImpl* context_;
int num_threads_;
};
CgnrSolver::CgnrSolver(LinearSolver::Options options)
: options_(std::move(options)) {
@@ -57,7 +133,14 @@ CgnrSolver::CgnrSolver(LinearSolver::Options options)
}
}
CgnrSolver::~CgnrSolver() = default;
CgnrSolver::~CgnrSolver() {
for (int i = 0; i < 4; ++i) {
if (scratch_[i]) {
delete scratch_[i];
scratch_[i] = nullptr;
}
}
}
LinearSolver::Summary CgnrSolver::SolveImpl(
BlockSparseMatrix* A,
@@ -65,48 +148,244 @@ LinearSolver::Summary CgnrSolver::SolveImpl(
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
EventLogger event_logger("CgnrSolver::Solve");
// Form z = Atb.
Vector z(A->num_cols());
z.setZero();
A->LeftMultiply(b, z.data());
if (!preconditioner_) {
Preconditioner::Options preconditioner_options;
preconditioner_options.type = options_.preconditioner_type;
preconditioner_options.subset_preconditioner_start_row_block =
options_.subset_preconditioner_start_row_block;
preconditioner_options.sparse_linear_algebra_library_type =
options_.sparse_linear_algebra_library_type;
preconditioner_options.ordering_type = options_.ordering_type;
preconditioner_options.num_threads = options_.num_threads;
preconditioner_options.context = options_.context;
if (options_.preconditioner_type == JACOBI) {
preconditioner_ = std::make_unique<BlockJacobiPreconditioner>(*A);
preconditioner_ = std::make_unique<BlockSparseJacobiPreconditioner>(
preconditioner_options, *A);
} else if (options_.preconditioner_type == SUBSET) {
Preconditioner::Options preconditioner_options;
preconditioner_options.type = SUBSET;
preconditioner_options.subset_preconditioner_start_row_block =
options_.subset_preconditioner_start_row_block;
preconditioner_options.sparse_linear_algebra_library_type =
options_.sparse_linear_algebra_library_type;
preconditioner_options.use_postordering = options_.use_postordering;
preconditioner_options.num_threads = options_.num_threads;
preconditioner_options.context = options_.context;
preconditioner_ =
std::make_unique<SubsetPreconditioner>(preconditioner_options, *A);
} else {
preconditioner_ = std::make_unique<IdentityPreconditioner>(A->num_cols());
}
}
preconditioner_->Update(*A, per_solve_options.D);
if (preconditioner_) {
preconditioner_->Update(*A, per_solve_options.D);
ConjugateGradientsSolverOptions cg_options;
cg_options.min_num_iterations = options_.min_num_iterations;
cg_options.max_num_iterations = options_.max_num_iterations;
cg_options.residual_reset_period = options_.residual_reset_period;
cg_options.q_tolerance = per_solve_options.q_tolerance;
cg_options.r_tolerance = per_solve_options.r_tolerance;
cg_options.context = options_.context;
cg_options.num_threads = options_.num_threads;
// lhs = AtA + DtD
CgnrLinearOperator lhs(
*A, per_solve_options.D, options_.context, options_.num_threads);
// rhs = Atb.
Vector rhs(A->num_cols());
rhs.setZero();
A->LeftMultiplyAndAccumulate(
b, rhs.data(), options_.context, options_.num_threads);
cg_solution_ = Vector::Zero(A->num_cols());
for (int i = 0; i < 4; ++i) {
if (scratch_[i] == nullptr) {
scratch_[i] = new Vector(A->num_cols());
}
}
LinearSolver::PerSolveOptions cg_per_solve_options = per_solve_options;
cg_per_solve_options.preconditioner = preconditioner_.get();
// Solve (AtA + DtD)x = z (= Atb).
VectorRef(x, A->num_cols()).setZero();
CgnrLinearOperator lhs(*A, per_solve_options.D);
event_logger.AddEvent("Setup");
ConjugateGradientsSolver conjugate_gradient_solver(options_);
LinearSolver::Summary summary =
conjugate_gradient_solver.Solve(&lhs, z.data(), cg_per_solve_options, x);
LinearOperatorAdapter preconditioner(*preconditioner_);
auto summary = ConjugateGradientsSolver(
cg_options, lhs, rhs, preconditioner, scratch_, cg_solution_);
VectorRef(x, A->num_cols()) = cg_solution_;
event_logger.AddEvent("Solve");
return summary;
}
} // namespace internal
} // namespace ceres
#ifndef CERES_NO_CUDA
// A linear operator which takes a matrix A and a diagonal vector D and
// performs products of the form
//
// (A^T A + D^T D)x
//
// This is used to implement iterative general sparse linear solving with
// conjugate gradients, where A is the Jacobian and D is a regularizing
// parameter. A brief proof is included in cgnr_linear_operator.h.
class CERES_NO_EXPORT CudaCgnrLinearOperator final
: public ConjugateGradientsLinearOperator<CudaVector> {
public:
CudaCgnrLinearOperator(CudaSparseMatrix& A,
const CudaVector& D,
CudaVector* z)
: A_(A), D_(D), z_(z) {}
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
// z = Ax
z_->SetZero();
A_.RightMultiplyAndAccumulate(x, z_);
// y = y + Atz
// = y + AtAx
A_.LeftMultiplyAndAccumulate(*z_, &y);
// y = y + DtDx
y.DtDxpy(D_, x);
}
private:
CudaSparseMatrix& A_;
const CudaVector& D_;
CudaVector* z_ = nullptr;
};
class CERES_NO_EXPORT CudaIdentityPreconditioner final
: public CudaPreconditioner {
public:
void Update(const CompressedRowSparseMatrix& A, const double* D) final {}
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
y.Axpby(1.0, x, 1.0);
}
};
// This class wraps the existing CPU Jacobi preconditioner, caches the structure
// of the block diagonal, and for each CGNR solve updates the values on the CPU
// and then copies them over to the GPU.
class CERES_NO_EXPORT CudaJacobiPreconditioner final
: public CudaPreconditioner {
public:
explicit CudaJacobiPreconditioner(Preconditioner::Options options,
const CompressedRowSparseMatrix& A)
: options_(std::move(options)),
cpu_preconditioner_(options_, A),
m_(options_.context, cpu_preconditioner_.matrix()) {}
~CudaJacobiPreconditioner() = default;
void Update(const CompressedRowSparseMatrix& A, const double* D) final {
cpu_preconditioner_.Update(A, D);
m_.CopyValuesFromCpu(cpu_preconditioner_.matrix());
}
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
m_.RightMultiplyAndAccumulate(x, &y);
}
private:
Preconditioner::Options options_;
BlockCRSJacobiPreconditioner cpu_preconditioner_;
CudaSparseMatrix m_;
};
CudaCgnrSolver::CudaCgnrSolver(LinearSolver::Options options)
: options_(std::move(options)) {}
CudaCgnrSolver::~CudaCgnrSolver() {
for (int i = 0; i < 4; ++i) {
if (scratch_[i]) {
delete scratch_[i];
scratch_[i] = nullptr;
}
}
}
std::unique_ptr<CudaCgnrSolver> CudaCgnrSolver::Create(
LinearSolver::Options options, std::string* error) {
CHECK(error != nullptr);
if (options.preconditioner_type != IDENTITY &&
options.preconditioner_type != JACOBI) {
*error =
"CudaCgnrSolver does not support preconditioner type " +
std::string(PreconditionerTypeToString(options.preconditioner_type)) +
". ";
return nullptr;
}
CHECK(options.context->IsCudaInitialized())
<< "CudaCgnrSolver requires CUDA initialization.";
auto solver = std::make_unique<CudaCgnrSolver>(options);
return solver;
}
void CudaCgnrSolver::CpuToGpuTransfer(const CompressedRowSparseMatrix& A,
const double* b,
const double* D) {
if (A_ == nullptr) {
// Assume structure is not cached, do an initialization and structural copy.
A_ = std::make_unique<CudaSparseMatrix>(options_.context, A);
b_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
x_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
Atb_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
Ax_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
D_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
Preconditioner::Options preconditioner_options;
preconditioner_options.type = options_.preconditioner_type;
preconditioner_options.subset_preconditioner_start_row_block =
options_.subset_preconditioner_start_row_block;
preconditioner_options.sparse_linear_algebra_library_type =
options_.sparse_linear_algebra_library_type;
preconditioner_options.ordering_type = options_.ordering_type;
preconditioner_options.num_threads = options_.num_threads;
preconditioner_options.context = options_.context;
if (options_.preconditioner_type == JACOBI) {
preconditioner_ =
std::make_unique<CudaJacobiPreconditioner>(preconditioner_options, A);
} else {
preconditioner_ = std::make_unique<CudaIdentityPreconditioner>();
}
for (int i = 0; i < 4; ++i) {
scratch_[i] = new CudaVector(options_.context, A.num_cols());
}
} else {
// Assume structure is cached, do a value copy.
A_->CopyValuesFromCpu(A);
}
b_->CopyFromCpu(ConstVectorRef(b, A.num_rows()));
D_->CopyFromCpu(ConstVectorRef(D, A.num_cols()));
}
LinearSolver::Summary CudaCgnrSolver::SolveImpl(
CompressedRowSparseMatrix* A,
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
EventLogger event_logger("CudaCgnrSolver::Solve");
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
CpuToGpuTransfer(*A, b, per_solve_options.D);
event_logger.AddEvent("CPU to GPU Transfer");
preconditioner_->Update(*A, per_solve_options.D);
event_logger.AddEvent("Preconditioner Update");
// Form z = Atb.
Atb_->SetZero();
A_->LeftMultiplyAndAccumulate(*b_, Atb_.get());
// Solve (AtA + DtD)x = z (= Atb).
x_->SetZero();
CudaCgnrLinearOperator lhs(*A_, *D_, Ax_.get());
event_logger.AddEvent("Setup");
ConjugateGradientsSolverOptions cg_options;
cg_options.min_num_iterations = options_.min_num_iterations;
cg_options.max_num_iterations = options_.max_num_iterations;
cg_options.residual_reset_period = options_.residual_reset_period;
cg_options.q_tolerance = per_solve_options.q_tolerance;
cg_options.r_tolerance = per_solve_options.r_tolerance;
summary = ConjugateGradientsSolver(
cg_options, lhs, *Atb_, *preconditioner_, scratch_, *x_);
x_->CopyTo(x);
event_logger.AddEvent("Solve");
return summary;
}
#endif // CERES_NO_CUDA
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,11 +33,13 @@
#include <memory>
#include "ceres/conjugate_gradients_solver.h"
#include "ceres/cuda_sparse_matrix.h"
#include "ceres/cuda_vector.h"
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Preconditioner;
@@ -65,9 +67,50 @@ class CERES_NO_EXPORT CgnrSolver final : public BlockSparseMatrixSolver {
private:
const LinearSolver::Options options_;
std::unique_ptr<Preconditioner> preconditioner_;
Vector cg_solution_;
Vector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr};
};
} // namespace internal
} // namespace ceres
#ifndef CERES_NO_CUDA
class CudaPreconditioner : public ConjugateGradientsLinearOperator<CudaVector> {
public:
virtual void Update(const CompressedRowSparseMatrix& A, const double* D) = 0;
virtual ~CudaPreconditioner() = default;
};
// A Cuda-accelerated version of CgnrSolver.
// This solver assumes that the sparsity structure of A remains constant for its
// lifetime.
class CERES_NO_EXPORT CudaCgnrSolver final
: public CompressedRowSparseMatrixSolver {
public:
explicit CudaCgnrSolver(LinearSolver::Options options);
static std::unique_ptr<CudaCgnrSolver> Create(LinearSolver::Options options,
std::string* error);
~CudaCgnrSolver() override;
Summary SolveImpl(CompressedRowSparseMatrix* A,
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) final;
private:
void CpuToGpuTransfer(const CompressedRowSparseMatrix& A,
const double* b,
const double* D);
LinearSolver::Options options_;
std::unique_ptr<CudaSparseMatrix> A_;
std::unique_ptr<CudaVector> b_;
std::unique_ptr<CudaVector> x_;
std::unique_ptr<CudaVector> Atb_;
std::unique_ptr<CudaVector> Ax_;
std::unique_ptr<CudaVector> D_;
std::unique_ptr<CudaPreconditioner> preconditioner_;
CudaVector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr};
};
#endif // CERES_NO_CUDA
} // namespace ceres::internal
#endif // CERES_INTERNAL_CGNR_SOLVER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,30 +36,21 @@
#include "ceres/internal/export.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
using std::vector;
void CompressedColumnScalarMatrixToBlockMatrix(const int* scalar_rows,
const int* scalar_cols,
const vector<int>& row_blocks,
const vector<int>& col_blocks,
vector<int>* block_rows,
vector<int>* block_cols) {
void CompressedColumnScalarMatrixToBlockMatrix(
const int* scalar_rows,
const int* scalar_cols,
const std::vector<Block>& row_blocks,
const std::vector<Block>& col_blocks,
std::vector<int>* block_rows,
std::vector<int>* block_cols) {
CHECK(block_rows != nullptr);
CHECK(block_cols != nullptr);
block_rows->clear();
block_cols->clear();
const int num_row_blocks = row_blocks.size();
const int num_col_blocks = col_blocks.size();
vector<int> row_block_starts(num_row_blocks);
for (int i = 0, cursor = 0; i < num_row_blocks; ++i) {
row_block_starts[i] = cursor;
cursor += row_blocks[i];
}
// This loop extracts the block sparsity of the scalar sparse matrix
// It does so by iterating over the columns, but only considering
// the columns corresponding to the first element of each column
@@ -71,52 +62,46 @@ void CompressedColumnScalarMatrixToBlockMatrix(const int* scalar_rows,
for (int col_block = 0; col_block < num_col_blocks; ++col_block) {
int column_size = 0;
for (int idx = scalar_cols[c]; idx < scalar_cols[c + 1]; ++idx) {
vector<int>::const_iterator it = std::lower_bound(
row_block_starts.begin(), row_block_starts.end(), scalar_rows[idx]);
// Since we are using lower_bound, it will return the row id
// where the row block starts. For everything but the first row
// of the block, where these values will be the same, we can
// skip, as we only need the first row to detect the presence of
// the block.
auto it = std::lower_bound(row_blocks.begin(),
row_blocks.end(),
scalar_rows[idx],
[](const Block& block, double value) {
return block.position < value;
});
// Since we are using lower_bound, it will return the row id where the row
// block starts. For everything but the first row of the block, where
// these values will be the same, we can skip, as we only need the first
// row to detect the presence of the block.
//
// For rows all but the first row in the last row block,
// lower_bound will return row_block_starts.end(), but those can
// be skipped like the rows in other row blocks too.
if (it == row_block_starts.end() || *it != scalar_rows[idx]) {
// For rows all but the first row in the last row block, lower_bound will
// return row_blocks_.end(), but those can be skipped like the rows in
// other row blocks too.
if (it == row_blocks.end() || it->position != scalar_rows[idx]) {
continue;
}
block_rows->push_back(it - row_block_starts.begin());
block_rows->push_back(it - row_blocks.begin());
++column_size;
}
block_cols->push_back(block_cols->back() + column_size);
c += col_blocks[col_block];
c += col_blocks[col_block].size;
}
}
void BlockOrderingToScalarOrdering(const vector<int>& blocks,
const vector<int>& block_ordering,
vector<int>* scalar_ordering) {
void BlockOrderingToScalarOrdering(const std::vector<Block>& blocks,
const std::vector<int>& block_ordering,
std::vector<int>* scalar_ordering) {
CHECK_EQ(blocks.size(), block_ordering.size());
const int num_blocks = blocks.size();
// block_starts = [0, block1, block1 + block2 ..]
vector<int> block_starts(num_blocks);
for (int i = 0, cursor = 0; i < num_blocks; ++i) {
block_starts[i] = cursor;
cursor += blocks[i];
}
scalar_ordering->resize(block_starts.back() + blocks.back());
scalar_ordering->resize(NumScalarEntries(blocks));
int cursor = 0;
for (int i = 0; i < num_blocks; ++i) {
const int block_id = block_ordering[i];
const int block_size = blocks[block_id];
int block_position = block_starts[block_id];
const int block_size = blocks[block_id].size;
int block_position = blocks[block_id].position;
for (int j = 0; j < block_size; ++j) {
(*scalar_ordering)[cursor++] = block_position++;
}
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -34,11 +34,11 @@
#include <algorithm>
#include <vector>
#include "ceres/block_structure.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Extract the block sparsity pattern of the scalar compressed columns
// matrix and return it in compressed column form. The compressed
@@ -53,8 +53,8 @@ namespace internal {
CERES_NO_EXPORT void CompressedColumnScalarMatrixToBlockMatrix(
const int* scalar_rows,
const int* scalar_cols,
const std::vector<int>& row_blocks,
const std::vector<int>& col_blocks,
const std::vector<Block>& row_blocks,
const std::vector<Block>& col_blocks,
std::vector<int>* block_rows,
std::vector<int>* block_cols);
@@ -62,7 +62,7 @@ CERES_NO_EXPORT void CompressedColumnScalarMatrixToBlockMatrix(
// the corresponding "scalar" ordering, where the scalar ordering of
// size sum(blocks).
CERES_NO_EXPORT void BlockOrderingToScalarOrdering(
const std::vector<int>& blocks,
const std::vector<Block>& blocks,
const std::vector<int>& block_ordering,
std::vector<int>* scalar_ordering);
@@ -141,8 +141,7 @@ void SolveRTRWithSparseRHS(IntegerType num_cols,
SolveUpperTriangularInPlace(num_cols, rows, cols, values, solution);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,44 +44,42 @@
#include "ceres/residual_block.h"
#include "ceres/scratch_evaluate_preparer.h"
namespace ceres {
namespace internal {
using std::adjacent_find;
using std::make_pair;
using std::pair;
using std::vector;
namespace ceres::internal {
void CompressedRowJacobianWriter::PopulateJacobianRowAndColumnBlockVectors(
const Program* program, CompressedRowSparseMatrix* jacobian) {
const vector<ParameterBlock*>& parameter_blocks = program->parameter_blocks();
vector<int>& col_blocks = *(jacobian->mutable_col_blocks());
const auto& parameter_blocks = program->parameter_blocks();
auto& col_blocks = *(jacobian->mutable_col_blocks());
col_blocks.resize(parameter_blocks.size());
int col_pos = 0;
for (int i = 0; i < parameter_blocks.size(); ++i) {
col_blocks[i] = parameter_blocks[i]->TangentSize();
col_blocks[i].size = parameter_blocks[i]->TangentSize();
col_blocks[i].position = col_pos;
col_pos += col_blocks[i].size;
}
const vector<ResidualBlock*>& residual_blocks = program->residual_blocks();
vector<int>& row_blocks = *(jacobian->mutable_row_blocks());
const auto& residual_blocks = program->residual_blocks();
auto& row_blocks = *(jacobian->mutable_row_blocks());
row_blocks.resize(residual_blocks.size());
int row_pos = 0;
for (int i = 0; i < residual_blocks.size(); ++i) {
row_blocks[i] = residual_blocks[i]->NumResiduals();
row_blocks[i].size = residual_blocks[i]->NumResiduals();
row_blocks[i].position = row_pos;
row_pos += row_blocks[i].size;
}
}
void CompressedRowJacobianWriter::GetOrderedParameterBlocks(
const Program* program,
int residual_id,
vector<pair<int, int>>* evaluated_jacobian_blocks) {
const ResidualBlock* residual_block = program->residual_blocks()[residual_id];
std::vector<std::pair<int, int>>* evaluated_jacobian_blocks) {
auto residual_block = program->residual_blocks()[residual_id];
const int num_parameter_blocks = residual_block->NumParameterBlocks();
for (int j = 0; j < num_parameter_blocks; ++j) {
const ParameterBlock* parameter_block =
residual_block->parameter_blocks()[j];
auto parameter_block = residual_block->parameter_blocks()[j];
if (!parameter_block->IsConstant()) {
evaluated_jacobian_blocks->push_back(
make_pair(parameter_block->index(), j));
std::make_pair(parameter_block->index(), j));
}
}
std::sort(evaluated_jacobian_blocks->begin(),
@@ -90,20 +88,29 @@ void CompressedRowJacobianWriter::GetOrderedParameterBlocks(
std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
const {
const vector<ResidualBlock*>& residual_blocks = program_->residual_blocks();
const auto& residual_blocks = program_->residual_blocks();
int total_num_residuals = program_->NumResiduals();
int total_num_effective_parameters = program_->NumEffectiveParameters();
const int total_num_residuals = program_->NumResiduals();
const int total_num_effective_parameters = program_->NumEffectiveParameters();
// Count the number of jacobian nonzeros.
int num_jacobian_nonzeros = 0;
//
// We used an unsigned int here, so that we can compare it INT_MAX without
// triggering overflow behaviour.
unsigned int num_jacobian_nonzeros = total_num_effective_parameters;
for (auto* residual_block : residual_blocks) {
const int num_residuals = residual_block->NumResiduals();
const int num_parameter_blocks = residual_block->NumParameterBlocks();
for (int j = 0; j < num_parameter_blocks; ++j) {
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
auto parameter_block = residual_block->parameter_blocks()[j];
if (!parameter_block->IsConstant()) {
num_jacobian_nonzeros += num_residuals * parameter_block->TangentSize();
if (num_jacobian_nonzeros > std::numeric_limits<int>::max()) {
LOG(ERROR) << "Unable to create Jacobian matrix: Too many entries in "
"the Jacobian matrix. num_jacobian_nonzeros = "
<< num_jacobian_nonzeros;
return nullptr;
}
}
}
}
@@ -112,14 +119,14 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
// Allocate more space than needed to store the jacobian so that when the LM
// algorithm adds the diagonal, no reallocation is necessary. This reduces
// peak memory usage significantly.
std::unique_ptr<CompressedRowSparseMatrix> jacobian =
std::make_unique<CompressedRowSparseMatrix>(
total_num_residuals,
total_num_effective_parameters,
num_jacobian_nonzeros + total_num_effective_parameters);
auto jacobian = std::make_unique<CompressedRowSparseMatrix>(
total_num_residuals,
total_num_effective_parameters,
static_cast<int>(num_jacobian_nonzeros));
// At this stage, the CompressedRowSparseMatrix is an invalid state. But this
// seems to be the only way to construct it without doing a memory copy.
// At this stage, the CompressedRowSparseMatrix is an invalid state. But
// this seems to be the only way to construct it without doing a memory
// copy.
int* rows = jacobian->mutable_rows();
int* cols = jacobian->mutable_cols();
@@ -131,9 +138,9 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
// Count the number of derivatives for a row of this residual block and
// build a list of active parameter block indices.
int num_derivatives = 0;
vector<int> parameter_indices;
std::vector<int> parameter_indices;
for (int j = 0; j < num_parameter_blocks; ++j) {
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
auto parameter_block = residual_block->parameter_blocks()[j];
if (!parameter_block->IsConstant()) {
parameter_indices.push_back(parameter_block->index());
num_derivatives += parameter_block->TangentSize();
@@ -141,12 +148,12 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
}
// Sort the parameters by their position in the state vector.
sort(parameter_indices.begin(), parameter_indices.end());
std::sort(parameter_indices.begin(), parameter_indices.end());
if (adjacent_find(parameter_indices.begin(), parameter_indices.end()) !=
parameter_indices.end()) {
std::string parameter_block_description;
for (int j = 0; j < num_parameter_blocks; ++j) {
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
auto parameter_block = residual_block->parameter_blocks()[j];
parameter_block_description += parameter_block->ToString() + "\n";
}
LOG(FATAL) << "Ceres internal error: "
@@ -168,15 +175,13 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
// values are updated.
int col_pos = 0;
for (int parameter_index : parameter_indices) {
ParameterBlock* parameter_block =
program_->parameter_blocks()[parameter_index];
auto parameter_block = program_->parameter_blocks()[parameter_index];
const int parameter_block_size = parameter_block->TangentSize();
for (int r = 0; r < num_residuals; ++r) {
// This is the position in the values array of the jacobian where this
// row of the jacobian block should go.
const int column_block_begin = rows[row_pos + r] + col_pos;
for (int c = 0; c < parameter_block_size; ++c) {
cols[column_block_begin + c] = parameter_block->delta_offset() + c;
}
@@ -185,7 +190,8 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
}
row_pos += num_residuals;
}
CHECK_EQ(num_jacobian_nonzeros, rows[total_num_residuals]);
CHECK_EQ(num_jacobian_nonzeros - total_num_effective_parameters,
rows[total_num_residuals]);
PopulateJacobianRowAndColumnBlockVectors(program_, jacobian.get());
@@ -201,11 +207,10 @@ void CompressedRowJacobianWriter::Write(int residual_id,
double* jacobian_values = jacobian->mutable_values();
const int* jacobian_rows = jacobian->rows();
const ResidualBlock* residual_block =
program_->residual_blocks()[residual_id];
auto residual_block = program_->residual_blocks()[residual_id];
const int num_residuals = residual_block->NumResiduals();
vector<pair<int, int>> evaluated_jacobian_blocks;
std::vector<std::pair<int, int>> evaluated_jacobian_blocks;
GetOrderedParameterBlocks(program_, residual_id, &evaluated_jacobian_blocks);
// Where in the current row does the jacobian for a parameter block begin.
@@ -214,7 +219,7 @@ void CompressedRowJacobianWriter::Write(int residual_id,
// Iterate over the jacobian blocks in increasing order of their
// positions in the reduced parameter vector.
for (auto& evaluated_jacobian_block : evaluated_jacobian_blocks) {
const ParameterBlock* parameter_block =
auto parameter_block =
program_->parameter_blocks()[evaluated_jacobian_block.first];
const int argument = evaluated_jacobian_block.second;
const int parameter_block_size = parameter_block->TangentSize();
@@ -238,5 +243,4 @@ void CompressedRowJacobianWriter::Write(int residual_id,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -41,8 +41,7 @@
#include "ceres/internal/export.h"
#include "ceres/scratch_evaluate_preparer.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CompressedRowSparseMatrix;
class Program;
@@ -107,7 +106,6 @@ class CERES_NO_EXPORT CompressedRowJacobianWriter {
Program* program_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_COMPRESSED_ROW_JACOBIAN_WRITER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -31,25 +31,24 @@
#include "ceres/compressed_row_sparse_matrix.h"
#include <algorithm>
#include <functional>
#include <memory>
#include <numeric>
#include <random>
#include <vector>
#include "ceres/context_impl.h"
#include "ceres/crs_matrix.h"
#include "ceres/internal/export.h"
#include "ceres/random.h"
#include "ceres/parallel_for.h"
#include "ceres/triplet_sparse_matrix.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::vector;
namespace ceres::internal {
namespace {
// Helper functor used by the constructor for reordering the contents
// of a TripletSparseMatrix. This comparator assumes thay there are no
// of a TripletSparseMatrix. This comparator assumes that there are no
// duplicates in the pair of arrays rows and cols, i.e., there is no
// indices i and j (not equal to each other) s.t.
//
@@ -119,10 +118,12 @@ void TransposeForCompressedRowSparseStructure(const int num_rows,
transpose_rows[0] = 0;
}
template <class RandomNormalFunctor>
void AddRandomBlock(const int num_rows,
const int num_cols,
const int row_block_begin,
const int col_block_begin,
RandomNormalFunctor&& randn,
std::vector<int>* rows,
std::vector<int>* cols,
std::vector<double>* values) {
@@ -130,19 +131,21 @@ void AddRandomBlock(const int num_rows,
for (int c = 0; c < num_cols; ++c) {
rows->push_back(row_block_begin + r);
cols->push_back(col_block_begin + c);
values->push_back(RandNormal());
values->push_back(randn());
}
}
}
template <class RandomNormalFunctor>
void AddSymmetricRandomBlock(const int num_rows,
const int row_block_begin,
RandomNormalFunctor&& randn,
std::vector<int>* rows,
std::vector<int>* cols,
std::vector<double>* values) {
for (int r = 0; r < num_rows; ++r) {
for (int c = r; c < num_rows; ++c) {
const double v = RandNormal();
const double v = randn();
rows->push_back(row_block_begin + r);
cols->push_back(row_block_begin + c);
values->push_back(v);
@@ -163,7 +166,7 @@ CompressedRowSparseMatrix::CompressedRowSparseMatrix(int num_rows,
int max_num_nonzeros) {
num_rows_ = num_rows;
num_cols_ = num_cols;
storage_type_ = UNSYMMETRIC;
storage_type_ = StorageType::UNSYMMETRIC;
rows_.resize(num_rows + 1, 0);
cols_.resize(max_num_nonzeros, 0);
values_.resize(max_num_nonzeros, 0.0);
@@ -202,7 +205,7 @@ CompressedRowSparseMatrix::FromTripletSparseMatrix(
}
// index is the list of indices into the TripletSparseMatrix input.
vector<int> index(input.num_nonzeros(), 0);
std::vector<int> index(input.num_nonzeros(), 0);
for (int i = 0; i < input.num_nonzeros(); ++i) {
index[i] = i;
}
@@ -217,9 +220,8 @@ CompressedRowSparseMatrix::FromTripletSparseMatrix(
input.num_nonzeros() * sizeof(int) + // NOLINT
input.num_nonzeros() * sizeof(double)); // NOLINT
std::unique_ptr<CompressedRowSparseMatrix> output =
std::make_unique<CompressedRowSparseMatrix>(
num_rows, num_cols, input.num_nonzeros());
auto output = std::make_unique<CompressedRowSparseMatrix>(
num_rows, num_cols, input.num_nonzeros());
if (num_rows == 0) {
// No data to copy.
@@ -255,7 +257,7 @@ CompressedRowSparseMatrix::CompressedRowSparseMatrix(const double* diagonal,
num_rows_ = num_rows;
num_cols_ = num_rows;
storage_type_ = UNSYMMETRIC;
storage_type_ = StorageType::UNSYMMETRIC;
rows_.resize(num_rows + 1);
cols_.resize(num_rows);
values_.resize(num_rows);
@@ -276,22 +278,37 @@ void CompressedRowSparseMatrix::SetZero() {
std::fill(values_.begin(), values_.end(), 0);
}
// TODO(sameeragarwal): Make RightMultiply and LeftMultiply
// block-aware for higher performance.
void CompressedRowSparseMatrix::RightMultiply(const double* x,
double* y) const {
// TODO(sameeragarwal): Make RightMultiplyAndAccumulate and
// LeftMultiplyAndAccumulate block-aware for higher performance.
void CompressedRowSparseMatrix::RightMultiplyAndAccumulate(
const double* x, double* y, ContextImpl* context, int num_threads) const {
if (storage_type_ != StorageType::UNSYMMETRIC) {
RightMultiplyAndAccumulate(x, y);
return;
}
auto values = values_.data();
auto rows = rows_.data();
auto cols = cols_.data();
ParallelFor(
context, 0, num_rows_, num_threads, [values, rows, cols, x, y](int row) {
for (int idx = rows[row]; idx < rows[row + 1]; ++idx) {
const int c = cols[idx];
const double v = values[idx];
y[row] += v * x[c];
}
});
}
void CompressedRowSparseMatrix::RightMultiplyAndAccumulate(const double* x,
double* y) const {
CHECK(x != nullptr);
CHECK(y != nullptr);
if (storage_type_ == UNSYMMETRIC) {
for (int r = 0; r < num_rows_; ++r) {
for (int idx = rows_[r]; idx < rows_[r + 1]; ++idx) {
const int c = cols_[idx];
const double v = values_[idx];
y[r] += v * x[c];
}
}
} else if (storage_type_ == UPPER_TRIANGULAR) {
if (storage_type_ == StorageType::UNSYMMETRIC) {
RightMultiplyAndAccumulate(x, y, nullptr, 1);
} else if (storage_type_ == StorageType::UPPER_TRIANGULAR) {
// Because of their block structure, we will have entries that lie
// above (below) the diagonal for lower (upper) triangular matrices,
// so the loops below need to account for this.
@@ -317,7 +334,7 @@ void CompressedRowSparseMatrix::RightMultiply(const double* x,
}
}
}
} else if (storage_type_ == LOWER_TRIANGULAR) {
} else if (storage_type_ == StorageType::LOWER_TRIANGULAR) {
for (int r = 0; r < num_rows_; ++r) {
int idx = rows_[r];
const int idx_end = rows_[r + 1];
@@ -340,19 +357,21 @@ void CompressedRowSparseMatrix::RightMultiply(const double* x,
}
}
void CompressedRowSparseMatrix::LeftMultiply(const double* x, double* y) const {
void CompressedRowSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
double* y) const {
CHECK(x != nullptr);
CHECK(y != nullptr);
if (storage_type_ == UNSYMMETRIC) {
if (storage_type_ == StorageType::UNSYMMETRIC) {
for (int r = 0; r < num_rows_; ++r) {
for (int idx = rows_[r]; idx < rows_[r + 1]; ++idx) {
y[cols_[idx]] += values_[idx] * x[r];
}
}
} else {
// Since the matrix is symmetric, LeftMultiply = RightMultiply.
RightMultiply(x, y);
// Since the matrix is symmetric, LeftMultiplyAndAccumulate =
// RightMultiplyAndAccumulate.
RightMultiplyAndAccumulate(x, y);
}
}
@@ -360,11 +379,11 @@ void CompressedRowSparseMatrix::SquaredColumnNorm(double* x) const {
CHECK(x != nullptr);
std::fill(x, x + num_cols_, 0.0);
if (storage_type_ == UNSYMMETRIC) {
if (storage_type_ == StorageType::UNSYMMETRIC) {
for (int idx = 0; idx < rows_[num_rows_]; ++idx) {
x[cols_[idx]] += values_[idx] * values_[idx];
}
} else if (storage_type_ == UPPER_TRIANGULAR) {
} else if (storage_type_ == StorageType::UPPER_TRIANGULAR) {
// Because of their block structure, we will have entries that lie
// above (below) the diagonal for lower (upper) triangular
// matrices, so the loops below need to account for this.
@@ -390,7 +409,7 @@ void CompressedRowSparseMatrix::SquaredColumnNorm(double* x) const {
}
}
}
} else if (storage_type_ == LOWER_TRIANGULAR) {
} else if (storage_type_ == StorageType::LOWER_TRIANGULAR) {
for (int r = 0; r < num_rows_; ++r) {
int idx = rows_[r];
const int idx_end = rows_[r + 1];
@@ -435,7 +454,7 @@ void CompressedRowSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
void CompressedRowSparseMatrix::DeleteRows(int delta_rows) {
CHECK_GE(delta_rows, 0);
CHECK_LE(delta_rows, num_rows_);
CHECK_EQ(storage_type_, UNSYMMETRIC);
CHECK_EQ(storage_type_, StorageType::UNSYMMETRIC);
num_rows_ -= delta_rows;
rows_.resize(num_rows_ + 1);
@@ -451,7 +470,7 @@ void CompressedRowSparseMatrix::DeleteRows(int delta_rows) {
int num_row_blocks = 0;
int num_rows = 0;
while (num_row_blocks < row_blocks_.size() && num_rows < num_rows_) {
num_rows += row_blocks_[num_row_blocks];
num_rows += row_blocks_[num_row_blocks].size;
++num_row_blocks;
}
@@ -459,7 +478,7 @@ void CompressedRowSparseMatrix::DeleteRows(int delta_rows) {
}
void CompressedRowSparseMatrix::AppendRows(const CompressedRowSparseMatrix& m) {
CHECK_EQ(storage_type_, UNSYMMETRIC);
CHECK_EQ(storage_type_, StorageType::UNSYMMETRIC);
CHECK_EQ(m.num_cols(), num_cols_);
CHECK((row_blocks_.empty() && m.row_blocks().empty()) ||
@@ -539,17 +558,15 @@ void CompressedRowSparseMatrix::SetMaxNumNonZeros(int num_nonzeros) {
std::unique_ptr<CompressedRowSparseMatrix>
CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(
const double* diagonal, const vector<int>& blocks) {
int num_rows = 0;
const double* diagonal, const std::vector<Block>& blocks) {
const int num_rows = NumScalarEntries(blocks);
int num_nonzeros = 0;
for (int block_size : blocks) {
num_rows += block_size;
num_nonzeros += block_size * block_size;
for (auto& block : blocks) {
num_nonzeros += block.size * block.size;
}
std::unique_ptr<CompressedRowSparseMatrix> matrix =
std::make_unique<CompressedRowSparseMatrix>(
num_rows, num_rows, num_nonzeros);
auto matrix = std::make_unique<CompressedRowSparseMatrix>(
num_rows, num_rows, num_nonzeros);
int* rows = matrix->mutable_rows();
int* cols = matrix->mutable_cols();
@@ -558,15 +575,17 @@ CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(
int idx_cursor = 0;
int col_cursor = 0;
for (int block_size : blocks) {
for (int r = 0; r < block_size; ++r) {
for (auto& block : blocks) {
for (int r = 0; r < block.size; ++r) {
*(rows++) = idx_cursor;
values[idx_cursor + r] = diagonal[col_cursor + r];
for (int c = 0; c < block_size; ++c, ++idx_cursor) {
if (diagonal != nullptr) {
values[idx_cursor + r] = diagonal[col_cursor + r];
}
for (int c = 0; c < block.size; ++c, ++idx_cursor) {
*(cols++) = col_cursor + c;
}
}
col_cursor += block_size;
col_cursor += block.size;
}
*rows = idx_cursor;
@@ -580,19 +599,18 @@ CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(
std::unique_ptr<CompressedRowSparseMatrix>
CompressedRowSparseMatrix::Transpose() const {
std::unique_ptr<CompressedRowSparseMatrix> transpose =
std::make_unique<CompressedRowSparseMatrix>(
num_cols_, num_rows_, num_nonzeros());
auto transpose = std::make_unique<CompressedRowSparseMatrix>(
num_cols_, num_rows_, num_nonzeros());
switch (storage_type_) {
case UNSYMMETRIC:
transpose->set_storage_type(UNSYMMETRIC);
case StorageType::UNSYMMETRIC:
transpose->set_storage_type(StorageType::UNSYMMETRIC);
break;
case LOWER_TRIANGULAR:
transpose->set_storage_type(UPPER_TRIANGULAR);
case StorageType::LOWER_TRIANGULAR:
transpose->set_storage_type(StorageType::UPPER_TRIANGULAR);
break;
case UPPER_TRIANGULAR:
transpose->set_storage_type(LOWER_TRIANGULAR);
case StorageType::UPPER_TRIANGULAR:
transpose->set_storage_type(StorageType::LOWER_TRIANGULAR);
break;
default:
LOG(FATAL) << "Unknown storage type: " << storage_type_;
@@ -621,13 +639,14 @@ CompressedRowSparseMatrix::Transpose() const {
std::unique_ptr<CompressedRowSparseMatrix>
CompressedRowSparseMatrix::CreateRandomMatrix(
CompressedRowSparseMatrix::RandomMatrixOptions options) {
CompressedRowSparseMatrix::RandomMatrixOptions options,
std::mt19937& prng) {
CHECK_GT(options.num_row_blocks, 0);
CHECK_GT(options.min_row_block_size, 0);
CHECK_GT(options.max_row_block_size, 0);
CHECK_LE(options.min_row_block_size, options.max_row_block_size);
if (options.storage_type == UNSYMMETRIC) {
if (options.storage_type == StorageType::UNSYMMETRIC) {
CHECK_GT(options.num_col_blocks, 0);
CHECK_GT(options.min_col_block_size, 0);
CHECK_GT(options.max_col_block_size, 0);
@@ -642,33 +661,42 @@ CompressedRowSparseMatrix::CreateRandomMatrix(
CHECK_GT(options.block_density, 0.0);
CHECK_LE(options.block_density, 1.0);
vector<int> row_blocks;
vector<int> col_blocks;
std::vector<Block> row_blocks;
row_blocks.reserve(options.num_row_blocks);
std::vector<Block> col_blocks;
col_blocks.reserve(options.num_col_blocks);
std::uniform_int_distribution<int> col_distribution(
options.min_col_block_size, options.max_col_block_size);
std::uniform_int_distribution<int> row_distribution(
options.min_row_block_size, options.max_row_block_size);
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
std::normal_distribution<double> standard_normal_distribution;
// Generate the row block structure.
int row_pos = 0;
for (int i = 0; i < options.num_row_blocks; ++i) {
// Generate a random integer in [min_row_block_size, max_row_block_size]
const int delta_block_size =
Uniform(options.max_row_block_size - options.min_row_block_size);
row_blocks.push_back(options.min_row_block_size + delta_block_size);
row_blocks.emplace_back(row_distribution(prng), row_pos);
row_pos += row_blocks.back().size;
}
if (options.storage_type == UNSYMMETRIC) {
if (options.storage_type == StorageType::UNSYMMETRIC) {
// Generate the col block structure.
int col_pos = 0;
for (int i = 0; i < options.num_col_blocks; ++i) {
// Generate a random integer in [min_col_block_size, max_col_block_size]
const int delta_block_size =
Uniform(options.max_col_block_size - options.min_col_block_size);
col_blocks.push_back(options.min_col_block_size + delta_block_size);
col_blocks.emplace_back(col_distribution(prng), col_pos);
col_pos += col_blocks.back().size;
}
} else {
// Symmetric matrices (LOWER_TRIANGULAR or UPPER_TRIANGULAR);
col_blocks = row_blocks;
}
vector<int> tsm_rows;
vector<int> tsm_cols;
vector<double> tsm_values;
std::vector<int> tsm_rows;
std::vector<int> tsm_cols;
std::vector<double> tsm_values;
// For ease of construction, we are going to generate the
// CompressedRowSparseMatrix by generating it as a
@@ -687,51 +715,55 @@ CompressedRowSparseMatrix::CreateRandomMatrix(
for (int r = 0; r < options.num_row_blocks; ++r) {
int col_block_begin = 0;
for (int c = 0; c < options.num_col_blocks; ++c) {
if (((options.storage_type == UPPER_TRIANGULAR) && (r > c)) ||
((options.storage_type == LOWER_TRIANGULAR) && (r < c))) {
col_block_begin += col_blocks[c];
if (((options.storage_type == StorageType::UPPER_TRIANGULAR) &&
(r > c)) ||
((options.storage_type == StorageType::LOWER_TRIANGULAR) &&
(r < c))) {
col_block_begin += col_blocks[c].size;
continue;
}
// Randomly determine if this block is present or not.
if (RandDouble() <= options.block_density) {
if (uniform01(prng) <= options.block_density) {
auto randn = [&standard_normal_distribution, &prng] {
return standard_normal_distribution(prng);
};
// If the matrix is symmetric, then we take care to generate
// symmetric diagonal blocks.
if (options.storage_type == UNSYMMETRIC || r != c) {
AddRandomBlock(row_blocks[r],
col_blocks[c],
if (options.storage_type == StorageType::UNSYMMETRIC || r != c) {
AddRandomBlock(row_blocks[r].size,
col_blocks[c].size,
row_block_begin,
col_block_begin,
randn,
&tsm_rows,
&tsm_cols,
&tsm_values);
} else {
AddSymmetricRandomBlock(row_blocks[r],
AddSymmetricRandomBlock(row_blocks[r].size,
row_block_begin,
randn,
&tsm_rows,
&tsm_cols,
&tsm_values);
}
}
col_block_begin += col_blocks[c];
col_block_begin += col_blocks[c].size;
}
row_block_begin += row_blocks[r];
row_block_begin += row_blocks[r].size;
}
}
const int num_rows = std::accumulate(row_blocks.begin(), row_blocks.end(), 0);
const int num_cols = std::accumulate(col_blocks.begin(), col_blocks.end(), 0);
const int num_rows = NumScalarEntries(row_blocks);
const int num_cols = NumScalarEntries(col_blocks);
const bool kDoNotTranspose = false;
std::unique_ptr<CompressedRowSparseMatrix> matrix =
CompressedRowSparseMatrix::FromTripletSparseMatrix(
TripletSparseMatrix(
num_rows, num_cols, tsm_rows, tsm_cols, tsm_values),
kDoNotTranspose);
auto matrix = CompressedRowSparseMatrix::FromTripletSparseMatrix(
TripletSparseMatrix(num_rows, num_cols, tsm_rows, tsm_cols, tsm_values),
kDoNotTranspose);
(*matrix->mutable_row_blocks()) = row_blocks;
(*matrix->mutable_col_blocks()) = col_blocks;
matrix->set_storage_type(options.storage_type);
return matrix;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,8 +32,10 @@
#define CERES_INTERNAL_COMPRESSED_ROW_SPARSE_MATRIX_H_
#include <memory>
#include <random>
#include <vector>
#include "ceres/block_structure.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
#include "ceres/sparse_matrix.h"
@@ -46,11 +48,12 @@ struct CRSMatrix;
namespace internal {
class ContextImpl;
class TripletSparseMatrix;
class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
public:
enum StorageType {
enum class StorageType {
UNSYMMETRIC,
// Matrix is assumed to be symmetric but only the lower triangular
// part of the matrix is stored.
@@ -100,8 +103,12 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
// SparseMatrix interface.
~CompressedRowSparseMatrix() override;
void SetZero() final;
void RightMultiply(const double* x, double* y) const final;
void LeftMultiply(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const final;
void LeftMultiplyAndAccumulate(const double* x, double* y) const final;
void SquaredColumnNorm(double* x) const final;
void ScaleColumns(const double* scale) final;
void ToDenseMatrix(Matrix* dense_matrix) const final;
@@ -109,8 +116,8 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
int num_rows() const final { return num_rows_; }
int num_cols() const final { return num_cols_; }
int num_nonzeros() const final { return rows_[num_rows_]; }
const double* values() const final { return &values_[0]; }
double* mutable_values() final { return &values_[0]; }
const double* values() const final { return values_.data(); }
double* mutable_values() final { return values_.data(); }
// Delete the bottom delta_rows.
// num_rows -= delta_rows
@@ -132,28 +139,28 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
void set_num_cols(const int num_cols) { num_cols_ = num_cols; }
// Low level access methods that expose the structure of the matrix.
const int* cols() const { return &cols_[0]; }
int* mutable_cols() { return &cols_[0]; }
const int* cols() const { return cols_.data(); }
int* mutable_cols() { return cols_.data(); }
const int* rows() const { return &rows_[0]; }
int* mutable_rows() { return &rows_[0]; }
const int* rows() const { return rows_.data(); }
int* mutable_rows() { return rows_.data(); }
StorageType storage_type() const { return storage_type_; }
void set_storage_type(const StorageType storage_type) {
storage_type_ = storage_type;
}
const std::vector<int>& row_blocks() const { return row_blocks_; }
std::vector<int>* mutable_row_blocks() { return &row_blocks_; }
const std::vector<Block>& row_blocks() const { return row_blocks_; }
std::vector<Block>* mutable_row_blocks() { return &row_blocks_; }
const std::vector<int>& col_blocks() const { return col_blocks_; }
std::vector<int>* mutable_col_blocks() { return &col_blocks_; }
const std::vector<Block>& col_blocks() const { return col_blocks_; }
std::vector<Block>* mutable_col_blocks() { return &col_blocks_; }
// Create a block diagonal CompressedRowSparseMatrix with the given
// block structure. The individual blocks are assumed to be laid out
// contiguously in the diagonal array, one block at a time.
static std::unique_ptr<CompressedRowSparseMatrix> CreateBlockDiagonalMatrix(
const double* diagonal, const std::vector<int>& blocks);
const double* diagonal, const std::vector<Block>& blocks);
// Options struct to control the generation of random block sparse
// matrices in compressed row sparse format.
@@ -165,7 +172,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
// given bounds.
//
// Then we walk the block structure of the resulting matrix, and with
// probability block_density detemine whether they are structurally
// probability block_density determine whether they are structurally
// zero or not. If the answer is no, then we generate entries for the
// block which are distributed normally.
struct RandomMatrixOptions {
@@ -176,7 +183,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
// (lower triangular) part. In this case, num_col_blocks,
// min_col_block_size and max_col_block_size will be ignored and
// assumed to be equal to the corresponding row settings.
StorageType storage_type = UNSYMMETRIC;
StorageType storage_type = StorageType::UNSYMMETRIC;
int num_row_blocks = 0;
int min_row_block_size = 0;
@@ -195,7 +202,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
// normally distributed and whose structure is determined by
// RandomMatrixOptions.
static std::unique_ptr<CompressedRowSparseMatrix> CreateRandomMatrix(
RandomMatrixOptions options);
RandomMatrixOptions options, std::mt19937& prng);
private:
static std::unique_ptr<CompressedRowSparseMatrix> FromTripletSparseMatrix(
@@ -209,14 +216,31 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
StorageType storage_type_;
// If the matrix has an underlying block structure, then it can also
// carry with it row and column block sizes. This is auxilliary and
// carry with it row and column block sizes. This is auxiliary and
// optional information for use by algorithms operating on the
// matrix. The class itself does not make use of this information in
// any way.
std::vector<int> row_blocks_;
std::vector<int> col_blocks_;
std::vector<Block> row_blocks_;
std::vector<Block> col_blocks_;
};
inline std::ostream& operator<<(std::ostream& s,
CompressedRowSparseMatrix::StorageType type) {
switch (type) {
case CompressedRowSparseMatrix::StorageType::UNSYMMETRIC:
s << "UNSYMMETRIC";
break;
case CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR:
s << "UPPER_TRIANGULAR";
break;
case CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR:
s << "LOWER_TRIANGULAR";
break;
default:
s << "UNKNOWN CompressedRowSparseMatrix::StorageType";
}
return s;
}
} // namespace internal
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// A thread-safe multi-producer, multi-consumer queue for queueing items that
// are typically handled asynchronously by multiple threads. The ConcurrentQueue
@@ -152,7 +151,6 @@ class ConcurrentQueue {
bool wait_{true};
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_CONCURRENT_QUEUE_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,253 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
//
// A preconditioned conjugate gradients solver
// (ConjugateGradientsSolver) for positive semidefinite linear
// systems.
//
// We have also augmented the termination criterion used by this
// solver to support not just residual based termination but also
// termination based on decrease in the value of the quadratic model
// that CG optimizes.
#include "ceres/conjugate_gradients_solver.h"
#include <cmath>
#include <cstddef>
#include <utility>
#include "ceres/internal/eigen.h"
#include "ceres/linear_operator.h"
#include "ceres/stringprintf.h"
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace {
bool IsZeroOrInfinity(double x) { return ((x == 0.0) || std::isinf(x)); }
} // namespace
ConjugateGradientsSolver::ConjugateGradientsSolver(
LinearSolver::Options options)
: options_(std::move(options)) {}
LinearSolver::Summary ConjugateGradientsSolver::Solve(
LinearOperator* A,
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) {
CHECK(A != nullptr);
CHECK(x != nullptr);
CHECK(b != nullptr);
CHECK_EQ(A->num_rows(), A->num_cols());
LinearSolver::Summary summary;
summary.termination_type = LINEAR_SOLVER_NO_CONVERGENCE;
summary.message = "Maximum number of iterations reached.";
summary.num_iterations = 0;
const int num_cols = A->num_cols();
VectorRef xref(x, num_cols);
ConstVectorRef bref(b, num_cols);
const double norm_b = bref.norm();
if (norm_b == 0.0) {
xref.setZero();
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.message = "Convergence. |b| = 0.";
return summary;
}
Vector r(num_cols);
Vector p(num_cols);
Vector z(num_cols);
Vector tmp(num_cols);
const double tol_r = per_solve_options.r_tolerance * norm_b;
tmp.setZero();
A->RightMultiply(x, tmp.data());
r = bref - tmp;
double norm_r = r.norm();
if (options_.min_num_iterations == 0 && norm_r <= tol_r) {
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.message =
StringPrintf("Convergence. |r| = %e <= %e.", norm_r, tol_r);
return summary;
}
double rho = 1.0;
// Initial value of the quadratic model Q = x'Ax - 2 * b'x.
double Q0 = -1.0 * xref.dot(bref + r);
for (summary.num_iterations = 1;; ++summary.num_iterations) {
// Apply preconditioner
if (per_solve_options.preconditioner != nullptr) {
z.setZero();
per_solve_options.preconditioner->RightMultiply(r.data(), z.data());
} else {
z = r;
}
double last_rho = rho;
rho = r.dot(z);
if (IsZeroOrInfinity(rho)) {
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.message = StringPrintf("Numerical failure. rho = r'z = %e.", rho);
break;
}
if (summary.num_iterations == 1) {
p = z;
} else {
double beta = rho / last_rho;
if (IsZeroOrInfinity(beta)) {
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.message = StringPrintf(
"Numerical failure. beta = rho_n / rho_{n-1} = %e, "
"rho_n = %e, rho_{n-1} = %e",
beta,
rho,
last_rho);
break;
}
p = z + beta * p;
}
Vector& q = z;
q.setZero();
A->RightMultiply(p.data(), q.data());
const double pq = p.dot(q);
if ((pq <= 0) || std::isinf(pq)) {
summary.termination_type = LINEAR_SOLVER_NO_CONVERGENCE;
summary.message = StringPrintf(
"Matrix is indefinite, no more progress can be made. "
"p'q = %e. |p| = %e, |q| = %e",
pq,
p.norm(),
q.norm());
break;
}
const double alpha = rho / pq;
if (std::isinf(alpha)) {
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.message = StringPrintf(
"Numerical failure. alpha = rho / pq = %e, rho = %e, pq = %e.",
alpha,
rho,
pq);
break;
}
xref = xref + alpha * p;
// Ideally we would just use the update r = r - alpha*q to keep
// track of the residual vector. However this estimate tends to
// drift over time due to round off errors. Thus every
// residual_reset_period iterations, we calculate the residual as
// r = b - Ax. We do not do this every iteration because this
// requires an additional matrix vector multiply which would
// double the complexity of the CG algorithm.
if (summary.num_iterations % options_.residual_reset_period == 0) {
tmp.setZero();
A->RightMultiply(x, tmp.data());
r = bref - tmp;
} else {
r = r - alpha * q;
}
// Quadratic model based termination.
// Q1 = x'Ax - 2 * b' x.
const double Q1 = -1.0 * xref.dot(bref + r);
// For PSD matrices A, let
//
// Q(x) = x'Ax - 2b'x
//
// be the cost of the quadratic function defined by A and b. Then,
// the solver terminates at iteration i if
//
// i * (Q(x_i) - Q(x_i-1)) / Q(x_i) < q_tolerance.
//
// This termination criterion is more useful when using CG to
// solve the Newton step. This particular convergence test comes
// from Stephen Nash's work on truncated Newton
// methods. References:
//
// 1. Stephen G. Nash & Ariela Sofer, Assessing A Search
// Direction Within A Truncated Newton Method, Operation
// Research Letters 9(1990) 219-221.
//
// 2. Stephen G. Nash, A Survey of Truncated Newton Methods,
// Journal of Computational and Applied Mathematics,
// 124(1-2), 45-59, 2000.
//
const double zeta = summary.num_iterations * (Q1 - Q0) / Q1;
if (zeta < per_solve_options.q_tolerance &&
summary.num_iterations >= options_.min_num_iterations) {
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.message =
StringPrintf("Iteration: %d Convergence: zeta = %e < %e. |r| = %e",
summary.num_iterations,
zeta,
per_solve_options.q_tolerance,
r.norm());
break;
}
Q0 = Q1;
// Residual based termination.
norm_r = r.norm();
if (norm_r <= tol_r &&
summary.num_iterations >= options_.min_num_iterations) {
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.message =
StringPrintf("Iteration: %d Convergence. |r| = %e <= %e.",
summary.num_iterations,
norm_r,
tol_r);
break;
}
if (summary.num_iterations >= options_.max_num_iterations) {
break;
}
}
return summary;
}
} // namespace internal
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -34,42 +34,277 @@
#ifndef CERES_INTERNAL_CONJUGATE_GRADIENTS_SOLVER_H_
#define CERES_INTERNAL_CONJUGATE_GRADIENTS_SOLVER_H_
#include <cmath>
#include <cstddef>
#include <utility>
#include "ceres/eigen_vector_ops.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/linear_operator.h"
#include "ceres/linear_solver.h"
#include "ceres/stringprintf.h"
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class LinearOperator;
// This class implements the now classical Conjugate Gradients
// algorithm of Hestenes & Stiefel for solving postive semidefinite
// linear sytems. Optionally it can use a preconditioner also to
// reduce the condition number of the linear system and improve the
// convergence rate. Modern references for Conjugate Gradients are the
// books by Yousef Saad and Trefethen & Bau. This implementation of CG
// has been augmented with additional termination tests that are
// needed for forcing early termination when used as part of an
// inexact Newton solver.
//
// For more details see the documentation for
// LinearSolver::PerSolveOptions::r_tolerance and
// LinearSolver::PerSolveOptions::q_tolerance in linear_solver.h.
class CERES_NO_EXPORT ConjugateGradientsSolver final : public LinearSolver {
// Interface for the linear operator used by ConjugateGradientsSolver.
template <typename DenseVectorType>
class ConjugateGradientsLinearOperator {
public:
explicit ConjugateGradientsSolver(LinearSolver::Options options);
Summary Solve(LinearOperator* A,
const double* b,
const LinearSolver::PerSolveOptions& per_solve_options,
double* x) final;
private:
const LinearSolver::Options options_;
~ConjugateGradientsLinearOperator() = default;
virtual void RightMultiplyAndAccumulate(const DenseVectorType& x,
DenseVectorType& y) = 0;
};
} // namespace internal
} // namespace ceres
// Adapter class that makes LinearOperator appear like an instance of
// ConjugateGradientsLinearOperator.
class LinearOperatorAdapter : public ConjugateGradientsLinearOperator<Vector> {
public:
LinearOperatorAdapter(LinearOperator& linear_operator)
: linear_operator_(linear_operator) {}
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
linear_operator_.RightMultiplyAndAccumulate(x, y);
}
private:
LinearOperator& linear_operator_;
};
// Options to control the ConjugateGradientsSolver. For detailed documentation
// for each of these options see linear_solver.h
struct ConjugateGradientsSolverOptions {
int min_num_iterations = 1;
int max_num_iterations = 1;
int residual_reset_period = 10;
double r_tolerance = 0.0;
double q_tolerance = 0.0;
ContextImpl* context = nullptr;
int num_threads = 1;
};
// This function implements the now classical Conjugate Gradients algorithm of
// Hestenes & Stiefel for solving positive semidefinite linear systems.
// Optionally it can use a preconditioner also to reduce the condition number of
// the linear system and improve the convergence rate. Modern references for
// Conjugate Gradients are the books by Yousef Saad and Trefethen & Bau. This
// implementation of CG has been augmented with additional termination tests
// that are needed for forcing early termination when used as part of an inexact
// Newton solver.
//
// This implementation is templated over DenseVectorType and then in turn on
// ConjugateGradientsLinearOperator, which allows us to write an abstract
// implementaion of the Conjugate Gradients algorithm without worrying about how
// these objects are implemented or where they are stored. In particular it
// allows us to have a single implementation that works on CPU and GPU based
// matrices and vectors.
//
// scratch must contain pointers to four DenseVector objects of the same size as
// rhs and solution. By asking the user for scratch space, we guarantee that we
// will not perform any allocations inside this function.
template <typename DenseVectorType>
LinearSolver::Summary ConjugateGradientsSolver(
const ConjugateGradientsSolverOptions options,
ConjugateGradientsLinearOperator<DenseVectorType>& lhs,
const DenseVectorType& rhs,
ConjugateGradientsLinearOperator<DenseVectorType>& preconditioner,
DenseVectorType* scratch[4],
DenseVectorType& solution) {
auto IsZeroOrInfinity = [](double x) {
return ((x == 0.0) || std::isinf(x));
};
DenseVectorType& p = *scratch[0];
DenseVectorType& r = *scratch[1];
DenseVectorType& z = *scratch[2];
DenseVectorType& tmp = *scratch[3];
LinearSolver::Summary summary;
summary.termination_type = LinearSolverTerminationType::NO_CONVERGENCE;
summary.message = "Maximum number of iterations reached.";
summary.num_iterations = 0;
const double norm_rhs = Norm(rhs, options.context, options.num_threads);
if (norm_rhs == 0.0) {
SetZero(solution, options.context, options.num_threads);
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message = "Convergence. |b| = 0.";
return summary;
}
const double tol_r = options.r_tolerance * norm_rhs;
SetZero(tmp, options.context, options.num_threads);
lhs.RightMultiplyAndAccumulate(solution, tmp);
// r = rhs - tmp
Axpby(1.0, rhs, -1.0, tmp, r, options.context, options.num_threads);
double norm_r = Norm(r, options.context, options.num_threads);
if (options.min_num_iterations == 0 && norm_r <= tol_r) {
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message =
StringPrintf("Convergence. |r| = %e <= %e.", norm_r, tol_r);
return summary;
}
double rho = 1.0;
// Initial value of the quadratic model Q = x'Ax - 2 * b'x.
// double Q0 = -1.0 * solution.dot(rhs + r);
Axpby(1.0, rhs, 1.0, r, tmp, options.context, options.num_threads);
double Q0 = -Dot(solution, tmp, options.context, options.num_threads);
for (summary.num_iterations = 1;; ++summary.num_iterations) {
SetZero(z, options.context, options.num_threads);
preconditioner.RightMultiplyAndAccumulate(r, z);
const double last_rho = rho;
// rho = r.dot(z);
rho = Dot(r, z, options.context, options.num_threads);
if (IsZeroOrInfinity(rho)) {
summary.termination_type = LinearSolverTerminationType::FAILURE;
summary.message = StringPrintf("Numerical failure. rho = r'z = %e.", rho);
break;
}
if (summary.num_iterations == 1) {
Copy(z, p, options.context, options.num_threads);
} else {
const double beta = rho / last_rho;
if (IsZeroOrInfinity(beta)) {
summary.termination_type = LinearSolverTerminationType::FAILURE;
summary.message = StringPrintf(
"Numerical failure. beta = rho_n / rho_{n-1} = %e, "
"rho_n = %e, rho_{n-1} = %e",
beta,
rho,
last_rho);
break;
}
// p = z + beta * p;
Axpby(1.0, z, beta, p, p, options.context, options.num_threads);
}
DenseVectorType& q = z;
SetZero(q, options.context, options.num_threads);
lhs.RightMultiplyAndAccumulate(p, q);
const double pq = Dot(p, q, options.context, options.num_threads);
if ((pq <= 0) || std::isinf(pq)) {
summary.termination_type = LinearSolverTerminationType::NO_CONVERGENCE;
summary.message = StringPrintf(
"Matrix is indefinite, no more progress can be made. "
"p'q = %e. |p| = %e, |q| = %e",
pq,
Norm(p, options.context, options.num_threads),
Norm(q, options.context, options.num_threads));
break;
}
const double alpha = rho / pq;
if (std::isinf(alpha)) {
summary.termination_type = LinearSolverTerminationType::FAILURE;
summary.message = StringPrintf(
"Numerical failure. alpha = rho / pq = %e, rho = %e, pq = %e.",
alpha,
rho,
pq);
break;
}
// solution = solution + alpha * p;
Axpby(1.0,
solution,
alpha,
p,
solution,
options.context,
options.num_threads);
// Ideally we would just use the update r = r - alpha*q to keep
// track of the residual vector. However this estimate tends to
// drift over time due to round off errors. Thus every
// residual_reset_period iterations, we calculate the residual as
// r = b - Ax. We do not do this every iteration because this
// requires an additional matrix vector multiply which would
// double the complexity of the CG algorithm.
if (summary.num_iterations % options.residual_reset_period == 0) {
SetZero(tmp, options.context, options.num_threads);
lhs.RightMultiplyAndAccumulate(solution, tmp);
Axpby(1.0, rhs, -1.0, tmp, r, options.context, options.num_threads);
// r = rhs - tmp;
} else {
Axpby(1.0, r, -alpha, q, r, options.context, options.num_threads);
// r = r - alpha * q;
}
// Quadratic model based termination.
// Q1 = x'Ax - 2 * b' x.
// const double Q1 = -1.0 * solution.dot(rhs + r);
Axpby(1.0, rhs, 1.0, r, tmp, options.context, options.num_threads);
const double Q1 = -Dot(solution, tmp, options.context, options.num_threads);
// For PSD matrices A, let
//
// Q(x) = x'Ax - 2b'x
//
// be the cost of the quadratic function defined by A and b. Then,
// the solver terminates at iteration i if
//
// i * (Q(x_i) - Q(x_i-1)) / Q(x_i) < q_tolerance.
//
// This termination criterion is more useful when using CG to
// solve the Newton step. This particular convergence test comes
// from Stephen Nash's work on truncated Newton
// methods. References:
//
// 1. Stephen G. Nash & Ariela Sofer, Assessing A Search
// Direction Within A Truncated Newton Method, Operation
// Research Letters 9(1990) 219-221.
//
// 2. Stephen G. Nash, A Survey of Truncated Newton Methods,
// Journal of Computational and Applied Mathematics,
// 124(1-2), 45-59, 2000.
//
const double zeta = summary.num_iterations * (Q1 - Q0) / Q1;
if (zeta < options.q_tolerance &&
summary.num_iterations >= options.min_num_iterations) {
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message =
StringPrintf("Iteration: %d Convergence: zeta = %e < %e. |r| = %e",
summary.num_iterations,
zeta,
options.q_tolerance,
Norm(r, options.context, options.num_threads));
break;
}
Q0 = Q1;
// Residual based termination.
norm_r = Norm(r, options.context, options.num_threads);
if (norm_r <= tol_r &&
summary.num_iterations >= options.min_num_iterations) {
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message =
StringPrintf("Iteration: %d Convergence. |r| = %e <= %e.",
summary.num_iterations,
norm_r,
tol_r);
break;
}
if (summary.num_iterations >= options.max_num_iterations) {
break;
}
}
return summary;
}
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,8 @@
#include <string>
#include "ceres/internal/config.h"
#include "ceres/stringprintf.h"
#include "ceres/wall_time.h"
#ifndef CERES_NO_CUDA
#include "cublas_v2.h"
@@ -40,69 +42,155 @@
#include "cusolverDn.h"
#endif // CERES_NO_CUDA
namespace ceres {
namespace internal {
namespace ceres::internal {
ContextImpl::ContextImpl() = default;
#ifndef CERES_NO_CUDA
bool ContextImpl::InitCUDA(std::string* message) {
if (cuda_initialized_) {
void ContextImpl::TearDown() {
if (cusolver_handle_ != nullptr) {
cusolverDnDestroy(cusolver_handle_);
cusolver_handle_ = nullptr;
}
if (cublas_handle_ != nullptr) {
cublasDestroy(cublas_handle_);
cublas_handle_ = nullptr;
}
if (cusparse_handle_ != nullptr) {
cusparseDestroy(cusparse_handle_);
cusparse_handle_ = nullptr;
}
for (auto& s : streams_) {
if (s != nullptr) {
cudaStreamDestroy(s);
s = nullptr;
}
}
is_cuda_initialized_ = false;
}
std::string ContextImpl::CudaConfigAsString() const {
return ceres::internal::StringPrintf(
"======================= CUDA Device Properties ======================\n"
"Cuda version : %d.%d\n"
"Device ID : %d\n"
"Device name : %s\n"
"Total GPU memory : %6.f MiB\n"
"GPU memory available : %6.f MiB\n"
"Compute capability : %d.%d\n"
"Warp size : %d\n"
"Max threads per block : %d\n"
"Max threads per dim : %d %d %d\n"
"Max grid size : %d %d %d\n"
"Multiprocessor count : %d\n"
"cudaMallocAsync supported : %s\n"
"====================================================================",
cuda_version_major_,
cuda_version_minor_,
gpu_device_id_in_use_,
gpu_device_properties_.name,
gpu_device_properties_.totalGlobalMem / 1024.0 / 1024.0,
GpuMemoryAvailable() / 1024.0 / 1024.0,
gpu_device_properties_.major,
gpu_device_properties_.minor,
gpu_device_properties_.warpSize,
gpu_device_properties_.maxThreadsPerBlock,
gpu_device_properties_.maxThreadsDim[0],
gpu_device_properties_.maxThreadsDim[1],
gpu_device_properties_.maxThreadsDim[2],
gpu_device_properties_.maxGridSize[0],
gpu_device_properties_.maxGridSize[1],
gpu_device_properties_.maxGridSize[2],
gpu_device_properties_.multiProcessorCount,
// In CUDA 12.0.0+ cudaDeviceProp has field memoryPoolsSupported, but it
// is not available in older versions
is_cuda_memory_pools_supported_ ? "Yes" : "No");
}
size_t ContextImpl::GpuMemoryAvailable() const {
size_t free, total;
cudaMemGetInfo(&free, &total);
return free;
}
bool ContextImpl::InitCuda(std::string* message) {
if (is_cuda_initialized_) {
return true;
}
CHECK_EQ(cudaGetDevice(&gpu_device_id_in_use_), cudaSuccess);
int cuda_version;
CHECK_EQ(cudaRuntimeGetVersion(&cuda_version), cudaSuccess);
cuda_version_major_ = cuda_version / 1000;
cuda_version_minor_ = (cuda_version % 1000) / 10;
CHECK_EQ(
cudaGetDeviceProperties(&gpu_device_properties_, gpu_device_id_in_use_),
cudaSuccess);
#if CUDART_VERSION >= 11020
int is_cuda_memory_pools_supported;
CHECK_EQ(cudaDeviceGetAttribute(&is_cuda_memory_pools_supported,
cudaDevAttrMemoryPoolsSupported,
gpu_device_id_in_use_),
cudaSuccess);
is_cuda_memory_pools_supported_ = is_cuda_memory_pools_supported == 1;
#endif
VLOG(3) << "\n" << CudaConfigAsString();
EventLogger event_logger("InitCuda");
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
*message = "cuBLAS::cublasCreate failed.";
cublas_handle_ = nullptr;
return false;
}
if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnCreate failed.";
cusolver_handle_ = nullptr;
cublasDestroy(cublas_handle_);
cublas_handle_ = nullptr;
return false;
}
if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) !=
cudaSuccess) {
*message = "CUDA::cudaStreamCreateWithFlags failed.";
cusolverDnDestroy(cusolver_handle_);
cublasDestroy(cublas_handle_);
cusolver_handle_ = nullptr;
cublas_handle_ = nullptr;
stream_ = nullptr;
return false;
}
if (cusolverDnSetStream(cusolver_handle_, stream_) !=
CUSOLVER_STATUS_SUCCESS ||
cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS) {
*message =
"cuSolverDN::cusolverDnSetStream or cuBLAS::cublasSetStream failed.";
cusolverDnDestroy(cusolver_handle_);
cublasDestroy(cublas_handle_);
cudaStreamDestroy(stream_);
cusolver_handle_ = nullptr;
"CUDA initialization failed because cuBLAS::cublasCreate failed.";
cublas_handle_ = nullptr;
stream_ = nullptr;
return false;
}
cuda_initialized_ = true;
event_logger.AddEvent("cublasCreate");
if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
*message =
"CUDA initialization failed because cuSolverDN::cusolverDnCreate "
"failed.";
TearDown();
return false;
}
event_logger.AddEvent("cusolverDnCreate");
if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) {
*message =
"CUDA initialization failed because cuSPARSE::cusparseCreate failed.";
TearDown();
return false;
}
event_logger.AddEvent("cusparseCreate");
for (auto& s : streams_) {
if (cudaStreamCreateWithFlags(&s, cudaStreamNonBlocking) != cudaSuccess) {
*message =
"CUDA initialization failed because CUDA::cudaStreamCreateWithFlags "
"failed.";
TearDown();
return false;
}
}
event_logger.AddEvent("cudaStreamCreateWithFlags");
if (cusolverDnSetStream(cusolver_handle_, DefaultStream()) !=
CUSOLVER_STATUS_SUCCESS ||
cublasSetStream(cublas_handle_, DefaultStream()) !=
CUBLAS_STATUS_SUCCESS ||
cusparseSetStream(cusparse_handle_, DefaultStream()) !=
CUSPARSE_STATUS_SUCCESS) {
*message = "CUDA initialization failed because SetStream failed.";
TearDown();
return false;
}
event_logger.AddEvent("SetStream");
is_cuda_initialized_ = true;
return true;
}
#endif // CERES_NO_CUDA
ContextImpl::~ContextImpl() {
#ifndef CERES_NO_CUDA
if (cuda_initialized_) {
cusolverDnDestroy(cusolver_handle_);
cublasDestroy(cublas_handle_);
cudaStreamDestroy(stream_);
}
TearDown();
#endif // CERES_NO_CUDA
}
void ContextImpl::EnsureMinimumThreads(int num_threads) {
#ifdef CERES_USE_CXX_THREADS
thread_pool.Resize(num_threads);
#endif // CERES_USE_CXX_THREADS
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,14 +46,12 @@
#include "cublas_v2.h"
#include "cuda_runtime.h"
#include "cusolverDn.h"
#include "cusparse.h"
#endif // CERES_NO_CUDA
#ifdef CERES_USE_CXX_THREADS
#include "ceres/thread_pool.h"
#endif // CERES_USE_CXX_THREADS
namespace ceres {
namespace internal {
namespace ceres::internal {
class CERES_NO_EXPORT ContextImpl final : public Context {
public:
@@ -67,30 +65,82 @@ class CERES_NO_EXPORT ContextImpl final : public Context {
// defined by the hardware. Otherwise this call is a no-op.
void EnsureMinimumThreads(int num_threads);
#ifdef CERES_USE_CXX_THREADS
ThreadPool thread_pool;
#endif // CERES_USE_CXX_THREADS
#ifndef CERES_NO_CUDA
// Initializes the cuSolverDN context, creates an asynchronous stream, and
// associates the stream with cuSolverDN. Returns true iff initialization was
// successful, else it returns false and a human-readable error message is
// returned.
bool InitCUDA(std::string* message);
// Note on Ceres' use of CUDA Devices on multi-GPU systems:
// 1. On a multi-GPU system, if nothing special is done, the "default" CUDA
// device will be used, which is device 0.
// 2. If the user masks out GPUs using the CUDA_VISIBLE_DEVICES environment
// variable, Ceres will still use device 0 visible to the program, but
// device 0 will be the first GPU indicated in the environment variable.
// 3. If the user explicitly selects a GPU in the host process before calling
// Ceres, Ceres will use that GPU.
// Note on Ceres' use of CUDA Streams:
// Most of operations on the GPU are performed using a single stream. In
// those cases DefaultStream() should be used. This ensures that operations
// are stream-ordered, and might be concurrent with cpu processing with no
// additional efforts.
//
// a. Single-stream workloads
// - Only use default stream
// - Return control to the callee without synchronization whenever possible
// - Stream synchronization occurs only after GPU to CPU transfers, and is
// handled by CudaBuffer
//
// b. Multi-stream workloads
// Multi-stream workloads are more restricted in order to make it harder to
// get a race-condition.
// - Should always synchronize the default stream on entry
// - Should always synchronize all utilized streams on exit
// - Should not make any assumptions on one of streams_[] being default
//
// With those rules in place
// - All single-stream asynchronous workloads are serialized using default
// stream
// - Multiple-stream workloads always wait single-stream workloads to finish
// and leave no running computations on exit.
// This slightly penalizes multi-stream workloads, but makes it easier to
// avoid race conditions when multiple-stream workload depends on results of
// any preceeding gpu computations.
// Initializes cuBLAS, cuSOLVER, and cuSPARSE contexts, creates an
// asynchronous CUDA stream, and associates the stream with the contexts.
// Returns true iff initialization was successful, else it returns false and a
// human-readable error message is returned.
bool InitCuda(std::string* message);
void TearDown();
inline bool IsCudaInitialized() const { return is_cuda_initialized_; }
// Returns a human-readable string describing the capabilities of the current
// CUDA device. CudaConfigAsString can only be called after InitCuda has been
// called.
std::string CudaConfigAsString() const;
// Returns the number of bytes of available global memory on the current CUDA
// device. If it is called before InitCuda, it returns 0.
size_t GpuMemoryAvailable() const;
// Handle to the cuSOLVER context.
cusolverDnHandle_t cusolver_handle_ = nullptr;
// Handle to cuBLAS context.
cublasHandle_t cublas_handle_ = nullptr;
// CUDA device stream.
cudaStream_t stream_ = nullptr;
// Indicates whether all the CUDA resources have been initialized.
bool cuda_initialized_ = false;
// Default stream.
// Kernel invocations and memory copies on this stream can be left without
// synchronization.
cudaStream_t DefaultStream() { return streams_[0]; }
static constexpr int kNumCudaStreams = 2;
cudaStream_t streams_[kNumCudaStreams] = {0};
cusparseHandle_t cusparse_handle_ = nullptr;
bool is_cuda_initialized_ = false;
int gpu_device_id_in_use_ = -1;
cudaDeviceProp gpu_device_properties_;
bool is_cuda_memory_pools_supported_ = false;
int cuda_version_major_ = 0;
int cuda_version_minor_ = 0;
#endif // CERES_NO_CUDA
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,8 +32,11 @@
#include <algorithm>
#include <iterator>
#include <map>
#include <memory>
#include <numeric>
#include <set>
#include <string>
#include <vector>
#include "ceres/evaluator.h"
@@ -49,15 +52,7 @@
#include "ceres/trust_region_minimizer.h"
#include "ceres/trust_region_strategy.h"
namespace ceres {
namespace internal {
using std::map;
using std::max;
using std::min;
using std::set;
using std::string;
using std::vector;
namespace ceres::internal {
CoordinateDescentMinimizer::CoordinateDescentMinimizer(ContextImpl* context)
: context_(context) {
@@ -70,15 +65,19 @@ bool CoordinateDescentMinimizer::Init(
const Program& program,
const ProblemImpl::ParameterMap& parameter_map,
const ParameterBlockOrdering& ordering,
string* error) {
std::string* /*error*/) {
parameter_blocks_.clear();
independent_set_offsets_.clear();
independent_set_offsets_.push_back(0);
// Serialize the OrderedGroups into a vector of parameter block
// offsets for parallel access.
map<ParameterBlock*, int> parameter_block_index;
map<int, set<double*>> group_to_elements = ordering.group_to_elements();
// TODO(sameeragarwal): Investigate if parameter_block_index should be an
// ordered or an unordered container.
std::map<ParameterBlock*, int> parameter_block_index;
std::map<int, std::set<double*>> group_to_elements =
ordering.group_to_elements();
for (const auto& g_t_e : group_to_elements) {
const auto& elements = g_t_e.second;
for (double* parameter_block : elements) {
@@ -93,7 +92,8 @@ bool CoordinateDescentMinimizer::Init(
// The ordering does not have to contain all parameter blocks, so
// assign zero offsets/empty independent sets to these parameter
// blocks.
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
const std::vector<ParameterBlock*>& parameter_blocks =
program.parameter_blocks();
for (auto* parameter_block : parameter_blocks) {
if (!ordering.IsMember(parameter_block->mutable_user_state())) {
parameter_blocks_.push_back(parameter_block);
@@ -104,7 +104,8 @@ bool CoordinateDescentMinimizer::Init(
// Compute the set of residual blocks that depend on each parameter
// block.
residual_blocks_.resize(parameter_block_index.size());
const vector<ResidualBlock*>& residual_blocks = program.residual_blocks();
const std::vector<ResidualBlock*>& residual_blocks =
program.residual_blocks();
for (auto* residual_block : residual_blocks) {
const int num_parameter_blocks = residual_block->NumParameterBlocks();
for (int j = 0; j < num_parameter_blocks; ++j) {
@@ -126,7 +127,7 @@ bool CoordinateDescentMinimizer::Init(
void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
double* parameters,
Solver::Summary* summary) {
Solver::Summary* /*summary*/) {
// Set the state and mark all parameter blocks constant.
for (auto* parameter_block : parameter_blocks_) {
parameter_block->SetState(parameters + parameter_block->state_offset());
@@ -135,8 +136,6 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
std::vector<std::unique_ptr<LinearSolver>> linear_solvers(
options.num_threads);
// std::unique_ptr<LinearSolver*[]> linear_solvers(
// new LinearSolver*[options.num_threads]);
LinearSolver::Options linear_solver_options;
linear_solver_options.type = DENSE_QR;
@@ -155,9 +154,9 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
}
const int num_inner_iteration_threads =
min(options.num_threads, num_problems);
std::min(options.num_threads, num_problems);
evaluator_options_.num_threads =
max(1, options.num_threads / num_inner_iteration_threads);
std::max(1, options.num_threads / num_inner_iteration_threads);
// The parameter blocks in each independent set can be optimized
// in parallel, since they do not co-occur in any residual block.
@@ -170,9 +169,11 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
ParameterBlock* parameter_block = parameter_blocks_[j];
const int old_index = parameter_block->index();
const int old_delta_offset = parameter_block->delta_offset();
const int old_state_offset = parameter_block->state_offset();
parameter_block->SetVarying();
parameter_block->set_index(0);
parameter_block->set_delta_offset(0);
parameter_block->set_state_offset(0);
Program inner_program;
inner_program.mutable_parameter_blocks()->push_back(parameter_block);
@@ -189,11 +190,12 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
Solver::Summary inner_summary;
Solve(&inner_program,
linear_solvers[thread_id].get(),
parameters + parameter_block->state_offset(),
parameters + old_state_offset,
&inner_summary);
parameter_block->set_index(old_index);
parameter_block->set_delta_offset(old_delta_offset);
parameter_block->set_state_offset(old_state_offset);
parameter_block->SetState(parameters +
parameter_block->state_offset());
parameter_block->SetConstant();
@@ -203,10 +205,6 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
for (auto* parameter_block : parameter_blocks_) {
parameter_block->SetVarying();
}
// for (int i = 0; i < options.num_threads; ++i) {
// delete linear_solvers[i];
//}
}
// Solve the optimization problem for one parameter block.
@@ -218,7 +216,7 @@ void CoordinateDescentMinimizer::Solve(Program* program,
summary->initial_cost = 0.0;
summary->fixed_cost = 0.0;
summary->final_cost = 0.0;
string error;
std::string error;
Minimizer::Options minimizer_options;
minimizer_options.evaluator =
@@ -241,8 +239,10 @@ void CoordinateDescentMinimizer::Solve(Program* program,
bool CoordinateDescentMinimizer::IsOrderingValid(
const Program& program,
const ParameterBlockOrdering& ordering,
string* message) {
const map<int, set<double*>>& group_to_elements =
std::string* message) {
// TODO(sameeragarwal): Investigate if this should be an ordered or an
// unordered group.
const std::map<int, std::set<double*>>& group_to_elements =
ordering.group_to_elements();
// Verify that each group is an independent set
@@ -270,5 +270,4 @@ CoordinateDescentMinimizer::CreateOrdering(const Program& program) {
return ordering;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,7 @@
#ifndef CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_
#define CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_
#include <memory>
#include <string>
#include <vector>
@@ -40,8 +41,7 @@
#include "ceres/problem_impl.h"
#include "ceres/solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Program;
class LinearSolver;
@@ -103,7 +103,6 @@ class CERES_NO_EXPORT CoordinateDescentMinimizer final : public Minimizer {
ContextImpl* context_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,8 +36,7 @@
#include "ceres/internal/eigen.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
Corrector::Corrector(const double sq_norm, const double rho[3]) {
CHECK_GE(sq_norm, 0.0);
@@ -88,7 +87,7 @@ Corrector::Corrector(const double sq_norm, const double rho[3]) {
// We now require that the first derivative of the loss function be
// positive only if the second derivative is positive. This is
// because when the second derivative is non-positive, we do not use
// the second order correction suggested by BANS and instead use a
// the second order correction suggested by BAMS and instead use a
// simpler first order strategy which does not use a division by the
// gradient of the loss function.
CHECK_GT(rho[1], 0.0);
@@ -112,7 +111,7 @@ Corrector::Corrector(const double sq_norm, const double rho[3]) {
void Corrector::CorrectResiduals(const int num_rows, double* residuals) {
DCHECK(residuals != nullptr);
// Equation 11 in BANS.
// Equation 11 in BAMS.
VectorRef(residuals, num_rows) *= residual_scaling_;
}
@@ -129,7 +128,7 @@ void Corrector::CorrectJacobian(const int num_rows,
return;
}
// Equation 11 in BANS.
// Equation 11 in BAMS.
//
// J = sqrt(rho) * (J - alpha^2 r * r' J)
//
@@ -155,5 +154,4 @@ void Corrector::CorrectJacobian(const int num_rows,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,7 +30,7 @@
//
// Class definition for the object that is responsible for applying a
// second order correction to the Gauss-Newton based on the ideas in
// BANS by Triggs et al.
// BAMS by Triggs et al.
#ifndef CERES_INTERNAL_CORRECTOR_H_
#define CERES_INTERNAL_CORRECTOR_H_
@@ -38,8 +38,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Corrector is responsible for applying the second order correction
// to the residual and jacobian of a least squares problem based on a
@@ -48,7 +47,7 @@ namespace internal {
// The key idea here is to look at the expressions for the robustified
// gauss newton approximation and then take its square root to get the
// corresponding corrections to the residual and jacobian. For the
// full expressions see Eq. 10 and 11 in BANS by Triggs et al.
// full expressions see Eq. 10 and 11 in BAMS by Triggs et al.
class CERES_NO_EXPORT Corrector {
public:
// The constructor takes the squared norm, the value, the first and
@@ -87,8 +86,7 @@ class CERES_NO_EXPORT Corrector {
double residual_scaling_;
double alpha_sq_norm_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,9 +39,6 @@
namespace ceres {
using std::pair;
using std::vector;
Covariance::Covariance(const Covariance::Options& options) {
impl_ = std::make_unique<internal::CovarianceImpl>(options);
}
@@ -49,14 +46,15 @@ Covariance::Covariance(const Covariance::Options& options) {
Covariance::~Covariance() = default;
bool Covariance::Compute(
const vector<pair<const double*, const double*>>& covariance_blocks,
const std::vector<std::pair<const double*, const double*>>&
covariance_blocks,
Problem* problem) {
return impl_->Compute(covariance_blocks, problem->impl_.get());
return impl_->Compute(covariance_blocks, problem->mutable_impl());
}
bool Covariance::Compute(const vector<const double*>& parameter_blocks,
bool Covariance::Compute(const std::vector<const double*>& parameter_blocks,
Problem* problem) {
return impl_->Compute(parameter_blocks, problem->impl_.get());
return impl_->Compute(parameter_blocks, problem->mutable_impl());
}
bool Covariance::GetCovarianceBlock(const double* parameter_block1,
@@ -79,7 +77,7 @@ bool Covariance::GetCovarianceBlockInTangentSpace(
}
bool Covariance::GetCovarianceMatrix(
const vector<const double*>& parameter_blocks,
const std::vector<const double*>& parameter_blocks,
double* covariance_matrix) const {
return impl_->GetCovarianceMatrixInTangentOrAmbientSpace(parameter_blocks,
true, // ambient

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -57,24 +57,12 @@
#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::swap;
namespace ceres::internal {
using CovarianceBlocks = std::vector<std::pair<const double*, const double*>>;
CovarianceImpl::CovarianceImpl(const Covariance::Options& options)
: options_(options), is_computed_(false), is_valid_(false) {
#ifdef CERES_NO_THREADS
if (options_.num_threads > 1) {
LOG(WARNING) << "No threading support is compiled into this binary; "
<< "only options.num_threads = 1 is supported. Switching "
<< "to single threaded mode.";
options_.num_threads = 1;
}
#endif
evaluate_options_.num_threads = options_.num_threads;
evaluate_options_.apply_loss_function = options_.apply_loss_function;
}
@@ -176,7 +164,7 @@ bool CovarianceImpl::GetCovarianceBlockInTangentOrAmbientSpace(
const double* parameter_block2 = original_parameter_block2;
const bool transpose = parameter_block1 > parameter_block2;
if (transpose) {
swap(parameter_block1, parameter_block2);
std::swap(parameter_block1, parameter_block2);
}
// Find where in the covariance matrix the block is located.
@@ -190,7 +178,7 @@ bool CovarianceImpl::GetCovarianceBlockInTangentOrAmbientSpace(
const int* cols_begin = cols + rows[row_begin];
// The only part that requires work is walking the compressed column
// vector to determine where the set of columns correspnding to the
// vector to determine where the set of columns corresponding to the
// covariance block begin.
int offset = 0;
while (cols_begin[offset] != col_begin && offset < row_size) {
@@ -322,9 +310,8 @@ bool CovarianceImpl::GetCovarianceMatrixInTangentOrAmbientSpace(
// Assemble the blocks in the covariance matrix.
MatrixRef covariance(covariance_matrix, covariance_size, covariance_size);
const int num_threads = options_.num_threads;
std::unique_ptr<double[]> workspace(
new double[num_threads * max_covariance_block_size *
max_covariance_block_size]);
auto workspace = std::make_unique<double[]>(
num_threads * max_covariance_block_size * max_covariance_block_size);
bool success = true;
@@ -481,14 +468,12 @@ bool CovarianceImpl::ComputeCovarianceSparsity(
// Iterate over the covariance blocks contained in this row block
// and count the number of columns in this row block.
int num_col_blocks = 0;
int num_columns = 0;
for (int j = i; j < covariance_blocks.size(); ++j, ++num_col_blocks) {
const std::pair<const double*, const double*>& block_pair =
covariance_blocks[j];
if (block_pair.first != row_block) {
break;
}
num_columns += problem->ParameterBlockTangentSize(block_pair.second);
}
// Fill out all the compressed rows for this parameter block.
@@ -598,9 +583,9 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() {
cholmod_jacobian.ncol = num_cols;
cholmod_jacobian.nzmax = num_nonzeros;
cholmod_jacobian.nz = nullptr;
cholmod_jacobian.p = reinterpret_cast<void*>(&transpose_rows[0]);
cholmod_jacobian.i = reinterpret_cast<void*>(&transpose_cols[0]);
cholmod_jacobian.x = reinterpret_cast<void*>(&transpose_values[0]);
cholmod_jacobian.p = reinterpret_cast<void*>(transpose_rows.data());
cholmod_jacobian.i = reinterpret_cast<void*>(transpose_cols.data());
cholmod_jacobian.x = reinterpret_cast<void*>(transpose_values.data());
cholmod_jacobian.z = nullptr;
cholmod_jacobian.stype = 0; // Matrix is not symmetric.
cholmod_jacobian.itype = CHOLMOD_LONG;
@@ -628,13 +613,15 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() {
// more efficient, both in runtime as well as the quality of
// ordering computed. So, it maybe worth doing that analysis
// separately.
const SuiteSparse_long rank = SuiteSparseQR<double>(SPQR_ORDERING_BESTAMD,
SPQR_DEFAULT_TOL,
cholmod_jacobian.ncol,
&cholmod_jacobian,
&R,
&permutation,
&cc);
const SuiteSparse_long rank = SuiteSparseQR<double>(
SPQR_ORDERING_BESTAMD,
options_.column_pivot_threshold < 0 ? SPQR_DEFAULT_TOL
: options_.column_pivot_threshold,
static_cast<int64_t>(cholmod_jacobian.ncol),
&cholmod_jacobian,
&R,
&permutation,
&cc);
event_logger.AddEvent("Numeric Factorization");
if (R == nullptr) {
LOG(ERROR) << "Something is wrong. SuiteSparseQR returned R = nullptr.";
@@ -678,7 +665,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() {
// Since the covariance matrix is symmetric, the i^th row and column
// are equal.
const int num_threads = options_.num_threads;
std::unique_ptr<double[]> workspace(new double[num_threads * num_cols]);
auto workspace = std::make_unique<double[]>(num_threads * num_cols);
problem_->context()->EnsureMinimumThreads(num_threads);
ParallelFor(
@@ -830,19 +817,23 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
jacobian.values.data());
event_logger.AddEvent("ConvertToSparseMatrix");
Eigen::SparseQR<EigenSparseMatrix, Eigen::COLAMDOrdering<int>> qr_solver(
sparse_jacobian);
Eigen::SparseQR<EigenSparseMatrix, Eigen::COLAMDOrdering<int>> qr;
if (options_.column_pivot_threshold > 0) {
qr.setPivotThreshold(options_.column_pivot_threshold);
}
qr.compute(sparse_jacobian);
event_logger.AddEvent("QRDecomposition");
if (qr_solver.info() != Eigen::Success) {
if (qr.info() != Eigen::Success) {
LOG(ERROR) << "Eigen::SparseQR decomposition failed.";
return false;
}
if (qr_solver.rank() < jacobian.num_cols) {
if (qr.rank() < jacobian.num_cols) {
LOG(ERROR) << "Jacobian matrix is rank deficient. "
<< "Number of columns: " << jacobian.num_cols
<< " rank: " << qr_solver.rank();
<< " rank: " << qr.rank();
return false;
}
@@ -852,7 +843,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
// Compute the inverse column permutation used by QR factorization.
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic> inverse_permutation =
qr_solver.colsPermutation().inverse();
qr.colsPermutation().inverse();
// The following loop exploits the fact that the i^th column of A^{-1}
// is given by the solution to the linear system
@@ -865,7 +856,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
// are equal.
const int num_cols = jacobian.num_cols;
const int num_threads = options_.num_threads;
std::unique_ptr<double[]> workspace(new double[num_threads * num_cols]);
auto workspace = std::make_unique<double[]>(num_threads * num_cols);
problem_->context()->EnsureMinimumThreads(num_threads);
ParallelFor(
@@ -875,9 +866,9 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
if (row_end != row_begin) {
double* solution = workspace.get() + thread_id * num_cols;
SolveRTRWithSparseRHS<int>(num_cols,
qr_solver.matrixR().innerIndexPtr(),
qr_solver.matrixR().outerIndexPtr(),
&qr_solver.matrixR().data().value(0),
qr.matrixR().innerIndexPtr(),
qr.matrixR().outerIndexPtr(),
&qr.matrixR().data().value(0),
inverse_permutation.indices().coeff(r),
solution);
@@ -895,5 +886,4 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,8 +43,7 @@
#include "ceres/problem_impl.h"
#include "ceres/suitesparse.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CompressedRowSparseMatrix;
@@ -96,8 +95,7 @@ class CERES_NO_EXPORT CovarianceImpl {
std::unique_ptr<CompressedRowSparseMatrix> covariance_matrix_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -0,0 +1,103 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include "ceres/cuda_block_sparse_crs_view.h"
#ifndef CERES_NO_CUDA
#include "ceres/cuda_kernels_bsm_to_crs.h"
namespace ceres::internal {
CudaBlockSparseCRSView::CudaBlockSparseCRSView(const BlockSparseMatrix& bsm,
ContextImpl* context)
: context_(context) {
block_structure_ = std::make_unique<CudaBlockSparseStructure>(
*bsm.block_structure(), context);
CudaBuffer<int32_t> rows(context, bsm.num_rows() + 1);
CudaBuffer<int32_t> cols(context, bsm.num_nonzeros());
FillCRSStructure(block_structure_->num_row_blocks(),
bsm.num_rows(),
block_structure_->first_cell_in_row_block(),
block_structure_->cells(),
block_structure_->row_blocks(),
block_structure_->col_blocks(),
rows.data(),
cols.data(),
context->DefaultStream(),
context->is_cuda_memory_pools_supported_);
is_crs_compatible_ = block_structure_->IsCrsCompatible();
// if matrix is crs-compatible - we can drop block-structure and don't need
// streamed_buffer_
if (is_crs_compatible_) {
VLOG(3) << "Block-sparse matrix is compatible with CRS, discarding "
"block-structure";
block_structure_ = nullptr;
} else {
streamed_buffer_ = std::make_unique<CudaStreamedBuffer<double>>(
context_, kMaxTemporaryArraySize);
}
crs_matrix_ = std::make_unique<CudaSparseMatrix>(
bsm.num_cols(), std::move(rows), std::move(cols), context);
UpdateValues(bsm);
}
void CudaBlockSparseCRSView::UpdateValues(const BlockSparseMatrix& bsm) {
if (is_crs_compatible_) {
// Values of CRS-compatible matrices can be copied as-is
CHECK_EQ(cudaSuccess,
cudaMemcpyAsync(crs_matrix_->mutable_values(),
bsm.values(),
bsm.num_nonzeros() * sizeof(double),
cudaMemcpyHostToDevice,
context_->DefaultStream()));
return;
}
streamed_buffer_->CopyToGpu(
bsm.values(),
bsm.num_nonzeros(),
[bs = block_structure_.get(), crs = crs_matrix_.get()](
const double* values, int num_values, int offset, auto stream) {
PermuteToCRS(offset,
num_values,
bs->num_row_blocks(),
bs->first_cell_in_row_block(),
bs->cells(),
bs->row_blocks(),
bs->col_blocks(),
crs->rows(),
values,
crs->mutable_values(),
stream);
});
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -0,0 +1,108 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
//
#ifndef CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_
#define CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
#include <memory>
#include "ceres/block_sparse_matrix.h"
#include "ceres/cuda_block_structure.h"
#include "ceres/cuda_buffer.h"
#include "ceres/cuda_sparse_matrix.h"
#include "ceres/cuda_streamed_buffer.h"
namespace ceres::internal {
// We use cuSPARSE library for SpMV operations. However, it does not support
// block-sparse format with varying size of the blocks. Thus, we perform the
// following operations in order to compute products of block-sparse matrices
// and dense vectors on gpu:
// - Once per block-sparse structure update:
// - Compute CRS structure from block-sparse structure and check if values of
// block-sparse matrix would have the same order as values of CRS matrix
// - Once per block-sparse values update:
// - Update values in CRS matrix with values of block-sparse matrix
//
// Only block-sparse matrices with sequential order of cells are supported.
//
// UpdateValues method updates values:
// - In a single host-to-device copy for matrices with CRS-compatible value
// layout
// - Simultaneously transferring and permuting values using CudaStreamedBuffer
// otherwise
class CERES_NO_EXPORT CudaBlockSparseCRSView {
public:
// Initializes internal CRS matrix using structure and values of block-sparse
// matrix For block-sparse matrices that have value layout different from CRS
// block-sparse structure will be stored/
CudaBlockSparseCRSView(const BlockSparseMatrix& bsm, ContextImpl* context);
const CudaSparseMatrix* crs_matrix() const { return crs_matrix_.get(); }
CudaSparseMatrix* mutable_crs_matrix() { return crs_matrix_.get(); }
// Update values of crs_matrix_ using values of block-sparse matrix.
// Assumes that bsm has the same block-sparse structure as matrix that was
// used for construction.
void UpdateValues(const BlockSparseMatrix& bsm);
// Returns true if block-sparse matrix had CRS-compatible value layout
bool IsCrsCompatible() const { return is_crs_compatible_; }
void LeftMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const {
crs_matrix()->LeftMultiplyAndAccumulate(x, y);
}
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const {
crs_matrix()->RightMultiplyAndAccumulate(x, y);
}
private:
// Value permutation kernel performs a single element-wise operation per
// thread, thus performing permutation in blocks of 8 megabytes of
// block-sparse values seems reasonable
static constexpr int kMaxTemporaryArraySize = 1 * 1024 * 1024;
std::unique_ptr<CudaSparseMatrix> crs_matrix_;
// Only created if block-sparse matrix has non-CRS value layout
std::unique_ptr<CudaStreamedBuffer<double>> streamed_buffer_;
// Only stored if block-sparse matrix has non-CRS value layout
std::unique_ptr<CudaBlockSparseStructure> block_structure_;
bool is_crs_compatible_;
ContextImpl* context_;
};
} // namespace ceres::internal
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_

View File

@@ -0,0 +1,164 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include "ceres/cuda_block_sparse_crs_view.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <numeric>
#ifndef CERES_NO_CUDA
namespace ceres::internal {
class CudaBlockSparseCRSViewTest : public ::testing::Test {
protected:
void SetUp() final {
std::string message;
CHECK(context_.InitCuda(&message))
<< "InitCuda() failed because: " << message;
BlockSparseMatrix::RandomMatrixOptions options;
options.num_row_blocks = 1234;
options.min_row_block_size = 1;
options.max_row_block_size = 10;
options.num_col_blocks = 567;
options.min_col_block_size = 1;
options.max_col_block_size = 10;
options.block_density = 0.2;
std::mt19937 rng;
// Block-sparse matrix with order of values different from CRS
block_sparse_non_crs_compatible_ =
BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
std::iota(block_sparse_non_crs_compatible_->mutable_values(),
block_sparse_non_crs_compatible_->mutable_values() +
block_sparse_non_crs_compatible_->num_nonzeros(),
1);
options.max_row_block_size = 1;
// Block-sparse matrix with CRS order of values (row-blocks are rows)
block_sparse_crs_compatible_rows_ =
BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
std::iota(block_sparse_crs_compatible_rows_->mutable_values(),
block_sparse_crs_compatible_rows_->mutable_values() +
block_sparse_crs_compatible_rows_->num_nonzeros(),
1);
// Block-sparse matrix with CRS order of values (single cell per row-block)
auto bs = std::make_unique<CompressedRowBlockStructure>(
*block_sparse_non_crs_compatible_->block_structure());
int num_nonzeros = 0;
for (auto& r : bs->rows) {
const int num_cells = r.cells.size();
if (num_cells > 1) {
std::uniform_int_distribution<int> uniform_cell(0, num_cells - 1);
const int selected_cell = uniform_cell(rng);
std::swap(r.cells[0], r.cells[selected_cell]);
r.cells.resize(1);
}
const int row_block_size = r.block.size;
for (auto& c : r.cells) {
c.position = num_nonzeros;
const int col_block_size = bs->cols[c.block_id].size;
num_nonzeros += col_block_size * row_block_size;
}
}
block_sparse_crs_compatible_single_cell_ =
std::make_unique<BlockSparseMatrix>(bs.release());
std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(),
block_sparse_crs_compatible_single_cell_->mutable_values() +
block_sparse_crs_compatible_single_cell_->num_nonzeros(),
1);
}
void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) {
ASSERT_EQ(csm.num_cols(), bsm.num_cols());
ASSERT_EQ(csm.num_rows(), bsm.num_rows());
ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros());
const int num_rows = bsm.num_rows();
const int num_cols = bsm.num_cols();
Vector x(num_cols);
Vector y(num_rows);
CudaVector x_cuda(&context_, num_cols);
CudaVector y_cuda(&context_, num_rows);
Vector y_cuda_host(num_rows);
for (int i = 0; i < num_cols; ++i) {
x.setZero();
y.setZero();
y_cuda.SetZero();
x[i] = 1.;
x_cuda.CopyFromCpu(x);
csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda);
bsm.RightMultiplyAndAccumulate(
x.data(), y.data(), &context_, std::thread::hardware_concurrency());
y_cuda.CopyTo(&y_cuda_host);
// There will be up to 1 non-zero product per row, thus we expect an exact
// match on 32-bit integer indices
EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
}
}
std::unique_ptr<BlockSparseMatrix> block_sparse_non_crs_compatible_;
std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_rows_;
std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_single_cell_;
ContextImpl context_;
};
TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) {
auto view =
CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_);
ASSERT_EQ(view.IsCrsCompatible(), false);
auto matrix = view.crs_matrix();
Compare(*block_sparse_non_crs_compatible_, *matrix);
}
TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) {
auto view =
CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_);
ASSERT_EQ(view.IsCrsCompatible(), true);
auto matrix = view.crs_matrix();
Compare(*block_sparse_crs_compatible_rows_, *matrix);
}
TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) {
auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_,
&context_);
ASSERT_EQ(view.IsCrsCompatible(), true);
auto matrix = view.crs_matrix();
Compare(*block_sparse_crs_compatible_single_cell_, *matrix);
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -0,0 +1,234 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include "ceres/cuda_block_structure.h"
#ifndef CERES_NO_CUDA
namespace ceres::internal {
namespace {
// Dimension of a sorted array of blocks
inline int Dimension(const std::vector<Block>& blocks) {
if (blocks.empty()) {
return 0;
}
const auto& last = blocks.back();
return last.size + last.position;
}
} // namespace
CudaBlockSparseStructure::CudaBlockSparseStructure(
const CompressedRowBlockStructure& block_structure, ContextImpl* context)
: CudaBlockSparseStructure(block_structure, 0, context) {}
CudaBlockSparseStructure::CudaBlockSparseStructure(
const CompressedRowBlockStructure& block_structure,
const int num_col_blocks_e,
ContextImpl* context)
: first_cell_in_row_block_(context),
value_offset_row_block_f_(context),
cells_(context),
row_blocks_(context),
col_blocks_(context) {
// Row blocks extracted from CompressedRowBlockStructure::rows
std::vector<Block> row_blocks;
// Column blocks can be reused as-is
const auto& col_blocks = block_structure.cols;
// Row block offset is an index of the first cell corresponding to row block
std::vector<int> first_cell_in_row_block;
// Offset of the first value in the first non-empty row-block of F sub-matrix
std::vector<int> value_offset_row_block_f;
// Flat array of all cells from all row-blocks
std::vector<Cell> cells;
int f_values_offset = -1;
num_nonzeros_e_ = 0;
is_crs_compatible_ = true;
num_row_blocks_ = block_structure.rows.size();
num_col_blocks_ = col_blocks.size();
row_blocks.reserve(num_row_blocks_);
first_cell_in_row_block.reserve(num_row_blocks_ + 1);
value_offset_row_block_f.reserve(num_row_blocks_ + 1);
num_nonzeros_ = 0;
// Block-sparse matrices arising from block-jacobian writer are expected to
// have sequential layout (for partitioned matrices - it is expected that both
// E and F sub-matrices have sequential layout).
bool sequential_layout = true;
int row_block_id = 0;
num_row_blocks_e_ = 0;
for (; row_block_id < num_row_blocks_; ++row_block_id) {
const auto& r = block_structure.rows[row_block_id];
const int row_block_size = r.block.size;
const int num_cells = r.cells.size();
if (num_col_blocks_e == 0 || r.cells.size() == 0 ||
r.cells[0].block_id >= num_col_blocks_e) {
break;
}
num_row_blocks_e_ = row_block_id + 1;
// In E sub-matrix there is exactly a single E cell in the row
// since E cells are stored separately from F cells, crs-compatiblity of
// F sub-matrix only breaks if there are more than 2 cells in row (that
// is, more than 1 cell in F sub-matrix)
if (num_cells > 2 && row_block_size > 1) {
is_crs_compatible_ = false;
}
row_blocks.emplace_back(r.block);
first_cell_in_row_block.push_back(cells.size());
for (int cell_id = 0; cell_id < num_cells; ++cell_id) {
const auto& c = r.cells[cell_id];
const int col_block_size = col_blocks[c.block_id].size;
const int cell_size = col_block_size * row_block_size;
cells.push_back(c);
if (cell_id == 0) {
DCHECK(c.position == num_nonzeros_e_);
num_nonzeros_e_ += cell_size;
} else {
if (f_values_offset == -1) {
num_nonzeros_ = c.position;
f_values_offset = c.position;
}
sequential_layout &= c.position == num_nonzeros_;
num_nonzeros_ += cell_size;
if (cell_id == 1) {
// Correct value_offset_row_block_f for empty row-blocks of F
// preceding this one
for (auto it = value_offset_row_block_f.rbegin();
it != value_offset_row_block_f.rend();
++it) {
if (*it != -1) break;
*it = c.position;
}
value_offset_row_block_f.push_back(c.position);
}
}
}
if (num_cells == 1) {
value_offset_row_block_f.push_back(-1);
}
}
for (; row_block_id < num_row_blocks_; ++row_block_id) {
const auto& r = block_structure.rows[row_block_id];
const int row_block_size = r.block.size;
const int num_cells = r.cells.size();
// After num_row_blocks_e_ row-blocks, there should be no cells in E
// sub-matrix. Thus crs-compatibility of F sub-matrix breaks if there are
// more than one cells in the row-block
if (num_cells > 1 && row_block_size > 1) {
is_crs_compatible_ = false;
}
row_blocks.emplace_back(r.block);
first_cell_in_row_block.push_back(cells.size());
if (r.cells.empty()) {
value_offset_row_block_f.push_back(-1);
} else {
for (auto it = value_offset_row_block_f.rbegin();
it != value_offset_row_block_f.rend();
--it) {
if (*it != -1) break;
*it = cells[0].position;
}
value_offset_row_block_f.push_back(r.cells[0].position);
}
for (const auto& c : r.cells) {
const int col_block_size = col_blocks[c.block_id].size;
const int cell_size = col_block_size * row_block_size;
cells.push_back(c);
DCHECK(c.block_id >= num_col_blocks_e);
if (f_values_offset == -1) {
num_nonzeros_ = c.position;
f_values_offset = c.position;
}
sequential_layout &= c.position == num_nonzeros_;
num_nonzeros_ += cell_size;
}
}
if (f_values_offset == -1) {
f_values_offset = num_nonzeros_e_;
num_nonzeros_ = num_nonzeros_e_;
}
// Fill non-zero offsets for the last rows of F submatrix
for (auto it = value_offset_row_block_f.rbegin();
it != value_offset_row_block_f.rend();
++it) {
if (*it != -1) break;
*it = num_nonzeros_;
}
value_offset_row_block_f.push_back(num_nonzeros_);
CHECK_EQ(num_nonzeros_e_, f_values_offset);
first_cell_in_row_block.push_back(cells.size());
num_cells_ = cells.size();
num_rows_ = Dimension(row_blocks);
num_cols_ = Dimension(col_blocks);
CHECK(sequential_layout);
if (VLOG_IS_ON(3)) {
const size_t first_cell_in_row_block_size =
first_cell_in_row_block.size() * sizeof(int);
const size_t cells_size = cells.size() * sizeof(Cell);
const size_t row_blocks_size = row_blocks.size() * sizeof(Block);
const size_t col_blocks_size = col_blocks.size() * sizeof(Block);
const size_t total_size = first_cell_in_row_block_size + cells_size +
col_blocks_size + row_blocks_size;
const double ratio =
(100. * total_size) / (num_nonzeros_ * (sizeof(int) + sizeof(double)) +
num_rows_ * sizeof(int));
VLOG(3) << "\nCudaBlockSparseStructure:\n"
"\tRow block offsets: "
<< first_cell_in_row_block_size
<< " bytes\n"
"\tColumn blocks: "
<< col_blocks_size
<< " bytes\n"
"\tRow blocks: "
<< row_blocks_size
<< " bytes\n"
"\tCells: "
<< cells_size << " bytes\n\tTotal: " << total_size
<< " bytes of GPU memory (" << ratio << "% of CRS matrix size)";
}
first_cell_in_row_block_.CopyFromCpuVector(first_cell_in_row_block);
cells_.CopyFromCpuVector(cells);
row_blocks_.CopyFromCpuVector(row_blocks);
col_blocks_.CopyFromCpuVector(col_blocks);
if (num_col_blocks_e || num_row_blocks_e_) {
value_offset_row_block_f_.CopyFromCpuVector(value_offset_row_block_f);
}
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -0,0 +1,120 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#ifndef CERES_INTERNAL_CUDA_BLOCK_STRUCTURE_H_
#define CERES_INTERNAL_CUDA_BLOCK_STRUCTURE_H_
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
#include "ceres/block_structure.h"
#include "ceres/cuda_buffer.h"
namespace ceres::internal {
class CudaBlockStructureTest;
// This class stores a read-only block-sparse structure in gpu memory.
// Invariants are the same as those of CompressedRowBlockStructure.
// In order to simplify allocation and copying data to gpu, cells from all
// row-blocks are stored in a single array sequentially. Array
// first_cell_in_row_block of size num_row_blocks + 1 allows to identify range
// of cells corresponding to a row-block. Cells corresponding to i-th row-block
// are stored in sub-array cells[first_cell_in_row_block[i]; ...
// first_cell_in_row_block[i + 1] - 1], and their order is preserved.
class CERES_NO_EXPORT CudaBlockSparseStructure {
public:
// CompressedRowBlockStructure is contains a vector of CompressedLists, with
// each CompressedList containing a vector of Cells. We precompute a flat
// array of cells on cpu and transfer it to the gpu.
CudaBlockSparseStructure(const CompressedRowBlockStructure& block_structure,
ContextImpl* context);
// In the case of partitioned matrices, number of non-zeros in E and layout of
// F are computed
CudaBlockSparseStructure(const CompressedRowBlockStructure& block_structure,
const int num_col_blocks_e,
ContextImpl* context);
int num_rows() const { return num_rows_; }
int num_cols() const { return num_cols_; }
int num_cells() const { return num_cells_; }
int num_nonzeros() const { return num_nonzeros_; }
// When partitioned matrix constructor was used, returns number of non-zeros
// in E sub-matrix
int num_nonzeros_e() const { return num_nonzeros_e_; }
int num_row_blocks() const { return num_row_blocks_; }
int num_row_blocks_e() const { return num_row_blocks_e_; }
int num_col_blocks() const { return num_col_blocks_; }
// Returns true if values from block-sparse matrix (F sub-matrix in
// partitioned case) can be copied to CRS matrix as-is. This is possible if
// each row-block is stored in CRS order:
// - Row-block consists of a single row
// - Row-block contains a single cell
bool IsCrsCompatible() const { return is_crs_compatible_; }
// Device pointer to array of num_row_blocks + 1 indices of the first cell of
// row block
const int* first_cell_in_row_block() const {
return first_cell_in_row_block_.data();
}
// Device pointer to array of num_row_blocks + 1 indices of the first value in
// this or subsequent row-blocks of submatrix F
const int* value_offset_row_block_f() const {
return value_offset_row_block_f_.data();
}
// Device pointer to array of num_cells cells, sorted by row-block
const Cell* cells() const { return cells_.data(); }
// Device pointer to array of row blocks
const Block* row_blocks() const { return row_blocks_.data(); }
// Device pointer to array of column blocks
const Block* col_blocks() const { return col_blocks_.data(); }
private:
int num_rows_;
int num_cols_;
int num_cells_;
int num_nonzeros_;
int num_nonzeros_e_;
int num_row_blocks_;
int num_row_blocks_e_;
int num_col_blocks_;
bool is_crs_compatible_;
CudaBuffer<int> first_cell_in_row_block_;
CudaBuffer<int> value_offset_row_block_f_;
CudaBuffer<Cell> cells_;
CudaBuffer<Block> row_blocks_;
CudaBuffer<Block> col_blocks_;
friend class CudaBlockStructureTest;
};
} // namespace ceres::internal
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_BLOCK_SPARSE_STRUCTURE_H_

View File

@@ -0,0 +1,144 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <numeric>
#include "ceres/block_sparse_matrix.h"
#include "ceres/cuda_block_structure.h"
namespace ceres::internal {
class CudaBlockStructureTest : public ::testing::Test {
protected:
void SetUp() final {
std::string message;
CHECK(context_.InitCuda(&message))
<< "InitCuda() failed because: " << message;
BlockSparseMatrix::RandomMatrixOptions options;
options.num_row_blocks = 1234;
options.min_row_block_size = 1;
options.max_row_block_size = 10;
options.num_col_blocks = 567;
options.min_col_block_size = 1;
options.max_col_block_size = 10;
options.block_density = 0.2;
std::mt19937 rng;
A_ = BlockSparseMatrix::CreateRandomMatrix(options, rng);
std::iota(
A_->mutable_values(), A_->mutable_values() + A_->num_nonzeros(), 1);
}
std::vector<Cell> GetCells(const CudaBlockSparseStructure& structure) {
const auto& cuda_buffer = structure.cells_;
std::vector<Cell> cells(cuda_buffer.size());
cuda_buffer.CopyToCpu(cells.data(), cells.size());
return cells;
}
std::vector<Block> GetRowBlocks(const CudaBlockSparseStructure& structure) {
const auto& cuda_buffer = structure.row_blocks_;
std::vector<Block> blocks(cuda_buffer.size());
cuda_buffer.CopyToCpu(blocks.data(), blocks.size());
return blocks;
}
std::vector<Block> GetColBlocks(const CudaBlockSparseStructure& structure) {
const auto& cuda_buffer = structure.col_blocks_;
std::vector<Block> blocks(cuda_buffer.size());
cuda_buffer.CopyToCpu(blocks.data(), blocks.size());
return blocks;
}
std::vector<int> GetRowBlockOffsets(
const CudaBlockSparseStructure& structure) {
const auto& cuda_buffer = structure.first_cell_in_row_block_;
std::vector<int> first_cell_in_row_block(cuda_buffer.size());
cuda_buffer.CopyToCpu(first_cell_in_row_block.data(),
first_cell_in_row_block.size());
return first_cell_in_row_block;
}
std::unique_ptr<BlockSparseMatrix> A_;
ContextImpl context_;
};
TEST_F(CudaBlockStructureTest, StructureIdentity) {
auto block_structure = A_->block_structure();
const int num_row_blocks = block_structure->rows.size();
const int num_col_blocks = block_structure->cols.size();
CudaBlockSparseStructure cuda_block_structure(*block_structure, &context_);
ASSERT_EQ(cuda_block_structure.num_rows(), A_->num_rows());
ASSERT_EQ(cuda_block_structure.num_cols(), A_->num_cols());
ASSERT_EQ(cuda_block_structure.num_nonzeros(), A_->num_nonzeros());
ASSERT_EQ(cuda_block_structure.num_row_blocks(), num_row_blocks);
ASSERT_EQ(cuda_block_structure.num_col_blocks(), num_col_blocks);
std::vector<Block> blocks = GetColBlocks(cuda_block_structure);
ASSERT_EQ(blocks.size(), num_col_blocks);
for (int i = 0; i < num_col_blocks; ++i) {
EXPECT_EQ(block_structure->cols[i].position, blocks[i].position);
EXPECT_EQ(block_structure->cols[i].size, blocks[i].size);
}
std::vector<Cell> cells = GetCells(cuda_block_structure);
std::vector<int> first_cell_in_row_block =
GetRowBlockOffsets(cuda_block_structure);
blocks = GetRowBlocks(cuda_block_structure);
ASSERT_EQ(blocks.size(), num_row_blocks);
ASSERT_EQ(first_cell_in_row_block.size(), num_row_blocks + 1);
ASSERT_EQ(first_cell_in_row_block.back(), cells.size());
for (int i = 0; i < num_row_blocks; ++i) {
const int num_cells = block_structure->rows[i].cells.size();
EXPECT_EQ(blocks[i].position, block_structure->rows[i].block.position);
EXPECT_EQ(blocks[i].size, block_structure->rows[i].block.size);
const int first_cell = first_cell_in_row_block[i];
const int last_cell = first_cell_in_row_block[i + 1];
ASSERT_EQ(last_cell - first_cell, num_cells);
for (int j = 0; j < num_cells; ++j) {
EXPECT_EQ(cells[first_cell + j].block_id,
block_structure->rows[i].cells[j].block_id);
EXPECT_EQ(cells[first_cell + j].position,
block_structure->rows[i].cells[j].position);
}
}
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,7 @@
#ifndef CERES_INTERNAL_CUDA_BUFFER_H_
#define CERES_INTERNAL_CUDA_BUFFER_H_
#include "ceres/context_impl.h"
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
@@ -40,17 +41,27 @@
#include "cuda_runtime.h"
#include "glog/logging.h"
namespace ceres::internal {
// An encapsulated buffer to maintain GPU memory, and handle transfers between
// GPU and system memory. It is the responsibility of the user to ensure that
// the appropriate GPU device is selected before each subroutine is called. This
// is particularly important when using multiple GPU devices on different CPU
// threads, since active Cuda devices are determined by the cuda runtime on a
// per-thread basis. Note that unless otherwise specified, all methods use the
// default stream, and are synchronous.
// per-thread basis.
template <typename T>
class CudaBuffer {
public:
CudaBuffer() = default;
explicit CudaBuffer(ContextImpl* context) : context_(context) {}
CudaBuffer(ContextImpl* context, int size) : context_(context) {
Reserve(size);
}
CudaBuffer(CudaBuffer&& other)
: data_(other.data_), size_(other.size_), context_(other.context_) {
other.data_ = nullptr;
other.size_ = 0;
}
CudaBuffer(const CudaBuffer&) = delete;
CudaBuffer& operator=(const CudaBuffer&) = delete;
@@ -67,41 +78,95 @@ class CudaBuffer {
if (data_ != nullptr) {
CHECK_EQ(cudaFree(data_), cudaSuccess);
}
CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess);
CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess)
<< "Failed to allocate " << size * sizeof(T)
<< " bytes of GPU memory";
size_ = size;
}
}
// Perform an asynchronous copy from CPU memory to GPU memory using the stream
// provided.
void CopyToGpuAsync(const T* data, const size_t size, cudaStream_t stream) {
// Perform an asynchronous copy from CPU memory to GPU memory managed by this
// CudaBuffer instance using the stream provided.
void CopyFromCpu(const T* data, const size_t size) {
Reserve(size);
CHECK_EQ(cudaMemcpyAsync(
data_, data, size * sizeof(T), cudaMemcpyHostToDevice, stream),
CHECK_EQ(cudaMemcpyAsync(data_,
data,
size * sizeof(T),
cudaMemcpyHostToDevice,
context_->DefaultStream()),
cudaSuccess);
}
// Copy data from the GPU to CPU memory. This is necessarily synchronous since
// any potential GPU kernels that may be writing to the buffer must finish
// before the transfer happens.
void CopyToHost(T* data, const size_t size) {
// Perform an asynchronous copy from a vector in CPU memory to GPU memory
// managed by this CudaBuffer instance.
void CopyFromCpuVector(const std::vector<T>& data) {
Reserve(data.size());
CHECK_EQ(cudaMemcpyAsync(data_,
data.data(),
data.size() * sizeof(T),
cudaMemcpyHostToDevice,
context_->DefaultStream()),
cudaSuccess);
}
// Perform an asynchronous copy from another GPU memory array to the GPU
// memory managed by this CudaBuffer instance using the stream provided.
void CopyFromGPUArray(const T* data, const size_t size) {
Reserve(size);
CHECK_EQ(cudaMemcpyAsync(data_,
data,
size * sizeof(T),
cudaMemcpyDeviceToDevice,
context_->DefaultStream()),
cudaSuccess);
}
// Copy data from the GPU memory managed by this CudaBuffer instance to CPU
// memory. It is the caller's responsibility to ensure that the CPU memory
// pointer is valid, i.e. it is not null, and that it points to memory of
// at least this->size() size. This method ensures all previously dispatched
// GPU operations on the specified stream have completed before copying the
// data to CPU memory.
void CopyToCpu(T* data, const size_t size) const {
CHECK(data_ != nullptr);
CHECK_EQ(cudaMemcpy(data, data_, size * sizeof(T), cudaMemcpyDeviceToHost),
CHECK_EQ(cudaMemcpyAsync(data,
data_,
size * sizeof(T),
cudaMemcpyDeviceToHost,
context_->DefaultStream()),
cudaSuccess);
CHECK_EQ(cudaStreamSynchronize(context_->DefaultStream()), cudaSuccess);
}
// Copy N items from another GPU memory array to the GPU memory managed by
// this CudaBuffer instance, growing this buffer's size if needed. This copy
// is asynchronous, and operates on the stream provided.
void CopyNItemsFrom(int n, const CudaBuffer<T>& other) {
Reserve(n);
CHECK(other.data_ != nullptr);
CHECK(data_ != nullptr);
CHECK_EQ(cudaMemcpyAsync(data_,
other.data_,
size_ * sizeof(T),
cudaMemcpyDeviceToDevice,
context_->DefaultStream()),
cudaSuccess);
}
void CopyToGpu(const std::vector<T>& data) {
CopyToGpu(data.data(), data.size());
}
// Return a pointer to the GPU memory managed by this CudaBuffer instance.
T* data() { return data_; }
const T* data() const { return data_; }
// Return the number of items of type T that can fit in the GPU memory
// allocated so far by this CudaBuffer instance.
size_t size() const { return size_; }
private:
T* data_ = nullptr;
size_t size_ = 0;
ContextImpl* context_ = nullptr;
};
} // namespace ceres::internal
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_BUFFER_H_
#endif // CERES_INTERNAL_CUDA_BUFFER_H_

View File

@@ -0,0 +1,332 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
#include <string>
#include "ceres/dense_cholesky.h"
#include "ceres/internal/config.h"
#include "ceres/internal/eigen.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
namespace ceres::internal {
#ifndef CERES_NO_CUDA
TEST(CUDADenseCholesky, InvalidOptionOnCreate) {
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
EXPECT_EQ(dense_cuda_solver, nullptr);
}
// Tests the CUDA Cholesky solver with a simple 4x4 matrix.
TEST(CUDADenseCholesky, Cholesky4x4Matrix) {
Eigen::Matrix4d A;
// clang-format off
A << 4, 12, -16, 0,
12, 37, -43, 0,
-16, -43, 98, 0,
0, 0, 0, 1;
// clang-format on
Vector b = Eigen::Vector4d::Ones();
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
std::string error_string;
ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
Eigen::Vector4d x = Eigen::Vector4d::Zero();
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
static const double kEpsilon = std::numeric_limits<double>::epsilon() * 10;
const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
}
TEST(CUDADenseCholesky, SingularMatrix) {
Eigen::Matrix3d A;
// clang-format off
A << 1, 0, 0,
0, 1, 0,
0, 0, 0;
// clang-format on
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
std::string error_string;
ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
LinearSolverTerminationType::FAILURE);
}
TEST(CUDADenseCholesky, NegativeMatrix) {
Eigen::Matrix3d A;
// clang-format off
A << 1, 0, 0,
0, 1, 0,
0, 0, -1;
// clang-format on
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
std::string error_string;
ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
LinearSolverTerminationType::FAILURE);
}
TEST(CUDADenseCholesky, MustFactorizeBeforeSolve) {
const Eigen::Vector3d b = Eigen::Vector3d::Ones();
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
std::string error_string;
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), nullptr, &error_string),
LinearSolverTerminationType::FATAL_ERROR);
}
TEST(CUDADenseCholesky, Randomized1600x1600Tests) {
const int kNumCols = 1600;
using LhsType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
using RhsType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
using SolutionType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = ceres::CUDA;
std::unique_ptr<DenseCholesky> dense_cholesky =
CUDADenseCholesky::Create(options);
const int kNumTrials = 20;
for (int i = 0; i < kNumTrials; ++i) {
LhsType lhs = LhsType::Random(kNumCols, kNumCols);
lhs = lhs.transpose() * lhs;
lhs += 1e-3 * LhsType::Identity(kNumCols, kNumCols);
SolutionType x_expected = SolutionType::Random(kNumCols);
RhsType rhs = lhs * x_expected;
SolutionType x_computed = SolutionType::Zero(kNumCols);
// Sanity check the random matrix sizes.
EXPECT_EQ(lhs.rows(), kNumCols);
EXPECT_EQ(lhs.cols(), kNumCols);
EXPECT_EQ(rhs.rows(), kNumCols);
EXPECT_EQ(rhs.cols(), 1);
EXPECT_EQ(x_expected.rows(), kNumCols);
EXPECT_EQ(x_expected.cols(), 1);
EXPECT_EQ(x_computed.rows(), kNumCols);
EXPECT_EQ(x_computed.cols(), 1);
LinearSolver::Summary summary;
summary.termination_type = dense_cholesky->FactorAndSolve(
kNumCols, lhs.data(), rhs.data(), x_computed.data(), &summary.message);
ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS);
static const double kEpsilon = std::numeric_limits<double>::epsilon() * 3e5;
ASSERT_NEAR(
(x_computed - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon);
}
}
TEST(CUDADenseCholeskyMixedPrecision, InvalidOptionsOnCreate) {
{
// Did not ask for CUDA, and did not ask for mixed precision.
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
ASSERT_EQ(solver, nullptr);
}
{
// Asked for CUDA, but did not ask for mixed precision.
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = ceres::CUDA;
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
ASSERT_EQ(solver, nullptr);
}
}
// Tests the CUDA Cholesky solver with a simple 4x4 matrix.
TEST(CUDADenseCholeskyMixedPrecision, Cholesky4x4Matrix1Step) {
Eigen::Matrix4d A;
// clang-format off
// A common test Cholesky decomposition test matrix, see :
// https://en.wikipedia.org/w/index.php?title=Cholesky_decomposition&oldid=1080607368#Example
A << 4, 12, -16, 0,
12, 37, -43, 0,
-16, -43, 98, 0,
0, 0, 0, 1;
// clang-format on
const Eigen::Vector4d b = Eigen::Vector4d::Ones();
LinearSolver::Options options;
options.max_num_refinement_iterations = 0;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
options.use_mixed_precision_solves = true;
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
ASSERT_NE(solver, nullptr);
std::string error_string;
ASSERT_EQ(solver->Factorize(A.cols(), A.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
Eigen::Vector4d x = Eigen::Vector4d::Zero();
ASSERT_EQ(solver->Solve(b.data(), x.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
// A single step of the mixed precision solver will be equivalent to solving
// in low precision (FP32). Hence the tolerance is defined w.r.t. FP32 epsilon
// instead of FP64 epsilon.
static const double kEpsilon = std::numeric_limits<float>::epsilon() * 10;
const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
}
// Tests the CUDA Cholesky solver with a simple 4x4 matrix.
TEST(CUDADenseCholeskyMixedPrecision, Cholesky4x4Matrix4Steps) {
Eigen::Matrix4d A;
// clang-format off
A << 4, 12, -16, 0,
12, 37, -43, 0,
-16, -43, 98, 0,
0, 0, 0, 1;
// clang-format on
const Eigen::Vector4d b = Eigen::Vector4d::Ones();
LinearSolver::Options options;
options.max_num_refinement_iterations = 3;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
options.use_mixed_precision_solves = true;
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
ASSERT_NE(solver, nullptr);
std::string error_string;
ASSERT_EQ(solver->Factorize(A.cols(), A.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
Eigen::Vector4d x = Eigen::Vector4d::Zero();
ASSERT_EQ(solver->Solve(b.data(), x.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
// The error does not reduce beyond four iterations, and stagnates at this
// level of precision.
static const double kEpsilon = std::numeric_limits<double>::epsilon() * 100;
const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
}
TEST(CUDADenseCholeskyMixedPrecision, Randomized1600x1600Tests) {
const int kNumCols = 1600;
using LhsType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
using RhsType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
using SolutionType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = ceres::CUDA;
options.use_mixed_precision_solves = true;
options.max_num_refinement_iterations = 20;
std::unique_ptr<CUDADenseCholeskyMixedPrecision> dense_cholesky =
CUDADenseCholeskyMixedPrecision::Create(options);
const int kNumTrials = 20;
for (int i = 0; i < kNumTrials; ++i) {
LhsType lhs = LhsType::Random(kNumCols, kNumCols);
lhs = lhs.transpose() * lhs;
lhs += 1e-3 * LhsType::Identity(kNumCols, kNumCols);
SolutionType x_expected = SolutionType::Random(kNumCols);
RhsType rhs = lhs * x_expected;
SolutionType x_computed = SolutionType::Zero(kNumCols);
// Sanity check the random matrix sizes.
EXPECT_EQ(lhs.rows(), kNumCols);
EXPECT_EQ(lhs.cols(), kNumCols);
EXPECT_EQ(rhs.rows(), kNumCols);
EXPECT_EQ(rhs.cols(), 1);
EXPECT_EQ(x_expected.rows(), kNumCols);
EXPECT_EQ(x_expected.cols(), 1);
EXPECT_EQ(x_computed.rows(), kNumCols);
EXPECT_EQ(x_computed.cols(), 1);
LinearSolver::Summary summary;
summary.termination_type = dense_cholesky->FactorAndSolve(
kNumCols, lhs.data(), rhs.data(), x_computed.data(), &summary.message);
ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS);
static const double kEpsilon = std::numeric_limits<double>::epsilon() * 1e6;
ASSERT_NEAR(
(x_computed - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon);
}
}
#endif // CERES_NO_CUDA
} // namespace ceres::internal

View File

@@ -0,0 +1,177 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
#include <string>
#include "ceres/dense_qr.h"
#include "ceres/internal/eigen.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
namespace ceres::internal {
#ifndef CERES_NO_CUDA
TEST(CUDADenseQR, InvalidOptionOnCreate) {
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
auto dense_cuda_solver = CUDADenseQR::Create(options);
EXPECT_EQ(dense_cuda_solver, nullptr);
}
// Tests the CUDA QR solver with a simple 4x4 matrix.
TEST(CUDADenseQR, QR4x4Matrix) {
Eigen::Matrix4d A;
// clang-format off
A << 4, 12, -16, 0,
12, 37, -43, 0,
-16, -43, 98, 0,
0, 0, 0, 1;
// clang-format on
const Eigen::Vector4d b = Eigen::Vector4d::Ones();
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseQR::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
std::string error_string;
ASSERT_EQ(
dense_cuda_solver->Factorize(A.rows(), A.cols(), A.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
Eigen::Vector4d x = Eigen::Vector4d::Zero();
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
// Empirically observed accuracy of cuSolverDN's QR solver.
const double kEpsilon = std::numeric_limits<double>::epsilon() * 1500;
const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
EXPECT_NEAR((x - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon);
}
// Tests the CUDA QR solver with a simple 4x4 matrix.
TEST(CUDADenseQR, QR4x2Matrix) {
Eigen::Matrix<double, 4, 2> A;
// clang-format off
A << 4, 12,
12, 37,
-16, -43,
0, 0;
// clang-format on
const std::vector<double> b(4, 1.0);
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseQR::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
std::string error_string;
ASSERT_EQ(
dense_cuda_solver->Factorize(A.rows(), A.cols(), A.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
std::vector<double> x(2, 0);
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string),
LinearSolverTerminationType::SUCCESS);
// Empirically observed accuracy of cuSolverDN's QR solver.
const double kEpsilon = std::numeric_limits<double>::epsilon() * 10;
// Solution values computed with Octave.
const Eigen::Vector2d x_expected(-1.143410852713177, 0.4031007751937981);
EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
}
TEST(CUDADenseQR, MustFactorizeBeforeSolve) {
const Eigen::Vector3d b = Eigen::Vector3d::Ones();
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = CUDA;
auto dense_cuda_solver = CUDADenseQR::Create(options);
ASSERT_NE(dense_cuda_solver, nullptr);
std::string error_string;
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), nullptr, &error_string),
LinearSolverTerminationType::FATAL_ERROR);
}
TEST(CUDADenseQR, Randomized1600x100Tests) {
const int kNumRows = 1600;
const int kNumCols = 100;
using LhsType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
using RhsType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
using SolutionType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
LinearSolver::Options options;
ContextImpl context;
options.context = &context;
std::string error;
EXPECT_TRUE(context.InitCuda(&error)) << error;
options.dense_linear_algebra_library_type = ceres::CUDA;
std::unique_ptr<DenseQR> dense_qr = CUDADenseQR::Create(options);
const int kNumTrials = 20;
for (int i = 0; i < kNumTrials; ++i) {
LhsType lhs = LhsType::Random(kNumRows, kNumCols);
SolutionType x_expected = SolutionType::Random(kNumCols);
RhsType rhs = lhs * x_expected;
SolutionType x_computed = SolutionType::Zero(kNumCols);
// Sanity check the random matrix sizes.
EXPECT_EQ(lhs.rows(), kNumRows);
EXPECT_EQ(lhs.cols(), kNumCols);
EXPECT_EQ(rhs.rows(), kNumRows);
EXPECT_EQ(rhs.cols(), 1);
EXPECT_EQ(x_expected.rows(), kNumCols);
EXPECT_EQ(x_expected.cols(), 1);
EXPECT_EQ(x_computed.rows(), kNumCols);
EXPECT_EQ(x_computed.cols(), 1);
LinearSolver::Summary summary;
summary.termination_type = dense_qr->FactorAndSolve(kNumRows,
kNumCols,
lhs.data(),
rhs.data(),
x_computed.data(),
&summary.message);
ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS);
ASSERT_NEAR((x_computed - x_expected).norm() / x_expected.norm(),
0.0,
std::numeric_limits<double>::epsilon() * 400);
}
}
#endif // CERES_NO_CUDA
} // namespace ceres::internal

View File

@@ -0,0 +1,477 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include "ceres/cuda_kernels_bsm_to_crs.h"
#include <cuda_runtime.h>
#include <thrust/execution_policy.h>
#include <thrust/scan.h>
#include "ceres/block_structure.h"
#include "ceres/cuda_kernels_utils.h"
namespace ceres {
namespace internal {
namespace {
inline auto ThrustCudaStreamExecutionPolicy(cudaStream_t stream) {
// par_nosync execution policy was added in Thrust 1.16
// https://github.com/NVIDIA/thrust/blob/main/CHANGELOG.md#thrust-1160
#if THRUST_VERSION < 101700
return thrust::cuda::par.on(stream);
#else
return thrust::cuda::par_nosync.on(stream);
#endif
}
void* CudaMalloc(size_t size,
cudaStream_t stream,
bool memory_pools_supported) {
void* data = nullptr;
// Stream-ordered alloaction API is available since CUDA 11.2, but might be
// not implemented by particular device
#if CUDART_VERSION < 11020
#warning \
"Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.2+"
cudaMalloc(&data, size);
#else
if (memory_pools_supported) {
cudaMallocAsync(&data, size, stream);
} else {
cudaMalloc(&data, size);
}
#endif
return data;
}
void CudaFree(void* data, cudaStream_t stream, bool memory_pools_supported) {
// Stream-ordered alloaction API is available since CUDA 11.2, but might be
// not implemented by particular device
#if CUDART_VERSION < 11020
#warning \
"Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.2+"
cudaSuccess, cudaFree(data);
#else
if (memory_pools_supported) {
cudaFreeAsync(data, stream);
} else {
cudaFree(data);
}
#endif
}
template <typename T>
T* CudaAllocate(size_t num_elements,
cudaStream_t stream,
bool memory_pools_supported) {
T* data = static_cast<T*>(
CudaMalloc(num_elements * sizeof(T), stream, memory_pools_supported));
return data;
}
} // namespace
// Fill row block id and nnz for each row using block-sparse structure
// represented by a set of flat arrays.
// Inputs:
// - num_row_blocks: number of row-blocks in block-sparse structure
// - first_cell_in_row_block: index of the first cell of the row-block; size:
// num_row_blocks + 1
// - cells: cells of block-sparse structure as a continuous array
// - row_blocks: row blocks of block-sparse structure stored sequentially
// - col_blocks: column blocks of block-sparse structure stored sequentially
// Outputs:
// - rows: rows[i + 1] will contain number of non-zeros in i-th row, rows[0]
// will be set to 0; rows are filled with a shift by one element in order
// to obtain row-index array of CRS matrix with a inclusive scan afterwards
// - row_block_ids: row_block_ids[i] will be set to index of row-block that
// contains i-th row.
// Computation is perform row-block-wise
template <bool partitioned = false>
__global__ void RowBlockIdAndNNZ(
const int num_row_blocks,
const int num_col_blocks_e,
const int num_row_blocks_e,
const int* __restrict__ first_cell_in_row_block,
const Cell* __restrict__ cells,
const Block* __restrict__ row_blocks,
const Block* __restrict__ col_blocks,
int* __restrict__ rows_e,
int* __restrict__ rows_f,
int* __restrict__ row_block_ids) {
const int row_block_id = blockIdx.x * blockDim.x + threadIdx.x;
if (row_block_id > num_row_blocks) {
// No synchronization is performed in this kernel, thus it is safe to return
return;
}
if (row_block_id == num_row_blocks) {
// one extra thread sets the first element
rows_f[0] = 0;
if constexpr (partitioned) {
rows_e[0] = 0;
}
return;
}
const auto& row_block = row_blocks[row_block_id];
auto first_cell = cells + first_cell_in_row_block[row_block_id];
const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1];
int row_nnz_e = 0;
if (partitioned && row_block_id < num_row_blocks_e) {
// First cell is a cell from E
row_nnz_e = col_blocks[first_cell->block_id].size;
++first_cell;
}
int row_nnz_f = 0;
for (auto cell = first_cell; cell < last_cell; ++cell) {
row_nnz_f += col_blocks[cell->block_id].size;
}
const int first_row = row_block.position;
const int last_row = first_row + row_block.size;
for (int i = first_row; i < last_row; ++i) {
if constexpr (partitioned) {
rows_e[i + 1] = row_nnz_e;
}
rows_f[i + 1] = row_nnz_f;
row_block_ids[i] = row_block_id;
}
}
// Row-wise creation of CRS structure
// Inputs:
// - num_rows: number of rows in matrix
// - first_cell_in_row_block: index of the first cell of the row-block; size:
// num_row_blocks + 1
// - cells: cells of block-sparse structure as a continuous array
// - row_blocks: row blocks of block-sparse structure stored sequentially
// - col_blocks: column blocks of block-sparse structure stored sequentially
// - row_block_ids: index of row-block that corresponds to row
// - rows: row-index array of CRS structure
// Outputs:
// - cols: column-index array of CRS structure
// Computaion is perform row-wise
template <bool partitioned>
__global__ void ComputeColumns(const int num_rows,
const int num_row_blocks_e,
const int num_col_blocks_e,
const int* __restrict__ first_cell_in_row_block,
const Cell* __restrict__ cells,
const Block* __restrict__ row_blocks,
const Block* __restrict__ col_blocks,
const int* __restrict__ row_block_ids,
const int* __restrict__ rows_e,
int* __restrict__ cols_e,
const int* __restrict__ rows_f,
int* __restrict__ cols_f) {
const int row = blockIdx.x * blockDim.x + threadIdx.x;
if (row >= num_rows) {
// No synchronization is performed in this kernel, thus it is safe to return
return;
}
const int row_block_id = row_block_ids[row];
// position in crs matrix
auto first_cell = cells + first_cell_in_row_block[row_block_id];
const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1];
const int num_cols_e = col_blocks[num_col_blocks_e].position;
// For reach cell of row-block only current row is being filled
if (partitioned && row_block_id < num_row_blocks_e) {
// The first cell is cell from E
const auto& col_block = col_blocks[first_cell->block_id];
const int col_block_size = col_block.size;
int column_idx = col_block.position;
int crs_position_e = rows_e[row];
// Column indices for each element of row_in_block row of current cell
for (int i = 0; i < col_block_size; ++i, ++crs_position_e) {
cols_e[crs_position_e] = column_idx++;
}
++first_cell;
}
int crs_position_f = rows_f[row];
for (auto cell = first_cell; cell < last_cell; ++cell) {
const auto& col_block = col_blocks[cell->block_id];
const int col_block_size = col_block.size;
int column_idx = col_block.position - num_cols_e;
// Column indices for each element of row_in_block row of current cell
for (int i = 0; i < col_block_size; ++i, ++crs_position_f) {
cols_f[crs_position_f] = column_idx++;
}
}
}
void FillCRSStructure(const int num_row_blocks,
const int num_rows,
const int* first_cell_in_row_block,
const Cell* cells,
const Block* row_blocks,
const Block* col_blocks,
int* rows,
int* cols,
cudaStream_t stream,
bool memory_pools_supported) {
// Set number of non-zeros per row in rows array and row to row-block map in
// row_block_ids array
int* row_block_ids =
CudaAllocate<int>(num_rows, stream, memory_pools_supported);
const int num_blocks_blockwise = NumBlocksInGrid(num_row_blocks + 1);
RowBlockIdAndNNZ<false><<<num_blocks_blockwise, kCudaBlockSize, 0, stream>>>(
num_row_blocks,
0,
0,
first_cell_in_row_block,
cells,
row_blocks,
col_blocks,
nullptr,
rows,
row_block_ids);
// Finalize row-index array of CRS strucure by computing prefix sum
thrust::inclusive_scan(
ThrustCudaStreamExecutionPolicy(stream), rows, rows + num_rows + 1, rows);
// Fill cols array of CRS structure
const int num_blocks_rowwise = NumBlocksInGrid(num_rows);
ComputeColumns<false><<<num_blocks_rowwise, kCudaBlockSize, 0, stream>>>(
num_rows,
0,
0,
first_cell_in_row_block,
cells,
row_blocks,
col_blocks,
row_block_ids,
nullptr,
nullptr,
rows,
cols);
CudaFree(row_block_ids, stream, memory_pools_supported);
}
void FillCRSStructurePartitioned(const int num_row_blocks,
const int num_rows,
const int num_row_blocks_e,
const int num_col_blocks_e,
const int num_nonzeros_e,
const int* first_cell_in_row_block,
const Cell* cells,
const Block* row_blocks,
const Block* col_blocks,
int* rows_e,
int* cols_e,
int* rows_f,
int* cols_f,
cudaStream_t stream,
bool memory_pools_supported) {
// Set number of non-zeros per row in rows array and row to row-block map in
// row_block_ids array
int* row_block_ids =
CudaAllocate<int>(num_rows, stream, memory_pools_supported);
const int num_blocks_blockwise = NumBlocksInGrid(num_row_blocks + 1);
RowBlockIdAndNNZ<true><<<num_blocks_blockwise, kCudaBlockSize, 0, stream>>>(
num_row_blocks,
num_col_blocks_e,
num_row_blocks_e,
first_cell_in_row_block,
cells,
row_blocks,
col_blocks,
rows_e,
rows_f,
row_block_ids);
// Finalize row-index array of CRS strucure by computing prefix sum
thrust::inclusive_scan(ThrustCudaStreamExecutionPolicy(stream),
rows_e,
rows_e + num_rows + 1,
rows_e);
thrust::inclusive_scan(ThrustCudaStreamExecutionPolicy(stream),
rows_f,
rows_f + num_rows + 1,
rows_f);
// Fill cols array of CRS structure
const int num_blocks_rowwise = NumBlocksInGrid(num_rows);
ComputeColumns<true><<<num_blocks_rowwise, kCudaBlockSize, 0, stream>>>(
num_rows,
num_row_blocks_e,
num_col_blocks_e,
first_cell_in_row_block,
cells,
row_blocks,
col_blocks,
row_block_ids,
rows_e,
cols_e,
rows_f,
cols_f);
CudaFree(row_block_ids, stream, memory_pools_supported);
}
template <typename T, typename Predicate>
__device__ int PartitionPoint(const T* data,
int first,
int last,
Predicate&& predicate) {
if (!predicate(data[first])) {
return first;
}
while (last - first > 1) {
const auto midpoint = first + (last - first) / 2;
if (predicate(data[midpoint])) {
first = midpoint;
} else {
last = midpoint;
}
}
return last;
}
// Element-wise reordering of block-sparse values
// - first_cell_in_row_block - position of the first cell of row-block
// - block_sparse_values - segment of block-sparse values starting from
// block_sparse_offset, containing num_values
template <bool partitioned>
__global__ void PermuteToCrsKernel(
const int block_sparse_offset,
const int num_values,
const int num_row_blocks,
const int num_row_blocks_e,
const int* __restrict__ first_cell_in_row_block,
const int* __restrict__ value_offset_row_block_f,
const Cell* __restrict__ cells,
const Block* __restrict__ row_blocks,
const Block* __restrict__ col_blocks,
const int* __restrict__ crs_rows,
const double* __restrict__ block_sparse_values,
double* __restrict__ crs_values) {
const int value_id = blockIdx.x * blockDim.x + threadIdx.x;
if (value_id >= num_values) {
return;
}
const int block_sparse_value_id = value_id + block_sparse_offset;
// Find the corresponding row-block with a binary search
const int row_block_id =
(partitioned
? PartitionPoint(value_offset_row_block_f,
0,
num_row_blocks,
[block_sparse_value_id] __device__(
const int row_block_offset) {
return row_block_offset <= block_sparse_value_id;
})
: PartitionPoint(first_cell_in_row_block,
0,
num_row_blocks,
[cells, block_sparse_value_id] __device__(
const int row_block_offset) {
return cells[row_block_offset].position <=
block_sparse_value_id;
})) -
1;
// Find cell and calculate offset within the row with a linear scan
const auto& row_block = row_blocks[row_block_id];
auto first_cell = cells + first_cell_in_row_block[row_block_id];
const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1];
const int row_block_size = row_block.size;
int num_cols_before = 0;
if (partitioned && row_block_id < num_row_blocks_e) {
++first_cell;
}
for (const Cell* cell = first_cell; cell < last_cell; ++cell) {
const auto& col_block = col_blocks[cell->block_id];
const int col_block_size = col_block.size;
const int cell_size = row_block_size * col_block_size;
if (cell->position + cell_size > block_sparse_value_id) {
const int pos_in_cell = block_sparse_value_id - cell->position;
const int row_in_cell = pos_in_cell / col_block_size;
const int col_in_cell = pos_in_cell % col_block_size;
const int row = row_in_cell + row_block.position;
crs_values[crs_rows[row] + num_cols_before + col_in_cell] =
block_sparse_values[value_id];
break;
}
num_cols_before += col_block_size;
}
}
void PermuteToCRS(const int block_sparse_offset,
const int num_values,
const int num_row_blocks,
const int* first_cell_in_row_block,
const Cell* cells,
const Block* row_blocks,
const Block* col_blocks,
const int* crs_rows,
const double* block_sparse_values,
double* crs_values,
cudaStream_t stream) {
const int num_blocks_valuewise = NumBlocksInGrid(num_values);
PermuteToCrsKernel<false>
<<<num_blocks_valuewise, kCudaBlockSize, 0, stream>>>(
block_sparse_offset,
num_values,
num_row_blocks,
0,
first_cell_in_row_block,
nullptr,
cells,
row_blocks,
col_blocks,
crs_rows,
block_sparse_values,
crs_values);
}
void PermuteToCRSPartitionedF(const int block_sparse_offset,
const int num_values,
const int num_row_blocks,
const int num_row_blocks_e,
const int* first_cell_in_row_block,
const int* value_offset_row_block_f,
const Cell* cells,
const Block* row_blocks,
const Block* col_blocks,
const int* crs_rows,
const double* block_sparse_values,
double* crs_values,
cudaStream_t stream) {
const int num_blocks_valuewise = NumBlocksInGrid(num_values);
PermuteToCrsKernel<true><<<num_blocks_valuewise, kCudaBlockSize, 0, stream>>>(
block_sparse_offset,
num_values,
num_row_blocks,
num_row_blocks_e,
first_cell_in_row_block,
value_offset_row_block_f,
cells,
row_blocks,
col_blocks,
crs_rows,
block_sparse_values,
crs_values);
}
} // namespace internal
} // namespace ceres

View File

@@ -0,0 +1,113 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#ifndef CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_
#define CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
#include "cuda_runtime.h"
namespace ceres {
namespace internal {
struct Block;
struct Cell;
// Compute structure of CRS matrix using block-sparse structure.
// Arrays corresponding to CRS matrix are to be allocated by caller
void FillCRSStructure(const int num_row_blocks,
const int num_rows,
const int* first_cell_in_row_block,
const Cell* cells,
const Block* row_blocks,
const Block* col_blocks,
int* rows,
int* cols,
cudaStream_t stream,
bool memory_pools_supported);
// Compute structure of partitioned CRS matrix using block-sparse structure.
// Arrays corresponding to CRS matrices are to be allocated by caller
void FillCRSStructurePartitioned(const int num_row_blocks,
const int num_rows,
const int num_row_blocks_e,
const int num_col_blocks_e,
const int num_nonzeros_e,
const int* first_cell_in_row_block,
const Cell* cells,
const Block* row_blocks,
const Block* col_blocks,
int* rows_e,
int* cols_e,
int* rows_f,
int* cols_f,
cudaStream_t stream,
bool memory_pools_supported);
// Permute segment of values from block-sparse matrix with sequential layout to
// CRS order. Segment starts at block_sparse_offset and has length of num_values
void PermuteToCRS(const int block_sparse_offset,
const int num_values,
const int num_row_blocks,
const int* first_cell_in_row_block,
const Cell* cells,
const Block* row_blocks,
const Block* col_blocks,
const int* crs_rows,
const double* block_sparse_values,
double* crs_values,
cudaStream_t stream);
// Permute segment of values from F sub-matrix of block-sparse partitioned
// matrix with sequential layout to CRS order. Segment starts at
// block_sparse_offset (including the offset induced by values of E submatrix)
// and has length of num_values
void PermuteToCRSPartitionedF(const int block_sparse_offset,
const int num_values,
const int num_row_blocks,
const int num_row_blocks_e,
const int* first_cell_in_row_block,
const int* value_offset_row_block_f,
const Cell* cells,
const Block* row_blocks,
const Block* col_blocks,
const int* crs_rows,
const double* block_sparse_values,
double* crs_values,
cudaStream_t stream);
} // namespace internal
} // namespace ceres
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -26,53 +26,31 @@
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: alexs.mac@gmail.com (Alex Stewart)
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/config.h"
#ifdef CERES_NO_THREADS
#include "ceres/parallel_for.h"
#include "glog/logging.h"
#ifndef CERES_INTERNAL_CUDA_KERNELS_UTILS_H_
#define CERES_INTERNAL_CUDA_KERNELS_UTILS_H_
namespace ceres {
namespace internal {
int MaxNumThreadsAvailable() { return 1; }
// Parallel execution on CUDA device requires splitting job into blocks of a
// fixed size. We use block-size of kCudaBlockSize for all kernels that do not
// require any specific block size. As the CUDA Toolkit documentation says,
// "although arbitrary in this case, is a common choice". This is determined by
// the warp size, max block size, and multiprocessor sizes of recent GPUs. For
// complex kernels with significant register usage and unusual memory patterns,
// the occupancy calculator API might provide better performance. See "Occupancy
// Calculator" under the CUDA toolkit documentation.
constexpr int kCudaBlockSize = 256;
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
const std::function<void(int)>& function) {
CHECK_GT(num_threads, 0);
CHECK(context != nullptr);
if (end <= start) {
return;
}
for (int i = start; i < end; ++i) {
function(i);
}
// Compute number of blocks of kCudaBlockSize that span over 1-d grid with
// dimension size. Note that 1-d grid dimension is limited by 2^31-1 in CUDA,
// thus a signed int is used as an argument.
inline int NumBlocksInGrid(int size) {
return (size + kCudaBlockSize - 1) / kCudaBlockSize;
}
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
const std::function<void(int thread_id, int i)>& function) {
CHECK_GT(num_threads, 0);
CHECK(context != nullptr);
if (end <= start) {
return;
}
const int thread_id = 0;
for (int i = start; i < end; ++i) {
function(thread_id, i);
}
}
} // namespace internal
} // namespace ceres
#endif // CERES_NO_THREADS
#endif // CERES_INTERNAL_CUDA_KERNELS_UTILS_H_

View File

@@ -0,0 +1,123 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
#include "ceres/cuda_kernels_vector_ops.h"
#include <cuda_runtime.h>
#include "ceres/cuda_kernels_utils.h"
namespace ceres {
namespace internal {
template <typename SrcType, typename DstType>
__global__ void TypeConversionKernel(const SrcType* __restrict__ input,
DstType* __restrict__ output,
const int size) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size) {
output[i] = static_cast<DstType>(input[i]);
}
}
void CudaFP64ToFP32(const double* input,
float* output,
const int size,
cudaStream_t stream) {
const int num_blocks = NumBlocksInGrid(size);
TypeConversionKernel<double, float>
<<<num_blocks, kCudaBlockSize, 0, stream>>>(input, output, size);
}
void CudaFP32ToFP64(const float* input,
double* output,
const int size,
cudaStream_t stream) {
const int num_blocks = NumBlocksInGrid(size);
TypeConversionKernel<float, double>
<<<num_blocks, kCudaBlockSize, 0, stream>>>(input, output, size);
}
template <typename T>
__global__ void SetZeroKernel(T* __restrict__ output, const int size) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size) {
output[i] = T(0.0);
}
}
void CudaSetZeroFP32(float* output, const int size, cudaStream_t stream) {
const int num_blocks = NumBlocksInGrid(size);
SetZeroKernel<float><<<num_blocks, kCudaBlockSize, 0, stream>>>(output, size);
}
void CudaSetZeroFP64(double* output, const int size, cudaStream_t stream) {
const int num_blocks = NumBlocksInGrid(size);
SetZeroKernel<double>
<<<num_blocks, kCudaBlockSize, 0, stream>>>(output, size);
}
template <typename SrcType, typename DstType>
__global__ void XPlusEqualsYKernel(DstType* __restrict__ x,
const SrcType* __restrict__ y,
const int size) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size) {
x[i] = x[i] + DstType(y[i]);
}
}
void CudaDsxpy(double* x, float* y, const int size, cudaStream_t stream) {
const int num_blocks = NumBlocksInGrid(size);
XPlusEqualsYKernel<float, double>
<<<num_blocks, kCudaBlockSize, 0, stream>>>(x, y, size);
}
__global__ void CudaDtDxpyKernel(double* __restrict__ y,
const double* D,
const double* __restrict__ x,
const int size) {
const int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < size) {
y[i] = y[i] + D[i] * D[i] * x[i];
}
}
void CudaDtDxpy(double* y,
const double* D,
const double* x,
const int size,
cudaStream_t stream) {
const int num_blocks = NumBlocksInGrid(size);
CudaDtDxpyKernel<<<num_blocks, kCudaBlockSize, 0, stream>>>(y, D, x, size);
}
} // namespace internal
} // namespace ceres

View File

@@ -0,0 +1,83 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
#ifndef CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_
#define CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
#include "cuda_runtime.h"
namespace ceres {
namespace internal {
class Block;
class Cell;
// Convert an array of double (FP64) values to float (FP32). Both arrays must
// already be on GPU memory.
void CudaFP64ToFP32(const double* input,
float* output,
const int size,
cudaStream_t stream);
// Convert an array of float (FP32) values to double (FP64). Both arrays must
// already be on GPU memory.
void CudaFP32ToFP64(const float* input,
double* output,
const int size,
cudaStream_t stream);
// Set all elements of the array to the FP32 value 0. The array must be in GPU
// memory.
void CudaSetZeroFP32(float* output, const int size, cudaStream_t stream);
// Set all elements of the array to the FP64 value 0. The array must be in GPU
// memory.
void CudaSetZeroFP64(double* output, const int size, cudaStream_t stream);
// Compute x = x + double(y). Input array is float (FP32), output array is
// double (FP64). Both arrays must already be on GPU memory.
void CudaDsxpy(double* x, float* y, const int size, cudaStream_t stream);
// Compute y[i] = y[i] + d[i]^2 x[i]. All arrays must already be on GPU memory.
void CudaDtDxpy(double* y,
const double* D,
const double* x,
const int size,
cudaStream_t stream);
} // namespace internal
} // namespace ceres
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_

View File

@@ -0,0 +1,198 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
#include "ceres/cuda_kernels_vector_ops.h"
#include <math.h>
#include <limits>
#include <string>
#include <vector>
#include "ceres/context_impl.h"
#include "ceres/cuda_buffer.h"
#include "ceres/internal/config.h"
#include "ceres/internal/eigen.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
namespace ceres {
namespace internal {
#ifndef CERES_NO_CUDA
TEST(CudaFP64ToFP32, SimpleConversions) {
ContextImpl context;
std::string cuda_error;
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
std::vector<double> fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
CudaBuffer<double> fp64_gpu(&context);
fp64_gpu.CopyFromCpuVector(fp64_cpu);
CudaBuffer<float> fp32_gpu(&context);
fp32_gpu.Reserve(fp64_cpu.size());
CudaFP64ToFP32(fp64_gpu.data(),
fp32_gpu.data(),
fp64_cpu.size(),
context.DefaultStream());
std::vector<float> fp32_cpu(fp64_cpu.size());
fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size());
for (int i = 0; i < fp32_cpu.size(); ++i) {
EXPECT_EQ(fp32_cpu[i], static_cast<float>(fp64_cpu[i]));
}
}
TEST(CudaFP64ToFP32, NumericallyExtremeValues) {
ContextImpl context;
std::string cuda_error;
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
std::vector<double> fp64_cpu = {
DBL_MIN, 10.0 * DBL_MIN, DBL_MAX, 0.1 * DBL_MAX};
// First just make sure that the compiler has represented these values
// accurately as fp64.
EXPECT_GT(fp64_cpu[0], 0.0);
EXPECT_GT(fp64_cpu[1], 0.0);
EXPECT_TRUE(std::isfinite(fp64_cpu[2]));
EXPECT_TRUE(std::isfinite(fp64_cpu[3]));
CudaBuffer<double> fp64_gpu(&context);
fp64_gpu.CopyFromCpuVector(fp64_cpu);
CudaBuffer<float> fp32_gpu(&context);
fp32_gpu.Reserve(fp64_cpu.size());
CudaFP64ToFP32(fp64_gpu.data(),
fp32_gpu.data(),
fp64_cpu.size(),
context.DefaultStream());
std::vector<float> fp32_cpu(fp64_cpu.size());
fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size());
EXPECT_EQ(fp32_cpu[0], 0.0f);
EXPECT_EQ(fp32_cpu[1], 0.0f);
EXPECT_EQ(fp32_cpu[2], std::numeric_limits<float>::infinity());
EXPECT_EQ(fp32_cpu[3], std::numeric_limits<float>::infinity());
}
TEST(CudaFP32ToFP64, SimpleConversions) {
ContextImpl context;
std::string cuda_error;
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
std::vector<float> fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
CudaBuffer<float> fp32_gpu(&context);
fp32_gpu.CopyFromCpuVector(fp32_cpu);
CudaBuffer<double> fp64_gpu(&context);
fp64_gpu.Reserve(fp32_cpu.size());
CudaFP32ToFP64(fp32_gpu.data(),
fp64_gpu.data(),
fp32_cpu.size(),
context.DefaultStream());
std::vector<double> fp64_cpu(fp32_cpu.size());
fp64_gpu.CopyToCpu(fp64_cpu.data(), fp64_cpu.size());
for (int i = 0; i < fp64_cpu.size(); ++i) {
EXPECT_EQ(fp64_cpu[i], static_cast<double>(fp32_cpu[i]));
}
}
TEST(CudaSetZeroFP32, NonZeroInput) {
ContextImpl context;
std::string cuda_error;
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
std::vector<float> fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
CudaBuffer<float> fp32_gpu(&context);
fp32_gpu.CopyFromCpuVector(fp32_cpu);
CudaSetZeroFP32(fp32_gpu.data(), fp32_cpu.size(), context.DefaultStream());
std::vector<float> fp32_cpu_zero(fp32_cpu.size());
fp32_gpu.CopyToCpu(fp32_cpu_zero.data(), fp32_cpu_zero.size());
for (int i = 0; i < fp32_cpu_zero.size(); ++i) {
EXPECT_EQ(fp32_cpu_zero[i], 0.0f);
}
}
TEST(CudaSetZeroFP64, NonZeroInput) {
ContextImpl context;
std::string cuda_error;
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
std::vector<double> fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
CudaBuffer<double> fp64_gpu(&context);
fp64_gpu.CopyFromCpuVector(fp64_cpu);
CudaSetZeroFP64(fp64_gpu.data(), fp64_cpu.size(), context.DefaultStream());
std::vector<double> fp64_cpu_zero(fp64_cpu.size());
fp64_gpu.CopyToCpu(fp64_cpu_zero.data(), fp64_cpu_zero.size());
for (int i = 0; i < fp64_cpu_zero.size(); ++i) {
EXPECT_EQ(fp64_cpu_zero[i], 0.0);
}
}
TEST(CudaDsxpy, DoubleValues) {
ContextImpl context;
std::string cuda_error;
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
std::vector<float> fp32_cpu_a = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
std::vector<double> fp64_cpu_b = {
1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
CudaBuffer<float> fp32_gpu_a(&context);
fp32_gpu_a.CopyFromCpuVector(fp32_cpu_a);
CudaBuffer<double> fp64_gpu_b(&context);
fp64_gpu_b.CopyFromCpuVector(fp64_cpu_b);
CudaDsxpy(fp64_gpu_b.data(),
fp32_gpu_a.data(),
fp32_gpu_a.size(),
context.DefaultStream());
fp64_gpu_b.CopyToCpu(fp64_cpu_b.data(), fp64_cpu_b.size());
for (int i = 0; i < fp64_cpu_b.size(); ++i) {
EXPECT_DOUBLE_EQ(fp64_cpu_b[i], 2.0 * fp32_cpu_a[i]);
}
}
TEST(CudaDtDxpy, ComputeFourItems) {
ContextImpl context;
std::string cuda_error;
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
std::vector<double> x_cpu = {1, 2, 3, 4};
std::vector<double> y_cpu = {4, 3, 2, 1};
std::vector<double> d_cpu = {10, 20, 30, 40};
CudaBuffer<double> x_gpu(&context);
x_gpu.CopyFromCpuVector(x_cpu);
CudaBuffer<double> y_gpu(&context);
y_gpu.CopyFromCpuVector(y_cpu);
CudaBuffer<double> d_gpu(&context);
d_gpu.CopyFromCpuVector(d_cpu);
CudaDtDxpy(y_gpu.data(),
d_gpu.data(),
x_gpu.data(),
y_gpu.size(),
context.DefaultStream());
y_gpu.CopyToCpu(y_cpu.data(), y_cpu.size());
EXPECT_DOUBLE_EQ(y_cpu[0], 4.0 + 10.0 * 10.0 * 1.0);
EXPECT_DOUBLE_EQ(y_cpu[1], 3.0 + 20.0 * 20.0 * 2.0);
EXPECT_DOUBLE_EQ(y_cpu[2], 2.0 + 30.0 * 30.0 * 3.0);
EXPECT_DOUBLE_EQ(y_cpu[3], 1.0 + 40.0 * 40.0 * 4.0);
}
#endif // CERES_NO_CUDA
} // namespace internal
} // namespace ceres

View File

@@ -0,0 +1,152 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include "ceres/cuda_partitioned_block_sparse_crs_view.h"
#ifndef CERES_NO_CUDA
#include "ceres/cuda_block_structure.h"
#include "ceres/cuda_kernels_bsm_to_crs.h"
namespace ceres::internal {
CudaPartitionedBlockSparseCRSView::CudaPartitionedBlockSparseCRSView(
const BlockSparseMatrix& bsm,
const int num_col_blocks_e,
ContextImpl* context)
:
context_(context) {
const auto& bs = *bsm.block_structure();
block_structure_ =
std::make_unique<CudaBlockSparseStructure>(bs, num_col_blocks_e, context);
// Determine number of non-zeros in left submatrix
// Row-blocks are at least 1 row high, thus we can use a temporary array of
// num_rows for ComputeNonZerosInColumnBlockSubMatrix; and later reuse it for
// FillCRSStructurePartitioned
const int num_rows = bsm.num_rows();
const int num_nonzeros_e = block_structure_->num_nonzeros_e();
const int num_nonzeros_f = bsm.num_nonzeros() - num_nonzeros_e;
const int num_cols_e = num_col_blocks_e < bs.cols.size()
? bs.cols[num_col_blocks_e].position
: bsm.num_cols();
const int num_cols_f = bsm.num_cols() - num_cols_e;
CudaBuffer<int32_t> rows_e(context, num_rows + 1);
CudaBuffer<int32_t> cols_e(context, num_nonzeros_e);
CudaBuffer<int32_t> rows_f(context, num_rows + 1);
CudaBuffer<int32_t> cols_f(context, num_nonzeros_f);
num_row_blocks_e_ = block_structure_->num_row_blocks_e();
FillCRSStructurePartitioned(block_structure_->num_row_blocks(),
num_rows,
num_row_blocks_e_,
num_col_blocks_e,
num_nonzeros_e,
block_structure_->first_cell_in_row_block(),
block_structure_->cells(),
block_structure_->row_blocks(),
block_structure_->col_blocks(),
rows_e.data(),
cols_e.data(),
rows_f.data(),
cols_f.data(),
context->DefaultStream(),
context->is_cuda_memory_pools_supported_);
f_is_crs_compatible_ = block_structure_->IsCrsCompatible();
if (f_is_crs_compatible_) {
block_structure_ = nullptr;
} else {
streamed_buffer_ = std::make_unique<CudaStreamedBuffer<double>>(
context, kMaxTemporaryArraySize);
}
matrix_e_ = std::make_unique<CudaSparseMatrix>(
num_cols_e, std::move(rows_e), std::move(cols_e), context);
matrix_f_ = std::make_unique<CudaSparseMatrix>(
num_cols_f, std::move(rows_f), std::move(cols_f), context);
CHECK_EQ(bsm.num_nonzeros(),
matrix_e_->num_nonzeros() + matrix_f_->num_nonzeros());
UpdateValues(bsm);
}
void CudaPartitionedBlockSparseCRSView::UpdateValues(
const BlockSparseMatrix& bsm) {
if (f_is_crs_compatible_) {
CHECK_EQ(cudaSuccess,
cudaMemcpyAsync(matrix_e_->mutable_values(),
bsm.values(),
matrix_e_->num_nonzeros() * sizeof(double),
cudaMemcpyHostToDevice,
context_->DefaultStream()));
CHECK_EQ(cudaSuccess,
cudaMemcpyAsync(matrix_f_->mutable_values(),
bsm.values() + matrix_e_->num_nonzeros(),
matrix_f_->num_nonzeros() * sizeof(double),
cudaMemcpyHostToDevice,
context_->DefaultStream()));
return;
}
streamed_buffer_->CopyToGpu(
bsm.values(),
bsm.num_nonzeros(),
[block_structure = block_structure_.get(),
num_nonzeros_e = matrix_e_->num_nonzeros(),
num_row_blocks_e = num_row_blocks_e_,
values_f = matrix_f_->mutable_values(),
rows_f = matrix_f_->rows()](
const double* values, int num_values, int offset, auto stream) {
PermuteToCRSPartitionedF(num_nonzeros_e + offset,
num_values,
block_structure->num_row_blocks(),
num_row_blocks_e,
block_structure->first_cell_in_row_block(),
block_structure->value_offset_row_block_f(),
block_structure->cells(),
block_structure->row_blocks(),
block_structure->col_blocks(),
rows_f,
values,
values_f,
stream);
});
CHECK_EQ(cudaSuccess,
cudaMemcpyAsync(matrix_e_->mutable_values(),
bsm.values(),
matrix_e_->num_nonzeros() * sizeof(double),
cudaMemcpyHostToDevice,
context_->DefaultStream()));
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -0,0 +1,111 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
//
#ifndef CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_
#define CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
#include <memory>
#include "ceres/block_sparse_matrix.h"
#include "ceres/cuda_block_structure.h"
#include "ceres/cuda_buffer.h"
#include "ceres/cuda_sparse_matrix.h"
#include "ceres/cuda_streamed_buffer.h"
namespace ceres::internal {
// We use cuSPARSE library for SpMV operations. However, it does not support
// neither block-sparse format with varying size of the blocks nor
// submatrix-vector products. Thus, we perform the following operations in order
// to compute products of partitioned block-sparse matrices and dense vectors on
// gpu:
// - Once per block-sparse structure update:
// - Compute CRS structures of left and right submatrices from block-sparse
// structure
// - Check if values of F sub-matrix can be copied without permutation
// matrices
// - Once per block-sparse values update:
// - Copy values of E sub-matrix
// - Permute or copy values of F sub-matrix
//
// It is assumed that cells of block-sparse matrix are laid out sequentially in
// both of sub-matrices and there is exactly one cell in row-block of E
// sub-matrix in the first num_row_blocks_e_ row blocks, and no cells in E
// sub-matrix below num_row_blocks_e_ row blocks.
//
// This class avoids storing both CRS and block-sparse values in GPU memory.
// Instead, block-sparse values are transferred to gpu memory as a disjoint set
// of small continuous segments with simultaneous permutation of the values into
// correct order using block-structure.
class CERES_NO_EXPORT CudaPartitionedBlockSparseCRSView {
public:
// Initializes internal CRS matrix and block-sparse structure on GPU side
// values. The following objects are stored in gpu memory for the whole
// lifetime of the object
// - matrix_e_: left CRS submatrix
// - matrix_f_: right CRS submatrix
// - block_structure_: copy of block-sparse structure on GPU
// - streamed_buffer_: helper for value updating
CudaPartitionedBlockSparseCRSView(const BlockSparseMatrix& bsm,
const int num_col_blocks_e,
ContextImpl* context);
// Update values of CRS submatrices using values of block-sparse matrix.
// Assumes that bsm has the same block-sparse structure as matrix that was
// used for construction.
void UpdateValues(const BlockSparseMatrix& bsm);
const CudaSparseMatrix* matrix_e() const { return matrix_e_.get(); }
const CudaSparseMatrix* matrix_f() const { return matrix_f_.get(); }
CudaSparseMatrix* mutable_matrix_e() { return matrix_e_.get(); }
CudaSparseMatrix* mutable_matrix_f() { return matrix_f_.get(); }
private:
// Value permutation kernel performs a single element-wise operation per
// thread, thus performing permutation in blocks of 8 megabytes of
// block-sparse values seems reasonable
static constexpr int kMaxTemporaryArraySize = 1 * 1024 * 1024;
std::unique_ptr<CudaSparseMatrix> matrix_e_;
std::unique_ptr<CudaSparseMatrix> matrix_f_;
std::unique_ptr<CudaStreamedBuffer<double>> streamed_buffer_;
std::unique_ptr<CudaBlockSparseStructure> block_structure_;
bool f_is_crs_compatible_;
int num_row_blocks_e_;
ContextImpl* context_;
};
} // namespace ceres::internal
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_

View File

@@ -0,0 +1,279 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include "ceres/cuda_partitioned_block_sparse_crs_view.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#ifndef CERES_NO_CUDA
namespace ceres::internal {
namespace {
struct RandomPartitionedMatrixOptions {
int num_row_blocks_e;
int num_row_blocks_f;
int num_col_blocks_e;
int num_col_blocks_f;
int min_row_block_size;
int max_row_block_size;
int min_col_block_size;
int max_col_block_size;
double empty_f_probability;
double cell_probability_f;
int max_cells_f;
};
std::unique_ptr<BlockSparseMatrix> CreateRandomPartitionedMatrix(
const RandomPartitionedMatrixOptions& options, std::mt19937& rng) {
const int num_row_blocks =
std::max(options.num_row_blocks_e, options.num_row_blocks_f);
const int num_col_blocks =
options.num_col_blocks_e + options.num_col_blocks_f;
CompressedRowBlockStructure* block_structure =
new CompressedRowBlockStructure;
block_structure->cols.reserve(num_col_blocks);
block_structure->rows.reserve(num_row_blocks);
// Create column blocks
std::uniform_int_distribution<int> col_size(options.min_col_block_size,
options.max_col_block_size);
int num_cols = 0;
for (int i = 0; i < num_col_blocks; ++i) {
const int size = col_size(rng);
block_structure->cols.emplace_back(size, num_cols);
num_cols += size;
}
// Prepare column-block indices of E cells
std::vector<int> e_col_block_idx;
e_col_block_idx.reserve(options.num_row_blocks_e);
std::uniform_int_distribution<int> col_e(0, options.num_col_blocks_e - 1);
for (int i = 0; i < options.num_row_blocks_e; ++i) {
e_col_block_idx.emplace_back(col_e(rng));
}
std::sort(e_col_block_idx.begin(), e_col_block_idx.end());
// Prepare cell structure
std::uniform_int_distribution<int> row_size(options.min_row_block_size,
options.max_row_block_size);
std::uniform_real_distribution<double> uniform;
int num_rows = 0;
for (int i = 0; i < num_row_blocks; ++i) {
const int size = row_size(rng);
block_structure->rows.emplace_back();
auto& row = block_structure->rows.back();
row.block.size = size;
row.block.position = num_rows;
num_rows += size;
if (i < options.num_row_blocks_e) {
row.cells.emplace_back(e_col_block_idx[i], -1);
if (uniform(rng) < options.empty_f_probability) {
continue;
}
}
if (i >= options.num_row_blocks_f) continue;
const int cells_before = row.cells.size();
for (int j = options.num_col_blocks_e; j < num_col_blocks; ++j) {
if (uniform(rng) > options.cell_probability_f) {
continue;
}
row.cells.emplace_back(j, -1);
}
if (row.cells.size() > cells_before + options.max_cells_f) {
std::shuffle(row.cells.begin() + cells_before, row.cells.end(), rng);
row.cells.resize(cells_before + options.max_cells_f);
std::sort(
row.cells.begin(), row.cells.end(), [](const auto& a, const auto& b) {
return a.block_id < b.block_id;
});
}
}
// Fill positions in E sub-matrix
int num_nonzeros = 0;
for (int i = 0; i < options.num_row_blocks_e; ++i) {
CHECK_GE(block_structure->rows[i].cells.size(), 1);
block_structure->rows[i].cells[0].position = num_nonzeros;
const int col_block_size =
block_structure->cols[block_structure->rows[i].cells[0].block_id].size;
const int row_block_size = block_structure->rows[i].block.size;
num_nonzeros += row_block_size * col_block_size;
CHECK_GE(num_nonzeros, 0);
}
// Fill positions in F sub-matrix
for (int i = 0; i < options.num_row_blocks_f; ++i) {
const int row_block_size = block_structure->rows[i].block.size;
for (auto& cell : block_structure->rows[i].cells) {
if (cell.position >= 0) continue;
cell.position = num_nonzeros;
const int col_block_size = block_structure->cols[cell.block_id].size;
num_nonzeros += row_block_size * col_block_size;
CHECK_GE(num_nonzeros, 0);
}
}
// Populate values
auto bsm = std::make_unique<BlockSparseMatrix>(block_structure, true);
for (int i = 0; i < num_nonzeros; ++i) {
bsm->mutable_values()[i] = i + 1;
}
return bsm;
}
} // namespace
class CudaPartitionedBlockSparseCRSViewTest : public ::testing::Test {
static constexpr int kNumColBlocksE = 456;
protected:
void SetUp() final {
std::string message;
CHECK(context_.InitCuda(&message))
<< "InitCuda() failed because: " << message;
RandomPartitionedMatrixOptions options;
options.num_row_blocks_f = 123;
options.num_row_blocks_e = 456;
options.num_col_blocks_f = 123;
options.num_col_blocks_e = kNumColBlocksE;
options.min_row_block_size = 1;
options.max_row_block_size = 4;
options.min_col_block_size = 1;
options.max_col_block_size = 4;
options.empty_f_probability = .1;
options.cell_probability_f = .2;
options.max_cells_f = options.num_col_blocks_f;
std::mt19937 rng;
short_f_ = CreateRandomPartitionedMatrix(options, rng);
options.num_row_blocks_e = 123;
options.num_row_blocks_f = 456;
short_e_ = CreateRandomPartitionedMatrix(options, rng);
options.max_cells_f = 1;
options.num_row_blocks_e = options.num_row_blocks_f;
options.num_row_blocks_e = options.num_row_blocks_f;
f_crs_compatible_ = CreateRandomPartitionedMatrix(options, rng);
}
void TestMatrix(const BlockSparseMatrix& A_) {
const int num_col_blocks_e = 456;
CudaPartitionedBlockSparseCRSView view(A_, kNumColBlocksE, &context_);
const int num_rows = A_.num_rows();
const int num_cols = A_.num_cols();
const auto& bs = *A_.block_structure();
const int num_cols_e = bs.cols[num_col_blocks_e].position;
const int num_cols_f = num_cols - num_cols_e;
auto matrix_e = view.matrix_e();
auto matrix_f = view.matrix_f();
ASSERT_EQ(matrix_e->num_cols(), num_cols_e);
ASSERT_EQ(matrix_e->num_rows(), num_rows);
ASSERT_EQ(matrix_f->num_cols(), num_cols_f);
ASSERT_EQ(matrix_f->num_rows(), num_rows);
Vector x(num_cols);
Vector x_left(num_cols_e);
Vector x_right(num_cols_f);
Vector y(num_rows);
CudaVector x_cuda(&context_, num_cols);
CudaVector x_left_cuda(&context_, num_cols_e);
CudaVector x_right_cuda(&context_, num_cols_f);
CudaVector y_cuda(&context_, num_rows);
Vector y_cuda_host(num_rows);
for (int i = 0; i < num_cols_e; ++i) {
x.setZero();
x_left.setZero();
y.setZero();
y_cuda.SetZero();
x[i] = 1.;
x_left[i] = 1.;
x_left_cuda.CopyFromCpu(x_left);
A_.RightMultiplyAndAccumulate(
x.data(), y.data(), &context_, std::thread::hardware_concurrency());
matrix_e->RightMultiplyAndAccumulate(x_left_cuda, &y_cuda);
y_cuda.CopyTo(&y_cuda_host);
// There will be up to 1 non-zero product per row, thus we expect an exact
// match on 32-bit integer indices
EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
}
for (int i = num_cols_e; i < num_cols_f; ++i) {
x.setZero();
x_right.setZero();
y.setZero();
y_cuda.SetZero();
x[i] = 1.;
x_right[i - num_cols_e] = 1.;
x_right_cuda.CopyFromCpu(x_right);
A_.RightMultiplyAndAccumulate(
x.data(), y.data(), &context_, std::thread::hardware_concurrency());
matrix_f->RightMultiplyAndAccumulate(x_right_cuda, &y_cuda);
y_cuda.CopyTo(&y_cuda_host);
// There will be up to 1 non-zero product per row, thus we expect an exact
// match on 32-bit integer indices
EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
}
}
// E sub-matrix might have less row-blocks with cells than F sub-matrix. This
// test matrix checks if this case is handled properly
std::unique_ptr<BlockSparseMatrix> short_e_;
// In case of non-crs compatible F matrix, permuting values from block-order
// to crs order involves binary search over row-blocks of F. Having lots of
// row-blocks with no F cells is an edge case for this algorithm.
std::unique_ptr<BlockSparseMatrix> short_f_;
// With F matrix being CRS-compatible, update of the values of partitioned
// matrix view reduces to two host->device memcopies, and uses a separate code
// path
std::unique_ptr<BlockSparseMatrix> f_crs_compatible_;
ContextImpl context_;
};
TEST_F(CudaPartitionedBlockSparseCRSViewTest, CreateUpdateValuesShortE) {
TestMatrix(*short_e_);
}
TEST_F(CudaPartitionedBlockSparseCRSViewTest, CreateUpdateValuesShortF) {
TestMatrix(*short_f_);
}
TEST_F(CudaPartitionedBlockSparseCRSViewTest,
CreateUpdateValuesCrsCompatibleF) {
TestMatrix(*f_crs_compatible_);
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -0,0 +1,226 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
//
// A CUDA sparse matrix linear operator.
// This include must come before any #ifndef check on Ceres compile options.
// clang-format off
#include "ceres/internal/config.h"
// clang-format on
#include "ceres/cuda_sparse_matrix.h"
#include <math.h>
#include <memory>
#include "ceres/block_sparse_matrix.h"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/context_impl.h"
#include "ceres/crs_matrix.h"
#include "ceres/internal/export.h"
#include "ceres/types.h"
#include "ceres/wall_time.h"
#ifndef CERES_NO_CUDA
#include "ceres/cuda_buffer.h"
#include "ceres/cuda_kernels_vector_ops.h"
#include "ceres/cuda_vector.h"
#include "cuda_runtime_api.h"
#include "cusparse.h"
namespace ceres::internal {
namespace {
// Starting in CUDA 11.2.1, CUSPARSE_MV_ALG_DEFAULT was deprecated in favor of
// CUSPARSE_SPMV_ALG_DEFAULT.
#if CUDART_VERSION >= 11021
const auto kSpMVAlgorithm = CUSPARSE_SPMV_ALG_DEFAULT;
#else // CUDART_VERSION >= 11021
const auto kSpMVAlgorithm = CUSPARSE_MV_ALG_DEFAULT;
#endif // CUDART_VERSION >= 11021
size_t GetTempBufferSizeForOp(const cusparseHandle_t& handle,
const cusparseOperation_t op,
const cusparseDnVecDescr_t& x,
const cusparseDnVecDescr_t& y,
const cusparseSpMatDescr_t& A) {
size_t buffer_size;
const double alpha = 1.0;
const double beta = 1.0;
CHECK_NE(A, nullptr);
CHECK_EQ(cusparseSpMV_bufferSize(handle,
op,
&alpha,
A,
x,
&beta,
y,
CUDA_R_64F,
kSpMVAlgorithm,
&buffer_size),
CUSPARSE_STATUS_SUCCESS);
return buffer_size;
}
size_t GetTempBufferSize(const cusparseHandle_t& handle,
const cusparseDnVecDescr_t& left,
const cusparseDnVecDescr_t& right,
const cusparseSpMatDescr_t& A) {
CHECK_NE(A, nullptr);
return std::max(GetTempBufferSizeForOp(
handle, CUSPARSE_OPERATION_NON_TRANSPOSE, right, left, A),
GetTempBufferSizeForOp(
handle, CUSPARSE_OPERATION_TRANSPOSE, left, right, A));
}
} // namespace
CudaSparseMatrix::CudaSparseMatrix(int num_cols,
CudaBuffer<int32_t>&& rows,
CudaBuffer<int32_t>&& cols,
ContextImpl* context)
: num_rows_(rows.size() - 1),
num_cols_(num_cols),
num_nonzeros_(cols.size()),
context_(context),
rows_(std::move(rows)),
cols_(std::move(cols)),
values_(context, num_nonzeros_),
spmv_buffer_(context) {
Initialize();
}
CudaSparseMatrix::CudaSparseMatrix(ContextImpl* context,
const CompressedRowSparseMatrix& crs_matrix)
: num_rows_(crs_matrix.num_rows()),
num_cols_(crs_matrix.num_cols()),
num_nonzeros_(crs_matrix.num_nonzeros()),
context_(context),
rows_(context, num_rows_ + 1),
cols_(context, num_nonzeros_),
values_(context, num_nonzeros_),
spmv_buffer_(context) {
rows_.CopyFromCpu(crs_matrix.rows(), num_rows_ + 1);
cols_.CopyFromCpu(crs_matrix.cols(), num_nonzeros_);
values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_);
Initialize();
}
CudaSparseMatrix::~CudaSparseMatrix() {
CHECK_EQ(cusparseDestroySpMat(descr_), CUSPARSE_STATUS_SUCCESS);
descr_ = nullptr;
CHECK_EQ(CUSPARSE_STATUS_SUCCESS, cusparseDestroyDnVec(descr_vec_left_));
CHECK_EQ(CUSPARSE_STATUS_SUCCESS, cusparseDestroyDnVec(descr_vec_right_));
}
void CudaSparseMatrix::CopyValuesFromCpu(
const CompressedRowSparseMatrix& crs_matrix) {
// There is no quick and easy way to verify that the structure is unchanged,
// but at least we can check that the size of the matrix and the number of
// nonzeros is unchanged.
CHECK_EQ(num_rows_, crs_matrix.num_rows());
CHECK_EQ(num_cols_, crs_matrix.num_cols());
CHECK_EQ(num_nonzeros_, crs_matrix.num_nonzeros());
values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_);
}
void CudaSparseMatrix::Initialize() {
CHECK(context_->IsCudaInitialized());
CHECK_EQ(CUSPARSE_STATUS_SUCCESS,
cusparseCreateCsr(&descr_,
num_rows_,
num_cols_,
num_nonzeros_,
rows_.data(),
cols_.data(),
values_.data(),
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ZERO,
CUDA_R_64F));
// Note: values_.data() is used as non-zero pointer to device memory
// When there is no non-zero values, data-pointer of values_ array will be a
// nullptr; but in this case left/right products are trivial and temporary
// buffer (and vector descriptors) is not required
if (!num_nonzeros_) return;
CHECK_EQ(CUSPARSE_STATUS_SUCCESS,
cusparseCreateDnVec(
&descr_vec_left_, num_rows_, values_.data(), CUDA_R_64F));
CHECK_EQ(CUSPARSE_STATUS_SUCCESS,
cusparseCreateDnVec(
&descr_vec_right_, num_cols_, values_.data(), CUDA_R_64F));
size_t buffer_size = GetTempBufferSize(
context_->cusparse_handle_, descr_vec_left_, descr_vec_right_, descr_);
spmv_buffer_.Reserve(buffer_size);
}
void CudaSparseMatrix::SpMv(cusparseOperation_t op,
const cusparseDnVecDescr_t& x,
const cusparseDnVecDescr_t& y) const {
const double alpha = 1.0;
const double beta = 1.0;
CHECK_EQ(cusparseSpMV(context_->cusparse_handle_,
op,
&alpha,
descr_,
x,
&beta,
y,
CUDA_R_64F,
kSpMVAlgorithm,
spmv_buffer_.data()),
CUSPARSE_STATUS_SUCCESS);
}
void CudaSparseMatrix::RightMultiplyAndAccumulate(const CudaVector& x,
CudaVector* y) const {
DCHECK(GetTempBufferSize(
context_->cusparse_handle_, y->descr(), x.descr(), descr_) <=
spmv_buffer_.size());
SpMv(CUSPARSE_OPERATION_NON_TRANSPOSE, x.descr(), y->descr());
}
void CudaSparseMatrix::LeftMultiplyAndAccumulate(const CudaVector& x,
CudaVector* y) const {
// TODO(Joydeep Biswas): We should consider storing a transposed copy of the
// matrix by converting CSR to CSC. From the cuSPARSE documentation:
// "In general, opA == CUSPARSE_OPERATION_NON_TRANSPOSE is 3x faster than opA
// != CUSPARSE_OPERATION_NON_TRANSPOSE"
DCHECK(GetTempBufferSize(
context_->cusparse_handle_, x.descr(), y->descr(), descr_) <=
spmv_buffer_.size());
SpMv(CUSPARSE_OPERATION_TRANSPOSE, x.descr(), y->descr());
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -0,0 +1,143 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
//
// A CUDA sparse matrix linear operator.
#ifndef CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_
#define CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_
// This include must come before any #ifndef check on Ceres compile options.
// clang-format off
#include "ceres/internal/config.h"
// clang-format on
#include <cstdint>
#include <memory>
#include <string>
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/context_impl.h"
#include "ceres/internal/export.h"
#include "ceres/types.h"
#ifndef CERES_NO_CUDA
#include "ceres/cuda_buffer.h"
#include "ceres/cuda_vector.h"
#include "cusparse.h"
namespace ceres::internal {
// A sparse matrix hosted on the GPU in compressed row sparse format, with
// CUDA-accelerated operations.
// The user of the class must ensure that ContextImpl::InitCuda() has already
// been successfully called before using this class.
class CERES_NO_EXPORT CudaSparseMatrix {
public:
// Create a GPU copy of the matrix provided.
CudaSparseMatrix(ContextImpl* context,
const CompressedRowSparseMatrix& crs_matrix);
// Create matrix from existing row and column index buffers.
// Values are left uninitialized.
CudaSparseMatrix(int num_cols,
CudaBuffer<int32_t>&& rows,
CudaBuffer<int32_t>&& cols,
ContextImpl* context);
~CudaSparseMatrix();
// Left/right products are using internal buffer and are not thread-safe
// y = y + Ax;
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const;
// y = y + A'x;
void LeftMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const;
int num_rows() const { return num_rows_; }
int num_cols() const { return num_cols_; }
int num_nonzeros() const { return num_nonzeros_; }
const int32_t* rows() const { return rows_.data(); }
const int32_t* cols() const { return cols_.data(); }
const double* values() const { return values_.data(); }
int32_t* mutable_rows() { return rows_.data(); }
int32_t* mutable_cols() { return cols_.data(); }
double* mutable_values() { return values_.data(); }
// If subsequent uses of this matrix involve only numerical changes and no
// structural changes, then this method can be used to copy the updated
// non-zero values -- the row and column index arrays are kept the same. It
// is the caller's responsibility to ensure that the sparsity structure of the
// matrix is unchanged.
void CopyValuesFromCpu(const CompressedRowSparseMatrix& crs_matrix);
const cusparseSpMatDescr_t& descr() const { return descr_; }
private:
// Disable copy and assignment.
CudaSparseMatrix(const CudaSparseMatrix&) = delete;
CudaSparseMatrix& operator=(const CudaSparseMatrix&) = delete;
// Allocate temporary buffer for left/right products, create cuSPARSE
// descriptors
void Initialize();
// y = y + op(M)x. op must be either CUSPARSE_OPERATION_NON_TRANSPOSE or
// CUSPARSE_OPERATION_TRANSPOSE.
void SpMv(cusparseOperation_t op,
const cusparseDnVecDescr_t& x,
const cusparseDnVecDescr_t& y) const;
int num_rows_ = 0;
int num_cols_ = 0;
int num_nonzeros_ = 0;
ContextImpl* context_ = nullptr;
// CSR row indices.
CudaBuffer<int32_t> rows_;
// CSR column indices.
CudaBuffer<int32_t> cols_;
// CSR values.
CudaBuffer<double> values_;
// CuSparse object that describes this matrix.
cusparseSpMatDescr_t descr_ = nullptr;
// Dense vector descriptors for pointer interface
cusparseDnVecDescr_t descr_vec_left_ = nullptr;
cusparseDnVecDescr_t descr_vec_right_ = nullptr;
mutable CudaBuffer<uint8_t> spmv_buffer_;
};
} // namespace ceres::internal
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_

View File

@@ -0,0 +1,286 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
#include "ceres/cuda_sparse_matrix.h"
#include <string>
#include "ceres/block_sparse_matrix.h"
#include "ceres/casts.h"
#include "ceres/cuda_vector.h"
#include "ceres/internal/config.h"
#include "ceres/internal/eigen.h"
#include "ceres/linear_least_squares_problems.h"
#include "ceres/triplet_sparse_matrix.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
namespace ceres {
namespace internal {
#ifndef CERES_NO_CUDA
class CudaSparseMatrixTest : public ::testing::Test {
protected:
void SetUp() final {
std::string message;
CHECK(context_.InitCuda(&message))
<< "InitCuda() failed because: " << message;
std::unique_ptr<LinearLeastSquaresProblem> problem =
CreateLinearLeastSquaresProblemFromId(2);
CHECK(problem != nullptr);
A_.reset(down_cast<BlockSparseMatrix*>(problem->A.release()));
CHECK(A_ != nullptr);
CHECK(problem->b != nullptr);
CHECK(problem->x != nullptr);
b_.resize(A_->num_rows());
for (int i = 0; i < A_->num_rows(); ++i) {
b_[i] = problem->b[i];
}
x_.resize(A_->num_cols());
for (int i = 0; i < A_->num_cols(); ++i) {
x_[i] = problem->x[i];
}
CHECK_EQ(A_->num_rows(), b_.rows());
CHECK_EQ(A_->num_cols(), x_.rows());
}
std::unique_ptr<BlockSparseMatrix> A_;
Vector x_;
Vector b_;
ContextImpl context_;
};
TEST_F(CudaSparseMatrixTest, RightMultiplyAndAccumulate) {
std::string message;
auto A_crs = A_->ToCompressedRowSparseMatrix();
CudaSparseMatrix A_gpu(&context_, *A_crs);
CudaVector x_gpu(&context_, A_gpu.num_cols());
CudaVector res_gpu(&context_, A_gpu.num_rows());
x_gpu.CopyFromCpu(x_);
const Vector minus_b = -b_;
// res = -b
res_gpu.CopyFromCpu(minus_b);
// res += A * x
A_gpu.RightMultiplyAndAccumulate(x_gpu, &res_gpu);
Vector res;
res_gpu.CopyTo(&res);
Vector res_expected = minus_b;
A_->RightMultiplyAndAccumulate(x_.data(), res_expected.data());
EXPECT_LE((res - res_expected).norm(),
std::numeric_limits<double>::epsilon() * 1e3);
}
TEST(CudaSparseMatrix, CopyValuesFromCpu) {
// A1:
// [ 1 1 0 0
// 0 1 1 0]
// A2:
// [ 1 2 0 0
// 0 3 4 0]
// b: [1 2 3 4]'
// A1 * b = [3 5]'
// A2 * b = [5 18]'
TripletSparseMatrix A1(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 1, 1, 1});
TripletSparseMatrix A2(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
Vector b(4);
b << 1, 2, 3, 4;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
auto A1_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A1);
CudaSparseMatrix A_gpu(&context, *A1_crs);
CudaVector b_gpu(&context, A1.num_cols());
CudaVector x_gpu(&context, A1.num_rows());
b_gpu.CopyFromCpu(b);
x_gpu.SetZero();
Vector x_expected(2);
x_expected << 3, 5;
A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
Vector x_computed;
x_gpu.CopyTo(&x_computed);
EXPECT_EQ(x_computed, x_expected);
auto A2_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A2);
A_gpu.CopyValuesFromCpu(*A2_crs);
x_gpu.SetZero();
x_expected << 5, 18;
A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
x_gpu.CopyTo(&x_computed);
EXPECT_EQ(x_computed, x_expected);
}
TEST(CudaSparseMatrix, RightMultiplyAndAccumulate) {
// A:
// [ 1 2 0 0
// 0 3 4 0]
// b: [1 2 3 4]'
// A * b = [5 18]'
TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
Vector b(4);
b << 1, 2, 3, 4;
Vector x_expected(2);
x_expected << 5, 18;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
CudaSparseMatrix A_gpu(&context, *A_crs);
CudaVector b_gpu(&context, A.num_cols());
CudaVector x_gpu(&context, A.num_rows());
b_gpu.CopyFromCpu(b);
x_gpu.SetZero();
A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
Vector x_computed;
x_gpu.CopyTo(&x_computed);
EXPECT_EQ(x_computed, x_expected);
}
TEST(CudaSparseMatrix, LeftMultiplyAndAccumulate) {
// A:
// [ 1 2 0 0
// 0 3 4 0]
// b: [1 2]'
// A'* b = [1 8 8 0]'
TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
Vector b(2);
b << 1, 2;
Vector x_expected(4);
x_expected << 1, 8, 8, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
CudaSparseMatrix A_gpu(&context, *A_crs);
CudaVector b_gpu(&context, A.num_rows());
CudaVector x_gpu(&context, A.num_cols());
b_gpu.CopyFromCpu(b);
x_gpu.SetZero();
A_gpu.LeftMultiplyAndAccumulate(b_gpu, &x_gpu);
Vector x_computed;
x_gpu.CopyTo(&x_computed);
EXPECT_EQ(x_computed, x_expected);
}
// If there are numerical errors due to synchronization issues, they will show
// up when testing with large matrices, since each operation will take
// significant time, thus hopefully revealing any potential synchronization
// issues.
TEST(CudaSparseMatrix, LargeMultiplyAndAccumulate) {
// Create a large NxN matrix A that has the following structure:
// In row i, only columns i and i+1 are non-zero.
// A_{i, i} = A_{i, i+1} = 1.
// There will be 2 * N - 1 non-zero elements in A.
// X = [1:N]
// Right multiply test:
// b = A * X
// Left multiply test:
// b = A' * X
const int N = 10 * 1000 * 1000;
const int num_non_zeros = 2 * N - 1;
std::vector<int> row_indices(num_non_zeros);
std::vector<int> col_indices(num_non_zeros);
std::vector<double> values(num_non_zeros);
for (int i = 0; i < N; ++i) {
row_indices[2 * i] = i;
col_indices[2 * i] = i;
values[2 * i] = 1.0;
if (i + 1 < N) {
col_indices[2 * i + 1] = i + 1;
row_indices[2 * i + 1] = i;
values[2 * i + 1] = 1;
}
}
TripletSparseMatrix A(N, N, row_indices, col_indices, values);
Vector x(N);
for (int i = 0; i < N; ++i) {
x[i] = i + 1;
}
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
CudaSparseMatrix A_gpu(&context, *A_crs);
CudaVector b_gpu(&context, N);
CudaVector x_gpu(&context, N);
x_gpu.CopyFromCpu(x);
// First check RightMultiply.
{
b_gpu.SetZero();
A_gpu.RightMultiplyAndAccumulate(x_gpu, &b_gpu);
Vector b_computed;
b_gpu.CopyTo(&b_computed);
for (int i = 0; i < N; ++i) {
if (i + 1 < N) {
EXPECT_EQ(b_computed[i], 2 * (i + 1) + 1);
} else {
EXPECT_EQ(b_computed[i], i + 1);
}
}
}
// Next check LeftMultiply.
{
b_gpu.SetZero();
A_gpu.LeftMultiplyAndAccumulate(x_gpu, &b_gpu);
Vector b_computed;
b_gpu.CopyTo(&b_computed);
for (int i = 0; i < N; ++i) {
if (i > 0) {
EXPECT_EQ(b_computed[i], 2 * (i + 1) - 1);
} else {
EXPECT_EQ(b_computed[i], i + 1);
}
}
}
}
#endif // CERES_NO_CUDA
} // namespace internal
} // namespace ceres

View File

@@ -0,0 +1,335 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#ifndef CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_
#define CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
#include "ceres/cuda_buffer.h"
namespace ceres::internal {
// Most contemporary CUDA devices are capable of simultaneous code execution and
// host-to-device transfer. This class copies batches of data to GPU memory and
// executes processing of copied data in parallel (asynchronously).
// Data is copied to a fixed-size buffer on GPU (containing at most
// max_buffer_size values), and this memory is re-used when the previous
// batch of values is processed by user-provided callback
// Host-to-device copy uses a temporary buffer if required. Each batch of values
// has size of kValuesPerBatch, except the last one.
template <typename T>
class CERES_NO_EXPORT CudaStreamedBuffer {
public:
// If hardware supports only one host-to-device copy or one host-to-device
// copy is able to reach peak bandwidth, two streams are sufficient to reach
// maximum efficiency:
// - If transferring batch of values takes more time, than processing it on
// gpu, then at every moment of time one of the streams will be transferring
// data and other stream will be either processing data or idle; the whole
// process will be bounded by host-to-device copy.
// - If transferring batch of values takes less time, than processing it on
// gpu, then at every moment of time one of the streams will be processing
// data and other stream will be either performing computations or
// transferring data, and the whole process will be bounded by computations.
static constexpr int kNumBatches = 2;
// max_buffer_size is the maximal size (in elements of type T) of array
// to be pre-allocated in gpu memory. The size of array determines size of
// batch of values for simultaneous copying and processing. It should be large
// enough to allow highly-parallel execution of user kernels; making it too
// large increases latency.
CudaStreamedBuffer(ContextImpl* context, const int max_buffer_size)
: kValuesPerBatch(max_buffer_size / kNumBatches),
context_(context),
values_gpu_(context, kValuesPerBatch * kNumBatches) {
static_assert(ContextImpl::kNumCudaStreams >= kNumBatches);
CHECK_GE(max_buffer_size, kNumBatches);
// Pre-allocate a buffer of page-locked memory for transfers from a regular
// cpu memory. Because we will be only writing into that buffer from cpu,
// memory is allocated with cudaHostAllocWriteCombined flag.
CHECK_EQ(cudaSuccess,
cudaHostAlloc(&values_cpu_pinned_,
sizeof(T) * kValuesPerBatch * kNumBatches,
cudaHostAllocWriteCombined));
for (auto& e : copy_finished_) {
CHECK_EQ(cudaSuccess,
cudaEventCreateWithFlags(&e, cudaEventDisableTiming));
}
}
CudaStreamedBuffer(const CudaStreamedBuffer&) = delete;
~CudaStreamedBuffer() {
CHECK_EQ(cudaSuccess, cudaFreeHost(values_cpu_pinned_));
for (auto& e : copy_finished_) {
CHECK_EQ(cudaSuccess, cudaEventDestroy(e));
}
}
// Transfer num_values at host-memory pointer from, calling
// callback(device_pointer, size_of_batch, offset_of_batch, stream_to_use)
// after scheduling transfer of each batch of data. User-provided callback
// should perform processing of data at device_pointer only in
// stream_to_use stream (device_pointer will be re-used in the next
// callback invocation with the same stream).
//
// Two diagrams below describe operation in two possible scenarios, depending
// on input data being stored in page-locked memory. In this example we will
// have max_buffer_size = 2 * K, num_values = N * K and callback
// scheduling a single asynchronous launch of
// Kernel<<..., stream_to_use>>(device_pointer,
// size_of_batch,
// offset_of_batch)
//
// a. Copying from page-locked memory
// In this case no copy on the host-side is necessary, and this method just
// schedules a bunch of interleaved memory copies and callback invocations:
//
// cudaStreamSynchronize(context->DefaultStream());
// - Iteration #0:
// - cudaMemcpyAsync(values_gpu_, from, K * sizeof(T), H->D, stream_0)
// - callback(values_gpu_, K, 0, stream_0)
// - Iteration #1:
// - cudaMemcpyAsync(values_gpu_ + K, from + K, K * sizeof(T), H->D,
// stream_1)
// - callback(values_gpu_ + K, K, K, stream_1)
// - Iteration #2:
// - cudaMemcpyAsync(values_gpu_, from + 2 * K, K * sizeof(T), H->D,
// stream_0)
// - callback(values_gpu_, K, 2 * K, stream_0)
// - Iteration #3:
// - cudaMemcpyAsync(values_gpu_ + K, from + 3 * K, K * sizeof(T), H->D,
// stream_1)
// - callback(values_gpu_ + K, K, 3 * K, stream_1)
// ...
// - Iteration #i:
// - cudaMemcpyAsync(values_gpu_ + (i % 2) * K, from + i * K, K *
// sizeof(T), H->D, stream_(i % 2))
// - callback(values_gpu_ + (i % 2) * K, K, i * K, stream_(i % 2)
// ...
// cudaStreamSynchronize(stream_0)
// cudaStreamSynchronize(stream_1)
//
// This sequence of calls results in following activity on gpu (assuming that
// kernel invoked by callback takes less time than host-to-device copy):
// +-------------------+-------------------+
// | Stream #0 | Stream #1 |
// +-------------------+-------------------+
// | Copy host->device | |
// | | |
// | | |
// +-------------------+-------------------+
// | Kernel | Copy host->device |
// +-------------------+ |
// | | |
// +-------------------+-------------------+
// | Copy host->device | Kernel |
// | +-------------------+
// | | |
// +-------------------+-------------------+
// | Kernel | Copy host->device |
// | ... |
// +---------------------------------------+
//
// b. Copying from regular memory
// In this case a copy from regular memory to page-locked memory is required
// in order to get asynchrnonous operation. Because pinned memory on host-side
// is reused, additional synchronization is required. On each iteration method
// the following actions are performed:
// - Wait till previous copy operation in stream is completed
// - Copy batch of values from input array into pinned memory
// - Asynchronously launch host-to-device copy
// - Setup event for synchronization on copy completion
// - Invoke callback (that launches kernel asynchronously)
//
// Invocations are performed with the following arguments
// cudaStreamSynchronize(context->DefaultStream());
// - Iteration #0:
// - cudaEventSynchronize(copy_finished_0)
// - std::copy_n(from, K, values_cpu_pinned_)
// - cudaMemcpyAsync(values_gpu_, values_cpu_pinned_, K * sizeof(T), H->D,
// stream_0)
// - cudaEventRecord(copy_finished_0, stream_0)
// - callback(values_gpu_, K, 0, stream_0)
// - Iteration #1:
// - cudaEventSynchronize(copy_finished_1)
// - std::copy_n(from + K, K, values_cpu_pinned_ + K)
// - cudaMemcpyAsync(values_gpu_ + K, values_cpu_pinned_ + K, K *
// sizeof(T), H->D, stream_1)
// - cudaEventRecord(copy_finished_1, stream_1)
// - callback(values_gpu_ + K, K, K, stream_1)
// - Iteration #2:
// - cudaEventSynchronize(copy_finished_0)
// - std::copy_n(from + 2 * K, K, values_cpu_pinned_)
// - cudaMemcpyAsync(values_gpu_, values_cpu_pinned_, K * sizeof(T), H->D,
// stream_0)
// - cudaEventRecord(copy_finished_0, stream_0)
// - callback(values_gpu_, K, 2 * K, stream_0)
// - Iteration #3:
// - cudaEventSynchronize(copy_finished_1)
// - std::copy_n(from + 3 * K, K, values_cpu_pinned_ + K)
// - cudaMemcpyAsync(values_gpu_ + K, values_cpu_pinned_ + K, K *
// sizeof(T), H->D, stream_1)
// - cudaEventRecord(copy_finished_1, stream_1)
// - callback(values_gpu_ + K, K, 3 * K, stream_1)
// ...
// - Iteration #i:
// - cudaEventSynchronize(copy_finished_(i % 2))
// - std::copy_n(from + i * K, K, values_cpu_pinned_ + (i % 2) * K)
// - cudaMemcpyAsync(values_gpu_ + (i % 2) * K, values_cpu_pinned_ + (i %
// 2) * K, K * sizeof(T), H->D, stream_(i % 2))
// - cudaEventRecord(copy_finished_(i % 2), stream_(i % 2))
// - callback(values_gpu_ + (i % 2) * K, K, i * K, stream_(i % 2))
// ...
// cudaStreamSynchronize(stream_0)
// cudaStreamSynchronize(stream_1)
//
// This sequence of calls results in following activity on cpu and gpu
// (assuming that kernel invoked by callback takes less time than
// host-to-device copy and copy in cpu memory, and copy in cpu memory is
// faster than host-to-device copy):
// +----------------------------+-------------------+-------------------+
// | Stream #0 | Stream #0 | Stream #1 |
// +----------------------------+-------------------+-------------------+
// | Copy to pinned memory | | |
// | | | |
// +----------------------------+-------------------| |
// | Copy to pinned memory | Copy host->device | |
// | | | |
// +----------------------------+ | |
// | Waiting previous h->d copy | | |
// +----------------------------+-------------------+-------------------+
// | Copy to pinned memory | Kernel | Copy host->device |
// | +-------------------+ |
// +----------------------------+ | |
// | Waiting previous h->d copy | | |
// +----------------------------+-------------------+-------------------+
// | Copy to pinned memory | Copy host->device | Kernel |
// | | +-------------------+
// | ... ... |
// +----------------------------+---------------------------------------+
//
template <typename Fun>
void CopyToGpu(const T* from, const int num_values, Fun&& callback) {
// This synchronization is not required in some cases, but we perform it in
// order to avoid situation when user callback depends on data that is
// still to be computed in default stream
CHECK_EQ(cudaSuccess, cudaStreamSynchronize(context_->DefaultStream()));
// If pointer to input data does not correspond to page-locked memory,
// host-to-device memory copy might be executed synchrnonously (with a copy
// to pinned memory happening inside the driver). In that case we perform
// copy to a pre-allocated array of page-locked memory.
const bool copy_to_pinned_memory = MemoryTypeResultsInSynchronousCopy(from);
T* batch_values_gpu[kNumBatches];
T* batch_values_cpu[kNumBatches];
auto streams = context_->streams_;
for (int i = 0; i < kNumBatches; ++i) {
batch_values_gpu[i] = values_gpu_.data() + kValuesPerBatch * i;
batch_values_cpu[i] = values_cpu_pinned_ + kValuesPerBatch * i;
}
int batch_id = 0;
for (int offset = 0; offset < num_values; offset += kValuesPerBatch) {
const int num_values_batch =
std::min(num_values - offset, kValuesPerBatch);
const T* batch_from = from + offset;
T* batch_to = batch_values_gpu[batch_id];
auto stream = streams[batch_id];
auto copy_finished = copy_finished_[batch_id];
if (copy_to_pinned_memory) {
// Copying values to a temporary buffer should be started only after the
// previous copy from temporary buffer to device is completed.
CHECK_EQ(cudaSuccess, cudaEventSynchronize(copy_finished));
std::copy_n(batch_from, num_values_batch, batch_values_cpu[batch_id]);
batch_from = batch_values_cpu[batch_id];
}
CHECK_EQ(cudaSuccess,
cudaMemcpyAsync(batch_to,
batch_from,
sizeof(T) * num_values_batch,
cudaMemcpyHostToDevice,
stream));
if (copy_to_pinned_memory) {
// Next copy to a temporary buffer can start straight after asynchronous
// copy is completed (and might be started before kernels asynchronously
// executed in stream by user-supplied callback are completed).
// No explicit synchronization is required when copying data from
// page-locked memory, because memory copy and user kernel execution
// with corresponding part of values_gpu_ array is serialized using
// stream
CHECK_EQ(cudaSuccess, cudaEventRecord(copy_finished, stream));
}
callback(batch_to, num_values_batch, offset, stream);
batch_id = (batch_id + 1) % kNumBatches;
}
// Explicitly synchronize on all CUDA streams that were utilized.
for (int i = 0; i < kNumBatches; ++i) {
CHECK_EQ(cudaSuccess, cudaStreamSynchronize(streams[i]));
}
}
private:
// It is necessary to have all host-to-device copies to be completely
// asynchronous. This requires source memory to be allocated in page-locked
// memory.
static bool MemoryTypeResultsInSynchronousCopy(const void* ptr) {
cudaPointerAttributes attributes;
auto status = cudaPointerGetAttributes(&attributes, ptr);
#if CUDART_VERSION < 11000
// In CUDA versions prior 11 call to cudaPointerGetAttributes with host
// pointer will return cudaErrorInvalidValue
if (status == cudaErrorInvalidValue) {
return true;
}
#endif
CHECK_EQ(status, cudaSuccess);
// This class only supports cpu memory as a source
CHECK_NE(attributes.type, cudaMemoryTypeDevice);
// If host memory was allocated (or registered) with CUDA API, or is a
// managed memory, then call to cudaMemcpyAsync will be asynchrnous. In case
// of managed memory it might be slightly better to perform a single call of
// user-provided call-back (and hope that page migration will provide a
// similar throughput with zero efforts from our side).
return attributes.type == cudaMemoryTypeUnregistered;
}
const int kValuesPerBatch;
ContextImpl* context_ = nullptr;
CudaBuffer<T> values_gpu_;
T* values_cpu_pinned_ = nullptr;
cudaEvent_t copy_finished_[kNumBatches] = {nullptr};
};
} // namespace ceres::internal
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_

View File

@@ -0,0 +1,169 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#include "ceres/internal/config.h"
#ifndef CERES_NO_CUDA
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <numeric>
#include "ceres/cuda_streamed_buffer.h"
namespace ceres::internal {
TEST(CudaStreamedBufferTest, IntegerCopy) {
// Offsets and sizes of batches supplied to callback
std::vector<std::pair<int, int>> batches;
const int kMaxTemporaryArraySize = 16;
const int kInputSize = kMaxTemporaryArraySize * 7 + 3;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
std::vector<int> inputs(kInputSize);
std::vector<int> outputs(kInputSize, -1);
std::iota(inputs.begin(), inputs.end(), 0);
CudaStreamedBuffer<int> streamed_buffer(&context, kMaxTemporaryArraySize);
streamed_buffer.CopyToGpu(inputs.data(),
kInputSize,
[&outputs, &batches](const int* device_pointer,
int size,
int offset,
cudaStream_t stream) {
batches.emplace_back(offset, size);
CHECK_EQ(cudaSuccess,
cudaMemcpyAsync(outputs.data() + offset,
device_pointer,
sizeof(int) * size,
cudaMemcpyDeviceToHost,
stream));
});
// All operations in all streams should be completed when CopyToGpu returns
// control to the callee
for (int i = 0; i < ContextImpl::kNumCudaStreams; ++i) {
CHECK_EQ(cudaSuccess, cudaStreamQuery(context.streams_[i]));
}
// Check if every element was visited
for (int i = 0; i < kInputSize; ++i) {
CHECK_EQ(outputs[i], i);
}
// Check if there is no overlap between batches
std::sort(batches.begin(), batches.end());
const int num_batches = batches.size();
for (int i = 0; i < num_batches; ++i) {
const auto [begin, size] = batches[i];
const int end = begin + size;
CHECK_GE(begin, 0);
CHECK_LT(begin, kInputSize);
CHECK_GT(size, 0);
CHECK_LE(end, kInputSize);
if (i + 1 == num_batches) continue;
CHECK_EQ(end, batches[i + 1].first);
}
}
TEST(CudaStreamedBufferTest, IntegerNoCopy) {
// Offsets and sizes of batches supplied to callback
std::vector<std::pair<int, int>> batches;
const int kMaxTemporaryArraySize = 16;
const int kInputSize = kMaxTemporaryArraySize * 7 + 3;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
int* inputs;
int* outputs;
CHECK_EQ(cudaSuccess,
cudaHostAlloc(
&inputs, sizeof(int) * kInputSize, cudaHostAllocWriteCombined));
CHECK_EQ(
cudaSuccess,
cudaHostAlloc(&outputs, sizeof(int) * kInputSize, cudaHostAllocDefault));
std::fill(outputs, outputs + kInputSize, -1);
std::iota(inputs, inputs + kInputSize, 0);
CudaStreamedBuffer<int> streamed_buffer(&context, kMaxTemporaryArraySize);
streamed_buffer.CopyToGpu(inputs,
kInputSize,
[outputs, &batches](const int* device_pointer,
int size,
int offset,
cudaStream_t stream) {
batches.emplace_back(offset, size);
CHECK_EQ(cudaSuccess,
cudaMemcpyAsync(outputs + offset,
device_pointer,
sizeof(int) * size,
cudaMemcpyDeviceToHost,
stream));
});
// All operations in all streams should be completed when CopyToGpu returns
// control to the callee
for (int i = 0; i < ContextImpl::kNumCudaStreams; ++i) {
CHECK_EQ(cudaSuccess, cudaStreamQuery(context.streams_[i]));
}
// Check if every element was visited
for (int i = 0; i < kInputSize; ++i) {
CHECK_EQ(outputs[i], i);
}
// Check if there is no overlap between batches
std::sort(batches.begin(), batches.end());
const int num_batches = batches.size();
for (int i = 0; i < num_batches; ++i) {
const auto [begin, size] = batches[i];
const int end = begin + size;
CHECK_GE(begin, 0);
CHECK_LT(begin, kInputSize);
CHECK_GT(size, 0);
CHECK_LE(end, kInputSize);
if (i + 1 == num_batches) continue;
CHECK_EQ(end, batches[i + 1].first);
}
CHECK_EQ(cudaSuccess, cudaFreeHost(inputs));
CHECK_EQ(cudaSuccess, cudaFreeHost(outputs));
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -0,0 +1,185 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
//
// A simple CUDA vector class.
// This include must come before any #ifndef check on Ceres compile options.
// clang-format off
#include "ceres/internal/config.h"
// clang-format on
#include <math.h>
#include "ceres/context_impl.h"
#include "ceres/internal/export.h"
#include "ceres/types.h"
#ifndef CERES_NO_CUDA
#include "ceres/cuda_buffer.h"
#include "ceres/cuda_kernels_vector_ops.h"
#include "ceres/cuda_vector.h"
#include "cublas_v2.h"
namespace ceres::internal {
CudaVector::CudaVector(ContextImpl* context, int size)
: context_(context), data_(context, size) {
DCHECK_NE(context, nullptr);
DCHECK(context->IsCudaInitialized());
Resize(size);
}
CudaVector::CudaVector(CudaVector&& other)
: num_rows_(other.num_rows_),
context_(other.context_),
data_(std::move(other.data_)),
descr_(other.descr_) {
other.num_rows_ = 0;
other.descr_ = nullptr;
}
CudaVector& CudaVector::operator=(const CudaVector& other) {
if (this != &other) {
Resize(other.num_rows());
data_.CopyFromGPUArray(other.data_.data(), num_rows_);
}
return *this;
}
void CudaVector::DestroyDescriptor() {
if (descr_ != nullptr) {
CHECK_EQ(cusparseDestroyDnVec(descr_), CUSPARSE_STATUS_SUCCESS);
descr_ = nullptr;
}
}
CudaVector::~CudaVector() { DestroyDescriptor(); }
void CudaVector::Resize(int size) {
data_.Reserve(size);
num_rows_ = size;
DestroyDescriptor();
CHECK_EQ(cusparseCreateDnVec(&descr_, num_rows_, data_.data(), CUDA_R_64F),
CUSPARSE_STATUS_SUCCESS);
}
double CudaVector::Dot(const CudaVector& x) const {
double result = 0;
CHECK_EQ(cublasDdot(context_->cublas_handle_,
num_rows_,
data_.data(),
1,
x.data(),
1,
&result),
CUBLAS_STATUS_SUCCESS)
<< "CuBLAS cublasDdot failed.";
return result;
}
double CudaVector::Norm() const {
double result = 0;
CHECK_EQ(cublasDnrm2(
context_->cublas_handle_, num_rows_, data_.data(), 1, &result),
CUBLAS_STATUS_SUCCESS)
<< "CuBLAS cublasDnrm2 failed.";
return result;
}
void CudaVector::CopyFromCpu(const double* x) {
data_.CopyFromCpu(x, num_rows_);
}
void CudaVector::CopyFromCpu(const Vector& x) {
if (x.rows() != num_rows_) {
Resize(x.rows());
}
CopyFromCpu(x.data());
}
void CudaVector::CopyTo(Vector* x) const {
CHECK(x != nullptr);
x->resize(num_rows_);
data_.CopyToCpu(x->data(), num_rows_);
}
void CudaVector::CopyTo(double* x) const {
CHECK(x != nullptr);
data_.CopyToCpu(x, num_rows_);
}
void CudaVector::SetZero() {
// Allow empty vector to be zeroed
if (num_rows_ == 0) return;
CHECK(data_.data() != nullptr);
CudaSetZeroFP64(data_.data(), num_rows_, context_->DefaultStream());
}
void CudaVector::Axpby(double a, const CudaVector& x, double b) {
if (&x == this) {
Scale(a + b);
return;
}
CHECK_EQ(num_rows_, x.num_rows_);
if (b != 1.0) {
// First scale y by b.
CHECK_EQ(
cublasDscal(context_->cublas_handle_, num_rows_, &b, data_.data(), 1),
CUBLAS_STATUS_SUCCESS)
<< "CuBLAS cublasDscal failed.";
}
// Then add a * x to y.
CHECK_EQ(cublasDaxpy(context_->cublas_handle_,
num_rows_,
&a,
x.data(),
1,
data_.data(),
1),
CUBLAS_STATUS_SUCCESS)
<< "CuBLAS cublasDaxpy failed.";
}
void CudaVector::DtDxpy(const CudaVector& D, const CudaVector& x) {
CudaDtDxpy(
data_.data(), D.data(), x.data(), num_rows_, context_->DefaultStream());
}
void CudaVector::Scale(double s) {
CHECK_EQ(
cublasDscal(context_->cublas_handle_, num_rows_, &s, data_.data(), 1),
CUBLAS_STATUS_SUCCESS)
<< "CuBLAS cublasDscal failed.";
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA

View File

@@ -0,0 +1,193 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
//
// A simple CUDA vector class.
#ifndef CERES_INTERNAL_CUDA_VECTOR_H_
#define CERES_INTERNAL_CUDA_VECTOR_H_
// This include must come before any #ifndef check on Ceres compile options.
// clang-format off
#include "ceres/internal/config.h"
// clang-format on
#include <math.h>
#include <memory>
#include <string>
#include "ceres/context_impl.h"
#include "ceres/internal/export.h"
#include "ceres/types.h"
#ifndef CERES_NO_CUDA
#include "ceres/cuda_buffer.h"
#include "ceres/cuda_kernels_vector_ops.h"
#include "ceres/internal/eigen.h"
#include "cublas_v2.h"
#include "cusparse.h"
namespace ceres::internal {
// An Nx1 vector, denoted y hosted on the GPU, with CUDA-accelerated operations.
class CERES_NO_EXPORT CudaVector {
public:
// Create a pre-allocated vector of size N and return a pointer to it. The
// caller must ensure that InitCuda() has already been successfully called on
// context before calling this method.
CudaVector(ContextImpl* context, int size);
CudaVector(CudaVector&& other);
~CudaVector();
void Resize(int size);
// Perform a deep copy of the vector.
CudaVector& operator=(const CudaVector&);
// Return the inner product x' * y.
double Dot(const CudaVector& x) const;
// Return the L2 norm of the vector (||y||_2).
double Norm() const;
// Set all elements to zero.
void SetZero();
// Copy from Eigen vector.
void CopyFromCpu(const Vector& x);
// Copy from CPU memory array.
void CopyFromCpu(const double* x);
// Copy to Eigen vector.
void CopyTo(Vector* x) const;
// Copy to CPU memory array. It is the caller's responsibility to ensure
// that the array is large enough.
void CopyTo(double* x) const;
// y = a * x + b * y.
void Axpby(double a, const CudaVector& x, double b);
// y = diag(d)' * diag(d) * x + y.
void DtDxpy(const CudaVector& D, const CudaVector& x);
// y = s * y.
void Scale(double s);
int num_rows() const { return num_rows_; }
int num_cols() const { return 1; }
const double* data() const { return data_.data(); }
double* mutable_data() { return data_.data(); }
const cusparseDnVecDescr_t& descr() const { return descr_; }
private:
CudaVector(const CudaVector&) = delete;
void DestroyDescriptor();
int num_rows_ = 0;
ContextImpl* context_ = nullptr;
CudaBuffer<double> data_;
// CuSparse object that describes this dense vector.
cusparseDnVecDescr_t descr_ = nullptr;
};
// Blas1 operations on Cuda vectors. These functions are needed as an
// abstraction layer so that we can use different versions of a vector style
// object in the conjugate gradients linear solver.
// Context and num_threads arguments are not used by CUDA implementation,
// context embedded into CudaVector is used instead.
inline double Norm(const CudaVector& x,
ContextImpl* context = nullptr,
int num_threads = 1) {
(void)context;
(void)num_threads;
return x.Norm();
}
inline void SetZero(CudaVector& x,
ContextImpl* context = nullptr,
int num_threads = 1) {
(void)context;
(void)num_threads;
x.SetZero();
}
inline void Axpby(double a,
const CudaVector& x,
double b,
const CudaVector& y,
CudaVector& z,
ContextImpl* context = nullptr,
int num_threads = 1) {
(void)context;
(void)num_threads;
if (&x == &y && &y == &z) {
// z = (a + b) * z;
z.Scale(a + b);
} else if (&x == &z) {
// x is aliased to z.
// z = x
// = b * y + a * x;
z.Axpby(b, y, a);
} else if (&y == &z) {
// y is aliased to z.
// z = y = a * x + b * y;
z.Axpby(a, x, b);
} else {
// General case: all inputs and outputs are distinct.
z = y;
z.Axpby(a, x, b);
}
}
inline double Dot(const CudaVector& x,
const CudaVector& y,
ContextImpl* context = nullptr,
int num_threads = 1) {
(void)context;
(void)num_threads;
return x.Dot(y);
}
inline void Copy(const CudaVector& from,
CudaVector& to,
ContextImpl* context = nullptr,
int num_threads = 1) {
(void)context;
(void)num_threads;
to = from;
}
} // namespace ceres::internal
#endif // CERES_NO_CUDA
#endif // CERES_INTERNAL_CUDA_SPARSE_LINEAR_OPERATOR_H_

View File

@@ -0,0 +1,423 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
#include "ceres/cuda_vector.h"
#include <string>
#include "ceres/internal/config.h"
#include "ceres/internal/eigen.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
namespace ceres {
namespace internal {
#ifndef CERES_NO_CUDA
TEST(CudaVector, Creation) {
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x(&context, 1000);
EXPECT_EQ(x.num_rows(), 1000);
EXPECT_NE(x.data(), nullptr);
}
TEST(CudaVector, CopyVector) {
Vector x(3);
x << 1, 2, 3;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector y(&context, 10);
y.CopyFromCpu(x);
EXPECT_EQ(y.num_rows(), 3);
Vector z(3);
z << 0, 0, 0;
y.CopyTo(&z);
EXPECT_EQ(x, z);
}
TEST(CudaVector, Move) {
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector y(&context, 10);
const auto y_data = y.data();
const auto y_descr = y.descr();
EXPECT_EQ(y.num_rows(), 10);
CudaVector z(std::move(y));
EXPECT_EQ(y.data(), nullptr);
EXPECT_EQ(y.descr(), nullptr);
EXPECT_EQ(y.num_rows(), 0);
EXPECT_EQ(z.data(), y_data);
EXPECT_EQ(z.descr(), y_descr);
}
TEST(CudaVector, DeepCopy) {
Vector x(3);
x << 1, 2, 3;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 3);
x_gpu.CopyFromCpu(x);
CudaVector y_gpu(&context, 3);
y_gpu.SetZero();
EXPECT_EQ(y_gpu.Norm(), 0.0);
y_gpu = x_gpu;
Vector y(3);
y << 0, 0, 0;
y_gpu.CopyTo(&y);
EXPECT_EQ(x, y);
}
TEST(CudaVector, Dot) {
Vector x(3);
Vector y(3);
x << 1, 2, 3;
y << 100, 10, 1;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
CudaVector y_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
EXPECT_EQ(x_gpu.Dot(y_gpu), 123.0);
EXPECT_EQ(Dot(x_gpu, y_gpu), 123.0);
}
TEST(CudaVector, Norm) {
Vector x(3);
x << 1, 2, 3;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
EXPECT_NEAR(x_gpu.Norm(),
sqrt(1.0 + 4.0 + 9.0),
std::numeric_limits<double>::epsilon());
EXPECT_NEAR(Norm(x_gpu),
sqrt(1.0 + 4.0 + 9.0),
std::numeric_limits<double>::epsilon());
}
TEST(CudaVector, SetZero) {
Vector x(4);
x << 1, 1, 1, 1;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
EXPECT_NEAR(x_gpu.Norm(), 2.0, std::numeric_limits<double>::epsilon());
x_gpu.SetZero();
EXPECT_NEAR(x_gpu.Norm(), 0.0, std::numeric_limits<double>::epsilon());
x_gpu.CopyFromCpu(x);
EXPECT_NEAR(x_gpu.Norm(), 2.0, std::numeric_limits<double>::epsilon());
SetZero(x_gpu);
EXPECT_NEAR(x_gpu.Norm(), 0.0, std::numeric_limits<double>::epsilon());
}
TEST(CudaVector, Resize) {
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
EXPECT_EQ(x_gpu.num_rows(), 10);
x_gpu.Resize(4);
EXPECT_EQ(x_gpu.num_rows(), 4);
}
TEST(CudaVector, Axpy) {
Vector x(4);
Vector y(4);
x << 1, 1, 1, 1;
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
x_gpu.Axpby(2.0, y_gpu, 1.0);
Vector result;
Vector expected(4);
expected << 201, 21, 3, 1;
x_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyBEquals1) {
Vector x(4);
Vector y(4);
x << 1, 1, 1, 1;
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
x_gpu.Axpby(2.0, y_gpu, 1.0);
Vector result;
Vector expected(4);
expected << 201, 21, 3, 1;
x_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyMemberFunctionBNotEqual1) {
Vector x(4);
Vector y(4);
x << 1, 1, 1, 1;
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
x_gpu.Axpby(2.0, y_gpu, 3.0);
Vector result;
Vector expected(4);
expected << 203, 23, 5, 3;
x_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyMemberFunctionBEqual1) {
Vector x(4);
Vector y(4);
x << 1, 1, 1, 1;
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
x_gpu.Axpby(2.0, y_gpu, 1.0);
Vector result;
Vector expected(4);
expected << 201, 21, 3, 1;
x_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyMemberXAliasesY) {
Vector x(4);
x << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
y_gpu.SetZero();
x_gpu.Axpby(2.0, x_gpu, 1.0);
Vector result;
Vector expected(4);
expected << 300, 30, 3, 0;
x_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyNonMemberMethodNoAliases) {
Vector x(4);
Vector y(4);
x << 1, 1, 1, 1;
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
CudaVector z_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
z_gpu.Resize(4);
z_gpu.SetZero();
Axpby(2.0, x_gpu, 3.0, y_gpu, z_gpu);
Vector result;
Vector expected(4);
expected << 302, 32, 5, 2;
z_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyNonMemberMethodXAliasesY) {
Vector x(4);
x << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector z_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
z_gpu.SetZero();
Axpby(2.0, x_gpu, 3.0, x_gpu, z_gpu);
Vector result;
Vector expected(4);
expected << 500, 50, 5, 0;
z_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyNonMemberMethodXAliasesZ) {
Vector x(4);
Vector y(4);
x << 1, 1, 1, 1;
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
CudaVector y_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
Axpby(2.0, x_gpu, 3.0, y_gpu, x_gpu);
Vector result;
Vector expected(4);
expected << 302, 32, 5, 2;
x_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyNonMemberMethodYAliasesZ) {
Vector x(4);
Vector y(4);
x << 1, 1, 1, 1;
y << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
Axpby(2.0, x_gpu, 3.0, y_gpu, y_gpu);
Vector result;
Vector expected(4);
expected << 302, 32, 5, 2;
y_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, AxpbyNonMemberMethodXAliasesYAliasesZ) {
Vector x(4);
x << 100, 10, 1, 0;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 10);
x_gpu.CopyFromCpu(x);
Axpby(2.0, x_gpu, 3.0, x_gpu, x_gpu);
Vector result;
Vector expected(4);
expected << 500, 50, 5, 0;
x_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, DtDxpy) {
Vector x(4);
Vector y(4);
Vector D(4);
x << 1, 2, 3, 4;
y << 100, 10, 1, 0;
D << 4, 3, 2, 1;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
CudaVector y_gpu(&context, 4);
CudaVector D_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
y_gpu.CopyFromCpu(y);
D_gpu.CopyFromCpu(D);
y_gpu.DtDxpy(D_gpu, x_gpu);
Vector result;
Vector expected(4);
expected << 116, 28, 13, 4;
y_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
TEST(CudaVector, Scale) {
Vector x(4);
x << 1, 2, 3, 4;
ContextImpl context;
std::string message;
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
CudaVector x_gpu(&context, 4);
x_gpu.CopyFromCpu(x);
x_gpu.Scale(-3.0);
Vector result;
Vector expected(4);
expected << -3.0, -6.0, -9.0, -12.0;
x_gpu.CopyTo(&result);
EXPECT_EQ(result, expected);
}
#endif // CERES_NO_CUDA
} // namespace internal
} // namespace ceres

View File

@@ -1,284 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: strandmark@google.com (Petter Strandmark)
// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/config.h"
#ifndef CERES_NO_CXSPARSE
#include <memory>
#include <string>
#include <vector>
#include "ceres/compressed_col_sparse_matrix_utils.h"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/cxsparse.h"
#include "ceres/triplet_sparse_matrix.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::vector;
CXSparse::CXSparse() : scratch_(nullptr), scratch_size_(0) {}
CXSparse::~CXSparse() {
if (scratch_size_ > 0) {
cs_di_free(scratch_);
}
}
csn* CXSparse::Cholesky(cs_di* A, cs_dis* symbolic_factor) {
return cs_di_chol(A, symbolic_factor);
}
void CXSparse::Solve(cs_dis* symbolic_factor, csn* numeric_factor, double* b) {
// Make sure we have enough scratch space available.
const int num_cols = numeric_factor->L->n;
if (scratch_size_ < num_cols) {
if (scratch_size_ > 0) {
cs_di_free(scratch_);
}
scratch_ =
reinterpret_cast<CS_ENTRY*>(cs_di_malloc(num_cols, sizeof(CS_ENTRY)));
scratch_size_ = num_cols;
}
// When the Cholesky factor succeeded, these methods are
// guaranteed to succeeded as well. In the comments below, "x"
// refers to the scratch space.
//
// Set x = P * b.
CHECK(cs_di_ipvec(symbolic_factor->pinv, b, scratch_, num_cols));
// Set x = L \ x.
CHECK(cs_di_lsolve(numeric_factor->L, scratch_));
// Set x = L' \ x.
CHECK(cs_di_ltsolve(numeric_factor->L, scratch_));
// Set b = P' * x.
CHECK(cs_di_pvec(symbolic_factor->pinv, scratch_, b, num_cols));
}
bool CXSparse::SolveCholesky(cs_di* lhs, double* rhs_and_solution) {
return cs_cholsol(1, lhs, rhs_and_solution);
}
cs_dis* CXSparse::AnalyzeCholesky(cs_di* A) {
// order = 1 for Cholesky factor.
return cs_schol(1, A);
}
cs_dis* CXSparse::AnalyzeCholeskyWithNaturalOrdering(cs_di* A) {
// order = 0 for Natural ordering.
return cs_schol(0, A);
}
cs_dis* CXSparse::BlockAnalyzeCholesky(cs_di* A,
const vector<int>& row_blocks,
const vector<int>& col_blocks) {
const int num_row_blocks = row_blocks.size();
const int num_col_blocks = col_blocks.size();
vector<int> block_rows;
vector<int> block_cols;
CompressedColumnScalarMatrixToBlockMatrix(
A->i, A->p, row_blocks, col_blocks, &block_rows, &block_cols);
cs_di block_matrix;
block_matrix.m = num_row_blocks;
block_matrix.n = num_col_blocks;
block_matrix.nz = -1;
block_matrix.nzmax = block_rows.size();
block_matrix.p = &block_cols[0];
block_matrix.i = &block_rows[0];
block_matrix.x = nullptr;
int* ordering = cs_amd(1, &block_matrix);
vector<int> block_ordering(num_row_blocks, -1);
std::copy(ordering, ordering + num_row_blocks, &block_ordering[0]);
cs_free(ordering);
vector<int> scalar_ordering;
BlockOrderingToScalarOrdering(row_blocks, block_ordering, &scalar_ordering);
auto* symbolic_factor =
reinterpret_cast<cs_dis*>(cs_calloc(1, sizeof(cs_dis)));
symbolic_factor->pinv = cs_pinv(&scalar_ordering[0], A->n);
cs* permuted_A = cs_symperm(A, symbolic_factor->pinv, 0);
symbolic_factor->parent = cs_etree(permuted_A, 0);
int* postordering = cs_post(symbolic_factor->parent, A->n);
int* column_counts =
cs_counts(permuted_A, symbolic_factor->parent, postordering, 0);
cs_free(postordering);
cs_spfree(permuted_A);
symbolic_factor->cp = static_cast<int*>(cs_malloc(A->n + 1, sizeof(int)));
symbolic_factor->lnz = cs_cumsum(symbolic_factor->cp, column_counts, A->n);
symbolic_factor->unz = symbolic_factor->lnz;
cs_free(column_counts);
if (symbolic_factor->lnz < 0) {
cs_sfree(symbolic_factor);
symbolic_factor = nullptr;
}
return symbolic_factor;
}
cs_di CXSparse::CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A) {
cs_di At;
At.m = A->num_cols();
At.n = A->num_rows();
At.nz = -1;
At.nzmax = A->num_nonzeros();
At.p = A->mutable_rows();
At.i = A->mutable_cols();
At.x = A->mutable_values();
return At;
}
cs_di* CXSparse::CreateSparseMatrix(TripletSparseMatrix* tsm) {
cs_di_sparse tsm_wrapper;
tsm_wrapper.nzmax = tsm->num_nonzeros();
tsm_wrapper.nz = tsm->num_nonzeros();
tsm_wrapper.m = tsm->num_rows();
tsm_wrapper.n = tsm->num_cols();
tsm_wrapper.p = tsm->mutable_cols();
tsm_wrapper.i = tsm->mutable_rows();
tsm_wrapper.x = tsm->mutable_values();
return cs_compress(&tsm_wrapper);
}
void CXSparse::ApproximateMinimumDegreeOrdering(cs_di* A, int* ordering) {
int* cs_ordering = cs_amd(1, A);
std::copy(cs_ordering, cs_ordering + A->m, ordering);
cs_free(cs_ordering);
}
cs_di* CXSparse::TransposeMatrix(cs_di* A) { return cs_di_transpose(A, 1); }
cs_di* CXSparse::MatrixMatrixMultiply(cs_di* A, cs_di* B) {
return cs_di_multiply(A, B);
}
void CXSparse::Free(cs_di* sparse_matrix) { cs_di_spfree(sparse_matrix); }
void CXSparse::Free(cs_dis* symbolic_factor) { cs_di_sfree(symbolic_factor); }
void CXSparse::Free(csn* numeric_factor) { cs_di_nfree(numeric_factor); }
std::unique_ptr<SparseCholesky> CXSparseCholesky::Create(
const OrderingType ordering_type) {
return std::unique_ptr<SparseCholesky>(new CXSparseCholesky(ordering_type));
}
CompressedRowSparseMatrix::StorageType CXSparseCholesky::StorageType() const {
return CompressedRowSparseMatrix::LOWER_TRIANGULAR;
}
CXSparseCholesky::CXSparseCholesky(const OrderingType ordering_type)
: ordering_type_(ordering_type),
symbolic_factor_(nullptr),
numeric_factor_(nullptr) {}
CXSparseCholesky::~CXSparseCholesky() {
FreeSymbolicFactorization();
FreeNumericFactorization();
}
LinearSolverTerminationType CXSparseCholesky::Factorize(
CompressedRowSparseMatrix* lhs, std::string* message) {
CHECK_EQ(lhs->storage_type(), StorageType());
if (lhs == nullptr) {
*message = "Failure: Input lhs is nullptr.";
return LINEAR_SOLVER_FATAL_ERROR;
}
cs_di cs_lhs = cs_.CreateSparseMatrixTransposeView(lhs);
if (symbolic_factor_ == nullptr) {
if (ordering_type_ == NATURAL) {
symbolic_factor_ = cs_.AnalyzeCholeskyWithNaturalOrdering(&cs_lhs);
} else {
if (!lhs->col_blocks().empty() && !(lhs->row_blocks().empty())) {
symbolic_factor_ = cs_.BlockAnalyzeCholesky(
&cs_lhs, lhs->col_blocks(), lhs->row_blocks());
} else {
symbolic_factor_ = cs_.AnalyzeCholesky(&cs_lhs);
}
}
if (symbolic_factor_ == nullptr) {
*message = "CXSparse Failure : Symbolic factorization failed.";
return LINEAR_SOLVER_FATAL_ERROR;
}
}
FreeNumericFactorization();
numeric_factor_ = cs_.Cholesky(&cs_lhs, symbolic_factor_);
if (numeric_factor_ == nullptr) {
*message = "CXSparse Failure : Numeric factorization failed.";
return LINEAR_SOLVER_FAILURE;
}
return LINEAR_SOLVER_SUCCESS;
}
LinearSolverTerminationType CXSparseCholesky::Solve(const double* rhs,
double* solution,
std::string* message) {
CHECK(numeric_factor_ != nullptr)
<< "Solve called without a call to Factorize first.";
const int num_cols = numeric_factor_->L->n;
memcpy(solution, rhs, num_cols * sizeof(*solution));
cs_.Solve(symbolic_factor_, numeric_factor_, solution);
return LINEAR_SOLVER_SUCCESS;
}
void CXSparseCholesky::FreeSymbolicFactorization() {
if (symbolic_factor_ != nullptr) {
cs_.Free(symbolic_factor_);
symbolic_factor_ = nullptr;
}
}
void CXSparseCholesky::FreeNumericFactorization() {
if (numeric_factor_ != nullptr) {
cs_.Free(numeric_factor_);
numeric_factor_ = nullptr;
}
}
} // namespace internal
} // namespace ceres
#endif // CERES_NO_CXSPARSE

View File

@@ -1,182 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: strandmark@google.com (Petter Strandmark)
#ifndef CERES_INTERNAL_CXSPARSE_H_
#define CERES_INTERNAL_CXSPARSE_H_
// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/config.h"
#ifndef CERES_NO_CXSPARSE
#include <memory>
#include <string>
#include <vector>
#include "ceres/internal/disable_warnings.h"
#include "ceres/linear_solver.h"
#include "ceres/sparse_cholesky.h"
#include "cs.h"
namespace ceres {
namespace internal {
class CompressedRowSparseMatrix;
class TripletSparseMatrix;
// This object provides access to solving linear systems using Cholesky
// factorization with a known symbolic factorization. This features does not
// explicitly exist in CXSparse. The methods in the class are nonstatic because
// the class manages internal scratch space.
class CERES_NO_EXPORT CXSparse {
public:
CXSparse();
~CXSparse();
// Solve the system lhs * solution = rhs in place by using an
// approximate minimum degree fill reducing ordering.
bool SolveCholesky(cs_di* lhs, double* rhs_and_solution);
// Solves a linear system given its symbolic and numeric factorization.
void Solve(cs_dis* symbolic_factor,
csn* numeric_factor,
double* rhs_and_solution);
// Compute the numeric Cholesky factorization of A, given its
// symbolic factorization.
//
// Caller owns the result.
csn* Cholesky(cs_di* A, cs_dis* symbolic_factor);
// Creates a sparse matrix from a compressed-column form. No memory is
// allocated or copied; the structure A is filled out with info from the
// argument.
cs_di CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A);
// Creates a new matrix from a triplet form. Deallocate the returned matrix
// with Free. May return nullptr if the compression or allocation fails.
cs_di* CreateSparseMatrix(TripletSparseMatrix* A);
// B = A'
//
// The returned matrix should be deallocated with Free when not used
// anymore.
cs_di* TransposeMatrix(cs_di* A);
// C = A * B
//
// The returned matrix should be deallocated with Free when not used
// anymore.
cs_di* MatrixMatrixMultiply(cs_di* A, cs_di* B);
// Computes a symbolic factorization of A that can be used in SolveCholesky.
//
// The returned matrix should be deallocated with Free when not used anymore.
cs_dis* AnalyzeCholesky(cs_di* A);
// Computes a symbolic factorization of A that can be used in
// SolveCholesky, but does not compute a fill-reducing ordering.
//
// The returned matrix should be deallocated with Free when not used anymore.
cs_dis* AnalyzeCholeskyWithNaturalOrdering(cs_di* A);
// Computes a symbolic factorization of A that can be used in
// SolveCholesky. The difference from AnalyzeCholesky is that this
// function first detects the block sparsity of the matrix using
// information about the row and column blocks and uses this block
// sparse matrix to find a fill-reducing ordering. This ordering is
// then used to find a symbolic factorization. This can result in a
// significant performance improvement AnalyzeCholesky on block
// sparse matrices.
//
// The returned matrix should be deallocated with Free when not used
// anymore.
cs_dis* BlockAnalyzeCholesky(cs_di* A,
const std::vector<int>& row_blocks,
const std::vector<int>& col_blocks);
// Compute an fill-reducing approximate minimum degree ordering of
// the matrix A. ordering should be non-nullptr and should point to
// enough memory to hold the ordering for the rows of A.
void ApproximateMinimumDegreeOrdering(cs_di* A, int* ordering);
void Free(cs_di* sparse_matrix);
void Free(cs_dis* symbolic_factorization);
void Free(csn* numeric_factorization);
private:
// Cached scratch space
CS_ENTRY* scratch_;
int scratch_size_;
};
// An implementation of SparseCholesky interface using the CXSparse
// library.
class CERES_NO_EXPORT CXSparseCholesky final : public SparseCholesky {
public:
// Factory
static std::unique_ptr<SparseCholesky> Create(OrderingType ordering_type);
// SparseCholesky interface.
~CXSparseCholesky() override;
CompressedRowSparseMatrix::StorageType StorageType() const final;
LinearSolverTerminationType Factorize(CompressedRowSparseMatrix* lhs,
std::string* message) final;
LinearSolverTerminationType Solve(const double* rhs,
double* solution,
std::string* message) final;
private:
explicit CXSparseCholesky(const OrderingType ordering_type);
void FreeSymbolicFactorization();
void FreeNumericFactorization();
const OrderingType ordering_type_;
CXSparse cs_;
cs_dis* symbolic_factor_;
csn* numeric_factor_;
};
} // namespace internal
} // namespace ceres
#include "ceres/internal/reenable_warnings.h"
#else
typedef void cs_dis;
class CXSparse {
public:
void Free(void* arg) {}
};
#endif // CERES_NO_CXSPARSE
#endif // CERES_INTERNAL_CXSPARSE_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,12 +33,15 @@
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "ceres/internal/config.h"
#include "ceres/iterative_refiner.h"
#ifndef CERES_NO_CUDA
#include "ceres/context_impl.h"
#include "ceres/cuda_kernels_vector_ops.h"
#include "cuda_runtime.h"
#include "cusolverDn.h"
#endif // CERES_NO_CUDA
@@ -57,10 +60,21 @@ extern "C" void dpotrs_(const char* uplo,
double* b,
const int* ldb,
int* info);
extern "C" void spotrf_(
const char* uplo, const int* n, float* a, const int* lda, int* info);
extern "C" void spotrs_(const char* uplo,
const int* n,
const int* nrhs,
const float* a,
const int* lda,
float* b,
const int* ldb,
int* info);
#endif
namespace ceres {
namespace internal {
namespace ceres::internal {
DenseCholesky::~DenseCholesky() = default;
@@ -70,12 +84,22 @@ std::unique_ptr<DenseCholesky> DenseCholesky::Create(
switch (options.dense_linear_algebra_library_type) {
case EIGEN:
dense_cholesky = std::make_unique<EigenDenseCholesky>();
// Eigen mixed precision solver not yet implemented.
if (options.use_mixed_precision_solves) {
dense_cholesky = std::make_unique<FloatEigenDenseCholesky>();
} else {
dense_cholesky = std::make_unique<EigenDenseCholesky>();
}
break;
case LAPACK:
#ifndef CERES_NO_LAPACK
dense_cholesky = std::make_unique<LAPACKDenseCholesky>();
// LAPACK mixed precision solver not yet implemented.
if (options.use_mixed_precision_solves) {
dense_cholesky = std::make_unique<FloatLAPACKDenseCholesky>();
} else {
dense_cholesky = std::make_unique<LAPACKDenseCholesky>();
}
break;
#else
LOG(FATAL) << "Ceres was compiled without support for LAPACK.";
@@ -83,7 +107,11 @@ std::unique_ptr<DenseCholesky> DenseCholesky::Create(
case CUDA:
#ifndef CERES_NO_CUDA
dense_cholesky = CUDADenseCholesky::Create(options);
if (options.use_mixed_precision_solves) {
dense_cholesky = CUDADenseCholeskyMixedPrecision::Create(options);
} else {
dense_cholesky = CUDADenseCholesky::Create(options);
}
break;
#else
LOG(FATAL) << "Ceres was compiled without support for CUDA.";
@@ -94,6 +122,14 @@ std::unique_ptr<DenseCholesky> DenseCholesky::Create(
<< DenseLinearAlgebraLibraryTypeToString(
options.dense_linear_algebra_library_type);
}
if (options.max_num_refinement_iterations > 0) {
auto refiner = std::make_unique<DenseIterativeRefiner>(
options.max_num_refinement_iterations);
dense_cholesky = std::make_unique<RefinedDenseCholesky>(
std::move(dense_cholesky), std::move(refiner));
}
return dense_cholesky;
}
@@ -105,7 +141,7 @@ LinearSolverTerminationType DenseCholesky::FactorAndSolve(
std::string* message) {
LinearSolverTerminationType termination_type =
Factorize(num_cols, lhs, message);
if (termination_type == LINEAR_SOLVER_SUCCESS) {
if (termination_type == LinearSolverTerminationType::SUCCESS) {
termination_type = Solve(rhs, solution, message);
}
return termination_type;
@@ -117,11 +153,11 @@ LinearSolverTerminationType EigenDenseCholesky::Factorize(
llt_ = std::make_unique<LLTType>(m);
if (llt_->info() != Eigen::Success) {
*message = "Eigen failure. Unable to perform dense Cholesky factorization.";
return LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::FAILURE;
}
*message = "Success.";
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs,
@@ -129,13 +165,41 @@ LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs,
std::string* message) {
if (llt_->info() != Eigen::Success) {
*message = "Eigen failure. Unable to perform dense Cholesky factorization.";
return LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::FAILURE;
}
VectorRef(solution, llt_->cols()) =
llt_->solve(ConstVectorRef(rhs, llt_->cols()));
*message = "Success.";
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType FloatEigenDenseCholesky::Factorize(
int num_cols, double* lhs, std::string* message) {
// TODO(sameeragarwal): Check if this causes a double allocation.
lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>();
llt_ = std::make_unique<LLTType>(lhs_);
if (llt_->info() != Eigen::Success) {
*message = "Eigen failure. Unable to perform dense Cholesky factorization.";
return LinearSolverTerminationType::FAILURE;
}
*message = "Success.";
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType FloatEigenDenseCholesky::Solve(
const double* rhs, double* solution, std::string* message) {
if (llt_->info() != Eigen::Success) {
*message = "Eigen failure. Unable to perform dense Cholesky factorization.";
return LinearSolverTerminationType::FAILURE;
}
rhs_ = ConstVectorRef(rhs, llt_->cols()).cast<float>();
solution_ = llt_->solve(rhs_);
VectorRef(solution, llt_->cols()) = solution_.cast<double>();
*message = "Success.";
return LinearSolverTerminationType::SUCCESS;
}
#ifndef CERES_NO_LAPACK
@@ -149,19 +213,19 @@ LinearSolverTerminationType LAPACKDenseCholesky::Factorize(
dpotrf_(&uplo, &num_cols_, lhs_, &num_cols_, &info);
if (info < 0) {
termination_type_ = LINEAR_SOLVER_FATAL_ERROR;
termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
<< "Please report it. "
<< "LAPACK::dpotrf fatal error. "
<< "Argument: " << -info << " is invalid.";
} else if (info > 0) {
termination_type_ = LINEAR_SOLVER_FAILURE;
termination_type_ = LinearSolverTerminationType::FAILURE;
*message = StringPrintf(
"LAPACK::dpotrf numerical failure. "
"The leading minor of order %d is not positive definite.",
info);
} else {
termination_type_ = LINEAR_SOLVER_SUCCESS;
termination_type_ = LinearSolverTerminationType::SUCCESS;
*message = "Success.";
}
return termination_type_;
@@ -174,12 +238,12 @@ LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs,
const int nrhs = 1;
int info = 0;
std::copy_n(rhs, num_cols_, solution);
VectorRef(solution, num_cols_) = ConstVectorRef(rhs, num_cols_);
dpotrs_(
&uplo, &num_cols_, &nrhs, lhs_, &num_cols_, solution, &num_cols_, &info);
if (info < 0) {
termination_type_ = LINEAR_SOLVER_FATAL_ERROR;
termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
<< "Please report it. "
<< "LAPACK::dpotrs fatal error. "
@@ -187,35 +251,118 @@ LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs,
}
*message = "Success";
termination_type_ = LINEAR_SOLVER_SUCCESS;
termination_type_ = LinearSolverTerminationType::SUCCESS;
return termination_type_;
}
LinearSolverTerminationType FloatLAPACKDenseCholesky::Factorize(
int num_cols, double* lhs, std::string* message) {
num_cols_ = num_cols;
lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>();
const char uplo = 'L';
int info = 0;
spotrf_(&uplo, &num_cols_, lhs_.data(), &num_cols_, &info);
if (info < 0) {
termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
<< "Please report it. "
<< "LAPACK::spotrf fatal error. "
<< "Argument: " << -info << " is invalid.";
} else if (info > 0) {
termination_type_ = LinearSolverTerminationType::FAILURE;
*message = StringPrintf(
"LAPACK::spotrf numerical failure. "
"The leading minor of order %d is not positive definite.",
info);
} else {
termination_type_ = LinearSolverTerminationType::SUCCESS;
*message = "Success.";
}
return termination_type_;
}
LinearSolverTerminationType FloatLAPACKDenseCholesky::Solve(
const double* rhs, double* solution, std::string* message) {
const char uplo = 'L';
const int nrhs = 1;
int info = 0;
rhs_and_solution_ = ConstVectorRef(rhs, num_cols_).cast<float>();
spotrs_(&uplo,
&num_cols_,
&nrhs,
lhs_.data(),
&num_cols_,
rhs_and_solution_.data(),
&num_cols_,
&info);
if (info < 0) {
termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
<< "Please report it. "
<< "LAPACK::dpotrs fatal error. "
<< "Argument: " << -info << " is invalid.";
}
*message = "Success";
termination_type_ = LinearSolverTerminationType::SUCCESS;
VectorRef(solution, num_cols_) =
rhs_and_solution_.head(num_cols_).cast<double>();
return termination_type_;
}
#endif // CERES_NO_LAPACK
RefinedDenseCholesky::RefinedDenseCholesky(
std::unique_ptr<DenseCholesky> dense_cholesky,
std::unique_ptr<DenseIterativeRefiner> iterative_refiner)
: dense_cholesky_(std::move(dense_cholesky)),
iterative_refiner_(std::move(iterative_refiner)) {}
RefinedDenseCholesky::~RefinedDenseCholesky() = default;
LinearSolverTerminationType RefinedDenseCholesky::Factorize(
const int num_cols, double* lhs, std::string* message) {
lhs_ = lhs;
num_cols_ = num_cols;
return dense_cholesky_->Factorize(num_cols, lhs, message);
}
LinearSolverTerminationType RefinedDenseCholesky::Solve(const double* rhs,
double* solution,
std::string* message) {
CHECK(lhs_ != nullptr);
auto termination_type = dense_cholesky_->Solve(rhs, solution, message);
if (termination_type != LinearSolverTerminationType::SUCCESS) {
return termination_type;
}
iterative_refiner_->Refine(
num_cols_, lhs_, rhs, dense_cholesky_.get(), solution);
return LinearSolverTerminationType::SUCCESS;
}
#ifndef CERES_NO_CUDA
bool CUDADenseCholesky::Init(ContextImpl* context, std::string* message) {
if (!context->InitCUDA(message)) {
return false;
}
cusolver_handle_ = context->cusolver_handle_;
stream_ = context->stream_;
error_.Reserve(1);
*message = "CUDADenseCholesky::Init Success.";
return true;
}
CUDADenseCholesky::CUDADenseCholesky(ContextImpl* context)
: context_(context),
lhs_{context},
rhs_{context},
device_workspace_{context},
error_(context, 1) {}
LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
double* lhs,
std::string* message) {
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
factorize_result_ = LinearSolverTerminationType::FATAL_ERROR;
lhs_.Reserve(num_cols * num_cols);
num_cols_ = num_cols;
lhs_.CopyToGpuAsync(lhs, num_cols * num_cols, stream_);
lhs_.CopyFromCpu(lhs, num_cols * num_cols);
int device_workspace_size = 0;
if (cusolverDnDpotrf_bufferSize(cusolver_handle_,
if (cusolverDnDpotrf_bufferSize(context_->cusolver_handle_,
CUBLAS_FILL_MODE_LOWER,
num_cols,
lhs_.data(),
@@ -223,10 +370,10 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
&device_workspace_size) !=
CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnDpotrf_bufferSize failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
device_workspace_.Reserve(device_workspace_size);
if (cusolverDnDpotrf(cusolver_handle_,
if (cusolverDnDpotrf(context_->cusolver_handle_,
CUBLAS_FILL_MODE_LOWER,
num_cols,
lhs_.data(),
@@ -235,15 +382,10 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
device_workspace_.size(),
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnDpotrf failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
}
if (cudaDeviceSynchronize() != cudaSuccess ||
cudaStreamSynchronize(stream_) != cudaSuccess) {
*message = "Cuda device synchronization failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
int error = 0;
error_.CopyToHost(&error, 1);
error_.CopyToCpu(&error, 1);
if (error < 0) {
LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
<< "please report it. "
@@ -251,29 +393,29 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
<< "Argument: " << -error << " is invalid.";
// The following line is unreachable, but return failure just to be
// pedantic, since the compiler does not know that.
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
} else if (error > 0) {
*message = StringPrintf(
"cuSolverDN::cusolverDnDpotrf numerical failure. "
"The leading minor of order %d is not positive definite.",
error);
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::LINEAR_SOLVER_FAILURE;
factorize_result_ = LinearSolverTerminationType::FAILURE;
return LinearSolverTerminationType::FAILURE;
}
*message = "Success";
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
factorize_result_ = LinearSolverTerminationType::SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs,
double* solution,
std::string* message) {
if (factorize_result_ != LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS) {
*message = "Factorize did not complete succesfully previously.";
if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
*message = "Factorize did not complete successfully previously.";
return factorize_result_;
}
rhs_.CopyToGpuAsync(rhs, num_cols_, stream_);
if (cusolverDnDpotrs(cusolver_handle_,
rhs_.CopyFromCpu(rhs, num_cols_);
if (cusolverDnDpotrs(context_->cusolver_handle_,
CUBLAS_FILL_MODE_LOWER,
num_cols_,
1,
@@ -283,45 +425,221 @@ LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs,
num_cols_,
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnDpotrs failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
}
if (cudaDeviceSynchronize() != cudaSuccess ||
cudaStreamSynchronize(stream_) != cudaSuccess) {
*message = "Cuda device synchronization failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
int error = 0;
error_.CopyToHost(&error, 1);
error_.CopyToCpu(&error, 1);
if (error != 0) {
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
<< "Please report it."
<< "cuSolverDN::cusolverDnDpotrs fatal error. "
<< "Argument: " << -error << " is invalid.";
}
rhs_.CopyToHost(solution, num_cols_);
rhs_.CopyToCpu(solution, num_cols_);
*message = "Success";
return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
std::unique_ptr<CUDADenseCholesky> CUDADenseCholesky::Create(
const LinearSolver::Options& options) {
if (options.dense_linear_algebra_library_type != CUDA) {
// The user called the wrong factory method.
if (options.dense_linear_algebra_library_type != CUDA ||
options.context == nullptr || !options.context->IsCudaInitialized()) {
return nullptr;
}
auto cuda_dense_cholesky =
std::unique_ptr<CUDADenseCholesky>(new CUDADenseCholesky());
std::string cuda_error;
if (cuda_dense_cholesky->Init(options.context, &cuda_error)) {
return cuda_dense_cholesky;
return std::unique_ptr<CUDADenseCholesky>(
new CUDADenseCholesky(options.context));
}
std::unique_ptr<CUDADenseCholeskyMixedPrecision>
CUDADenseCholeskyMixedPrecision::Create(const LinearSolver::Options& options) {
if (options.dense_linear_algebra_library_type != CUDA ||
!options.use_mixed_precision_solves || options.context == nullptr ||
!options.context->IsCudaInitialized()) {
return nullptr;
}
// Initialization failed, destroy the object (done automatically) and return a
// nullptr.
LOG(ERROR) << "CUDADenseCholesky::Init failed: " << cuda_error;
return nullptr;
return std::unique_ptr<CUDADenseCholeskyMixedPrecision>(
new CUDADenseCholeskyMixedPrecision(
options.context, options.max_num_refinement_iterations));
}
LinearSolverTerminationType
CUDADenseCholeskyMixedPrecision::CudaCholeskyFactorize(std::string* message) {
int device_workspace_size = 0;
if (cusolverDnSpotrf_bufferSize(context_->cusolver_handle_,
CUBLAS_FILL_MODE_LOWER,
num_cols_,
lhs_fp32_.data(),
num_cols_,
&device_workspace_size) !=
CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnSpotrf_bufferSize failed.";
return LinearSolverTerminationType::FATAL_ERROR;
}
device_workspace_.Reserve(device_workspace_size);
if (cusolverDnSpotrf(context_->cusolver_handle_,
CUBLAS_FILL_MODE_LOWER,
num_cols_,
lhs_fp32_.data(),
num_cols_,
device_workspace_.data(),
device_workspace_.size(),
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnSpotrf failed.";
return LinearSolverTerminationType::FATAL_ERROR;
}
int error = 0;
error_.CopyToCpu(&error, 1);
if (error < 0) {
LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
<< "please report it. "
<< "cuSolverDN::cusolverDnSpotrf fatal error. "
<< "Argument: " << -error << " is invalid.";
// The following line is unreachable, but return failure just to be
// pedantic, since the compiler does not know that.
return LinearSolverTerminationType::FATAL_ERROR;
}
if (error > 0) {
*message = StringPrintf(
"cuSolverDN::cusolverDnSpotrf numerical failure. "
"The leading minor of order %d is not positive definite.",
error);
factorize_result_ = LinearSolverTerminationType::FAILURE;
return LinearSolverTerminationType::FAILURE;
}
*message = "Success";
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::CudaCholeskySolve(
std::string* message) {
CHECK_EQ(cudaMemcpyAsync(correction_fp32_.data(),
residual_fp32_.data(),
num_cols_ * sizeof(float),
cudaMemcpyDeviceToDevice,
context_->DefaultStream()),
cudaSuccess);
if (cusolverDnSpotrs(context_->cusolver_handle_,
CUBLAS_FILL_MODE_LOWER,
num_cols_,
1,
lhs_fp32_.data(),
num_cols_,
correction_fp32_.data(),
num_cols_,
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnDpotrs failed.";
return LinearSolverTerminationType::FATAL_ERROR;
}
int error = 0;
error_.CopyToCpu(&error, 1);
if (error != 0) {
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
<< "Please report it."
<< "cuSolverDN::cusolverDnDpotrs fatal error. "
<< "Argument: " << -error << " is invalid.";
}
*message = "Success";
return LinearSolverTerminationType::SUCCESS;
}
CUDADenseCholeskyMixedPrecision::CUDADenseCholeskyMixedPrecision(
ContextImpl* context, int max_num_refinement_iterations)
: context_(context),
lhs_fp64_{context},
rhs_fp64_{context},
lhs_fp32_{context},
device_workspace_{context},
error_(context, 1),
x_fp64_{context},
correction_fp32_{context},
residual_fp32_{context},
residual_fp64_{context},
max_num_refinement_iterations_(max_num_refinement_iterations) {}
LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Factorize(
int num_cols, double* lhs, std::string* message) {
num_cols_ = num_cols;
// Copy fp64 version of lhs to GPU.
lhs_fp64_.Reserve(num_cols * num_cols);
lhs_fp64_.CopyFromCpu(lhs, num_cols * num_cols);
// Create an fp32 copy of lhs, lhs_fp32.
lhs_fp32_.Reserve(num_cols * num_cols);
CudaFP64ToFP32(lhs_fp64_.data(),
lhs_fp32_.data(),
num_cols * num_cols,
context_->DefaultStream());
// Factorize lhs_fp32.
factorize_result_ = CudaCholeskyFactorize(message);
return factorize_result_;
}
LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Solve(
const double* rhs, double* solution, std::string* message) {
// If factorization failed, return failure.
if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
*message = "Factorize did not complete successfully previously.";
return factorize_result_;
}
// Reserve memory for all arrays.
rhs_fp64_.Reserve(num_cols_);
x_fp64_.Reserve(num_cols_);
correction_fp32_.Reserve(num_cols_);
residual_fp32_.Reserve(num_cols_);
residual_fp64_.Reserve(num_cols_);
// Initialize x = 0.
CudaSetZeroFP64(x_fp64_.data(), num_cols_, context_->DefaultStream());
// Initialize residual = rhs.
rhs_fp64_.CopyFromCpu(rhs, num_cols_);
residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_);
for (int i = 0; i <= max_num_refinement_iterations_; ++i) {
// Cast residual from fp64 to fp32.
CudaFP64ToFP32(residual_fp64_.data(),
residual_fp32_.data(),
num_cols_,
context_->DefaultStream());
// [fp32] c = lhs^-1 * residual.
auto result = CudaCholeskySolve(message);
if (result != LinearSolverTerminationType::SUCCESS) {
return result;
}
// [fp64] x += c.
CudaDsxpy(x_fp64_.data(),
correction_fp32_.data(),
num_cols_,
context_->DefaultStream());
if (i < max_num_refinement_iterations_) {
// [fp64] residual = rhs - lhs * x
// This is done in two steps:
// 1. [fp64] residual = rhs
residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_);
// 2. [fp64] residual = residual - lhs * x
double alpha = -1.0;
double beta = 1.0;
cublasDsymv(context_->cublas_handle_,
CUBLAS_FILL_MODE_LOWER,
num_cols_,
&alpha,
lhs_fp64_.data(),
num_cols_,
x_fp64_.data(),
1,
&beta,
residual_fp64_.data(),
1);
}
}
x_fp64_.CopyToCpu(solution, num_cols_);
*message = "Success.";
return LinearSolverTerminationType::SUCCESS;
}
#endif // CERES_NO_CUDA
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,7 @@
#include <vector>
#include "Eigen/Dense"
#include "ceres/context_impl.h"
#include "ceres/cuda_buffer.h"
#include "ceres/linear_solver.h"
#include "glog/logging.h"
@@ -49,8 +50,7 @@
#include "cusolverDn.h"
#endif // CERES_NO_CUDA
namespace ceres {
namespace internal {
namespace ceres::internal {
// An interface that abstracts away the internal details of various dense linear
// algebra libraries and offers a simple API for solving dense symmetric
@@ -88,7 +88,7 @@ class CERES_NO_EXPORT DenseCholesky {
std::string* message) = 0;
// Convenience method which combines a call to Factorize and Solve. Solve is
// only called if Factorize returns LINEAR_SOLVER_SUCCESS.
// only called if Factorize returns LinearSolverTerminationType::SUCCESS.
//
// The input matrix lhs may be modified by the implementation to store the
// factorization, irrespective of whether the method succeeds or not. It is
@@ -115,6 +115,23 @@ class CERES_NO_EXPORT EigenDenseCholesky final : public DenseCholesky {
std::unique_ptr<LLTType> llt_;
};
class CERES_NO_EXPORT FloatEigenDenseCholesky final : public DenseCholesky {
public:
LinearSolverTerminationType Factorize(int num_cols,
double* lhs,
std::string* message) override;
LinearSolverTerminationType Solve(const double* rhs,
double* solution,
std::string* message) override;
private:
Eigen::MatrixXf lhs_;
Eigen::VectorXf rhs_;
Eigen::VectorXf solution_;
using LLTType = Eigen::LLT<Eigen::MatrixXf, Eigen::Lower>;
std::unique_ptr<LLTType> llt_;
};
#ifndef CERES_NO_LAPACK
class CERES_NO_EXPORT LAPACKDenseCholesky final : public DenseCholesky {
public:
@@ -128,10 +145,53 @@ class CERES_NO_EXPORT LAPACKDenseCholesky final : public DenseCholesky {
private:
double* lhs_ = nullptr;
int num_cols_ = -1;
LinearSolverTerminationType termination_type_ = LINEAR_SOLVER_FATAL_ERROR;
LinearSolverTerminationType termination_type_ =
LinearSolverTerminationType::FATAL_ERROR;
};
class CERES_NO_EXPORT FloatLAPACKDenseCholesky final : public DenseCholesky {
public:
LinearSolverTerminationType Factorize(int num_cols,
double* lhs,
std::string* message) override;
LinearSolverTerminationType Solve(const double* rhs,
double* solution,
std::string* message) override;
private:
Eigen::MatrixXf lhs_;
Eigen::VectorXf rhs_and_solution_;
int num_cols_ = -1;
LinearSolverTerminationType termination_type_ =
LinearSolverTerminationType::FATAL_ERROR;
};
#endif // CERES_NO_LAPACK
class DenseIterativeRefiner;
// Computes an initial solution using the given instance of
// DenseCholesky, and then refines it using the DenseIterativeRefiner.
class CERES_NO_EXPORT RefinedDenseCholesky final : public DenseCholesky {
public:
RefinedDenseCholesky(
std::unique_ptr<DenseCholesky> dense_cholesky,
std::unique_ptr<DenseIterativeRefiner> iterative_refiner);
~RefinedDenseCholesky() override;
LinearSolverTerminationType Factorize(int num_cols,
double* lhs,
std::string* message) override;
LinearSolverTerminationType Solve(const double* rhs,
double* solution,
std::string* message) override;
private:
std::unique_ptr<DenseCholesky> dense_cholesky_;
std::unique_ptr<DenseIterativeRefiner> iterative_refiner_;
double* lhs_ = nullptr;
int num_cols_;
};
#ifndef CERES_NO_CUDA
// CUDA implementation of DenseCholesky using the cuSolverDN library using the
// 32-bit legacy interface for maximum compatibility.
@@ -149,16 +209,9 @@ class CERES_NO_EXPORT CUDADenseCholesky final : public DenseCholesky {
std::string* message) override;
private:
CUDADenseCholesky() = default;
// Picks up the cuSolverDN and cuStream handles from the context. If
// the context is unable to initialize CUDA, returns false with a
// human-readable message indicating the reason.
bool Init(ContextImpl* context, std::string* message);
explicit CUDADenseCholesky(ContextImpl* context);
// Handle to the cuSOLVER context.
cusolverDnHandle_t cusolver_handle_ = nullptr;
// CUDA device stream.
cudaStream_t stream_ = nullptr;
ContextImpl* context_ = nullptr;
// Number of columns in the A matrix, to be cached between calls to *Factorize
// and *Solve.
size_t num_cols_ = 0;
@@ -171,13 +224,85 @@ class CERES_NO_EXPORT CUDADenseCholesky final : public DenseCholesky {
// Required for error handling with cuSOLVER.
CudaBuffer<int> error_;
// Cache the result of Factorize to ensure that when Solve is called, the
// factiorization of lhs is valid.
LinearSolverTerminationType factorize_result_ = LINEAR_SOLVER_FATAL_ERROR;
// factorization of lhs is valid.
LinearSolverTerminationType factorize_result_ =
LinearSolverTerminationType::FATAL_ERROR;
};
// A mixed-precision iterative refinement dense Cholesky solver using FP32 CUDA
// Dense Cholesky for inner iterations, and FP64 outer refinements.
// This class implements a modified version of the "Classical iterative
// refinement" (Algorithm 4.1) from the following paper:
// Haidar, Azzam, Harun Bayraktar, Stanimire Tomov, Jack Dongarra, and Nicholas
// J. Higham. "Mixed-precision iterative refinement using tensor cores on GPUs
// to accelerate solution of linear systems." Proceedings of the Royal Society A
// 476, no. 2243 (2020): 20200110.
//
// The three key modifications from Algorithm 4.1 in the paper are:
// 1. We use Cholesky factorization instead of LU factorization since our A is
// symmetric positive definite.
// 2. During the solution update, the up-cast and accumulation is performed in
// one step with a custom kernel.
class CERES_NO_EXPORT CUDADenseCholeskyMixedPrecision final
: public DenseCholesky {
public:
static std::unique_ptr<CUDADenseCholeskyMixedPrecision> Create(
const LinearSolver::Options& options);
CUDADenseCholeskyMixedPrecision(const CUDADenseCholeskyMixedPrecision&) =
delete;
CUDADenseCholeskyMixedPrecision& operator=(
const CUDADenseCholeskyMixedPrecision&) = delete;
LinearSolverTerminationType Factorize(int num_cols,
double* lhs,
std::string* message) override;
LinearSolverTerminationType Solve(const double* rhs,
double* solution,
std::string* message) override;
private:
CUDADenseCholeskyMixedPrecision(ContextImpl* context,
int max_num_refinement_iterations);
// Helper function to wrap Cuda boilerplate needed to call Spotrf.
LinearSolverTerminationType CudaCholeskyFactorize(std::string* message);
// Helper function to wrap Cuda boilerplate needed to call Spotrs.
LinearSolverTerminationType CudaCholeskySolve(std::string* message);
// Picks up the cuSolverDN and cuStream handles from the context in the
// options, and the number of refinement iterations from the options. If
// the context is unable to initialize CUDA, returns false with a
// human-readable message indicating the reason.
bool Init(const LinearSolver::Options& options, std::string* message);
ContextImpl* context_ = nullptr;
// Number of columns in the A matrix, to be cached between calls to *Factorize
// and *Solve.
size_t num_cols_ = 0;
CudaBuffer<double> lhs_fp64_;
CudaBuffer<double> rhs_fp64_;
CudaBuffer<float> lhs_fp32_;
// Scratch space for cuSOLVER on the GPU.
CudaBuffer<float> device_workspace_;
// Required for error handling with cuSOLVER.
CudaBuffer<int> error_;
// Solution to lhs * x = rhs.
CudaBuffer<double> x_fp64_;
// Incremental correction to x.
CudaBuffer<float> correction_fp32_;
// Residual to iterative refinement.
CudaBuffer<float> residual_fp32_;
CudaBuffer<double> residual_fp64_;
// Number of inner refinement iterations to perform.
int max_num_refinement_iterations_ = 0;
// Cache the result of Factorize to ensure that when Solve is called, the
// factorization of lhs is valid.
LinearSolverTerminationType factorize_result_ =
LinearSolverTerminationType::FATAL_ERROR;
};
#endif // CERES_NO_CUDA
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_DENSE_CHOLESKY_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -75,8 +75,8 @@ class CERES_NO_EXPORT DenseJacobianWriter {
DenseSparseMatrix* dense_jacobian = down_cast<DenseSparseMatrix*>(jacobian);
const ResidualBlock* residual_block =
program_->residual_blocks()[residual_id];
int num_parameter_blocks = residual_block->NumParameterBlocks();
int num_residuals = residual_block->NumResiduals();
const int num_parameter_blocks = residual_block->NumParameterBlocks();
const int num_residuals = residual_block->NumResiduals();
// Now copy the jacobians for each parameter into the dense jacobian matrix.
for (int j = 0; j < num_parameter_blocks; ++j) {

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@
#include "ceres/types.h"
#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
DenseNormalCholeskySolver::DenseNormalCholeskySolver(
LinearSolver::Options options)
@@ -87,5 +86,4 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl(
return summary;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -41,8 +41,7 @@
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class DenseSparseMatrix;
@@ -94,8 +93,7 @@ class CERES_NO_EXPORT DenseNormalCholeskySolver
std::unique_ptr<DenseCholesky> cholesky_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,7 @@
#include <algorithm>
#include <memory>
#include <string>
#ifndef CERES_NO_CUDA
#include "ceres/context_impl.h"
#include "cublas_v2.h"
@@ -98,7 +99,7 @@ extern "C" void dormqr_(const char* side, const char* trans, const int* m,
// a is a column major lda x n.
// b is a column major matrix of ldb x nrhs
//
// info = 0 succesful.
// info = 0 successful.
// = -i < 0 i^th argument is an illegal value.
// = i > 0, i^th diagonal element of A is zero.
extern "C" void dtrtrs_(const char* uplo, const char* trans, const char* diag,
@@ -108,8 +109,7 @@ extern "C" void dtrtrs_(const char* uplo, const char* trans, const char* diag,
#endif
namespace ceres {
namespace internal {
namespace ceres::internal {
DenseQR::~DenseQR() = default;
@@ -153,7 +153,7 @@ LinearSolverTerminationType DenseQR::FactorAndSolve(int num_rows,
std::string* message) {
LinearSolverTerminationType termination_type =
Factorize(num_rows, num_cols, lhs, message);
if (termination_type == LINEAR_SOLVER_SUCCESS) {
if (termination_type == LinearSolverTerminationType::SUCCESS) {
termination_type = Solve(rhs, solution, message);
}
return termination_type;
@@ -166,7 +166,7 @@ LinearSolverTerminationType EigenDenseQR::Factorize(int num_rows,
Eigen::Map<ColMajorMatrix> m(lhs, num_rows, num_cols);
qr_ = std::make_unique<QRType>(m);
*message = "Success.";
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType EigenDenseQR::Solve(const double* rhs,
@@ -175,7 +175,7 @@ LinearSolverTerminationType EigenDenseQR::Solve(const double* rhs,
VectorRef(solution, qr_->cols()) =
qr_->solve(ConstVectorRef(rhs, qr_->rows()));
*message = "Success.";
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
#ifndef CERES_NO_LAPACK
@@ -237,7 +237,7 @@ LinearSolverTerminationType LAPACKDenseQR::Factorize(int num_rows,
<< "Argument: " << -info << " is invalid.";
}
termination_type_ = LINEAR_SOLVER_SUCCESS;
termination_type_ = LinearSolverTerminationType::SUCCESS;
*message = "Success.";
return termination_type_;
}
@@ -245,7 +245,7 @@ LinearSolverTerminationType LAPACKDenseQR::Factorize(int num_rows,
LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs,
double* solution,
std::string* message) {
if (termination_type_ != LINEAR_SOLVER_SUCCESS) {
if (termination_type_ != LinearSolverTerminationType::SUCCESS) {
*message = "QR factorization failed and solve called.";
return termination_type_;
}
@@ -298,10 +298,10 @@ LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs,
*message =
"QR factorization failure. The factorization is not full rank. R has "
"zeros on the diagonal.";
termination_type_ = LINEAR_SOLVER_FAILURE;
termination_type_ = LinearSolverTerminationType::FAILURE;
} else {
std::copy_n(q_transpose_rhs_.data(), num_cols_, solution);
termination_type_ = LINEAR_SOLVER_SUCCESS;
termination_type_ = LinearSolverTerminationType::SUCCESS;
}
return termination_type_;
@@ -311,30 +311,26 @@ LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs,
#ifndef CERES_NO_CUDA
bool CUDADenseQR::Init(ContextImpl* context, std::string* message) {
if (!context->InitCUDA(message)) {
return false;
}
cublas_handle_ = context->cublas_handle_;
cusolver_handle_ = context->cusolver_handle_;
stream_ = context->stream_;
error_.Reserve(1);
*message = "CUDADenseQR::Init Success.";
return true;
}
CUDADenseQR::CUDADenseQR(ContextImpl* context)
: context_(context),
lhs_{context},
rhs_{context},
tau_{context},
device_workspace_{context},
error_(context, 1) {}
LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows,
int num_cols,
double* lhs,
std::string* message) {
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
factorize_result_ = LinearSolverTerminationType::FATAL_ERROR;
lhs_.Reserve(num_rows * num_cols);
tau_.Reserve(std::min(num_rows, num_cols));
num_rows_ = num_rows;
num_cols_ = num_cols;
lhs_.CopyToGpuAsync(lhs, num_rows * num_cols, stream_);
lhs_.CopyFromCpu(lhs, num_rows * num_cols);
int device_workspace_size = 0;
if (cusolverDnDgeqrf_bufferSize(cusolver_handle_,
if (cusolverDnDgeqrf_bufferSize(context_->cusolver_handle_,
num_rows,
num_cols,
lhs_.data(),
@@ -342,10 +338,10 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows,
&device_workspace_size) !=
CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnDgeqrf_bufferSize failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
device_workspace_.Reserve(device_workspace_size);
if (cusolverDnDgeqrf(cusolver_handle_,
if (cusolverDnDgeqrf(context_->cusolver_handle_,
num_rows,
num_cols,
lhs_.data(),
@@ -355,15 +351,10 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows,
device_workspace_.size(),
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnDgeqrf failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
}
if (cudaDeviceSynchronize() != cudaSuccess ||
cudaStreamSynchronize(stream_) != cudaSuccess) {
*message = "Cuda device synchronization failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
int error = 0;
error_.CopyToHost(&error, 1);
error_.CopyToCpu(&error, 1);
if (error < 0) {
LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
<< "please report it. "
@@ -371,24 +362,24 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows,
<< "Argument: " << -error << " is invalid.";
// The following line is unreachable, but return failure just to be
// pedantic, since the compiler does not know that.
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
*message = "Success";
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
factorize_result_ = LinearSolverTerminationType::SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
double* solution,
std::string* message) {
if (factorize_result_ != LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS) {
*message = "Factorize did not complete succesfully previously.";
if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
*message = "Factorize did not complete successfully previously.";
return factorize_result_;
}
rhs_.CopyToGpuAsync(rhs, num_rows_, stream_);
rhs_.CopyFromCpu(rhs, num_rows_);
int device_workspace_size = 0;
if (cusolverDnDormqr_bufferSize(cusolver_handle_,
if (cusolverDnDormqr_bufferSize(context_->cusolver_handle_,
CUBLAS_SIDE_LEFT,
CUBLAS_OP_T,
num_rows_,
@@ -402,12 +393,12 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
&device_workspace_size) !=
CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnDormqr_bufferSize failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
device_workspace_.Reserve(device_workspace_size);
// Compute rhs = Q^T * rhs, assuming that lhs has already been factorized.
// The result of factorization would have stored Q in a packed form in lhs_.
if (cusolverDnDormqr(cusolver_handle_,
if (cusolverDnDormqr(context_->cusolver_handle_,
CUBLAS_SIDE_LEFT,
CUBLAS_OP_T,
num_rows_,
@@ -422,10 +413,10 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
device_workspace_.size(),
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
*message = "cuSolverDN::cusolverDnDormqr failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
int error = 0;
error_.CopyToHost(&error, 1);
error_.CopyToCpu(&error, 1);
if (error < 0) {
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
<< "Please report it."
@@ -434,7 +425,7 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
}
// Compute the solution vector as x = R \ (Q^T * rhs). Since the previous step
// replaced rhs by (Q^T * rhs), this is just x = R \ rhs.
if (cublasDtrsv(cublas_handle_,
if (cublasDtrsv(context_->cublas_handle_,
CUBLAS_FILL_MODE_UPPER,
CUBLAS_OP_N,
CUBLAS_DIAG_NON_UNIT,
@@ -444,38 +435,22 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
rhs_.data(),
1) != CUBLAS_STATUS_SUCCESS) {
*message = "cuBLAS::cublasDtrsv failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
if (cudaDeviceSynchronize() != cudaSuccess ||
cudaStreamSynchronize(stream_) != cudaSuccess) {
*message = "Cuda device synchronization failed.";
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
}
rhs_.CopyToHost(solution, num_cols_);
rhs_.CopyToCpu(solution, num_cols_);
*message = "Success";
return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
std::unique_ptr<CUDADenseQR> CUDADenseQR::Create(
const LinearSolver::Options& options) {
if (options.dense_linear_algebra_library_type != CUDA) {
// The user called the wrong factory method.
if (options.dense_linear_algebra_library_type != CUDA ||
options.context == nullptr || !options.context->IsCudaInitialized()) {
return nullptr;
}
auto cuda_dense_qr = std::unique_ptr<CUDADenseQR>(new CUDADenseQR());
std::string cuda_error;
if (cuda_dense_qr->Init(options.context, &cuda_error)) {
return cuda_dense_qr;
}
// Initialization failed, destroy the object (done automatically) and return a
// nullptr.
LOG(ERROR) << "CUDADenseQR::Init failed: " << cuda_error;
return nullptr;
return std::unique_ptr<CUDADenseQR>(new CUDADenseQR(options.context));
}
CUDADenseQR::CUDADenseQR() = default;
#endif // CERES_NO_CUDA
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,7 @@
#include <vector>
#include "Eigen/Dense"
#include "ceres/context_impl.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
@@ -54,8 +55,7 @@
#include "cusolverDn.h"
#endif // CERES_NO_CUDA
namespace ceres {
namespace internal {
namespace ceres::internal {
// An interface that abstracts away the internal details of various dense linear
// algebra libraries and offers a simple API for solving dense linear systems
@@ -92,7 +92,7 @@ class CERES_NO_EXPORT DenseQR {
std::string* message) = 0;
// Convenience method which combines a call to Factorize and Solve. Solve is
// only called if Factorize returns LINEAR_SOLVER_SUCCESS.
// only called if Factorize returns LinearSolverTerminationType::SUCCESS.
//
// The input matrix lhs may be modified by the implementation to store the
// factorization, irrespective of whether the method succeeds or not. It is
@@ -136,7 +136,8 @@ class CERES_NO_EXPORT LAPACKDenseQR final : public DenseQR {
double* lhs_ = nullptr;
int num_rows_;
int num_cols_;
LinearSolverTerminationType termination_type_ = LINEAR_SOLVER_FATAL_ERROR;
LinearSolverTerminationType termination_type_ =
LinearSolverTerminationType::FATAL_ERROR;
Vector work_;
Vector tau_;
Vector q_transpose_rhs_;
@@ -164,18 +165,9 @@ class CERES_NO_EXPORT CUDADenseQR final : public DenseQR {
std::string* message) override;
private:
CUDADenseQR();
// Picks up the cuSolverDN, cuBLAS, and cuStream handles from the context. If
// the context is unable to initialize CUDA, returns false with a
// human-readable message indicating the reason.
bool Init(ContextImpl* context, std::string* message);
explicit CUDADenseQR(ContextImpl* context);
// Handle to the cuSOLVER context.
cusolverDnHandle_t cusolver_handle_ = nullptr;
// Handle to cuBLAS context.
cublasHandle_t cublas_handle_ = nullptr;
// CUDA device stream.
cudaStream_t stream_ = nullptr;
ContextImpl* context_ = nullptr;
// Number of rowns in the A matrix, to be cached between calls to *Factorize
// and *Solve.
size_t num_rows_ = 0;
@@ -194,13 +186,13 @@ class CERES_NO_EXPORT CUDADenseQR final : public DenseQR {
CudaBuffer<int> error_;
// Cache the result of Factorize to ensure that when Solve is called, the
// factiorization of lhs is valid.
LinearSolverTerminationType factorize_result_ = LINEAR_SOLVER_FATAL_ERROR;
LinearSolverTerminationType factorize_result_ =
LinearSolverTerminationType::FATAL_ERROR;
};
#endif // CERES_NO_CUDA
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "ceres/types.h"
#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
DenseQRSolver::DenseQRSolver(const LinearSolver::Options& options)
: options_(options), dense_qr_(DenseQR::Create(options)) {}
@@ -81,5 +80,4 @@ LinearSolver::Summary DenseQRSolver::SolveImpl(
return summary;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class DenseSparseMatrix;
@@ -112,8 +111,7 @@ class CERES_NO_EXPORT DenseQRSolver final : public DenseSparseMatrixSolver {
std::unique_ptr<DenseQR> dense_qr_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "ceres/triplet_sparse_matrix.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
DenseSparseMatrix::DenseSparseMatrix(int num_rows, int num_cols)
: m_(Matrix(num_rows, num_cols)) {}
@@ -60,17 +59,31 @@ DenseSparseMatrix::DenseSparseMatrix(Matrix m) : m_(std::move(m)) {}
void DenseSparseMatrix::SetZero() { m_.setZero(); }
void DenseSparseMatrix::RightMultiply(const double* x, double* y) const {
VectorRef(y, num_rows()) += matrix() * ConstVectorRef(x, num_cols());
void DenseSparseMatrix::RightMultiplyAndAccumulate(const double* x,
double* y) const {
VectorRef(y, num_rows()).noalias() += m_ * ConstVectorRef(x, num_cols());
}
void DenseSparseMatrix::LeftMultiply(const double* x, double* y) const {
VectorRef(y, num_cols()) +=
matrix().transpose() * ConstVectorRef(x, num_rows());
void DenseSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
double* y) const {
VectorRef(y, num_cols()).noalias() +=
m_.transpose() * ConstVectorRef(x, num_rows());
}
void DenseSparseMatrix::SquaredColumnNorm(double* x) const {
VectorRef(x, num_cols()) = m_.colwise().squaredNorm();
// This implementation is 3x faster than the naive version
// x = m_.colwise().square().sum(), likely because m_
// is a row major matrix.
const int num_rows = m_.rows();
const int num_cols = m_.cols();
std::fill_n(x, num_cols, 0.0);
const double* m = m_.data();
for (int i = 0; i < num_rows; ++i) {
for (int j = 0; j < num_cols; ++j, ++m) {
x[j] += (*m) * (*m);
}
}
}
void DenseSparseMatrix::ScaleColumns(const double* scale) {
@@ -100,5 +113,4 @@ void DenseSparseMatrix::ToTextFile(FILE* file) const {
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@
#include "ceres/sparse_matrix.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class TripletSparseMatrix;
@@ -54,8 +53,8 @@ class CERES_NO_EXPORT DenseSparseMatrix final : public SparseMatrix {
// SparseMatrix interface.
void SetZero() final;
void RightMultiply(const double* x, double* y) const final;
void LeftMultiply(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
void LeftMultiplyAndAccumulate(const double* x, double* y) const final;
void SquaredColumnNorm(double* x) const final;
void ScaleColumns(const double* scale) final;
void ToDenseMatrix(Matrix* dense_matrix) const final;
@@ -73,8 +72,7 @@ class CERES_NO_EXPORT DenseSparseMatrix final : public SparseMatrix {
Matrix m_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,8 +33,7 @@
#include "ceres/internal/eigen.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
void DetectStructure(const CompressedRowBlockStructure& bs,
const int num_eliminate_blocks,
@@ -119,5 +118,4 @@ void DetectStructure(const CompressedRowBlockStructure& bs,
// clang-format on
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,8 +35,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Detect static blocks in the problem sparsity. For rows containing
// e_blocks, we are interested in detecting if the size of the row
@@ -63,8 +62,7 @@ void CERES_NO_EXPORT DetectStructure(const CompressedRowBlockStructure& bs,
int* e_block_size,
int* f_block_size);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,8 +44,7 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
namespace {
const double kMaxMu = 1.0;
const double kMinMu = 1e-8;
@@ -101,7 +100,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
}
TrustRegionStrategy::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.termination_type = LinearSolverTerminationType::SUCCESS;
return summary;
}
@@ -138,11 +137,13 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
summary.num_iterations = linear_solver_summary.num_iterations;
summary.termination_type = linear_solver_summary.termination_type;
if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) {
if (linear_solver_summary.termination_type ==
LinearSolverTerminationType::FATAL_ERROR) {
return summary;
}
if (linear_solver_summary.termination_type != LINEAR_SOLVER_FAILURE) {
if (linear_solver_summary.termination_type !=
LinearSolverTerminationType::FAILURE) {
switch (dogleg_type_) {
// Interpolate the Cauchy point and the Gauss-Newton step.
case TRADITIONAL_DOGLEG:
@@ -153,7 +154,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
// Cauchy point and the (Gauss-)Newton step.
case SUBSPACE_DOGLEG:
if (!ComputeSubspaceModel(jacobian)) {
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.termination_type = LinearSolverTerminationType::FAILURE;
break;
}
ComputeSubspaceDoglegStep(step);
@@ -174,7 +175,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
void DoglegStrategy::ComputeGradient(SparseMatrix* jacobian,
const double* residuals) {
gradient_.setZero();
jacobian->LeftMultiply(residuals, gradient_.data());
jacobian->LeftMultiplyAndAccumulate(residuals, gradient_.data());
gradient_.array() /= diagonal_.array();
}
@@ -187,7 +188,7 @@ void DoglegStrategy::ComputeCauchyPoint(SparseMatrix* jacobian) {
// The Jacobian is scaled implicitly by computing J * (D^-1 * (D^-1 * g))
// instead of (J * D^-1) * (D^-1 * g).
Vector scaled_gradient = (gradient_.array() / diagonal_.array()).matrix();
jacobian->RightMultiply(scaled_gradient.data(), Jg.data());
jacobian->RightMultiplyAndAccumulate(scaled_gradient.data(), Jg.data());
alpha_ = gradient_.squaredNorm() / Jg.squaredNorm();
}
@@ -518,7 +519,7 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep(
const double* residuals) {
const int n = jacobian->num_cols();
LinearSolver::Summary linear_solver_summary;
linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE;
linear_solver_summary.termination_type = LinearSolverTerminationType::FAILURE;
// The Jacobian matrix is often quite poorly conditioned. Thus it is
// necessary to add a diagonal matrix at the bottom to prevent the
@@ -531,7 +532,7 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep(
// If the solve fails, the multiplier to the diagonal is increased
// up to max_mu_ by a factor of mu_increase_factor_ every time. If
// the linear solver is still not successful, the strategy returns
// with LINEAR_SOLVER_FAILURE.
// with LinearSolverTerminationType::FAILURE.
//
// Next time when a new Gauss-Newton step is requested, the
// multiplier starts out from the last successful solve.
@@ -582,21 +583,25 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep(
}
}
if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) {
if (linear_solver_summary.termination_type ==
LinearSolverTerminationType::FATAL_ERROR) {
return linear_solver_summary;
}
if (linear_solver_summary.termination_type == LINEAR_SOLVER_FAILURE ||
if (linear_solver_summary.termination_type ==
LinearSolverTerminationType::FAILURE ||
!IsArrayValid(n, gauss_newton_step_.data())) {
mu_ *= mu_increase_factor_;
VLOG(2) << "Increasing mu " << mu_;
linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE;
linear_solver_summary.termination_type =
LinearSolverTerminationType::FAILURE;
continue;
}
break;
}
if (linear_solver_summary.termination_type != LINEAR_SOLVER_FAILURE) {
if (linear_solver_summary.termination_type !=
LinearSolverTerminationType::FAILURE) {
// The scaled Gauss-Newton step is D * GN:
//
// - (D^-1 J^T J D^-1)^-1 (D^-1 g)
@@ -627,7 +632,7 @@ void DoglegStrategy::StepAccepted(double step_quality) {
reuse_ = false;
}
void DoglegStrategy::StepRejected(double step_quality) {
void DoglegStrategy::StepRejected(double /*step_quality*/) {
radius_ *= 0.5;
reuse_ = true;
}
@@ -701,14 +706,13 @@ bool DoglegStrategy::ComputeSubspaceModel(SparseMatrix* jacobian) {
Vector tmp;
tmp = (subspace_basis_.col(0).array() / diagonal_.array()).matrix();
jacobian->RightMultiply(tmp.data(), Jb.row(0).data());
jacobian->RightMultiplyAndAccumulate(tmp.data(), Jb.row(0).data());
tmp = (subspace_basis_.col(1).array() / diagonal_.array()).matrix();
jacobian->RightMultiply(tmp.data(), Jb.row(1).data());
jacobian->RightMultiplyAndAccumulate(tmp.data(), Jb.row(1).data());
subspace_B_ = Jb * Jb.transpose();
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,8 +36,7 @@
#include "ceres/linear_solver.h"
#include "ceres/trust_region_strategy.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Dogleg step computation and trust region sizing strategy based on
// on "Methods for Nonlinear Least Squares" by K. Madsen, H.B. Nielsen
@@ -159,8 +158,7 @@ class CERES_NO_EXPORT DoglegStrategy final : public TrustRegionStrategy {
Matrix2d subspace_B_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -28,15 +28,14 @@
//
// Author: richie.stebbing@gmail.com (Richard Stebbing)
#ifndef CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALIZER_H_
#define CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALIZER_H_
#ifndef CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALIZER_H_
#define CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALIZER_H_
#include "ceres/casts.h"
#include "ceres/dynamic_compressed_row_sparse_matrix.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
struct CERES_NO_EXPORT DynamicCompressedRowJacobianFinalizer {
void operator()(SparseMatrix* base_jacobian, int num_parameters) {
@@ -46,7 +45,6 @@ struct CERES_NO_EXPORT DynamicCompressedRowJacobianFinalizer {
}
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALISER_H_
#endif // CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALISER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,8 @@
#include "ceres/dynamic_compressed_row_jacobian_writer.h"
#include <memory>
#include <utility>
#include <vector>
#include "ceres/casts.h"
#include "ceres/compressed_row_jacobian_writer.h"
@@ -39,11 +41,7 @@
#include "ceres/program.h"
#include "ceres/residual_block.h"
namespace ceres {
namespace internal {
using std::pair;
using std::vector;
namespace ceres::internal {
std::unique_ptr<ScratchEvaluatePreparer[]>
DynamicCompressedRowJacobianWriter::CreateEvaluatePreparers(int num_threads) {
@@ -69,7 +67,7 @@ void DynamicCompressedRowJacobianWriter::Write(int residual_id,
program_->residual_blocks()[residual_id];
const int num_residuals = residual_block->NumResiduals();
vector<pair<int, int>> evaluated_jacobian_blocks;
std::vector<std::pair<int, int>> evaluated_jacobian_blocks;
CompressedRowJacobianWriter::GetOrderedParameterBlocks(
program_, residual_id, &evaluated_jacobian_blocks);
@@ -100,5 +98,4 @@ void DynamicCompressedRowJacobianWriter::Write(int residual_id,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "ceres/internal/export.h"
#include "ceres/scratch_evaluate_preparer.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Program;
class SparseMatrix;
@@ -68,7 +67,7 @@ class CERES_NO_EXPORT DynamicCompressedRowJacobianWriter {
// Write only the non-zero jacobian entries for a residual block
// (specified by `residual_id`) into `base_jacobian`, starting at the row
// specifed by `residual_offset`.
// specified by `residual_offset`.
//
// This method is thread-safe over residual blocks (each `residual_id`).
void Write(int residual_id,
@@ -80,7 +79,6 @@ class CERES_NO_EXPORT DynamicCompressedRowJacobianWriter {
Program* program_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_JACOBIAN_WRITER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,8 +32,7 @@
#include <cstring>
namespace ceres {
namespace internal {
namespace ceres::internal {
DynamicCompressedRowSparseMatrix::DynamicCompressedRowSparseMatrix(
int num_rows, int num_cols, int initial_max_num_nonzeros)
@@ -99,5 +98,4 @@ void DynamicCompressedRowSparseMatrix::Finalize(int num_additional_elements) {
<< "the number of jacobian nonzeros. Please contact the developers!";
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,13 +47,12 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final
: public CompressedRowSparseMatrix {
public:
// Set the number of rows and columns for the underlyig
// Set the number of rows and columns for the underlying
// `CompressedRowSparseMatrix` and set the initial number of maximum non-zero
// entries. Note that following the insertion of entries, when `Finalize`
// is called the number of non-zeros is determined and all internal
@@ -74,7 +73,7 @@ class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final
// Insert an entry at a given row and column position. This method is
// thread-safe across rows i.e. different threads can insert values
// simultaneously into different rows. It should be emphasised that this
// simultaneously into different rows. It should be emphasized that this
// method always inserts a new entry and does not check for existing
// entries at the specified row and column position. Duplicate entries
// for a given row and column position will result in undefined
@@ -98,8 +97,7 @@ class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final
std::vector<std::vector<double>> dynamic_values_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,7 +39,6 @@
#include "Eigen/SparseCore"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/cxsparse.h"
#include "ceres/internal/config.h"
#include "ceres/internal/eigen.h"
#include "ceres/linear_solver.h"
@@ -52,8 +51,7 @@
#include "Eigen/SparseCholesky"
#endif
namespace ceres {
namespace internal {
namespace ceres::internal {
DynamicSparseNormalCholeskySolver::DynamicSparseNormalCholeskySolver(
LinearSolver::Options options)
@@ -66,7 +64,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImpl(
double* x) {
const int num_cols = A->num_cols();
VectorRef(x, num_cols).setZero();
A->LeftMultiply(b, x);
A->LeftMultiplyAndAccumulate(b, x);
if (per_solve_options.D != nullptr) {
// Temporarily append a diagonal block to the A matrix, but undo
@@ -87,9 +85,6 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImpl(
case SUITE_SPARSE:
summary = SolveImplUsingSuiteSparse(A, x);
break;
case CX_SPARSE:
summary = SolveImplUsingCXSparse(A, x);
break;
case EIGEN_SPARSE:
summary = SolveImplUsingEigen(A, x);
break;
@@ -113,7 +108,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
summary.message =
"SPARSE_NORMAL_CHOLESKY cannot be used with EIGEN_SPARSE "
"because Ceres was not built with support for "
@@ -138,7 +133,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
LinearSolver::Summary summary;
summary.num_iterations = 1;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message = "Success.";
solver.analyzePattern(lhs);
@@ -150,7 +145,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
event_logger.AddEvent("Analyze");
if (solver.info() != Eigen::Success) {
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
summary.message = "Eigen failure. Unable to find symbolic factorization.";
return summary;
}
@@ -158,7 +153,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
solver.factorize(lhs);
event_logger.AddEvent("Factorize");
if (solver.info() != Eigen::Success) {
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.termination_type = LinearSolverTerminationType::FAILURE;
summary.message = "Eigen failure. Unable to find numeric factorization.";
return summary;
}
@@ -167,7 +162,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
VectorRef(rhs_and_solution, lhs.cols()) = solver.solve(rhs);
event_logger.AddEvent("Solve");
if (solver.info() != Eigen::Success) {
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.termination_type = LinearSolverTerminationType::FAILURE;
summary.message = "Eigen failure. Unable to do triangular solve.";
return summary;
}
@@ -176,66 +171,16 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
#endif // CERES_USE_EIGEN_SPARSE
}
LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingCXSparse(
CompressedRowSparseMatrix* A, double* rhs_and_solution) {
#ifdef CERES_NO_CXSPARSE
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
summary.message =
"SPARSE_NORMAL_CHOLESKY cannot be used with CX_SPARSE "
"because Ceres was not built with support for CXSparse. "
"This requires enabling building with -DCXSPARSE=ON.";
return summary;
#else
EventLogger event_logger(
"DynamicSparseNormalCholeskySolver::CXSparse::Solve");
LinearSolver::Summary summary;
summary.num_iterations = 1;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.message = "Success.";
CXSparse cxsparse;
// Wrap the augmented Jacobian in a compressed sparse column matrix.
cs_di a_transpose = cxsparse.CreateSparseMatrixTransposeView(A);
// Compute the normal equations. J'J delta = J'f and solve them
// using a sparse Cholesky factorization. Notice that when compared
// to SuiteSparse we have to explicitly compute the transpose of Jt,
// and then the normal equations before they can be
// factorized. CHOLMOD/SuiteSparse on the other hand can just work
// off of Jt to compute the Cholesky factorization of the normal
// equations.
cs_di* a = cxsparse.TransposeMatrix(&a_transpose);
cs_di* lhs = cxsparse.MatrixMatrixMultiply(&a_transpose, a);
cxsparse.Free(a);
event_logger.AddEvent("NormalEquations");
if (!cxsparse.SolveCholesky(lhs, rhs_and_solution)) {
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.message = "CXSparse::SolveCholesky failed";
}
event_logger.AddEvent("Solve");
cxsparse.Free(lhs);
event_logger.AddEvent("TearDown");
return summary;
#endif
}
LinearSolver::Summary
DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
CompressedRowSparseMatrix* A, double* rhs_and_solution) {
#ifdef CERES_NO_SUITESPARSE
(void)A;
(void)rhs_and_solution;
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
summary.message =
"SPARSE_NORMAL_CHOLESKY cannot be used with SUITE_SPARSE "
"because Ceres was not built with support for SuiteSparse. "
@@ -247,7 +192,7 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
EventLogger event_logger(
"DynamicSparseNormalCholeskySolver::SuiteSparse::Solve");
LinearSolver::Summary summary;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.num_iterations = 1;
summary.message = "Success.";
@@ -255,16 +200,17 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
const int num_cols = A->num_cols();
cholmod_sparse lhs = ss.CreateSparseMatrixTransposeView(A);
event_logger.AddEvent("Setup");
cholmod_factor* factor = ss.AnalyzeCholesky(&lhs, &summary.message);
cholmod_factor* factor =
ss.AnalyzeCholesky(&lhs, options_.ordering_type, &summary.message);
event_logger.AddEvent("Analysis");
if (factor == nullptr) {
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
return summary;
}
summary.termination_type = ss.Cholesky(&lhs, factor, &summary.message);
if (summary.termination_type == LINEAR_SOLVER_SUCCESS) {
if (summary.termination_type == LinearSolverTerminationType::SUCCESS) {
cholmod_dense cholmod_rhs =
ss.CreateDenseVectorView(rhs_and_solution, num_cols);
cholmod_dense* solution = ss.Solve(factor, &cholmod_rhs, &summary.message);
@@ -274,7 +220,7 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
rhs_and_solution, solution->x, num_cols * sizeof(*rhs_and_solution));
ss.Free(solution);
} else {
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.termination_type = LinearSolverTerminationType::FAILURE;
}
}
@@ -285,5 +231,4 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
#endif
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -42,8 +42,7 @@
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CompressedRowSparseMatrix;
@@ -77,7 +76,6 @@ class CERES_NO_EXPORT DynamicSparseNormalCholeskySolver
const LinearSolver::Options options_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_DYNAMIC_SPARSE_NORMAL_CHOLESKY_SOLVER_H_

View File

@@ -0,0 +1,105 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
#ifndef CERES_INTERNAL_EIGEN_VECTOR_OPS_H_
#define CERES_INTERNAL_EIGEN_VECTOR_OPS_H_
#include <numeric>
#include "ceres/internal/eigen.h"
#include "ceres/internal/fixed_array.h"
#include "ceres/parallel_for.h"
#include "ceres/parallel_vector_ops.h"
namespace ceres::internal {
// Blas1 operations on Eigen vectors. These functions are needed as an
// abstraction layer so that we can use different versions of a vector style
// object in the conjugate gradients linear solver.
template <typename Derived>
inline double Norm(const Eigen::DenseBase<Derived>& x,
ContextImpl* context,
int num_threads) {
FixedArray<double> norms(num_threads, 0.);
ParallelFor(
context,
0,
x.rows(),
num_threads,
[&x, &norms](int thread_id, std::tuple<int, int> range) {
auto [start, end] = range;
norms[thread_id] += x.segment(start, end - start).squaredNorm();
},
kMinBlockSizeParallelVectorOps);
return std::sqrt(std::accumulate(norms.begin(), norms.end(), 0.));
}
inline void SetZero(Vector& x, ContextImpl* context, int num_threads) {
ParallelSetZero(context, num_threads, x);
}
inline void Axpby(double a,
const Vector& x,
double b,
const Vector& y,
Vector& z,
ContextImpl* context,
int num_threads) {
ParallelAssign(context, num_threads, z, a * x + b * y);
}
template <typename VectorLikeX, typename VectorLikeY>
inline double Dot(const VectorLikeX& x,
const VectorLikeY& y,
ContextImpl* context,
int num_threads) {
FixedArray<double> dots(num_threads, 0.);
ParallelFor(
context,
0,
x.rows(),
num_threads,
[&x, &y, &dots](int thread_id, std::tuple<int, int> range) {
auto [start, end] = range;
const int block_size = end - start;
const auto& x_block = x.segment(start, block_size);
const auto& y_block = y.segment(start, block_size);
dots[thread_id] += x_block.dot(y_block);
},
kMinBlockSizeParallelVectorOps);
return std::accumulate(dots.begin(), dots.end(), 0.);
}
inline void Copy(const Vector& from,
Vector& to,
ContextImpl* context,
int num_threads) {
ParallelAssign(context, num_threads, to, from);
}
} // namespace ceres::internal
#endif // CERES_INTERNAL_EIGEN_VECTOR_OPS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,22 +36,25 @@
#include <sstream>
#ifndef CERES_NO_EIGEN_METIS
#include <iostream> // This is needed because MetisSupport depends on iostream.
#include "Eigen/MetisSupport"
#endif
#include "Eigen/SparseCholesky"
#include "Eigen/SparseCore"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// TODO(sameeragarwal): Use enable_if to clean up the implementations
// for when Scalar == double.
template <typename Solver>
class EigenSparseCholeskyTemplate final : public SparseCholesky {
public:
EigenSparseCholeskyTemplate() = default;
CompressedRowSparseMatrix::StorageType StorageType() const final {
return CompressedRowSparseMatrix::LOWER_TRIANGULAR;
return CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR;
}
LinearSolverTerminationType Factorize(
@@ -68,7 +71,7 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
if (solver_.info() != Eigen::Success) {
*message = "Eigen failure. Unable to find symbolic factorization.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
analyzed_ = true;
@@ -77,9 +80,9 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
solver_.factorize(lhs);
if (solver_.info() != Eigen::Success) {
*message = "Eigen failure. Unable to find numeric factorization.";
return LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::FAILURE;
}
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType Solve(const double* rhs_ptr,
@@ -87,23 +90,23 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
std::string* message) override {
CHECK(analyzed_) << "Solve called without a call to Factorize first.";
scalar_rhs_ = ConstVectorRef(rhs_ptr, solver_.cols())
.template cast<typename Solver::Scalar>();
// The two casts are needed if the Scalar in this class is not
// double. For code simplicity we are going to assume that Eigen
// is smart enough to figure out that casting a double Vector to a
// double Vector is a straight copy. If this turns into a
// performance bottleneck (unlikely), we can revisit this.
scalar_solution_ = solver_.solve(scalar_rhs_);
VectorRef(solution_ptr, solver_.cols()) =
scalar_solution_.template cast<double>();
// Avoid copying when the scalar type is double
if constexpr (std::is_same_v<typename Solver::Scalar, double>) {
ConstVectorRef scalar_rhs(rhs_ptr, solver_.cols());
VectorRef(solution_ptr, solver_.cols()) = solver_.solve(scalar_rhs);
} else {
auto scalar_rhs = ConstVectorRef(rhs_ptr, solver_.cols())
.template cast<typename Solver::Scalar>();
auto scalar_solution = solver_.solve(scalar_rhs);
VectorRef(solution_ptr, solver_.cols()) =
scalar_solution.template cast<double>();
}
if (solver_.info() != Eigen::Success) {
*message = "Eigen failure. Unable to do triangular solve.";
return LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::FAILURE;
}
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
LinearSolverTerminationType Factorize(CompressedRowSparseMatrix* lhs,
@@ -111,9 +114,8 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
CHECK_EQ(lhs->storage_type(), StorageType());
typename Solver::Scalar* values_ptr = nullptr;
if (std::is_same<typename Solver::Scalar, double>::value) {
values_ptr =
reinterpret_cast<typename Solver::Scalar*>(lhs->mutable_values());
if constexpr (std::is_same_v<typename Solver::Scalar, double>) {
values_ptr = lhs->mutable_values();
} else {
// In the case where the scalar used in this class is not
// double. In that case, make a copy of the values array in the
@@ -123,19 +125,20 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
values_ptr = values_.data();
}
Eigen::Map<Eigen::SparseMatrix<typename Solver::Scalar, Eigen::ColMajor>>
Eigen::Map<
const Eigen::SparseMatrix<typename Solver::Scalar, Eigen::ColMajor>>
eigen_lhs(lhs->num_rows(),
lhs->num_rows(),
lhs->num_nonzeros(),
lhs->mutable_rows(),
lhs->mutable_cols(),
lhs->rows(),
lhs->cols(),
values_ptr);
return Factorize(eigen_lhs, message);
}
private:
Eigen::Matrix<typename Solver::Scalar, Eigen::Dynamic, 1> values_,
scalar_rhs_, scalar_solution_;
Eigen::Matrix<typename Solver::Scalar, Eigen::Dynamic, 1> values_;
bool analyzed_{false};
Solver solver_;
};
@@ -150,11 +153,22 @@ std::unique_ptr<SparseCholesky> EigenSparseCholesky::Create(
Eigen::Upper,
Eigen::NaturalOrdering<int>>;
if (ordering_type == AMD) {
if (ordering_type == OrderingType::AMD) {
return std::make_unique<EigenSparseCholeskyTemplate<WithAMDOrdering>>();
} else {
return std::make_unique<EigenSparseCholeskyTemplate<WithNaturalOrdering>>();
} else if (ordering_type == OrderingType::NESDIS) {
#ifndef CERES_NO_EIGEN_METIS
using WithMetisOrdering = Eigen::SimplicialLDLT<Eigen::SparseMatrix<double>,
Eigen::Upper,
Eigen::MetisOrdering<int>>;
return std::make_unique<EigenSparseCholeskyTemplate<WithMetisOrdering>>();
#else
LOG(FATAL)
<< "Congratulations you have found a bug in Ceres Solver. Please "
"report it to the Ceres Solver developers.";
return nullptr;
#endif // CERES_NO_EIGEN_METIS
}
return std::make_unique<EigenSparseCholeskyTemplate<WithNaturalOrdering>>();
}
EigenSparseCholesky::~EigenSparseCholesky() = default;
@@ -168,16 +182,26 @@ std::unique_ptr<SparseCholesky> FloatEigenSparseCholesky::Create(
Eigen::SimplicialLDLT<Eigen::SparseMatrix<float>,
Eigen::Upper,
Eigen::NaturalOrdering<int>>;
if (ordering_type == AMD) {
if (ordering_type == OrderingType::AMD) {
return std::make_unique<EigenSparseCholeskyTemplate<WithAMDOrdering>>();
} else {
return std::make_unique<EigenSparseCholeskyTemplate<WithNaturalOrdering>>();
} else if (ordering_type == OrderingType::NESDIS) {
#ifndef CERES_NO_EIGEN_METIS
using WithMetisOrdering = Eigen::SimplicialLDLT<Eigen::SparseMatrix<float>,
Eigen::Upper,
Eigen::MetisOrdering<int>>;
return std::make_unique<EigenSparseCholeskyTemplate<WithMetisOrdering>>();
#else
LOG(FATAL)
<< "Congratulations you have found a bug in Ceres Solver. Please "
"report it to the Ceres Solver developers.";
return nullptr;
#endif // CERES_NO_EIGEN_METIS
}
return std::make_unique<EigenSparseCholeskyTemplate<WithNaturalOrdering>>();
}
FloatEigenSparseCholesky::~FloatEigenSparseCholesky() = default;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_USE_EIGEN_SPARSE

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,8 +46,18 @@
#include "ceres/linear_solver.h"
#include "ceres/sparse_cholesky.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class EigenSparse {
public:
static constexpr bool IsNestedDissectionAvailable() noexcept {
#ifdef CERES_NO_EIGEN_METIS
return false;
#else
return true;
#endif
}
};
class CERES_NO_EXPORT EigenSparseCholesky : public SparseCholesky {
public:
@@ -83,8 +93,18 @@ class CERES_NO_EXPORT FloatEigenSparseCholesky : public SparseCholesky {
std::string* message) override = 0;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#else
namespace ceres::internal {
class EigenSparse {
public:
static constexpr bool IsNestedDissectionAvailable() noexcept { return false; }
};
} // namespace ceres::internal
#endif // CERES_USE_EIGEN_SPARSE

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,8 +46,7 @@
#include "ceres/scratch_evaluate_preparer.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
Evaluator::~Evaluator() = default;
@@ -65,10 +64,17 @@ std::unique_ptr<Evaluator> Evaluator::Create(const Evaluator::Options& options,
case DENSE_SCHUR:
case SPARSE_SCHUR:
case ITERATIVE_SCHUR:
case CGNR:
return std::make_unique<
ProgramEvaluator<BlockEvaluatePreparer, BlockJacobianWriter>>(
options, program);
case CGNR: {
if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) {
return std::make_unique<ProgramEvaluator<ScratchEvaluatePreparer,
CompressedRowJacobianWriter>>(
options, program);
} else {
return std::make_unique<
ProgramEvaluator<BlockEvaluatePreparer, BlockJacobianWriter>>(
options, program);
}
}
case SPARSE_NORMAL_CHOLESKY:
if (options.dynamic_sparsity) {
return std::make_unique<
@@ -88,5 +94,4 @@ std::unique_ptr<Evaluator> Evaluator::Create(const Evaluator::Options& options,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -65,6 +65,8 @@ class CERES_NO_EXPORT Evaluator {
int num_threads = 1;
int num_eliminate_blocks = -1;
LinearSolverType linear_solver_type = DENSE_QR;
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
NO_SPARSE;
bool dynamic_sparsity = false;
ContextImpl* context = nullptr;
EvaluationCallback* evaluation_callback = nullptr;

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@
#include "ceres/internal/export.h"
#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
struct CallStatistics {
CallStatistics() = default;
@@ -85,7 +84,6 @@ class ScopedExecutionTimer {
ExecutionSummary* summary_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_EXECUTION_SUMMARY_H_

View File

@@ -0,0 +1,120 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas)
#include "ceres/fake_bundle_adjustment_jacobian.h"
#include <memory>
#include <random>
#include <string>
#include <utility>
#include "Eigen/Dense"
#include "ceres/block_sparse_matrix.h"
#include "ceres/internal/eigen.h"
namespace ceres::internal {
std::unique_ptr<BlockSparseMatrix> CreateFakeBundleAdjustmentJacobian(
int num_cameras,
int num_points,
int camera_size,
int point_size,
double visibility,
std::mt19937& prng) {
constexpr int kResidualSize = 2;
CompressedRowBlockStructure* bs = new CompressedRowBlockStructure;
int c = 0;
// Add column blocks for each point
for (int i = 0; i < num_points; ++i) {
bs->cols.push_back(Block(point_size, c));
c += point_size;
}
// Add column blocks for each camera.
for (int i = 0; i < num_cameras; ++i) {
bs->cols.push_back(Block(camera_size, c));
c += camera_size;
}
std::bernoulli_distribution visibility_distribution(visibility);
int row_pos = 0;
int cell_pos = 0;
for (int i = 0; i < num_points; ++i) {
for (int j = 0; j < num_cameras; ++j) {
if (!visibility_distribution(prng)) {
continue;
}
bs->rows.emplace_back();
auto& row = bs->rows.back();
row.block.position = row_pos;
row.block.size = kResidualSize;
auto& cells = row.cells;
cells.resize(2);
cells[0].block_id = i;
cells[0].position = cell_pos;
cell_pos += kResidualSize * point_size;
cells[1].block_id = num_points + j;
cells[1].position = cell_pos;
cell_pos += kResidualSize * camera_size;
row_pos += kResidualSize;
}
}
auto jacobian = std::make_unique<BlockSparseMatrix>(bs);
VectorRef(jacobian->mutable_values(), jacobian->num_nonzeros()).setRandom();
return jacobian;
}
std::pair<
std::unique_ptr<PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>>,
std::unique_ptr<BlockSparseMatrix>>
CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras,
int num_points,
int camera_size,
int landmark_size,
double visibility,
std::mt19937& rng) {
using PartitionedView =
PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>;
auto block_sparse_matrix = CreateFakeBundleAdjustmentJacobian(
num_cameras, num_points, camera_size, landmark_size, visibility, rng);
LinearSolver::Options options;
options.elimination_groups.push_back(num_points);
auto partitioned_view =
std::make_unique<PartitionedView>(options, *block_sparse_matrix);
return std::make_pair(std::move(partitioned_view),
std::move(block_sparse_matrix));
}
} // namespace ceres::internal

View File

@@ -0,0 +1,78 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
#ifndef CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
#define CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
#include <memory>
#include <random>
#include "ceres/block_sparse_matrix.h"
#include "ceres/partitioned_matrix_view.h"
namespace ceres::internal {
std::unique_ptr<BlockSparseMatrix> CreateFakeBundleAdjustmentJacobian(
int num_cameras,
int num_points,
int camera_size,
int point_size,
double visibility,
std::mt19937& prng);
template <int kEBlockSize = 3, int kFBlockSize = 6>
std::pair<std::unique_ptr<PartitionedMatrixView<2, kEBlockSize, kFBlockSize>>,
std::unique_ptr<BlockSparseMatrix>>
CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras,
int num_points,
double visibility,
std::mt19937& rng) {
using PartitionedView = PartitionedMatrixView<2, kEBlockSize, kFBlockSize>;
auto block_sparse_matrix = CreateFakeBundleAdjustmentJacobian(
num_cameras, num_points, kFBlockSize, kEBlockSize, visibility, rng);
auto partitioned_view =
std::make_unique<PartitionedView>(*block_sparse_matrix, num_points);
return std::make_pair(std::move(partitioned_view),
std::move(block_sparse_matrix));
}
std::pair<
std::unique_ptr<PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>>,
std::unique_ptr<BlockSparseMatrix>>
CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras,
int num_points,
int camera_size,
int landmark_size,
double visibility,
std::mt19937& rng);
} // namespace ceres::internal
#endif // CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,15 +33,14 @@
#include "ceres/file.h"
#include <cstdio>
#include <string>
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
using std::string;
void WriteStringToFileOrDie(const string& data, const string& filename) {
void WriteStringToFileOrDie(const std::string& data,
const std::string& filename) {
FILE* file_descriptor = fopen(filename.c_str(), "wb");
if (!file_descriptor) {
LOG(FATAL) << "Couldn't write to file: " << filename;
@@ -50,7 +49,7 @@ void WriteStringToFileOrDie(const string& data, const string& filename) {
fclose(file_descriptor);
}
void ReadFileToStringOrDie(const string& filename, string* data) {
void ReadFileToStringOrDie(const std::string& filename, std::string* data) {
FILE* file_descriptor = fopen(filename.c_str(), "r");
if (!file_descriptor) {
@@ -59,12 +58,12 @@ void ReadFileToStringOrDie(const string& filename, string* data) {
// Resize the input buffer appropriately.
fseek(file_descriptor, 0L, SEEK_END);
int num_bytes = ftell(file_descriptor);
int64_t num_bytes = ftell(file_descriptor);
data->resize(num_bytes);
// Read the data.
fseek(file_descriptor, 0L, SEEK_SET);
int num_read =
int64_t num_read =
fread(&((*data)[0]), sizeof((*data)[0]), num_bytes, file_descriptor);
if (num_read != num_bytes) {
LOG(FATAL) << "Couldn't read all of " << filename
@@ -74,7 +73,7 @@ void ReadFileToStringOrDie(const string& filename, string* data) {
fclose(file_descriptor);
}
string JoinPath(const string& dirname, const string& basename) {
std::string JoinPath(const std::string& dirname, const std::string& basename) {
#ifdef _WIN32
static const char separator = '\\';
#else
@@ -86,9 +85,8 @@ string JoinPath(const string& dirname, const string& basename) {
} else if (dirname[dirname.size() - 1] == separator) {
return dirname + basename;
} else {
return dirname + string(&separator, 1) + basename;
return dirname + std::string(&separator, 1) + basename;
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
CERES_NO_EXPORT
void WriteStringToFileOrDie(const std::string& data,
@@ -52,8 +51,7 @@ void ReadFileToStringOrDie(const std::string& filename, std::string* data);
CERES_NO_EXPORT
std::string JoinPath(const std::string& dirname, const std::string& basename);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -34,8 +34,7 @@
#if !defined(CERES_NO_SUITESPARSE)
namespace ceres {
namespace internal {
namespace ceres::internal {
std::unique_ptr<SparseCholesky> FloatSuiteSparseCholesky::Create(
OrderingType ordering_type) {
@@ -43,7 +42,6 @@ std::unique_ptr<SparseCholesky> FloatSuiteSparseCholesky::Create(
return {};
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // !defined(CERES_NO_SUITESPARSE)

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,8 +43,7 @@
#if !defined(CERES_NO_SUITESPARSE)
namespace ceres {
namespace internal {
namespace ceres::internal {
// Fake implementation of a single precision Sparse Cholesky using
// SuiteSparse.
@@ -53,8 +52,7 @@ class CERES_NO_EXPORT FloatSuiteSparseCholesky : public SparseCholesky {
static std::unique_ptr<SparseCholesky> Create(OrderingType ordering_type);
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // !defined(CERES_NO_SUITESPARSE)

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,8 +32,7 @@
#include "ceres/stringprintf.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
FunctionSample::FunctionSample()
: x(0.0),
@@ -75,5 +74,4 @@ std::string FunctionSample::ToDebugString() const {
gradient_is_valid);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -37,8 +37,7 @@
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// FunctionSample is used by the line search routines to store and
// communicate the value and (optionally) the gradient of the function
@@ -83,13 +82,12 @@ struct CERES_NO_EXPORT FunctionSample {
//
// where d is the search direction.
double gradient;
// True if the evaluation of the gradient was sucessful and the
// True if the evaluation of the gradient was successful and the
// value is a finite number.
bool gradient_is_valid;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -0,0 +1,305 @@
# Ceres Solver - A fast non-linear least squares minimizer
# Copyright 2023 Google Inc. All rights reserved.
# http://ceres-solver.org/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of Google Inc. nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: keir@google.com (Keir Mierle)
#
# Generate bundle adjustment tests as separate binaries. Since the bundle
# adjustment tests are fairly processing intensive, serializing them makes the
# tests take forever to run. Splitting them into separate binaries makes it
# easier to parallelize in continuous integration systems, and makes local
# processing on multi-core workstations much faster.
# Product of ORDERINGS, THREAD_CONFIGS, and SOLVER_CONFIGS is the full set of
# tests to generate.
ORDERINGS = ["kAutomaticOrdering", "kUserOrdering"]
SINGLE_THREADED = "1"
MULTI_THREADED = "4"
THREAD_CONFIGS = [SINGLE_THREADED, MULTI_THREADED]
DENSE_SOLVER_CONFIGS = [
# Linear solver Dense backend
('DENSE_SCHUR', 'EIGEN'),
('DENSE_SCHUR', 'LAPACK'),
('DENSE_SCHUR', 'CUDA'),
]
SPARSE_SOLVER_CONFIGS = [
# Linear solver Sparse backend
('SPARSE_NORMAL_CHOLESKY', 'SUITE_SPARSE'),
('SPARSE_NORMAL_CHOLESKY', 'EIGEN_SPARSE'),
('SPARSE_NORMAL_CHOLESKY', 'ACCELERATE_SPARSE'),
('SPARSE_SCHUR', 'SUITE_SPARSE'),
('SPARSE_SCHUR', 'EIGEN_SPARSE'),
('SPARSE_SCHUR', 'ACCELERATE_SPARSE'),
]
ITERATIVE_SOLVER_CONFIGS = [
# Linear solver Sparse backend Preconditioner
('ITERATIVE_SCHUR', 'NO_SPARSE', 'JACOBI'),
('ITERATIVE_SCHUR', 'NO_SPARSE', 'SCHUR_JACOBI'),
('ITERATIVE_SCHUR', 'NO_SPARSE', 'SCHUR_POWER_SERIES_EXPANSION'),
('ITERATIVE_SCHUR', 'SUITE_SPARSE', 'CLUSTER_JACOBI'),
('ITERATIVE_SCHUR', 'EIGEN_SPARSE', 'CLUSTER_JACOBI'),
('ITERATIVE_SCHUR', 'ACCELERATE_SPARSE','CLUSTER_JACOBI'),
('ITERATIVE_SCHUR', 'SUITE_SPARSE', 'CLUSTER_TRIDIAGONAL'),
('ITERATIVE_SCHUR', 'EIGEN_SPARSE', 'CLUSTER_TRIDIAGONAL'),
('ITERATIVE_SCHUR', 'ACCELERATE_SPARSE','CLUSTER_TRIDIAGONAL'),
]
FILENAME_SHORTENING_MAP = dict(
DENSE_SCHUR='denseschur',
ITERATIVE_SCHUR='iterschur',
SPARSE_NORMAL_CHOLESKY='sparsecholesky',
SPARSE_SCHUR='sparseschur',
EIGEN='eigen',
LAPACK='lapack',
CUDA='cuda',
NO_SPARSE='', # Omit sparse reference entirely for dense tests.
SUITE_SPARSE='suitesparse',
EIGEN_SPARSE='eigensparse',
ACCELERATE_SPARSE='acceleratesparse',
IDENTITY='identity',
JACOBI='jacobi',
SCHUR_JACOBI='schurjacobi',
CLUSTER_JACOBI='clustjacobi',
CLUSTER_TRIDIAGONAL='clusttri',
SCHUR_POWER_SERIES_EXPANSION='spse',
kAutomaticOrdering='auto',
kUserOrdering='user',
)
COPYRIGHT_HEADER = (
"""// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// ========================================
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// ========================================
//
// This file is generated using generate_bundle_adjustment_tests.py.""")
BUNDLE_ADJUSTMENT_TEST_TEMPLATE = (COPYRIGHT_HEADER + """
#include "ceres/bundle_adjustment_test_util.h"
#include "ceres/internal/config.h"
#include "gtest/gtest.h"
%(preprocessor_conditions_begin)s
namespace ceres::internal {
TEST_F(BundleAdjustmentTest,
%(test_class_name)s) { // NOLINT
BundleAdjustmentProblem bundle_adjustment_problem;
Solver::Options* options = bundle_adjustment_problem.mutable_solver_options();
options->eta = 0.01;
options->num_threads = %(num_threads)s;
options->linear_solver_type = %(linear_solver)s;
options->dense_linear_algebra_library_type = %(dense_backend)s;
options->sparse_linear_algebra_library_type = %(sparse_backend)s;
options->preconditioner_type = %(preconditioner)s;
if (%(ordering)s) {
options->linear_solver_ordering = nullptr;
}
Problem* problem = bundle_adjustment_problem.mutable_problem();
RunSolverForConfigAndExpectResidualsMatch(*options, problem);
}
} // namespace ceres::internal
%(preprocessor_conditions_end)s""")
def camelcasify(token):
"""Convert capitalized underscore tokens to camel case"""
return ''.join([x.lower().capitalize() for x in token.split('_')])
def generate_bundle_test(linear_solver,
dense_backend,
sparse_backend,
preconditioner,
ordering,
thread_config):
"""Generate a bundle adjustment test executable configured appropriately"""
# Preconditioner only makes sense for iterative schur; drop it otherwise.
preconditioner_tag = preconditioner
if linear_solver != 'ITERATIVE_SCHUR':
preconditioner_tag = ''
dense_backend_tag = dense_backend
if linear_solver != 'DENSE_SCHUR':
dense_backend_tag=''
# Omit references to the sparse backend when one is not in use.
sparse_backend_tag = sparse_backend
if sparse_backend == 'NO_SPARSE':
sparse_backend_tag = ''
# Use a double underscore; otherwise the names are harder to understand.
test_class_name = '_'.join(filter(lambda x: x, [
camelcasify(linear_solver),
camelcasify(dense_backend_tag),
camelcasify(sparse_backend_tag),
camelcasify(preconditioner_tag),
ordering[1:], # Strip 'k'
'Threads' if thread_config == MULTI_THREADED else '']))
# Initial template parameters (augmented more below).
template_parameters = dict(
linear_solver=linear_solver,
dense_backend=dense_backend,
sparse_backend=sparse_backend,
preconditioner=preconditioner,
ordering=ordering,
num_threads=thread_config,
test_class_name=test_class_name)
# Accumulate appropriate #ifdef/#ifndefs for the solver's sparse backend.
preprocessor_conditions_begin = []
preprocessor_conditions_end = []
if sparse_backend == 'SUITE_SPARSE':
preprocessor_conditions_begin.append('#ifndef CERES_NO_SUITESPARSE')
preprocessor_conditions_end.insert(0, '#endif // CERES_NO_SUITESPARSE')
elif sparse_backend == 'ACCELERATE_SPARSE':
preprocessor_conditions_begin.append('#ifndef CERES_NO_ACCELERATE_SPARSE')
preprocessor_conditions_end.insert(0, '#endif // CERES_NO_ACCELERATE_SPARSE')
elif sparse_backend == 'EIGEN_SPARSE':
preprocessor_conditions_begin.append('#ifdef CERES_USE_EIGEN_SPARSE')
preprocessor_conditions_end.insert(0, '#endif // CERES_USE_EIGEN_SPARSE')
if dense_backend == "LAPACK":
preprocessor_conditions_begin.append('#ifndef CERES_NO_LAPACK')
preprocessor_conditions_end.insert(0, '#endif // CERES_NO_LAPACK')
elif dense_backend == "CUDA":
preprocessor_conditions_begin.append('#ifndef CERES_NO_CUDA')
preprocessor_conditions_end.insert(0, '#endif // CERES_NO_CUDA')
# If there are #ifdefs, put newlines around them.
if preprocessor_conditions_begin:
preprocessor_conditions_begin.insert(0, '')
preprocessor_conditions_begin.append('')
preprocessor_conditions_end.insert(0, '')
preprocessor_conditions_end.append('')
# Put #ifdef/#ifndef stacks into the template parameters.
template_parameters['preprocessor_conditions_begin'] = '\n'.join(
preprocessor_conditions_begin)
template_parameters['preprocessor_conditions_end'] = '\n'.join(
preprocessor_conditions_end)
# Substitute variables into the test template, and write the result to a file.
filename_tag = '_'.join(FILENAME_SHORTENING_MAP.get(x) for x in [
linear_solver,
dense_backend_tag,
sparse_backend_tag,
preconditioner_tag,
ordering]
if FILENAME_SHORTENING_MAP.get(x))
if (thread_config == MULTI_THREADED):
filename_tag += '_threads'
filename = ('generated_bundle_adjustment_tests/ba_%s_test.cc' %
filename_tag.lower())
with open(filename, 'w') as fd:
fd.write(BUNDLE_ADJUSTMENT_TEST_TEMPLATE % template_parameters)
# All done.
print('Generated', filename)
return filename
if __name__ == '__main__':
# Iterate over all the possible configurations and generate the tests.
generated_files = []
for ordering in ORDERINGS:
for thread_config in THREAD_CONFIGS:
for linear_solver, dense_backend in DENSE_SOLVER_CONFIGS:
generated_files.append(
generate_bundle_test(linear_solver,
dense_backend,
'NO_SPARSE',
'IDENTITY',
ordering,
thread_config))
for linear_solver, sparse_backend, in SPARSE_SOLVER_CONFIGS:
generated_files.append(
generate_bundle_test(linear_solver,
'EIGEN',
sparse_backend,
'IDENTITY',
ordering,
thread_config))
for linear_solver, sparse_backend, preconditioner, in ITERATIVE_SOLVER_CONFIGS:
generated_files.append(
generate_bundle_test(linear_solver,
'EIGEN',
sparse_backend,
preconditioner,
ordering,
thread_config))
# Generate the CMakeLists.txt as well.
with open('generated_bundle_adjustment_tests/CMakeLists.txt', 'w') as fd:
fd.write(COPYRIGHT_HEADER.replace('//', '#').replace('http:#', 'http://'))
fd.write('\n')
fd.write('\n')
for generated_file in generated_files:
fd.write('ceres_test(%s)\n' %
generated_file.split('/')[1].replace('_test.cc', ''))

View File

@@ -0,0 +1,246 @@
# Ceres Solver - A fast non-linear least squares minimizer
# Copyright 2023 Google Inc. All rights reserved.
# http://ceres-solver.org/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of Google Inc. nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: sameeragarwal@google.com (Sameer Agarwal)
#
# Script for explicitly generating template specialization of the
# SchurEliminator class. It is a rather large class
# and the number of explicit instantiations is also large. Explicitly
# generating these instantiations in separate .cc files breaks the
# compilation into separate compilation unit rather than one large cc
# file which takes 2+GB of RAM to compile.
#
# This script creates three sets of files.
#
# 1. schur_eliminator_x_x_x.cc and partitioned_matrix_view_x_x_x.cc
# where, the x indicates the template parameters and
#
# 2. schur_eliminator.cc & partitioned_matrix_view.cc
#
# that contains a factory function for instantiating these classes
# based on runtime parameters.
#
# 3. schur_templates.cc
#
# that contains a function which can be queried to determine what
# template specializations are available.
#
# The following list of tuples, specializations indicates the set of
# specializations that is generated.
SPECIALIZATIONS = [(2, 2, 2),
(2, 2, 3),
(2, 2, 4),
(2, 2, "Eigen::Dynamic"),
(2, 3, 3),
(2, 3, 4),
(2, 3, 6),
(2, 3, 9),
(2, 3, "Eigen::Dynamic"),
(2, 4, 3),
(2, 4, 4),
(2, 4, 6),
(2, 4, 8),
(2, 4, 9),
(2, 4, "Eigen::Dynamic"),
(2, "Eigen::Dynamic", "Eigen::Dynamic"),
(3, 3, 3),
(4, 4, 2),
(4, 4, 3),
(4, 4, 4),
(4, 4, "Eigen::Dynamic")]
import schur_eliminator_template
import partitioned_matrix_view_template
import os
import glob
def SuffixForSize(size):
if size == "Eigen::Dynamic":
return "d"
return str(size)
def SpecializationFilename(prefix, row_block_size, e_block_size, f_block_size):
return "_".join([prefix] + list(map(SuffixForSize, (row_block_size,
e_block_size,
f_block_size))))
def GenerateFactoryConditional(row_block_size, e_block_size, f_block_size):
conditionals = []
if (row_block_size != "Eigen::Dynamic"):
conditionals.append("(options.row_block_size == %s)" % row_block_size)
if (e_block_size != "Eigen::Dynamic"):
conditionals.append("(options.e_block_size == %s)" % e_block_size)
if (f_block_size != "Eigen::Dynamic"):
conditionals.append("(options.f_block_size == %s)" % f_block_size)
if (len(conditionals) == 0):
return "%s"
if (len(conditionals) == 1):
return " if " + conditionals[0] + " {\n %s\n }\n"
return " if (" + " &&\n ".join(conditionals) + ") {\n %s\n }\n"
def Specialize(name, data):
"""
Generate specialization code and the conditionals to instantiate it.
"""
# Specialization files
for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS:
output = SpecializationFilename("generated/" + name,
row_block_size,
e_block_size,
f_block_size) + ".cc"
with open(output, "w") as f:
f.write(data["HEADER"])
f.write(data["SPECIALIZATION_FILE"] %
(row_block_size, e_block_size, f_block_size))
# Generate the _d_d_d specialization.
output = SpecializationFilename("generated/" + name,
"Eigen::Dynamic",
"Eigen::Dynamic",
"Eigen::Dynamic") + ".cc"
with open(output, "w") as f:
f.write(data["HEADER"])
f.write(data["DYNAMIC_FILE"] %
("Eigen::Dynamic", "Eigen::Dynamic", "Eigen::Dynamic"))
# Factory
with open(name + ".cc", "w") as f:
f.write(data["HEADER"])
f.write(data["FACTORY_FILE_HEADER"])
for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS:
factory_conditional = GenerateFactoryConditional(
row_block_size, e_block_size, f_block_size)
factory = data["FACTORY"] % (row_block_size, e_block_size, f_block_size)
f.write(factory_conditional % factory);
f.write(data["FACTORY_FOOTER"])
QUERY_HEADER = """// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
//
// What template specializations are available.
//
// ========================================
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
//=========================================
//
// This file is generated using generate_template_specializations.py.
"""
QUERY_FILE_HEADER = """
#include "ceres/internal/eigen.h"
#include "ceres/schur_templates.h"
namespace ceres {
namespace internal {
void GetBestSchurTemplateSpecialization(int* row_block_size,
int* e_block_size,
int* f_block_size) {
LinearSolver::Options options;
options.row_block_size = *row_block_size;
options.e_block_size = *e_block_size;
options.f_block_size = *f_block_size;
*row_block_size = Eigen::Dynamic;
*e_block_size = Eigen::Dynamic;
*f_block_size = Eigen::Dynamic;
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
"""
QUERY_FOOTER = """
#endif
return;
}
} // namespace internal
} // namespace ceres
"""
QUERY_ACTION = """ *row_block_size = %s;
*e_block_size = %s;
*f_block_size = %s;
return;"""
def GenerateQueryFile():
"""
Generate file that allows querying for available template specializations.
"""
with open("schur_templates.cc", "w") as f:
f.write(QUERY_HEADER)
f.write(QUERY_FILE_HEADER)
for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS:
factory_conditional = GenerateFactoryConditional(
row_block_size, e_block_size, f_block_size)
action = QUERY_ACTION % (row_block_size, e_block_size, f_block_size)
f.write(factory_conditional % action)
f.write(QUERY_FOOTER)
if __name__ == "__main__":
for f in glob.glob("generated/*"):
os.remove(f)
Specialize("schur_eliminator",
schur_eliminator_template.__dict__)
Specialize("partitioned_matrix_view",
partitioned_matrix_view_template.__dict__)
GenerateQueryFile()

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 2, 2>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 2, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 2, 4>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 2, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 3, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 3, 4>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 3, 6>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 3, 9>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 3, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 4, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 4, 4>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 4, 6>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 4, 8>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 4, 9>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, 4, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<3, 3, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<4, 4, 2>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<4, 4, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<4, 4, 4>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<4, 4, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -41,12 +41,10 @@
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class PartitionedMatrixView<Eigen::Dynamic,
Eigen::Dynamic,
Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 2, 2>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 2, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 2, 4>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 2, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 3, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 3, 4>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 3, 6>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 3, 9>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 3, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 4, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 4, 4>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 4, 6>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 4, 8>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 4, 9>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, 4, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<2, Eigen::Dynamic, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<3, 3, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<4, 4, 2>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<4, 4, 3>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<4, 4, 4>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,12 +46,10 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<4, 4, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,8 @@
#include "ceres/schur_eliminator_impl.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template class SchurEliminator<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic>;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2016 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,6 @@
#include <vector>
#include "ceres/is_close.h"
#include "ceres/manifold_adapter.h"
#include "ceres/stringprintf.h"
#include "ceres/types.h"
@@ -49,8 +48,6 @@ namespace ceres {
using internal::IsClose;
using internal::StringAppendF;
using internal::StringPrintf;
using std::string;
using std::vector;
namespace {
// Evaluate the cost function and transform the returned Jacobians to
@@ -65,12 +62,12 @@ bool EvaluateCostFunction(const CostFunction* function,
CHECK(jacobians != nullptr);
CHECK(local_jacobians != nullptr);
const vector<int32_t>& block_sizes = function->parameter_block_sizes();
const std::vector<int32_t>& block_sizes = function->parameter_block_sizes();
const int num_parameter_blocks = block_sizes.size();
// Allocate Jacobian matrices in tangent space.
local_jacobians->resize(num_parameter_blocks);
vector<double*> local_jacobian_data(num_parameter_blocks);
std::vector<double*> local_jacobian_data(num_parameter_blocks);
for (int i = 0; i < num_parameter_blocks; ++i) {
int block_size = block_sizes.at(i);
if (manifolds.at(i) != nullptr) {
@@ -83,7 +80,7 @@ bool EvaluateCostFunction(const CostFunction* function,
// Allocate Jacobian matrices in ambient space.
jacobians->resize(num_parameter_blocks);
vector<double*> jacobian_data(num_parameter_blocks);
std::vector<double*> jacobian_data(num_parameter_blocks);
for (int i = 0; i < num_parameter_blocks; ++i) {
jacobians->at(i).resize(function->num_residuals(), block_sizes.at(i));
jacobians->at(i).setZero();
@@ -116,39 +113,8 @@ bool EvaluateCostFunction(const CostFunction* function,
}
} // namespace
GradientChecker::GradientChecker(
const CostFunction* function,
const vector<const LocalParameterization*>* local_parameterizations,
const NumericDiffOptions& options)
: delete_manifolds_(true), function_(function) {
CHECK(function != nullptr);
manifolds_.resize(function->parameter_block_sizes().size(), nullptr);
// Wrap the local parameterization into manifold objects using
// ManifoldAdapter.
for (int i = 0; i < manifolds_.size(); ++i) {
const LocalParameterization* local_param = local_parameterizations->at(i);
if (local_param == nullptr) {
continue;
}
manifolds_[i] = new internal::ManifoldAdapter(local_param);
}
auto finite_diff_cost_function =
std::make_unique<DynamicNumericDiffCostFunction<CostFunction, RIDDERS>>(
function, DO_NOT_TAKE_OWNERSHIP, options);
const vector<int32_t>& parameter_block_sizes =
function->parameter_block_sizes();
for (int32_t parameter_block_size : parameter_block_sizes) {
finite_diff_cost_function->AddParameterBlock(parameter_block_size);
}
finite_diff_cost_function->SetNumResiduals(function->num_residuals());
finite_diff_cost_function_ = std::move(finite_diff_cost_function);
}
GradientChecker::GradientChecker(const CostFunction* function,
const vector<const Manifold*>* manifolds,
const std::vector<const Manifold*>* manifolds,
const NumericDiffOptions& options)
: function_(function) {
CHECK(function != nullptr);
@@ -161,7 +127,7 @@ GradientChecker::GradientChecker(const CostFunction* function,
auto finite_diff_cost_function =
std::make_unique<DynamicNumericDiffCostFunction<CostFunction, RIDDERS>>(
function, DO_NOT_TAKE_OWNERSHIP, options);
const vector<int32_t>& parameter_block_sizes =
const std::vector<int32_t>& parameter_block_sizes =
function->parameter_block_sizes();
const int num_parameter_blocks = parameter_block_sizes.size();
for (int i = 0; i < num_parameter_blocks; ++i) {
@@ -172,14 +138,6 @@ GradientChecker::GradientChecker(const CostFunction* function,
finite_diff_cost_function_ = std::move(finite_diff_cost_function);
}
GradientChecker::~GradientChecker() {
if (delete_manifolds_) {
for (const auto m : manifolds_) {
delete m;
}
}
}
bool GradientChecker::Probe(double const* const* parameters,
double relative_precision,
ProbeResults* results_param) const {
@@ -204,8 +162,8 @@ bool GradientChecker::Probe(double const* const* parameters,
results->return_value = true;
// Evaluate the derivative using the user supplied code.
vector<Matrix>& jacobians = results->jacobians;
vector<Matrix>& local_jacobians = results->local_jacobians;
std::vector<Matrix>& jacobians = results->jacobians;
std::vector<Matrix>& local_jacobians = results->local_jacobians;
if (!EvaluateCostFunction(function_,
parameters,
manifolds_,
@@ -217,8 +175,9 @@ bool GradientChecker::Probe(double const* const* parameters,
}
// Evaluate the derivative using numeric derivatives.
vector<Matrix>& numeric_jacobians = results->numeric_jacobians;
vector<Matrix>& local_numeric_jacobians = results->local_numeric_jacobians;
std::vector<Matrix>& numeric_jacobians = results->numeric_jacobians;
std::vector<Matrix>& local_numeric_jacobians =
results->local_numeric_jacobians;
Vector finite_diff_residuals;
if (!EvaluateCostFunction(finite_diff_cost_function_.get(),
parameters,
@@ -258,7 +217,7 @@ bool GradientChecker::Probe(double const* const* parameters,
// Accumulate the error message for all the jacobians, since it won't get
// output if there are no bad jacobian components.
string error_log;
std::string error_log;
for (int k = 0; k < function_->parameter_block_sizes().size(); k++) {
StringAppendF(&error_log,
"========== "
@@ -312,7 +271,7 @@ bool GradientChecker::Probe(double const* const* parameters,
// Since there were some bad errors, dump comprehensive debug info.
if (num_bad_jacobian_components) {
string header = StringPrintf(
std::string header = StringPrintf(
"\nDetected %d bad Jacobian component(s). "
"Worst relative error was %g.\n",
num_bad_jacobian_components,

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -52,13 +52,7 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::abs;
using std::max;
using std::string;
using std::vector;
namespace ceres::internal {
namespace {
@@ -68,7 +62,7 @@ class GradientCheckingCostFunction final : public CostFunction {
const std::vector<const Manifold*>* manifolds,
const NumericDiffOptions& options,
double relative_precision,
string extra_info,
std::string extra_info,
GradientCheckingIterationCallback* callback)
: function_(function),
gradient_checker_(function, manifolds, options),
@@ -76,7 +70,7 @@ class GradientCheckingCostFunction final : public CostFunction {
extra_info_(std::move(extra_info)),
callback_(callback) {
CHECK(callback_ != nullptr);
const vector<int32_t>& parameter_block_sizes =
const std::vector<int32_t>& parameter_block_sizes =
function->parameter_block_sizes();
*mutable_parameter_block_sizes() = parameter_block_sizes;
set_num_residuals(function->num_residuals());
@@ -105,7 +99,8 @@ class GradientCheckingCostFunction final : public CostFunction {
MatrixRef(residuals, num_residuals, 1) = results.residuals;
// Copy the original jacobian blocks into the jacobians array.
const vector<int32_t>& block_sizes = function_->parameter_block_sizes();
const std::vector<int32_t>& block_sizes =
function_->parameter_block_sizes();
for (int k = 0; k < block_sizes.size(); k++) {
if (jacobians[k] != nullptr) {
MatrixRef(jacobians[k],
@@ -127,7 +122,7 @@ class GradientCheckingCostFunction final : public CostFunction {
const CostFunction* function_;
GradientChecker gradient_checker_;
double relative_precision_;
string extra_info_;
std::string extra_info_;
GradientCheckingIterationCallback* callback_;
};
@@ -137,7 +132,7 @@ GradientCheckingIterationCallback::GradientCheckingIterationCallback()
: gradient_error_detected_(false) {}
CallbackReturnType GradientCheckingIterationCallback::operator()(
const IterationSummary& summary) {
const IterationSummary& /*summary*/) {
if (gradient_error_detected_) {
LOG(ERROR) << "Gradient error detected. Terminating solver.";
return SOLVER_ABORT;
@@ -198,7 +193,8 @@ std::unique_ptr<ProblemImpl> CreateGradientCheckingProblemImpl(
// For every ParameterBlock in problem_impl, create a new parameter block with
// the same manifold and constancy.
const vector<ParameterBlock*>& parameter_blocks = program->parameter_blocks();
const std::vector<ParameterBlock*>& parameter_blocks =
program->parameter_blocks();
for (auto* parameter_block : parameter_blocks) {
gradient_checking_problem_impl->AddParameterBlock(
parameter_block->mutable_user_state(),
@@ -225,17 +221,18 @@ std::unique_ptr<ProblemImpl> CreateGradientCheckingProblemImpl(
// For every ResidualBlock in problem_impl, create a new
// ResidualBlock by wrapping its CostFunction inside a
// GradientCheckingCostFunction.
const vector<ResidualBlock*>& residual_blocks = program->residual_blocks();
const std::vector<ResidualBlock*>& residual_blocks =
program->residual_blocks();
for (int i = 0; i < residual_blocks.size(); ++i) {
ResidualBlock* residual_block = residual_blocks[i];
// Build a human readable string which identifies the
// ResidualBlock. This is used by the GradientCheckingCostFunction
// when logging debugging information.
string extra_info =
std::string extra_info =
StringPrintf("Residual block id %d; depends on parameters [", i);
vector<double*> parameter_blocks;
vector<const Manifold*> manifolds;
std::vector<double*> parameter_blocks;
std::vector<const Manifold*> manifolds;
parameter_blocks.reserve(residual_block->NumParameterBlocks());
manifolds.reserve(residual_block->NumParameterBlocks());
for (int j = 0; j < residual_block->NumParameterBlocks(); ++j) {
@@ -280,5 +277,4 @@ std::unique_ptr<ProblemImpl> CreateGradientCheckingProblemImpl(
return gradient_checking_problem_impl;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -42,8 +42,7 @@
#include "ceres/iteration_callback.h"
#include "ceres/manifold.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ProblemImpl;
@@ -109,8 +108,7 @@ CERES_NO_EXPORT std::unique_ptr<ProblemImpl> CreateGradientCheckingProblemImpl(
double relative_precision,
GradientCheckingIterationCallback* callback);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,8 +32,6 @@
#include <memory>
#include "ceres/local_parameterization.h"
#include "ceres/manifold_adapter.h"
#include "glog/logging.h"
namespace ceres {
@@ -46,22 +44,6 @@ GradientProblem::GradientProblem(FirstOrderFunction* function)
CHECK(function != nullptr);
}
GradientProblem::GradientProblem(FirstOrderFunction* function,
LocalParameterization* parameterization)
: function_(function),
parameterization_(parameterization),
scratch_(new double[function_->NumParameters()]) {
CHECK(function != nullptr);
if (parameterization != nullptr) {
manifold_ =
std::make_unique<internal::ManifoldAdapter>(parameterization_.get());
} else {
manifold_ = std::make_unique<EuclideanManifold<DYNAMIC>>(
function_->NumParameters());
}
CHECK_EQ(function_->NumParameters(), manifold_->AmbientSize());
}
GradientProblem::GradientProblem(FirstOrderFunction* function,
Manifold* manifold)
: function_(function), scratch_(new double[function_->NumParameters()]) {

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,8 +43,7 @@
#include "ceres/sparse_matrix.h"
#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator {
public:
@@ -53,10 +52,10 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator {
std::unique_ptr<SparseMatrix> CreateJacobian() const final { return nullptr; }
bool Evaluate(const EvaluateOptions& evaluate_options,
bool Evaluate(const EvaluateOptions& /*evaluate_options*/,
const double* state,
double* cost,
double* residuals,
double* /*residuals*/,
double* gradient,
SparseMatrix* jacobian) final {
CHECK(jacobian == nullptr);
@@ -83,7 +82,7 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator {
int NumParameters() const final { return problem_.NumParameters(); }
int NumEffectiveParameters() const final {
return problem_.NumLocalParameters();
return problem_.NumTangentParameters();
}
int NumResiduals() const final { return 1; }
@@ -97,8 +96,7 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator {
::ceres::internal::ExecutionSummary execution_summary_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,7 +30,9 @@
#include "ceres/gradient_problem_solver.h"
#include <map>
#include <memory>
#include <string>
#include "ceres/callbacks.h"
#include "ceres/gradient_problem.h"
@@ -48,7 +50,6 @@
namespace ceres {
using internal::StringAppendF;
using internal::StringPrintf;
using std::string;
namespace {
@@ -112,7 +113,7 @@ void GradientProblemSolver::Solve(const GradientProblemSolver::Options& options,
*summary = Summary();
// clang-format off
summary->num_parameters = problem.NumParameters();
summary->num_local_parameters = problem.NumLocalParameters();
summary->num_tangent_parameters = problem.NumTangentParameters();
summary->line_search_direction_type = options.line_search_direction_type; // NOLINT
summary->line_search_interpolation_type = options.line_search_interpolation_type; // NOLINT
summary->line_search_type = options.line_search_type;
@@ -180,7 +181,7 @@ void GradientProblemSolver::Solve(const GradientProblemSolver::Options& options,
SetSummaryFinalCost(summary);
}
const std::map<string, CallStatistics>& evaluator_statistics =
const std::map<std::string, CallStatistics>& evaluator_statistics =
minimizer_options.evaluator->Statistics();
{
const CallStatistics& call_stats = FindWithDefault(
@@ -203,7 +204,7 @@ bool GradientProblemSolver::Summary::IsSolutionUsable() const {
return internal::IsSolutionUsable(*this);
}
string GradientProblemSolver::Summary::BriefReport() const {
std::string GradientProblemSolver::Summary::BriefReport() const {
return StringPrintf(
"Ceres GradientProblemSolver Report: "
"Iterations: %d, "
@@ -216,17 +217,20 @@ string GradientProblemSolver::Summary::BriefReport() const {
TerminationTypeToString(termination_type));
}
string GradientProblemSolver::Summary::FullReport() const {
std::string GradientProblemSolver::Summary::FullReport() const {
using internal::VersionString;
string report = string("\nSolver Summary (v " + VersionString() + ")\n\n");
// NOTE operator+ is not usable for concatenating a string and a string_view.
std::string report =
std::string{"\nSolver Summary (v "}.append(VersionString()) + ")\n\n";
StringAppendF(&report, "Parameters % 25d\n", num_parameters);
if (num_local_parameters != num_parameters) {
StringAppendF(&report, "Local parameters % 25d\n", num_local_parameters);
if (num_tangent_parameters != num_parameters) {
StringAppendF(
&report, "Tangent parameters % 25d\n", num_tangent_parameters);
}
string line_search_direction_string;
std::string line_search_direction_string;
if (line_search_direction_type == LBFGS) {
line_search_direction_string = StringPrintf("LBFGS (%d)", max_lbfgs_rank);
} else if (line_search_direction_type == NONLINEAR_CONJUGATE_GRADIENT) {
@@ -241,7 +245,7 @@ string GradientProblemSolver::Summary::FullReport() const {
"Line search direction %19s\n",
line_search_direction_string.c_str());
const string line_search_type_string = StringPrintf(
const std::string line_search_type_string = StringPrintf(
"%s %s",
LineSearchInterpolationTypeToString(line_search_interpolation_type),
LineSearchTypeToString(line_search_type));

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -42,8 +42,7 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// A unweighted undirected graph templated over the vertex ids. Vertex
// should be hashable.
@@ -206,7 +205,6 @@ class WeightedGraph {
edge_weights_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_GRAPH_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -45,8 +45,7 @@
#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Compare two vertices of a graph by their degrees, if the degrees
// are equal then order them by their ids.
@@ -340,7 +339,6 @@ std::unique_ptr<WeightedGraph<Vertex>> Degree2MaximumSpanningForest(
return forest;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_GRAPH_ALGORITHMS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,15 +35,16 @@
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
#include "ceres/linear_solver.h"
#include "ceres/parallel_for.h"
#include "ceres/parallel_vector_ops.h"
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
ImplicitSchurComplement::ImplicitSchurComplement(
const LinearSolver::Options& options)
: options_(options), D_(nullptr), b_(nullptr) {}
: options_(options) {}
void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
const double* D,
@@ -57,11 +58,16 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
D_ = D;
b_ = b;
compute_ftf_inverse_ =
options_.use_spse_initialization ||
options_.preconditioner_type == JACOBI ||
options_.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION;
// Initialize temporary storage and compute the block diagonals of
// E'E and F'E.
if (block_diagonal_EtE_inverse_ == nullptr) {
block_diagonal_EtE_inverse_ = A_->CreateBlockDiagonalEtE();
if (options_.preconditioner_type == JACOBI) {
if (compute_ftf_inverse_) {
block_diagonal_FtF_inverse_ = A_->CreateBlockDiagonalFtF();
}
rhs_.resize(A_->num_cols_f());
@@ -72,7 +78,7 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
tmp_f_cols_.resize(A_->num_cols_f());
} else {
A_->UpdateBlockDiagonalEtE(block_diagonal_EtE_inverse_.get());
if (options_.preconditioner_type == JACOBI) {
if (compute_ftf_inverse_) {
A_->UpdateBlockDiagonalFtF(block_diagonal_FtF_inverse_.get());
}
}
@@ -81,7 +87,7 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
// contributions from the diagonal D if it is non-null. Add that to
// the block diagonals and invert them.
AddDiagonalAndInvert(D_, block_diagonal_EtE_inverse_.get());
if (options_.preconditioner_type == JACOBI) {
if (compute_ftf_inverse_) {
AddDiagonalAndInvert((D_ == nullptr) ? nullptr : D_ + A_->num_cols_e(),
block_diagonal_FtF_inverse_.get());
}
@@ -97,36 +103,74 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
// By breaking it down into individual matrix vector products
// involving the matrices E and F. This is implemented using a
// PartitionedMatrixView of the input matrix A.
void ImplicitSchurComplement::RightMultiply(const double* x, double* y) const {
void ImplicitSchurComplement::RightMultiplyAndAccumulate(const double* x,
double* y) const {
// y1 = F x
tmp_rows_.setZero();
A_->RightMultiplyF(x, tmp_rows_.data());
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
// y2 = E' y1
tmp_e_cols_.setZero();
A_->LeftMultiplyE(tmp_rows_.data(), tmp_e_cols_.data());
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
// y3 = -(E'E)^-1 y2
tmp_e_cols_2_.setZero();
block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(),
tmp_e_cols_2_.data());
tmp_e_cols_2_ *= -1.0;
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
tmp_e_cols_2_.data(),
options_.context,
options_.num_threads);
ParallelAssign(
options_.context, options_.num_threads, tmp_e_cols_2_, -tmp_e_cols_2_);
// y1 = y1 + E y3
A_->RightMultiplyE(tmp_e_cols_2_.data(), tmp_rows_.data());
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
// y5 = D * x
if (D_ != nullptr) {
ConstVectorRef Dref(D_ + A_->num_cols_e(), num_cols());
VectorRef(y, num_cols()) =
(Dref.array().square() * ConstVectorRef(x, num_cols()).array())
.matrix();
VectorRef y_cols(y, num_cols());
ParallelAssign(
options_.context,
options_.num_threads,
y_cols,
(Dref.array().square() * ConstVectorRef(x, num_cols()).array()));
} else {
VectorRef(y, num_cols()).setZero();
ParallelSetZero(options_.context, options_.num_threads, y, num_cols());
}
// y = y5 + F' y1
A_->LeftMultiplyF(tmp_rows_.data(), y);
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), y);
}
void ImplicitSchurComplement::InversePowerSeriesOperatorRightMultiplyAccumulate(
const double* x, double* y) const {
CHECK(compute_ftf_inverse_);
// y1 = F x
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
// y2 = E' y1
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
// y3 = (E'E)^-1 y2
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
tmp_e_cols_2_.data(),
options_.context,
options_.num_threads);
// y1 = E y3
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
// y4 = F' y1
ParallelSetZero(options_.context, options_.num_threads, tmp_f_cols_);
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), tmp_f_cols_.data());
// y += (F'F)^-1 y4
block_diagonal_FtF_inverse_->RightMultiplyAndAccumulate(
tmp_f_cols_.data(), y, options_.context, options_.num_threads);
}
// Given a block diagonal matrix and an optional array of diagonal
@@ -136,26 +180,31 @@ void ImplicitSchurComplement::AddDiagonalAndInvert(
const double* D, BlockSparseMatrix* block_diagonal) {
const CompressedRowBlockStructure* block_diagonal_structure =
block_diagonal->block_structure();
for (const auto& row : block_diagonal_structure->rows) {
const int row_block_pos = row.block.position;
const int row_block_size = row.block.size;
const Cell& cell = row.cells[0];
MatrixRef m(block_diagonal->mutable_values() + cell.position,
row_block_size,
row_block_size);
ParallelFor(options_.context,
0,
block_diagonal_structure->rows.size(),
options_.num_threads,
[block_diagonal_structure, D, block_diagonal](int row_block_id) {
auto& row = block_diagonal_structure->rows[row_block_id];
const int row_block_pos = row.block.position;
const int row_block_size = row.block.size;
const Cell& cell = row.cells[0];
MatrixRef m(block_diagonal->mutable_values() + cell.position,
row_block_size,
row_block_size);
if (D != nullptr) {
ConstVectorRef d(D + row_block_pos, row_block_size);
m += d.array().square().matrix().asDiagonal();
}
if (D != nullptr) {
ConstVectorRef d(D + row_block_pos, row_block_size);
m += d.array().square().matrix().asDiagonal();
}
m = m.selfadjointView<Eigen::Upper>().llt().solve(
Matrix::Identity(row_block_size, row_block_size));
}
m = m.selfadjointView<Eigen::Upper>().llt().solve(
Matrix::Identity(row_block_size, row_block_size));
});
}
// Similar to RightMultiply, use the block structure of the matrix A
// to compute y = (E'E)^-1 (E'b - E'F x).
// Similar to RightMultiplyAndAccumulate, use the block structure of the matrix
// A to compute y = (E'E)^-1 (E'b - E'F x).
void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
const int num_cols_e = A_->num_cols_e();
const int num_cols_f = A_->num_cols_f();
@@ -163,26 +212,34 @@ void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
const int num_rows = A_->num_rows();
// y1 = F x
tmp_rows_.setZero();
A_->RightMultiplyF(x, tmp_rows_.data());
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
// y2 = b - y1
tmp_rows_ = ConstVectorRef(b_, num_rows) - tmp_rows_;
ParallelAssign(options_.context,
options_.num_threads,
tmp_rows_,
ConstVectorRef(b_, num_rows) - tmp_rows_);
// y3 = E' y2
tmp_e_cols_.setZero();
A_->LeftMultiplyE(tmp_rows_.data(), tmp_e_cols_.data());
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
// y = (E'E)^-1 y3
VectorRef(y, num_cols).setZero();
block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(), y);
ParallelSetZero(options_.context, options_.num_threads, y, num_cols);
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(
tmp_e_cols_.data(), y, options_.context, options_.num_threads);
// The full solution vector y has two blocks. The first block of
// variables corresponds to the eliminated variables, which we just
// computed via back substitution. The second block of variables
// corresponds to the Schur complement system, so we just copy those
// values from the solution to the Schur complement.
VectorRef(y + num_cols_e, num_cols_f) = ConstVectorRef(x, num_cols_f);
VectorRef y_cols_f(y + num_cols_e, num_cols_f);
ParallelAssign(options_.context,
options_.num_threads,
y_cols_f,
ConstVectorRef(x, num_cols_f));
}
// Compute the RHS of the Schur complement system.
@@ -193,24 +250,29 @@ void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
// this using a series of matrix vector products.
void ImplicitSchurComplement::UpdateRhs() {
// y1 = E'b
tmp_e_cols_.setZero();
A_->LeftMultiplyE(b_, tmp_e_cols_.data());
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
A_->LeftMultiplyAndAccumulateE(b_, tmp_e_cols_.data());
// y2 = (E'E)^-1 y1
Vector y2 = Vector::Zero(A_->num_cols_e());
block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(), y2.data());
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
tmp_e_cols_2_.data(),
options_.context,
options_.num_threads);
// y3 = E y2
tmp_rows_.setZero();
A_->RightMultiplyE(y2.data(), tmp_rows_.data());
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
// y3 = b - y3
tmp_rows_ = ConstVectorRef(b_, A_->num_rows()) - tmp_rows_;
ParallelAssign(options_.context,
options_.num_threads,
tmp_rows_,
ConstVectorRef(b_, A_->num_rows()) - tmp_rows_);
// rhs = F' y3
rhs_.setZero();
A_->LeftMultiplyF(tmp_rows_.data(), rhs_.data());
ParallelSetZero(options_.context, options_.num_threads, rhs_);
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), rhs_.data());
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,8 +44,7 @@
#include "ceres/partitioned_matrix_view.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockSparseMatrix;
@@ -82,13 +81,13 @@ class BlockSparseMatrix;
// (which for our purposes is an easily inverted block diagonal
// matrix), it can be done in terms of matrix vector products with E,
// F and (E'E)^-1. This class implements this functionality and other
// auxilliary bits needed to implement a CG solver on the Schur
// auxiliary bits needed to implement a CG solver on the Schur
// complement using the PartitionedMatrixView object.
//
// THREAD SAFETY: This class is nqot thread safe. In particular, the
// RightMultiply (and the LeftMultiply) methods are not thread safe as
// they depend on mutable arrays used for the temporaries needed to
// compute the product y += Sx;
// THREAD SAFETY: This class is not thread safe. In particular, the
// RightMultiplyAndAccumulate (and the LeftMultiplyAndAccumulate) methods are
// not thread safe as they depend on mutable arrays used for the temporaries
// needed to compute the product y += Sx;
class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator {
public:
// num_eliminate_blocks is the number of E blocks in the matrix
@@ -115,14 +114,20 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator {
void Init(const BlockSparseMatrix& A, const double* D, const double* b);
// y += Sx, where S is the Schur complement.
void RightMultiply(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
// The Schur complement is a symmetric positive definite matrix,
// thus the left and right multiply operators are the same.
void LeftMultiply(const double* x, double* y) const final {
RightMultiply(x, y);
void LeftMultiplyAndAccumulate(const double* x, double* y) const final {
RightMultiplyAndAccumulate(x, y);
}
// Following is useful for approximation of S^-1 via power series expansion.
// Z = (F'F)^-1 F'E (E'E)^-1 E'F
// y += Zx
void InversePowerSeriesOperatorRightMultiplyAccumulate(const double* x,
double* y) const;
// y = (E'E)^-1 (E'b - E'F x). Given an estimate of the solution to
// the Schur complement system, this method computes the value of
// the e_block variables that were eliminated to form the Schur
@@ -138,6 +143,7 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator {
}
const BlockSparseMatrix* block_diagonal_FtF_inverse() const {
CHECK(compute_ftf_inverse_);
return block_diagonal_FtF_inverse_.get();
}
@@ -146,25 +152,24 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator {
void UpdateRhs();
const LinearSolver::Options& options_;
bool compute_ftf_inverse_ = false;
std::unique_ptr<PartitionedMatrixViewBase> A_;
const double* D_;
const double* b_;
const double* D_ = nullptr;
const double* b_ = nullptr;
std::unique_ptr<BlockSparseMatrix> block_diagonal_EtE_inverse_;
std::unique_ptr<BlockSparseMatrix> block_diagonal_FtF_inverse_;
Vector rhs_;
// Temporary storage vectors used to implement RightMultiply.
// Temporary storage vectors used to implement RightMultiplyAndAccumulate.
mutable Vector tmp_rows_;
mutable Vector tmp_e_cols_;
mutable Vector tmp_e_cols_2_;
mutable Vector tmp_f_cols_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,8 +35,7 @@
#include "ceres/small_blas.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Create the CompressedRowSparseMatrix matrix that will contain the
// inner product.
@@ -52,16 +51,9 @@ InnerProductComputer::CreateResultMatrix(
auto matrix = std::make_unique<CompressedRowSparseMatrix>(
m_.num_cols(), m_.num_cols(), num_nonzeros);
matrix->set_storage_type(storage_type);
const CompressedRowBlockStructure* bs = m_.block_structure();
const std::vector<Block>& blocks = bs->cols;
matrix->mutable_row_blocks()->resize(blocks.size());
matrix->mutable_col_blocks()->resize(blocks.size());
for (int i = 0; i < blocks.size(); ++i) {
(*(matrix->mutable_row_blocks()))[i] = blocks[i].size;
(*(matrix->mutable_col_blocks()))[i] = blocks[i].size;
}
*matrix->mutable_row_blocks() = bs->cols;
*matrix->mutable_col_blocks() = bs->cols;
return matrix;
}
@@ -78,6 +70,10 @@ int InnerProductComputer::ComputeNonzeros(
row_nnz->resize(blocks.size());
std::fill(row_nnz->begin(), row_nnz->end(), 0);
if (product_terms.empty()) {
return 0;
}
// First product term.
(*row_nnz)[product_terms[0].row] = blocks[product_terms[0].col].size;
int num_nonzeros =
@@ -130,8 +126,10 @@ std::unique_ptr<InnerProductComputer> InnerProductComputer::Create(
const int start_row_block,
const int end_row_block,
CompressedRowSparseMatrix::StorageType product_storage_type) {
CHECK(product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR ||
product_storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR);
CHECK(product_storage_type ==
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR ||
product_storage_type ==
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
CHECK_GT(m.num_nonzeros(), 0)
<< "Congratulations, you found a bug in Ceres. Please report it.";
std::unique_ptr<InnerProductComputer> inner_product_computer(
@@ -157,7 +155,8 @@ void InnerProductComputer::Init(
for (int c1 = 0; c1 < row.cells.size(); ++c1) {
const Cell& cell1 = row.cells[c1];
int c2_begin, c2_end;
if (product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) {
if (product_storage_type ==
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) {
c2_begin = 0;
c2_end = c1 + 1;
} else {
@@ -195,6 +194,10 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix(
*(crsm_rows + 1) = *crsm_rows + row_block_nnz[i];
}
}
result_offsets_.resize(product_terms.size());
if (num_nonzeros == 0) {
return;
}
// The following macro FILL_CRSM_COL_BLOCK is key to understanding
// how this class works.
@@ -241,12 +244,11 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix(
} \
}
result_offsets_.resize(product_terms.size());
int col_nnz = 0;
int nnz = 0;
// Process the first term.
const InnerProductComputer::ProductTerm* current = &product_terms[0];
const InnerProductComputer::ProductTerm* current = product_terms.data();
FILL_CRSM_COL_BLOCK;
// Process the rest of the terms.
@@ -264,7 +266,7 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix(
if (previous->row == current->row) {
// if the current and previous terms are in the same row block,
// then they differ in the column block, in which case advance
// col_nnz by the column size of the prevous term.
// col_nnz by the column size of the previous term.
col_nnz += col_blocks[previous->col].size;
} else {
// If we have moved to a new row-block , then col_nnz is zero,
@@ -302,7 +304,8 @@ void InnerProductComputer::Compute() {
rows[bs->cols[cell1.block_id].position];
int c2_begin, c2_end;
if (storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) {
if (storage_type ==
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) {
c2_begin = 0;
c2_end = c1 + 1;
} else {
@@ -330,5 +333,4 @@ void InnerProductComputer::Compute() {
CHECK_EQ(cursor, result_offsets_.size());
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// This class is used to repeatedly compute the inner product
//
@@ -153,8 +152,7 @@ class CERES_NO_EXPORT InnerProductComputer {
std::vector<int> result_offsets_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,8 +35,7 @@
#include "ceres/internal/eigen.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Helper routine to compute the inverse or pseudo-inverse of a
// symmetric positive semi-definite matrix.
@@ -73,7 +72,6 @@ typename EigenTypes<kSize, kSize>::Matrix InvertPSDMatrix(
return svd.solve(MType::Identity(size, size));
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_INVERT_PSD_MATRIX_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2016 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,8 +33,7 @@
#include <algorithm>
#include <cmath>
namespace ceres {
namespace internal {
namespace ceres::internal {
bool IsClose(double x,
double y,
double relative_precision,
@@ -57,5 +56,4 @@ bool IsClose(double x,
}
return *relative_error < std::fabs(relative_precision);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2016 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,8 +36,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Returns true if x and y have a relative (unsigned) difference less than
// relative_precision and false otherwise. Stores the relative and absolute
// difference in relative/absolute_error if non-nullptr. If one of the two
@@ -48,8 +47,7 @@ CERES_NO_EXPORT bool IsClose(double x,
double relative_precision,
double* relative_error,
double* absolute_error);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,43 +33,69 @@
#include <string>
#include "Eigen/Core"
#include "ceres/dense_cholesky.h"
#include "ceres/sparse_cholesky.h"
#include "ceres/sparse_matrix.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
IterativeRefiner::IterativeRefiner(const int max_num_iterations)
SparseIterativeRefiner::SparseIterativeRefiner(const int max_num_iterations)
: max_num_iterations_(max_num_iterations) {}
IterativeRefiner::~IterativeRefiner() = default;
SparseIterativeRefiner::~SparseIterativeRefiner() = default;
void IterativeRefiner::Allocate(int num_cols) {
void SparseIterativeRefiner::Allocate(int num_cols) {
residual_.resize(num_cols);
correction_.resize(num_cols);
lhs_x_solution_.resize(num_cols);
}
void IterativeRefiner::Refine(const SparseMatrix& lhs,
const double* rhs_ptr,
SparseCholesky* sparse_cholesky,
double* solution_ptr) {
void SparseIterativeRefiner::Refine(const SparseMatrix& lhs,
const double* rhs_ptr,
SparseCholesky* cholesky,
double* solution_ptr) {
const int num_cols = lhs.num_cols();
Allocate(num_cols);
ConstVectorRef rhs(rhs_ptr, num_cols);
VectorRef solution(solution_ptr, num_cols);
std::string ignored_message;
for (int i = 0; i < max_num_iterations_; ++i) {
// residual = rhs - lhs * solution
lhs_x_solution_.setZero();
lhs.RightMultiply(solution_ptr, lhs_x_solution_.data());
lhs.RightMultiplyAndAccumulate(solution_ptr, lhs_x_solution_.data());
residual_ = rhs - lhs_x_solution_;
// solution += lhs^-1 residual
std::string ignored_message;
sparse_cholesky->Solve(
residual_.data(), correction_.data(), &ignored_message);
cholesky->Solve(residual_.data(), correction_.data(), &ignored_message);
solution += correction_;
}
};
} // namespace internal
} // namespace ceres
DenseIterativeRefiner::DenseIterativeRefiner(const int max_num_iterations)
: max_num_iterations_(max_num_iterations) {}
DenseIterativeRefiner::~DenseIterativeRefiner() = default;
void DenseIterativeRefiner::Allocate(int num_cols) {
residual_.resize(num_cols);
correction_.resize(num_cols);
}
void DenseIterativeRefiner::Refine(const int num_cols,
const double* lhs_ptr,
const double* rhs_ptr,
DenseCholesky* cholesky,
double* solution_ptr) {
Allocate(num_cols);
ConstMatrixRef lhs(lhs_ptr, num_cols, num_cols);
ConstVectorRef rhs(rhs_ptr, num_cols);
VectorRef solution(solution_ptr, num_cols);
std::string ignored_message;
for (int i = 0; i < max_num_iterations_; ++i) {
residual_ = rhs - lhs * solution;
// solution += lhs^-1 residual
cholesky->Solve(residual_.data(), correction_.data(), &ignored_message);
solution += correction_;
}
};
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,9 +39,9 @@
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class DenseCholesky;
class SparseCholesky;
class SparseMatrix;
@@ -58,20 +58,20 @@ class SparseMatrix;
// Definite linear systems.
//
// The above iterative loop is run until max_num_iterations is reached.
class CERES_NO_EXPORT IterativeRefiner {
class CERES_NO_EXPORT SparseIterativeRefiner {
public:
// max_num_iterations is the number of refinement iterations to
// perform.
explicit IterativeRefiner(int max_num_iterations);
explicit SparseIterativeRefiner(int max_num_iterations);
// Needed for mocking.
virtual ~IterativeRefiner();
virtual ~SparseIterativeRefiner();
// Given an initial estimate of the solution of lhs * x = rhs, use
// max_num_iterations rounds of iterative refinement to improve it.
//
// sparse_cholesky is assumed to contain an already computed
// factorization (or approximation thereof) of lhs.
// cholesky is assumed to contain an already computed factorization (or
// an approximation thereof) of lhs.
//
// solution is expected to contain a approximation to the solution
// to lhs * x = rhs. It can be zero.
@@ -79,7 +79,7 @@ class CERES_NO_EXPORT IterativeRefiner {
// This method is virtual to facilitate mocking.
virtual void Refine(const SparseMatrix& lhs,
const double* rhs,
SparseCholesky* sparse_cholesky,
SparseCholesky* cholesky,
double* solution);
private:
@@ -91,7 +91,39 @@ class CERES_NO_EXPORT IterativeRefiner {
Vector lhs_x_solution_;
};
} // namespace internal
} // namespace ceres
class CERES_NO_EXPORT DenseIterativeRefiner {
public:
// max_num_iterations is the number of refinement iterations to
// perform.
explicit DenseIterativeRefiner(int max_num_iterations);
// Needed for mocking.
virtual ~DenseIterativeRefiner();
// Given an initial estimate of the solution of lhs * x = rhs, use
// max_num_iterations rounds of iterative refinement to improve it.
//
// cholesky is assumed to contain an already computed factorization (or
// an approximation thereof) of lhs.
//
// solution is expected to contain a approximation to the solution
// to lhs * x = rhs. It can be zero.
//
// This method is virtual to facilitate mocking.
virtual void Refine(int num_cols,
const double* lhs,
const double* rhs,
DenseCholesky* cholesky,
double* solution);
private:
void Allocate(int num_cols);
int max_num_iterations_;
Vector residual_;
Vector correction_;
};
} // namespace ceres::internal
#endif // CERES_INTERNAL_ITERATIVE_REFINER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,7 @@
#include "ceres/implicit_schur_complement.h"
#include "ceres/internal/eigen.h"
#include "ceres/linear_solver.h"
#include "ceres/power_series_expansion_preconditioner.h"
#include "ceres/preconditioner.h"
#include "ceres/schur_jacobi_preconditioner.h"
#include "ceres/triplet_sparse_matrix.h"
@@ -51,8 +52,7 @@
#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
IterativeSchurComplementSolver::IterativeSchurComplementSolver(
LinearSolver::Options options)
@@ -68,6 +68,8 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
EventLogger event_logger("IterativeSchurComplementSolver::Solve");
CHECK(A->block_structure() != nullptr);
CHECK(A->transpose_block_structure() != nullptr);
const int num_eliminate_blocks = options_.elimination_groups[0];
// Initialize a ImplicitSchurComplement object.
if (schur_complement_ == nullptr) {
@@ -86,45 +88,66 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
VLOG(2) << "No parameter blocks left in the schur complement.";
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.termination_type = LinearSolverTerminationType::SUCCESS;
schur_complement_->BackSubstitute(nullptr, x);
return summary;
}
// Initialize the solution to the Schur complement system to zero.
// Initialize the solution to the Schur complement system.
reduced_linear_system_solution_.resize(schur_complement_->num_rows());
reduced_linear_system_solution_.setZero();
LinearSolver::Options cg_options;
cg_options.min_num_iterations = options_.min_num_iterations;
cg_options.max_num_iterations = options_.max_num_iterations;
ConjugateGradientsSolver cg_solver(cg_options);
LinearSolver::PerSolveOptions cg_per_solve_options;
cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance;
cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance;
if (options_.use_spse_initialization) {
Preconditioner::Options preconditioner_options(options_);
preconditioner_options.type = SCHUR_POWER_SERIES_EXPANSION;
PowerSeriesExpansionPreconditioner pse_solver(
schur_complement_.get(),
options_.max_num_spse_iterations,
options_.spse_tolerance,
preconditioner_options);
pse_solver.RightMultiplyAndAccumulate(
schur_complement_->rhs().data(),
reduced_linear_system_solution_.data());
}
CreatePreconditioner(A);
if (preconditioner_.get() != nullptr) {
if (preconditioner_ != nullptr) {
if (!preconditioner_->Update(*A, per_solve_options.D)) {
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_FAILURE;
summary.termination_type = LinearSolverTerminationType::FAILURE;
summary.message = "Preconditioner update failed.";
return summary;
}
cg_per_solve_options.preconditioner = preconditioner_.get();
}
ConjugateGradientsSolverOptions cg_options;
cg_options.min_num_iterations = options_.min_num_iterations;
cg_options.max_num_iterations = options_.max_num_iterations;
cg_options.residual_reset_period = options_.residual_reset_period;
cg_options.q_tolerance = per_solve_options.q_tolerance;
cg_options.r_tolerance = per_solve_options.r_tolerance;
LinearOperatorAdapter lhs(*schur_complement_);
LinearOperatorAdapter preconditioner(*preconditioner_);
Vector scratch[4];
for (int i = 0; i < 4; ++i) {
scratch[i].resize(schur_complement_->num_cols());
}
Vector* scratch_ptr[4] = {&scratch[0], &scratch[1], &scratch[2], &scratch[3]};
event_logger.AddEvent("Setup");
LinearSolver::Summary summary =
cg_solver.Solve(schur_complement_.get(),
schur_complement_->rhs().data(),
cg_per_solve_options,
reduced_linear_system_solution_.data());
if (summary.termination_type != LINEAR_SOLVER_FAILURE &&
summary.termination_type != LINEAR_SOLVER_FATAL_ERROR) {
ConjugateGradientsSolver(cg_options,
lhs,
schur_complement_->rhs(),
preconditioner,
scratch_ptr,
reduced_linear_system_solution_);
if (summary.termination_type != LinearSolverTerminationType::FAILURE &&
summary.termination_type != LinearSolverTerminationType::FATAL_ERROR) {
schur_complement_->BackSubstitute(reduced_linear_system_solution_.data(),
x);
}
@@ -134,29 +157,31 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
void IterativeSchurComplementSolver::CreatePreconditioner(
BlockSparseMatrix* A) {
if (options_.preconditioner_type == IDENTITY ||
preconditioner_.get() != nullptr) {
if (preconditioner_ != nullptr) {
return;
}
Preconditioner::Options preconditioner_options;
preconditioner_options.type = options_.preconditioner_type;
preconditioner_options.visibility_clustering_type =
options_.visibility_clustering_type;
preconditioner_options.sparse_linear_algebra_library_type =
options_.sparse_linear_algebra_library_type;
preconditioner_options.num_threads = options_.num_threads;
preconditioner_options.row_block_size = options_.row_block_size;
preconditioner_options.e_block_size = options_.e_block_size;
preconditioner_options.f_block_size = options_.f_block_size;
preconditioner_options.elimination_groups = options_.elimination_groups;
Preconditioner::Options preconditioner_options(options_);
CHECK(options_.context != nullptr);
preconditioner_options.context = options_.context;
switch (options_.preconditioner_type) {
case IDENTITY:
preconditioner_ = std::make_unique<IdentityPreconditioner>(
schur_complement_->num_cols());
break;
case JACOBI:
preconditioner_ = std::make_unique<SparseMatrixPreconditionerWrapper>(
schur_complement_->block_diagonal_FtF_inverse());
schur_complement_->block_diagonal_FtF_inverse(),
preconditioner_options);
break;
case SCHUR_POWER_SERIES_EXPANSION:
// Ignoring the value of spse_tolerance to ensure preconditioner stays
// fixed during the iterations of cg.
preconditioner_ = std::make_unique<PowerSeriesExpansionPreconditioner>(
schur_complement_.get(),
options_.max_num_spse_iterations,
0,
preconditioner_options);
break;
case SCHUR_JACOBI:
preconditioner_ = std::make_unique<SchurJacobiPreconditioner>(
@@ -172,5 +197,4 @@ void IterativeSchurComplementSolver::CreatePreconditioner(
}
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@
#include "ceres/linear_solver.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockSparseMatrix;
class ImplicitSchurComplement;
@@ -53,7 +52,7 @@ class Preconditioner;
// The algorithm used by this solver was developed in a series of
// papers - "Agarwal et al, Bundle Adjustment in the Large, ECCV 2010"
// and "Wu et al, Multicore Bundle Adjustment, submitted to CVPR
// 2011" at the Univeristy of Washington.
// 2011" at the University of Washington.
//
// The key idea is that one can run Conjugate Gradients on the Schur
// Complement system without explicitly forming the Schur Complement
@@ -94,8 +93,7 @@ class CERES_NO_EXPORT IterativeSchurComplementSolver final
Vector reduced_linear_system_solution_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,13 +38,13 @@
#include "ceres/internal/eigen.h"
#include "ceres/linear_least_squares_problems.h"
#include "ceres/linear_solver.h"
#include "ceres/parallel_vector_ops.h"
#include "ceres/sparse_matrix.h"
#include "ceres/trust_region_strategy.h"
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
LevenbergMarquardtStrategy::LevenbergMarquardtStrategy(
const TrustRegionStrategy::Options& options)
@@ -54,7 +54,9 @@ LevenbergMarquardtStrategy::LevenbergMarquardtStrategy(
min_diagonal_(options.min_lm_diagonal),
max_diagonal_(options.max_lm_diagonal),
decrease_factor_(2.0),
reuse_diagonal_(false) {
reuse_diagonal_(false),
context_(options.context),
num_threads_(options.num_threads) {
CHECK(linear_solver_ != nullptr);
CHECK_GT(min_diagonal_, 0.0);
CHECK_LE(min_diagonal_, max_diagonal_);
@@ -78,14 +80,18 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep(
diagonal_.resize(num_parameters, 1);
}
jacobian->SquaredColumnNorm(diagonal_.data());
for (int i = 0; i < num_parameters; ++i) {
diagonal_[i] =
std::min(std::max(diagonal_[i], min_diagonal_), max_diagonal_);
}
jacobian->SquaredColumnNorm(diagonal_.data(), context_, num_threads_);
ParallelAssign(context_,
num_threads_,
diagonal_,
diagonal_.array().max(min_diagonal_).min(max_diagonal_));
}
lm_diagonal_ = (diagonal_ / radius_).array().sqrt();
if (lm_diagonal_.size() == 0) {
lm_diagonal_.resize(num_parameters);
}
ParallelAssign(
context_, num_threads_, lm_diagonal_, (diagonal_ / radius_).cwiseSqrt());
LinearSolver::PerSolveOptions solve_options;
solve_options.D = lm_diagonal_.data();
@@ -99,7 +105,7 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep(
// Invalidate the output array lm_step, so that we can detect if
// the linear solver generated numerical garbage. This is known
// to happen for the DENSE_QR and then DENSE_SCHUR solver when
// the Jacobin is severely rank deficient and mu is too small.
// the Jacobian is severely rank deficient and mu is too small.
InvalidateArray(num_parameters, step);
// Instead of solving Jx = -r, solve Jy = r.
@@ -108,17 +114,21 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep(
LinearSolver::Summary linear_solver_summary =
linear_solver_->Solve(jacobian, residuals, solve_options, step);
if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) {
if (linear_solver_summary.termination_type ==
LinearSolverTerminationType::FATAL_ERROR) {
LOG(WARNING) << "Linear solver fatal error: "
<< linear_solver_summary.message;
} else if (linear_solver_summary.termination_type == LINEAR_SOLVER_FAILURE) {
} else if (linear_solver_summary.termination_type ==
LinearSolverTerminationType::FAILURE) {
LOG(WARNING) << "Linear solver failure. Failed to compute a step: "
<< linear_solver_summary.message;
} else if (!IsArrayValid(num_parameters, step)) {
LOG(WARNING) << "Linear solver failure. Failed to compute a finite step.";
linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE;
linear_solver_summary.termination_type =
LinearSolverTerminationType::FAILURE;
} else {
VectorRef(step, num_parameters) *= -1.0;
VectorRef step_vec(step, num_parameters);
ParallelAssign(context_, num_threads_, step_vec, -step_vec);
}
reuse_diagonal_ = true;
@@ -153,7 +163,7 @@ void LevenbergMarquardtStrategy::StepAccepted(double step_quality) {
reuse_diagonal_ = false;
}
void LevenbergMarquardtStrategy::StepRejected(double step_quality) {
void LevenbergMarquardtStrategy::StepRejected(double /*step_quality*/) {
radius_ = radius_ / decrease_factor_;
decrease_factor_ *= 2.0;
reuse_diagonal_ = true;
@@ -161,5 +171,4 @@ void LevenbergMarquardtStrategy::StepRejected(double step_quality) {
double LevenbergMarquardtStrategy::Radius() const { return radius_; }
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,8 +36,9 @@
#include "ceres/internal/export.h"
#include "ceres/trust_region_strategy.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ContextImpl;
// Levenberg-Marquardt step computation and trust region sizing
// strategy based on on "Methods for Nonlinear Least Squares" by
@@ -82,10 +83,11 @@ class CERES_NO_EXPORT LevenbergMarquardtStrategy final
// allocations in every iteration and reuse when a step fails and
// ComputeStep is called again.
Vector lm_diagonal_; // lm_diagonal_ = sqrt(diagonal_ / radius_);
ContextImpl* context_;
int num_threads_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,8 +33,11 @@
#include <algorithm>
#include <cmath>
#include <iomanip>
#include <iostream> // NOLINT
#include <map>
#include <memory>
#include <ostream> // NOLINT
#include <string>
#include <vector>
#include "ceres/evaluator.h"
#include "ceres/function_sample.h"
@@ -45,23 +48,17 @@
#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::map;
using std::ostream;
using std::string;
using std::vector;
namespace ceres::internal {
namespace {
// Precision used for floating point values in error message output.
const int kErrorMessageNumericPrecision = 8;
} // namespace
ostream& operator<<(ostream& os, const FunctionSample& sample);
std::ostream& operator<<(std::ostream& os, const FunctionSample& sample);
// Convenience stream operator for pushing FunctionSamples into log messages.
ostream& operator<<(ostream& os, const FunctionSample& sample) {
std::ostream& operator<<(std::ostream& os, const FunctionSample& sample) {
os << sample.ToDebugString();
return os;
}
@@ -74,16 +71,16 @@ LineSearch::LineSearch(const LineSearch::Options& options)
std::unique_ptr<LineSearch> LineSearch::Create(
const LineSearchType line_search_type,
const LineSearch::Options& options,
string* error) {
std::string* error) {
switch (line_search_type) {
case ceres::ARMIJO:
return std::make_unique<ArmijoLineSearch>(options);
case ceres::WOLFE:
return std::make_unique<WolfeLineSearch>(options);
default:
*error = string("Invalid line search algorithm type: ") +
*error = std::string("Invalid line search algorithm type: ") +
LineSearchTypeToString(line_search_type) +
string(", unable to create line search.");
std::string(", unable to create line search.");
}
return nullptr;
}
@@ -150,7 +147,7 @@ double LineSearchFunction::DirectionInfinityNorm() const {
}
void LineSearchFunction::ResetTimeStatistics() {
const map<string, CallStatistics> evaluator_statistics =
const std::map<std::string, CallStatistics> evaluator_statistics =
evaluator_->Statistics();
initial_evaluator_residual_time_in_seconds =
@@ -166,7 +163,7 @@ void LineSearchFunction::ResetTimeStatistics() {
void LineSearchFunction::TimeStatistics(
double* cost_evaluation_time_in_seconds,
double* gradient_evaluation_time_in_seconds) const {
const map<string, CallStatistics> evaluator_time_statistics =
const std::map<std::string, CallStatistics> evaluator_time_statistics =
evaluator_->Statistics();
*cost_evaluation_time_in_seconds =
FindWithDefault(
@@ -243,7 +240,7 @@ double LineSearch::InterpolatingPolynomialMinimizingStepSize(
// Select step size by interpolating the function and gradient values
// and minimizing the corresponding polynomial.
vector<FunctionSample> samples;
std::vector<FunctionSample> samples;
samples.push_back(lowerbound);
if (interpolation_type == QUADRATIC) {
@@ -427,7 +424,7 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate,
// shrank the bracket width until it was below our minimum tolerance.
// As these are 'artificial' constraints, and we would otherwise fail to
// produce a valid point when ArmijoLineSearch would succeed, we return the
// point with the lowest cost found thus far which satsifies the Armijo
// point with the lowest cost found thus far which satisfies the Armijo
// condition (but not the Wolfe conditions).
summary->optimal_point = bracket_low;
summary->success = true;
@@ -449,8 +446,8 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate,
// defined by bracket_low & bracket_high, which satisfy:
//
// 1. The interval bounded by step sizes: bracket_low.x & bracket_high.x
// contains step sizes that satsify the strong Wolfe conditions.
// 2. bracket_low.x is of all the step sizes evaluated *which satisifed the
// contains step sizes that satisfy the strong Wolfe conditions.
// 2. bracket_low.x is of all the step sizes evaluated *which satisfied the
// Armijo sufficient decrease condition*, the one which generated the
// smallest function value, i.e. bracket_low.value <
// f(all other steps satisfying Armijo).
@@ -494,7 +491,7 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate,
// Or, searching was stopped due to an 'artificial' constraint, i.e. not
// a condition imposed / required by the underlying algorithm, but instead an
// engineering / implementation consideration. But a step which exceeds the
// minimum step size, and satsifies the Armijo condition was still found,
// minimum step size, and satisfies the Armijo condition was still found,
// and should thus be used [zoom not required].
//
// Returns false if no step size > minimum step size was found which
@@ -518,7 +515,7 @@ bool WolfeLineSearch::BracketingPhase(const FunctionSample& initial_position,
// As we require the gradient to evaluate the Wolfe condition, we always
// calculate it together with the value, irrespective of the interpolation
// type. As opposed to only calculating the gradient after the Armijo
// condition is satisifed, as the computational saving from this approach
// condition is satisfied, as the computational saving from this approach
// would be slight (perhaps even negative due to the extra call). Also,
// always calculating the value & gradient together protects against us
// reporting invalid solutions if the cost function returns slightly different
@@ -821,7 +818,7 @@ bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position,
// As we require the gradient to evaluate the Wolfe condition, we always
// calculate it together with the value, irrespective of the interpolation
// type. As opposed to only calculating the gradient after the Armijo
// condition is satisifed, as the computational saving from this approach
// condition is satisfied, as the computational saving from this approach
// would be slight (perhaps even negative due to the extra call). Also,
// always calculating the value & gradient together protects against us
// reporting invalid solutions if the cost function returns slightly
@@ -883,5 +880,4 @@ bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position,
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -42,8 +42,7 @@
#include "ceres/internal/export.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Evaluator;
class LineSearchFunction;
@@ -302,7 +301,6 @@ class CERES_NO_EXPORT WolfeLineSearch final : public LineSearch {
Summary* summary) const final;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_LINE_SEARCH_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,12 +38,11 @@
#include "ceres/low_rank_inverse_hessian.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CERES_NO_EXPORT SteepestDescent final : public LineSearchDirection {
public:
bool NextDirection(const LineSearchMinimizer::State& previous,
bool NextDirection(const LineSearchMinimizer::State& /*previous*/,
const LineSearchMinimizer::State& current,
Vector* search_direction) override {
*search_direction = -current.gradient;
@@ -121,8 +120,8 @@ class CERES_NO_EXPORT LBFGS final : public LineSearchDirection {
current.gradient - previous.gradient);
search_direction->setZero();
low_rank_inverse_hessian_.RightMultiply(current.gradient.data(),
search_direction->data());
low_rank_inverse_hessian_.RightMultiplyAndAccumulate(
current.gradient.data(), search_direction->data());
*search_direction *= -1.0;
if (search_direction->dot(current.gradient) >= 0.0) {
@@ -242,7 +241,7 @@ class CERES_NO_EXPORT BFGS final : public LineSearchDirection {
//
// The original origin of this rescaling trick is somewhat unclear, the
// earliest reference appears to be Oren [1], however it is widely
// discussed without specific attributation in various texts including
// discussed without specific attribution in various texts including
// [2] (p143).
//
// [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms
@@ -367,5 +366,4 @@ std::unique_ptr<LineSearchDirection> LineSearchDirection::Create(
return nullptr;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "ceres/line_search_minimizer.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CERES_NO_EXPORT LineSearchDirection {
public:
@@ -61,7 +60,6 @@ class CERES_NO_EXPORT LineSearchDirection {
Vector* search_direction) = 0;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_LINE_SEARCH_DIRECTION_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,7 +30,7 @@
//
// Generic loop for line search based optimization algorithms.
//
// This is primarily inpsired by the minFunc packaged written by Mark
// This is primarily inspired by the minFunc packaged written by Mark
// Schmidt.
//
// http://www.di.ens.fr/~mschmidt/Software/minFunc.html
@@ -59,8 +59,7 @@
#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
namespace {
bool EvaluateGradientNorms(Evaluator* evaluator,
@@ -473,5 +472,4 @@ void LineSearchMinimizer::Minimize(const Minimizer::Options& options,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Generic line search minimization algorithm.
//
@@ -47,7 +46,7 @@ namespace internal {
class CERES_NO_EXPORT LineSearchMinimizer final : public Minimizer {
public:
struct State {
State(int num_parameters, int num_effective_parameters)
State(int /*num_parameters*/, int num_effective_parameters)
: cost(0.0),
gradient(num_effective_parameters),
gradient_squared_norm(0.0),
@@ -69,7 +68,6 @@ class CERES_NO_EXPORT LineSearchMinimizer final : public Minimizer {
Solver::Summary* summary) final;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_LINE_SEARCH_MINIMIZER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -41,8 +41,7 @@
#include "ceres/program.h"
#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
namespace {
bool IsProgramValid(const Program& program, std::string* error) {
@@ -102,5 +101,4 @@ bool LineSearchPreprocessor::Preprocess(const Solver::Options& options,
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,8 +35,7 @@
#include "ceres/internal/export.h"
#include "ceres/preprocessor.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CERES_NO_EXPORT LineSearchPreprocessor final : public Preprocessor {
public:
@@ -45,8 +44,7 @@ class CERES_NO_EXPORT LineSearchPreprocessor final : public Preprocessor {
PreprocessedProblem* preprocessed_problem) final;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,10 +44,7 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::string;
namespace ceres::internal {
std::unique_ptr<LinearLeastSquaresProblem>
CreateLinearLeastSquaresProblemFromId(int id) {
@@ -62,6 +59,10 @@ CreateLinearLeastSquaresProblemFromId(int id) {
return LinearLeastSquaresProblem3();
case 4:
return LinearLeastSquaresProblem4();
case 5:
return LinearLeastSquaresProblem5();
case 6:
return LinearLeastSquaresProblem6();
default:
LOG(FATAL) << "Unknown problem id requested " << id;
}
@@ -87,8 +88,7 @@ x_D = [1.78448275;
2.82327586;]
*/
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem0() {
std::unique_ptr<LinearLeastSquaresProblem> problem =
std::make_unique<LinearLeastSquaresProblem>();
auto problem = std::make_unique<LinearLeastSquaresProblem>();
auto A = std::make_unique<TripletSparseMatrix>(3, 2, 6);
problem->b = std::make_unique<double[]>(3);
@@ -161,13 +161,15 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem0() {
12 0 1 17 1
0 30 1 1 37]
cond(A'A) = 200.36
S = [ 42.3419 -1.4000 -11.5806
-1.4000 2.6000 1.0000
-11.5806 1.0000 31.1935]
r = [ 4.3032
5.4000
5.0323]
4.0323]
S\r = [ 0.2102
2.1367
@@ -187,14 +189,21 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem1() {
int num_rows = 6;
int num_cols = 5;
std::unique_ptr<LinearLeastSquaresProblem> problem =
std::make_unique<LinearLeastSquaresProblem>();
auto problem = std::make_unique<LinearLeastSquaresProblem>();
auto A = std::make_unique<TripletSparseMatrix>(
num_rows, num_cols, num_rows * num_cols);
problem->b = std::make_unique<double[]>(num_rows);
problem->D = std::make_unique<double[]>(num_cols);
problem->num_eliminate_blocks = 2;
problem->x = std::make_unique<double[]>(num_cols);
problem->x[0] = -2.3061;
problem->x[1] = 0.3172;
problem->x[2] = 0.2102;
problem->x[3] = 2.1367;
problem->x[4] = 0.1388;
int* rows = A->mutable_rows();
int* cols = A->mutable_cols();
double* values = A->mutable_values();
@@ -292,16 +301,21 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem2() {
int num_rows = 6;
int num_cols = 5;
std::unique_ptr<LinearLeastSquaresProblem> problem =
std::make_unique<LinearLeastSquaresProblem>();
auto problem = std::make_unique<LinearLeastSquaresProblem>();
problem->b = std::make_unique<double[]>(num_rows);
problem->D = std::make_unique<double[]>(num_cols);
problem->num_eliminate_blocks = 2;
problem->x = std::make_unique<double[]>(num_cols);
problem->x[0] = -2.3061;
problem->x[1] = 0.3172;
problem->x[2] = 0.2102;
problem->x[3] = 2.1367;
problem->x[4] = 0.1388;
auto* bs = new CompressedRowBlockStructure;
std::unique_ptr<double[]> values =
std::make_unique<double[]>(num_rows * num_cols);
auto values = std::make_unique<double[]>(num_rows * num_cols);
for (int c = 0; c < num_cols; ++c) {
bs->cols.emplace_back();
@@ -427,16 +441,14 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem3() {
int num_rows = 5;
int num_cols = 2;
std::unique_ptr<LinearLeastSquaresProblem> problem =
std::make_unique<LinearLeastSquaresProblem>();
auto problem = std::make_unique<LinearLeastSquaresProblem>();
problem->b = std::make_unique<double[]>(num_rows);
problem->D = std::make_unique<double[]>(num_cols);
problem->num_eliminate_blocks = 2;
auto* bs = new CompressedRowBlockStructure;
std::unique_ptr<double[]> values =
std::make_unique<double[]>(num_rows * num_cols);
auto values = std::make_unique<double[]>(num_rows * num_cols);
for (int c = 0; c < num_cols; ++c) {
bs->cols.emplace_back();
@@ -536,16 +548,14 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem4() {
int num_rows = 3;
int num_cols = 7;
std::unique_ptr<LinearLeastSquaresProblem> problem =
std::make_unique<LinearLeastSquaresProblem>();
auto problem = std::make_unique<LinearLeastSquaresProblem>();
problem->b = std::make_unique<double[]>(num_rows);
problem->D = std::make_unique<double[]>(num_cols);
problem->num_eliminate_blocks = 1;
auto* bs = new CompressedRowBlockStructure;
std::unique_ptr<double[]> values =
std::make_unique<double[]>(num_rows * num_cols);
auto values = std::make_unique<double[]>(num_rows * num_cols);
// Column block structure
bs->cols.emplace_back();
@@ -614,12 +624,313 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem4() {
return problem;
}
/*
A problem with block-diagonal F'F.
A = [1 0 | 0 0 2
3 0 | 0 0 4
0 -1 | 0 1 0
0 -3 | 0 1 0
0 -1 | 3 0 0
0 -2 | 1 0 0]
b = [0
1
2
3
4
5]
c = A'* b = [ 22
-25
17
7
4]
A'A = [10 0 0 0 10
0 15 -5 -4 0
0 -5 10 0 0
0 -4 0 2 0
10 0 0 0 20]
cond(A'A) = 41.402
S = [ 8.3333 -1.3333 0
-1.3333 0.9333 0
0 0 10.0000]
r = [ 8.6667
-1.6667
1.0000]
S\r = [ 0.9778
-0.3889
0.1000]
A\b = [ 0.2
-1.4444
0.9777
-0.3888
0.1]
*/
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem5() {
int num_rows = 6;
int num_cols = 5;
auto problem = std::make_unique<LinearLeastSquaresProblem>();
problem->b = std::make_unique<double[]>(num_rows);
problem->D = std::make_unique<double[]>(num_cols);
problem->num_eliminate_blocks = 2;
// TODO: add x
problem->x = std::make_unique<double[]>(num_cols);
problem->x[0] = 0.2;
problem->x[1] = -1.4444;
problem->x[2] = 0.9777;
problem->x[3] = -0.3888;
problem->x[4] = 0.1;
auto* bs = new CompressedRowBlockStructure;
auto values = std::make_unique<double[]>(num_rows * num_cols);
for (int c = 0; c < num_cols; ++c) {
bs->cols.emplace_back();
bs->cols.back().size = 1;
bs->cols.back().position = c;
}
int nnz = 0;
// Row 1
{
values[nnz++] = -1;
values[nnz++] = 2;
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 1;
row.block.position = 0;
row.cells.emplace_back(0, 0);
row.cells.emplace_back(4, 1);
}
// Row 2
{
values[nnz++] = 3;
values[nnz++] = 4;
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 1;
row.block.position = 1;
row.cells.emplace_back(0, 2);
row.cells.emplace_back(4, 3);
}
// Row 3
{
values[nnz++] = -1;
values[nnz++] = 1;
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 1;
row.block.position = 2;
row.cells.emplace_back(1, 4);
row.cells.emplace_back(3, 5);
}
// Row 4
{
values[nnz++] = -3;
values[nnz++] = 1;
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 1;
row.block.position = 3;
row.cells.emplace_back(1, 6);
row.cells.emplace_back(3, 7);
}
// Row 5
{
values[nnz++] = -1;
values[nnz++] = 3;
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 1;
row.block.position = 4;
row.cells.emplace_back(1, 8);
row.cells.emplace_back(2, 9);
}
// Row 6
{
// values[nnz++] = 2;
values[nnz++] = -2;
values[nnz++] = 1;
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 1;
row.block.position = 5;
// row.cells.emplace_back(0, 10);
row.cells.emplace_back(1, 10);
row.cells.emplace_back(2, 11);
}
auto A = std::make_unique<BlockSparseMatrix>(bs);
memcpy(A->mutable_values(), values.get(), nnz * sizeof(*A->values()));
for (int i = 0; i < num_cols; ++i) {
problem->D.get()[i] = 1;
}
for (int i = 0; i < num_rows; ++i) {
problem->b.get()[i] = i;
}
problem->A = std::move(A);
return problem;
}
/*
A = [1 2 0 0 0 1 1
1 4 0 0 0 5 6
3 4 0 0 0 7 8
5 6 0 0 0 9 0
0 0 9 0 0 3 1]
b = [0
1
2
3
4]
*/
// BlockSparseMatrix version
//
// This problem has the unique property that it has two different
// sized f-blocks, but only one of them occurs in the rows involving
// the one e-block. So performing Schur elimination on this problem
// tests the Schur Eliminator's ability to handle non-e-block rows
// correctly when their structure does not conform to the static
// structure determined by DetectStructure.
//
// Additionally, this problem has the first row of the last row block of E being
// larger than number of row blocks in E
//
// NOTE: This problem is too small and rank deficient to be solved without
// the diagonal regularization.
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem6() {
int num_rows = 5;
int num_cols = 7;
auto problem = std::make_unique<LinearLeastSquaresProblem>();
problem->b = std::make_unique<double[]>(num_rows);
problem->D = std::make_unique<double[]>(num_cols);
problem->num_eliminate_blocks = 1;
auto* bs = new CompressedRowBlockStructure;
auto values = std::make_unique<double[]>(num_rows * num_cols);
// Column block structure
bs->cols.emplace_back();
bs->cols.back().size = 2;
bs->cols.back().position = 0;
bs->cols.emplace_back();
bs->cols.back().size = 3;
bs->cols.back().position = 2;
bs->cols.emplace_back();
bs->cols.back().size = 2;
bs->cols.back().position = 5;
int nnz = 0;
// Row 1 & 2
{
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 2;
row.block.position = 0;
row.cells.emplace_back(0, nnz);
values[nnz++] = 1;
values[nnz++] = 2;
values[nnz++] = 1;
values[nnz++] = 4;
row.cells.emplace_back(2, nnz);
values[nnz++] = 1;
values[nnz++] = 1;
values[nnz++] = 5;
values[nnz++] = 6;
}
// Row 3 & 4
{
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 2;
row.block.position = 2;
row.cells.emplace_back(0, nnz);
values[nnz++] = 3;
values[nnz++] = 4;
values[nnz++] = 5;
values[nnz++] = 6;
row.cells.emplace_back(2, nnz);
values[nnz++] = 7;
values[nnz++] = 8;
values[nnz++] = 9;
values[nnz++] = 0;
}
// Row 5
{
bs->rows.emplace_back();
CompressedRow& row = bs->rows.back();
row.block.size = 1;
row.block.position = 4;
row.cells.emplace_back(1, nnz);
values[nnz++] = 9;
values[nnz++] = 0;
values[nnz++] = 0;
row.cells.emplace_back(2, nnz);
values[nnz++] = 3;
values[nnz++] = 1;
}
auto A = std::make_unique<BlockSparseMatrix>(bs);
memcpy(A->mutable_values(), values.get(), nnz * sizeof(*A->values()));
for (int i = 0; i < num_cols; ++i) {
problem->D.get()[i] = (i + 1) * 100;
}
for (int i = 0; i < num_rows; ++i) {
problem->b.get()[i] = i;
}
problem->A = std::move(A);
return problem;
}
namespace {
bool DumpLinearLeastSquaresProblemToConsole(const SparseMatrix* A,
const double* D,
const double* b,
const double* x,
int num_eliminate_blocks) {
int /*num_eliminate_blocks*/) {
CHECK(A != nullptr);
Matrix AA;
A->ToDenseMatrix(&AA);
@@ -639,7 +950,7 @@ bool DumpLinearLeastSquaresProblemToConsole(const SparseMatrix* A,
return true;
}
void WriteArrayToFileOrDie(const string& filename,
void WriteArrayToFileOrDie(const std::string& filename,
const double* x,
const int size) {
CHECK(x != nullptr);
@@ -652,23 +963,23 @@ void WriteArrayToFileOrDie(const string& filename,
fclose(fptr);
}
bool DumpLinearLeastSquaresProblemToTextFile(const string& filename_base,
bool DumpLinearLeastSquaresProblemToTextFile(const std::string& filename_base,
const SparseMatrix* A,
const double* D,
const double* b,
const double* x,
int num_eliminate_blocks) {
int /*num_eliminate_blocks*/) {
CHECK(A != nullptr);
LOG(INFO) << "writing to: " << filename_base << "*";
string matlab_script;
std::string matlab_script;
StringAppendF(&matlab_script,
"function lsqp = load_trust_region_problem()\n");
StringAppendF(&matlab_script, "lsqp.num_rows = %d;\n", A->num_rows());
StringAppendF(&matlab_script, "lsqp.num_cols = %d;\n", A->num_cols());
{
string filename = filename_base + "_A.txt";
std::string filename = filename_base + "_A.txt";
FILE* fptr = fopen(filename.c_str(), "w");
CHECK(fptr != nullptr);
A->ToTextFile(fptr);
@@ -683,33 +994,33 @@ bool DumpLinearLeastSquaresProblemToTextFile(const string& filename_base,
}
if (D != nullptr) {
string filename = filename_base + "_D.txt";
std::string filename = filename_base + "_D.txt";
WriteArrayToFileOrDie(filename, D, A->num_cols());
StringAppendF(
&matlab_script, "lsqp.D = load('%s', '-ascii');\n", filename.c_str());
}
if (b != nullptr) {
string filename = filename_base + "_b.txt";
std::string filename = filename_base + "_b.txt";
WriteArrayToFileOrDie(filename, b, A->num_rows());
StringAppendF(
&matlab_script, "lsqp.b = load('%s', '-ascii');\n", filename.c_str());
}
if (x != nullptr) {
string filename = filename_base + "_x.txt";
std::string filename = filename_base + "_x.txt";
WriteArrayToFileOrDie(filename, x, A->num_cols());
StringAppendF(
&matlab_script, "lsqp.x = load('%s', '-ascii');\n", filename.c_str());
}
string matlab_filename = filename_base + ".m";
std::string matlab_filename = filename_base + ".m";
WriteStringToFileOrDie(matlab_script, matlab_filename);
return true;
}
} // namespace
bool DumpLinearLeastSquaresProblem(const string& filename_base,
bool DumpLinearLeastSquaresProblem(const std::string& filename_base,
DumpFormatType dump_format_type,
const SparseMatrix* A,
const double* D,
@@ -730,5 +1041,4 @@ bool DumpLinearLeastSquaresProblem(const string& filename_base,
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@
#include "ceres/internal/export.h"
#include "ceres/sparse_matrix.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Structure defining a linear least squares problem and if possible
// ground truth solutions. To be used by various LinearSolver tests.
@@ -74,6 +73,10 @@ CERES_NO_EXPORT
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem3();
CERES_NO_EXPORT
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem4();
CERES_NO_EXPORT
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem5();
CERES_NO_EXPORT
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem6();
// Write the linear least squares problem to disk. The exact format
// depends on dump_format_type.
@@ -85,8 +88,7 @@ bool DumpLinearLeastSquaresProblem(const std::string& filename_base,
const double* b,
const double* x,
int num_eliminate_blocks);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,10 +30,34 @@
#include "ceres/linear_operator.h"
namespace ceres {
namespace internal {
#include <glog/logging.h>
namespace ceres::internal {
void LinearOperator::RightMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const {
(void)context;
if (num_threads != 1) {
VLOG(3) << "Parallel right product is not supported by linear operator "
"implementation";
}
RightMultiplyAndAccumulate(x, y);
}
void LinearOperator::LeftMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const {
(void)context;
if (num_threads != 1) {
VLOG(3) << "Parallel left product is not supported by linear operator "
"implementation";
}
LeftMultiplyAndAccumulate(x, y);
}
LinearOperator::~LinearOperator() = default;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,11 +33,13 @@
#ifndef CERES_INTERNAL_LINEAR_OPERATOR_H_
#define CERES_INTERNAL_LINEAR_OPERATOR_H_
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ContextImpl;
// This is an abstract base class for linear operators. It supports
// access to size information and left and right multiply operators.
@@ -46,15 +48,44 @@ class CERES_NO_EXPORT LinearOperator {
virtual ~LinearOperator();
// y = y + Ax;
virtual void RightMultiply(const double* x, double* y) const = 0;
virtual void RightMultiplyAndAccumulate(const double* x, double* y) const = 0;
virtual void RightMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const;
// y = y + A'x;
virtual void LeftMultiply(const double* x, double* y) const = 0;
virtual void LeftMultiplyAndAccumulate(const double* x, double* y) const = 0;
virtual void LeftMultiplyAndAccumulate(const double* x,
double* y,
ContextImpl* context,
int num_threads) const;
virtual void RightMultiplyAndAccumulate(const Vector& x, Vector& y) const {
RightMultiplyAndAccumulate(x.data(), y.data());
}
virtual void LeftMultiplyAndAccumulate(const Vector& x, Vector& y) const {
LeftMultiplyAndAccumulate(x.data(), y.data());
}
virtual void RightMultiplyAndAccumulate(const Vector& x,
Vector& y,
ContextImpl* context,
int num_threads) const {
RightMultiplyAndAccumulate(x.data(), y.data(), context, num_threads);
}
virtual void LeftMultiplyAndAccumulate(const Vector& x,
Vector& y,
ContextImpl* context,
int num_threads) const {
LeftMultiplyAndAccumulate(x.data(), y.data(), context, num_threads);
}
virtual int num_rows() const = 0;
virtual int num_cols() const = 0;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_LINEAR_OPERATOR_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,8 +43,7 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
LinearSolver::~LinearSolver() = default;
@@ -77,8 +76,15 @@ std::unique_ptr<LinearSolver> LinearSolver::Create(
CHECK(options.context != nullptr);
switch (options.type) {
case CGNR:
case CGNR: {
#ifndef CERES_NO_CUDA
if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) {
std::string error;
return CudaCgnrSolver::Create(options, &error);
}
#endif
return std::make_unique<CgnrSolver>(options);
} break;
case SPARSE_NORMAL_CHOLESKY:
#if defined(CERES_NO_SPARSE)
@@ -120,5 +126,4 @@ std::unique_ptr<LinearSolver> LinearSolver::Create(
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -52,39 +52,81 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
enum LinearSolverTerminationType {
enum class LinearSolverTerminationType {
// Termination criterion was met.
LINEAR_SOLVER_SUCCESS,
SUCCESS,
// Solver ran for max_num_iterations and terminated before the
// termination tolerance could be satisfied.
LINEAR_SOLVER_NO_CONVERGENCE,
NO_CONVERGENCE,
// Solver was terminated due to numerical problems, generally due to
// the linear system being poorly conditioned.
LINEAR_SOLVER_FAILURE,
FAILURE,
// Solver failed with a fatal error that cannot be recovered from,
// e.g. CHOLMOD ran out of memory when computing the symbolic or
// numeric factorization or an underlying library was called with
// the wrong arguments.
LINEAR_SOLVER_FATAL_ERROR
FATAL_ERROR
};
inline std::ostream& operator<<(std::ostream& s,
LinearSolverTerminationType type) {
switch (type) {
case LinearSolverTerminationType::SUCCESS:
s << "LINEAR_SOLVER_SUCCESS";
break;
case LinearSolverTerminationType::NO_CONVERGENCE:
s << "LINEAR_SOLVER_NO_CONVERGENCE";
break;
case LinearSolverTerminationType::FAILURE:
s << "LINEAR_SOLVER_FAILURE";
break;
case LinearSolverTerminationType::FATAL_ERROR:
s << "LINEAR_SOLVER_FATAL_ERROR";
break;
default:
s << "UNKNOWN LinearSolverTerminationType";
}
return s;
}
// This enum controls the fill-reducing ordering a sparse linear
// algebra library should use before computing a sparse factorization
// (usually Cholesky).
enum OrderingType {
//
// TODO(sameeragarwal): Add support for nested dissection
enum class OrderingType {
NATURAL, // Do not re-order the matrix. This is useful when the
// matrix has been ordered using a fill-reducing ordering
// already.
AMD // Use the Approximate Minimum Degree algorithm to re-order
// the matrix.
AMD, // Use the Approximate Minimum Degree algorithm to re-order
// the matrix.
NESDIS, // Use the Nested Dissection algorithm to re-order the matrix.
};
inline std::ostream& operator<<(std::ostream& s, OrderingType type) {
switch (type) {
case OrderingType::NATURAL:
s << "NATURAL";
break;
case OrderingType::AMD:
s << "AMD";
break;
case OrderingType::NESDIS:
s << "NESDIS";
break;
default:
s << "UNKNOWN OrderingType";
}
return s;
}
class LinearOperator;
// Abstract base class for objects that implement algorithms for
@@ -112,9 +154,9 @@ class CERES_NO_EXPORT LinearSolver {
DenseLinearAlgebraLibraryType dense_linear_algebra_library_type = EIGEN;
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
SUITE_SPARSE;
OrderingType ordering_type = OrderingType::NATURAL;
// See solver.h for information about these flags.
bool use_postordering = false;
bool dynamic_sparsity = false;
bool use_explicit_schur_complement = false;
@@ -123,6 +165,23 @@ class CERES_NO_EXPORT LinearSolver {
int min_num_iterations = 1;
int max_num_iterations = 1;
// Maximum number of iterations performed by SCHUR_POWER_SERIES_EXPANSION.
// This value controls the maximum number of iterations whether it is used
// as a preconditioner or just to initialize the solution for
// ITERATIVE_SCHUR.
int max_num_spse_iterations = 5;
// Use SCHUR_POWER_SERIES_EXPANSION to initialize the solution for
// ITERATIVE_SCHUR. This option can be set true regardless of what
// preconditioner is being used.
bool use_spse_initialization = false;
// When use_spse_initialization is true, this parameter along with
// max_num_spse_iterations controls the number of
// SCHUR_POWER_SERIES_EXPANSION iterations performed for initialization. It
// is not used to control the preconditioner.
double spse_tolerance = 0.1;
// If possible, how many threads can the solver use.
int num_threads = 1;
@@ -261,7 +320,8 @@ class CERES_NO_EXPORT LinearSolver {
struct Summary {
double residual_norm = -1.0;
int num_iterations = -1;
LinearSolverTerminationType termination_type = LINEAR_SOLVER_FAILURE;
LinearSolverTerminationType termination_type =
LinearSolverTerminationType::FAILURE;
std::string message;
};
@@ -329,17 +389,16 @@ class TypedLinearSolver : public LinearSolver {
ExecutionSummary execution_summary_;
};
// Linear solvers that depend on acccess to the low level structure of
// Linear solvers that depend on access to the low level structure of
// a SparseMatrix.
// clang-format off
typedef TypedLinearSolver<BlockSparseMatrix> BlockSparseMatrixSolver; // NOLINT
typedef TypedLinearSolver<CompressedRowSparseMatrix> CompressedRowSparseMatrixSolver; // NOLINT
typedef TypedLinearSolver<DenseSparseMatrix> DenseSparseMatrixSolver; // NOLINT
typedef TypedLinearSolver<TripletSparseMatrix> TripletSparseMatrixSolver; // NOLINT
using BlockSparseMatrixSolver = TypedLinearSolver<BlockSparseMatrix>; // NOLINT
using CompressedRowSparseMatrixSolver = TypedLinearSolver<CompressedRowSparseMatrix>; // NOLINT
using DenseSparseMatrixSolver = TypedLinearSolver<DenseSparseMatrix>; // NOLINT
using TripletSparseMatrixSolver = TypedLinearSolver<TripletSparseMatrix>; // NOLINT
// clang-format on
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,349 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
#include "ceres/local_parameterization.h"
#include <algorithm>
#include "Eigen/Geometry"
#include "ceres/internal/eigen.h"
#include "ceres/internal/fixed_array.h"
#include "ceres/internal/householder_vector.h"
#include "ceres/rotation.h"
#include "glog/logging.h"
namespace ceres {
using std::vector;
LocalParameterization::~LocalParameterization() = default;
bool LocalParameterization::MultiplyByJacobian(const double* x,
const int num_rows,
const double* global_matrix,
double* local_matrix) const {
if (LocalSize() == 0) {
return true;
}
Matrix jacobian(GlobalSize(), LocalSize());
if (!ComputeJacobian(x, jacobian.data())) {
return false;
}
MatrixRef(local_matrix, num_rows, LocalSize()) =
ConstMatrixRef(global_matrix, num_rows, GlobalSize()) * jacobian;
return true;
}
IdentityParameterization::IdentityParameterization(const int size)
: size_(size) {
CHECK_GT(size, 0);
}
bool IdentityParameterization::Plus(const double* x,
const double* delta,
double* x_plus_delta) const {
VectorRef(x_plus_delta, size_) =
ConstVectorRef(x, size_) + ConstVectorRef(delta, size_);
return true;
}
bool IdentityParameterization::ComputeJacobian(const double* x,
double* jacobian) const {
MatrixRef(jacobian, size_, size_).setIdentity();
return true;
}
bool IdentityParameterization::MultiplyByJacobian(const double* x,
const int num_cols,
const double* global_matrix,
double* local_matrix) const {
std::copy(
global_matrix, global_matrix + num_cols * GlobalSize(), local_matrix);
return true;
}
SubsetParameterization::SubsetParameterization(
int size, const vector<int>& constant_parameters)
: local_size_(size - constant_parameters.size()), constancy_mask_(size, 0) {
if (constant_parameters.empty()) {
return;
}
vector<int> constant = constant_parameters;
std::sort(constant.begin(), constant.end());
CHECK_GE(constant.front(), 0) << "Indices indicating constant parameter must "
"be greater than equal to zero.";
CHECK_LT(constant.back(), size)
<< "Indices indicating constant parameter must be less than the size "
<< "of the parameter block.";
CHECK(std::adjacent_find(constant.begin(), constant.end()) == constant.end())
<< "The set of constant parameters cannot contain duplicates";
for (int parameter : constant_parameters) {
constancy_mask_[parameter] = 1;
}
}
bool SubsetParameterization::Plus(const double* x,
const double* delta,
double* x_plus_delta) const {
const int global_size = GlobalSize();
for (int i = 0, j = 0; i < global_size; ++i) {
if (constancy_mask_[i]) {
x_plus_delta[i] = x[i];
} else {
x_plus_delta[i] = x[i] + delta[j++];
}
}
return true;
}
bool SubsetParameterization::ComputeJacobian(const double* x,
double* jacobian) const {
if (local_size_ == 0) {
return true;
}
const int global_size = GlobalSize();
MatrixRef m(jacobian, global_size, local_size_);
m.setZero();
for (int i = 0, j = 0; i < global_size; ++i) {
if (!constancy_mask_[i]) {
m(i, j++) = 1.0;
}
}
return true;
}
bool SubsetParameterization::MultiplyByJacobian(const double* x,
const int num_cols,
const double* global_matrix,
double* local_matrix) const {
if (local_size_ == 0) {
return true;
}
const int global_size = GlobalSize();
for (int col = 0; col < num_cols; ++col) {
for (int i = 0, j = 0; i < global_size; ++i) {
if (!constancy_mask_[i]) {
local_matrix[col * local_size_ + j++] =
global_matrix[col * global_size + i];
}
}
}
return true;
}
bool QuaternionParameterization::Plus(const double* x,
const double* delta,
double* x_plus_delta) const {
const double norm_delta =
sqrt(delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]);
if (norm_delta > 0.0) {
const double sin_delta_by_delta = (sin(norm_delta) / norm_delta);
double q_delta[4];
q_delta[0] = cos(norm_delta);
q_delta[1] = sin_delta_by_delta * delta[0];
q_delta[2] = sin_delta_by_delta * delta[1];
q_delta[3] = sin_delta_by_delta * delta[2];
QuaternionProduct(q_delta, x, x_plus_delta);
} else {
for (int i = 0; i < 4; ++i) {
x_plus_delta[i] = x[i];
}
}
return true;
}
bool QuaternionParameterization::ComputeJacobian(const double* x,
double* jacobian) const {
// clang-format off
jacobian[0] = -x[1]; jacobian[1] = -x[2]; jacobian[2] = -x[3];
jacobian[3] = x[0]; jacobian[4] = x[3]; jacobian[5] = -x[2];
jacobian[6] = -x[3]; jacobian[7] = x[0]; jacobian[8] = x[1];
jacobian[9] = x[2]; jacobian[10] = -x[1]; jacobian[11] = x[0];
// clang-format on
return true;
}
bool EigenQuaternionParameterization::Plus(const double* x_ptr,
const double* delta,
double* x_plus_delta_ptr) const {
Eigen::Map<Eigen::Quaterniond> x_plus_delta(x_plus_delta_ptr);
Eigen::Map<const Eigen::Quaterniond> x(x_ptr);
const double norm_delta =
sqrt(delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]);
if (norm_delta > 0.0) {
const double sin_delta_by_delta = sin(norm_delta) / norm_delta;
// Note, in the constructor w is first.
Eigen::Quaterniond delta_q(cos(norm_delta),
sin_delta_by_delta * delta[0],
sin_delta_by_delta * delta[1],
sin_delta_by_delta * delta[2]);
x_plus_delta = delta_q * x;
} else {
x_plus_delta = x;
}
return true;
}
bool EigenQuaternionParameterization::ComputeJacobian(const double* x,
double* jacobian) const {
// clang-format off
jacobian[0] = x[3]; jacobian[1] = x[2]; jacobian[2] = -x[1];
jacobian[3] = -x[2]; jacobian[4] = x[3]; jacobian[5] = x[0];
jacobian[6] = x[1]; jacobian[7] = -x[0]; jacobian[8] = x[3];
jacobian[9] = -x[0]; jacobian[10] = -x[1]; jacobian[11] = -x[2];
// clang-format on
return true;
}
HomogeneousVectorParameterization::HomogeneousVectorParameterization(int size)
: size_(size) {
CHECK_GT(size_, 1) << "The size of the homogeneous vector needs to be "
<< "greater than 1.";
}
bool HomogeneousVectorParameterization::Plus(const double* x_ptr,
const double* delta_ptr,
double* x_plus_delta_ptr) const {
ConstVectorRef x(x_ptr, size_);
ConstVectorRef delta(delta_ptr, size_ - 1);
VectorRef x_plus_delta(x_plus_delta_ptr, size_);
const double norm_delta = delta.norm();
if (norm_delta == 0.0) {
x_plus_delta = x;
return true;
}
// Map the delta from the minimum representation to the over parameterized
// homogeneous vector. See section A6.9.2 on page 624 of Hartley & Zisserman
// (2nd Edition) for a detailed description. Note there is a typo on Page
// 625, line 4 so check the book errata.
const double norm_delta_div_2 = 0.5 * norm_delta;
const double sin_delta_by_delta =
std::sin(norm_delta_div_2) / norm_delta_div_2;
Vector y(size_);
y.head(size_ - 1) = 0.5 * sin_delta_by_delta * delta;
y(size_ - 1) = std::cos(norm_delta_div_2);
Vector v(size_);
double beta;
// NOTE: The explicit template arguments are needed here because
// ComputeHouseholderVector is templated and some versions of MSVC
// have trouble deducing the type of v automatically.
internal::ComputeHouseholderVector<ConstVectorRef, double, Eigen::Dynamic>(
x, &v, &beta);
// Apply the delta update to remain on the unit sphere. See section A6.9.3
// on page 625 of Hartley & Zisserman (2nd Edition) for a detailed
// description.
x_plus_delta = x.norm() * (y - v * (beta * (v.transpose() * y)));
return true;
}
bool HomogeneousVectorParameterization::ComputeJacobian(
const double* x_ptr, double* jacobian_ptr) const {
ConstVectorRef x(x_ptr, size_);
MatrixRef jacobian(jacobian_ptr, size_, size_ - 1);
Vector v(size_);
double beta;
// NOTE: The explicit template arguments are needed here because
// ComputeHouseholderVector is templated and some versions of MSVC
// have trouble deducing the type of v automatically.
internal::ComputeHouseholderVector<ConstVectorRef, double, Eigen::Dynamic>(
x, &v, &beta);
// The Jacobian is equal to J = 0.5 * H.leftCols(size_ - 1) where H is the
// Householder matrix (H = I - beta * v * v').
for (int i = 0; i < size_ - 1; ++i) {
jacobian.col(i) = -0.5 * beta * v(i) * v;
jacobian.col(i)(i) += 0.5;
}
jacobian *= x.norm();
return true;
}
bool ProductParameterization::Plus(const double* x,
const double* delta,
double* x_plus_delta) const {
int x_cursor = 0;
int delta_cursor = 0;
for (const auto& param : local_params_) {
if (!param->Plus(
x + x_cursor, delta + delta_cursor, x_plus_delta + x_cursor)) {
return false;
}
delta_cursor += param->LocalSize();
x_cursor += param->GlobalSize();
}
return true;
}
bool ProductParameterization::ComputeJacobian(const double* x,
double* jacobian_ptr) const {
MatrixRef jacobian(jacobian_ptr, GlobalSize(), LocalSize());
jacobian.setZero();
internal::FixedArray<double> buffer(buffer_size_);
int x_cursor = 0;
int delta_cursor = 0;
for (const auto& param : local_params_) {
const int local_size = param->LocalSize();
const int global_size = param->GlobalSize();
if (!param->ComputeJacobian(x + x_cursor, buffer.data())) {
return false;
}
jacobian.block(x_cursor, delta_cursor, global_size, local_size) =
MatrixRef(buffer.data(), global_size, local_size);
delta_cursor += local_size;
x_cursor += global_size;
}
return true;
}
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,10 +35,7 @@
#include "ceres/internal/eigen.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::list;
namespace ceres::internal {
// The (L)BFGS algorithm explicitly requires that the secant equation:
//
@@ -117,8 +114,8 @@ bool LowRankInverseHessian::Update(const Vector& delta_x,
return true;
}
void LowRankInverseHessian::RightMultiply(const double* x_ptr,
double* y_ptr) const {
void LowRankInverseHessian::RightMultiplyAndAccumulate(const double* x_ptr,
double* y_ptr) const {
ConstVectorRef gradient(x_ptr, num_parameters_);
VectorRef search_direction(y_ptr, num_parameters_);
@@ -159,7 +156,7 @@ void LowRankInverseHessian::RightMultiply(const double* x_ptr,
//
// The original origin of this rescaling trick is somewhat unclear, the
// earliest reference appears to be Oren [1], however it is widely discussed
// without specific attributation in various texts including [2] (p143/178).
// without specific attribution in various texts including [2] (p143/178).
//
// [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms Part II:
// Implementation and experiments, Management Science,
@@ -179,5 +176,4 @@ void LowRankInverseHessian::RightMultiply(const double* x_ptr,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "ceres/internal/export.h"
#include "ceres/linear_operator.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// LowRankInverseHessian is a positive definite approximation to the
// Hessian using the limited memory variant of the
@@ -65,7 +64,7 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator {
// num_parameters is the row/column size of the Hessian.
// max_num_corrections is the rank of the Hessian approximation.
// use_approximate_eigenvalue_scaling controls whether the initial
// inverse Hessian used during Right/LeftMultiply() is scaled by
// inverse Hessian used during Right/LeftMultiplyAndAccumulate() is scaled by
// the approximate eigenvalue of the true inverse Hessian at the
// current operating point.
// The approximation uses:
@@ -84,9 +83,9 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator {
bool Update(const Vector& delta_x, const Vector& delta_gradient);
// LinearOperator interface
void RightMultiply(const double* x, double* y) const final;
void LeftMultiply(const double* x, double* y) const final {
RightMultiply(x, y);
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
void LeftMultiplyAndAccumulate(const double* x, double* y) const final {
RightMultiplyAndAccumulate(x, y);
}
int num_rows() const final { return num_parameters_; }
int num_cols() const final { return num_parameters_; }
@@ -102,7 +101,6 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator {
std::list<int> indices_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_LOW_RANK_INVERSE_HESSIAN_H_

View File

@@ -30,13 +30,11 @@ inline void QuaternionPlusImpl(const double* x,
double* x_plus_delta) {
// x_plus_delta = QuaternionProduct(q_delta, x), where q_delta is the
// quaternion constructed from delta.
const double norm_delta = std::sqrt(
delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]);
const double norm_delta = std::hypot(delta[0], delta[1], delta[2]);
if (norm_delta == 0.0) {
for (int i = 0; i < 4; ++i) {
x_plus_delta[i] = x[i];
}
if (std::fpclassify(norm_delta) == FP_ZERO) {
// No change in rotation: return the quaternion as is.
std::copy_n(x, 4, x_plus_delta);
return;
}
@@ -100,19 +98,16 @@ inline void QuaternionMinusImpl(const double* y,
-y[Order::kW] * x[Order::kZ] - y[Order::kX] * x[Order::kY] +
y[Order::kY] * x[Order::kX] + y[Order::kZ] * x[Order::kW];
const double u_norm =
std::sqrt(ambient_y_minus_x[Order::kX] * ambient_y_minus_x[Order::kX] +
ambient_y_minus_x[Order::kY] * ambient_y_minus_x[Order::kY] +
ambient_y_minus_x[Order::kZ] * ambient_y_minus_x[Order::kZ]);
if (u_norm > 0.0) {
const double u_norm = std::hypot(ambient_y_minus_x[Order::kX],
ambient_y_minus_x[Order::kY],
ambient_y_minus_x[Order::kZ]);
if (std::fpclassify(u_norm) != FP_ZERO) {
const double theta = std::atan2(u_norm, ambient_y_minus_x[Order::kW]);
y_minus_x[0] = theta * ambient_y_minus_x[Order::kX] / u_norm;
y_minus_x[1] = theta * ambient_y_minus_x[Order::kY] / u_norm;
y_minus_x[2] = theta * ambient_y_minus_x[Order::kZ] / u_norm;
} else {
y_minus_x[0] = 0.0;
y_minus_x[1] = 0.0;
y_minus_x[2] = 0.0;
std::fill_n(y_minus_x, 3, 0.0);
}
}
@@ -201,7 +196,7 @@ bool SubsetManifold::Plus(const double* x,
return true;
}
bool SubsetManifold::PlusJacobian(const double* x,
bool SubsetManifold::PlusJacobian(const double* /*x*/,
double* plus_jacobian) const {
if (tangent_size_ == 0) {
return true;
@@ -218,7 +213,7 @@ bool SubsetManifold::PlusJacobian(const double* x,
return true;
}
bool SubsetManifold::RightMultiplyByPlusJacobian(const double* x,
bool SubsetManifold::RightMultiplyByPlusJacobian(const double* /*x*/,
const int num_rows,
const double* ambient_matrix,
double* tangent_matrix) const {
@@ -254,7 +249,7 @@ bool SubsetManifold::Minus(const double* y,
return true;
}
bool SubsetManifold::MinusJacobian(const double* x,
bool SubsetManifold::MinusJacobian(const double* /*x*/,
double* minus_jacobian) const {
const int ambient_size = AmbientSize();
MatrixRef m(minus_jacobian, tangent_size_, ambient_size);

View File

@@ -1,60 +0,0 @@
#include "ceres/internal/export.h"
#include "ceres/local_parameterization.h"
#include "ceres/manifold.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
// Adapter to wrap LocalParameterization and make them look like Manifolds.
//
// ManifoldAdapter NEVER takes ownership of local_parameterization.
class CERES_NO_EXPORT ManifoldAdapter final : public Manifold {
public:
explicit ManifoldAdapter(const LocalParameterization* local_parameterization)
: local_parameterization_(local_parameterization) {
CHECK(local_parameterization != nullptr);
}
bool Plus(const double* x,
const double* delta,
double* x_plus_delta) const override {
return local_parameterization_->Plus(x, delta, x_plus_delta);
}
bool PlusJacobian(const double* x, double* jacobian) const override {
return local_parameterization_->ComputeJacobian(x, jacobian);
}
bool RightMultiplyByPlusJacobian(const double* x,
const int num_rows,
const double* ambient_matrix,
double* tangent_matrix) const override {
return local_parameterization_->MultiplyByJacobian(
x, num_rows, ambient_matrix, tangent_matrix);
}
bool Minus(const double* y, const double* x, double* delta) const override {
LOG(FATAL) << "This should never be called.";
return false;
}
bool MinusJacobian(const double* x, double* jacobian) const override {
LOG(FATAL) << "This should never be called.";
return false;
}
int AmbientSize() const override {
return local_parameterization_->GlobalSize();
}
int TangentSize() const override {
return local_parameterization_->LocalSize();
}
private:
const LocalParameterization* local_parameterization_;
};
} // namespace internal
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -37,8 +37,7 @@
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
std::unique_ptr<Minimizer> Minimizer::Create(MinimizerType minimizer_type) {
if (minimizer_type == TRUST_REGION) {
@@ -89,5 +88,4 @@ bool Minimizer::RunCallbacks(const Minimizer::Options& options,
return false;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,14 +40,14 @@
#include "ceres/iteration_callback.h"
#include "ceres/solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Evaluator;
class SparseMatrix;
class TrustRegionStrategy;
class CoordinateDescentMinimizer;
class LinearSolver;
class ContextImpl;
// Interface for non-linear least squares solvers.
class CERES_NO_EXPORT Minimizer {
@@ -114,6 +114,7 @@ class CERES_NO_EXPORT Minimizer {
int max_num_iterations;
double max_solver_time_in_seconds;
int num_threads;
ContextImpl* context = nullptr;
// Number of times the linear solver should be retried in case of
// numerical failure. The retries are done by exponentially scaling up
@@ -193,8 +194,7 @@ class CERES_NO_EXPORT Minimizer {
Solver::Summary* summary) = 0;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,7 @@
#include "ceres/normal_prior.h"
#include <cstddef>
#include <utility>
#include <vector>
#include "ceres/internal/eigen.h"
@@ -39,7 +40,7 @@
namespace ceres {
NormalPrior::NormalPrior(const Matrix& A, const Vector& b) : A_(A), b_(b) {
NormalPrior::NormalPrior(const Matrix& A, Vector b) : A_(A), b_(std::move(b)) {
CHECK_GT(b_.rows(), 0);
CHECK_GT(A_.rows(), 0);
CHECK_EQ(b_.rows(), A.cols());
@@ -54,7 +55,7 @@ bool NormalPrior::Evaluate(double const* const* parameters,
VectorRef r(residuals, num_residuals());
// The following line should read
// r = A_ * (p - b_);
// The extra eval is to get around a bug in the eigen library.
// The extra eval is to get around a bug in the Eigen library.
r = A_ * (p - b_).eval();
if ((jacobians != nullptr) && (jacobians[0] != nullptr)) {
MatrixRef(jacobians[0], num_residuals(), parameter_block_sizes()[0]) = A_;

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
#if defined(_WIN32) && !defined(__MINGW64__) && !defined(__MINGW32__)
#define GG_LONGLONG(x) x##I64
@@ -112,7 +111,6 @@ struct pair_hash {
}
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_PAIR_HASH_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -26,48 +26,161 @@
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: vitus@google.com (Michael Vitus)
// Authors: vitus@google.com (Michael Vitus),
// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#ifndef CERES_INTERNAL_PARALLEL_FOR_H_
#define CERES_INTERNAL_PARALLEL_FOR_H_
#include <functional>
#include <mutex>
#include <vector>
#include "ceres/context_impl.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/parallel_invoke.h"
#include "ceres/partition_range_for_parallel_for.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Returns the maximum number of threads supported by the threading backend
// Ceres was compiled with.
CERES_NO_EXPORT
int MaxNumThreadsAvailable();
// Use a dummy mutex if num_threads = 1.
inline decltype(auto) MakeConditionalLock(const int num_threads,
std::mutex& m) {
return (num_threads == 1) ? std::unique_lock<std::mutex>{}
: std::unique_lock<std::mutex>{m};
}
// Execute the function for every element in the range [start, end) with at most
// num_threads. It will execute all the work on the calling thread if
// num_threads is 1.
CERES_NO_EXPORT void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
const std::function<void(int)>& function);
// num_threads or (end - start) is equal to 1.
// Depending on function signature, it will be supplied with either loop index
// or a range of loop indicies; function can also be supplied with thread_id.
// The following function signatures are supported:
// - Functions accepting a single loop index:
// - [](int index) { ... }
// - [](int thread_id, int index) { ... }
// - Functions accepting a range of loop index:
// - [](std::tuple<int, int> index) { ... }
// - [](int thread_id, std::tuple<int, int> index) { ... }
//
// When distributing workload between threads, it is assumed that each loop
// iteration takes approximately equal time to complete.
template <typename F>
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
F&& function,
int min_block_size = 1) {
CHECK_GT(num_threads, 0);
if (start >= end) {
return;
}
// Execute the function for every element in the range [start, end) with at most
// num_threads. It will execute all the work on the calling thread if
// num_threads is 1. Each invocation of function() will be passed a thread_id
// in [0, num_threads) that is guaranteed to be distinct from the value passed
// to any concurrent execution of function().
CERES_NO_EXPORT void ParallelFor(
ContextImpl* context,
int start,
int end,
int num_threads,
const std::function<void(int thread_id, int i)>& function);
} // namespace internal
} // namespace ceres
if (num_threads == 1 || end - start < min_block_size * 2) {
InvokeOnSegment(0, std::make_tuple(start, end), std::forward<F>(function));
return;
}
#include "ceres/internal/disable_warnings.h"
CHECK(context != nullptr);
ParallelInvoke(context,
start,
end,
num_threads,
std::forward<F>(function),
min_block_size);
}
// Execute function for every element in the range [start, end) with at most
// num_threads, using user-provided partitions array.
// When distributing workload between threads, it is assumed that each segment
// bounded by adjacent elements of partitions array takes approximately equal
// time to process.
template <typename F>
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
F&& function,
const std::vector<int>& partitions) {
CHECK_GT(num_threads, 0);
if (start >= end) {
return;
}
CHECK_EQ(partitions.front(), start);
CHECK_EQ(partitions.back(), end);
if (num_threads == 1 || end - start <= num_threads) {
ParallelFor(context, start, end, num_threads, std::forward<F>(function));
return;
}
CHECK_GT(partitions.size(), 1);
const int num_partitions = partitions.size() - 1;
ParallelFor(context,
0,
num_partitions,
num_threads,
[&function, &partitions](int thread_id,
std::tuple<int, int> partition_ids) {
// partition_ids is a range of partition indices
const auto [partition_start, partition_end] = partition_ids;
// Execution over several adjacent segments is equivalent
// to execution over union of those segments (which is also a
// contiguous segment)
const int range_start = partitions[partition_start];
const int range_end = partitions[partition_end];
// Range of original loop indices
const auto range = std::make_tuple(range_start, range_end);
InvokeOnSegment(thread_id, range, function);
});
}
// Execute function for every element in the range [start, end) with at most
// num_threads, taking into account user-provided integer cumulative costs of
// iterations. Cumulative costs of iteration for indices in range [0, end) are
// stored in objects from cumulative_cost_data. User-provided
// cumulative_cost_fun returns non-decreasing integer values corresponding to
// inclusive cumulative cost of loop iterations, provided with a reference to
// user-defined object. Only indices from [start, end) will be referenced. This
// routine assumes that cumulative_cost_fun is non-decreasing (in other words,
// all costs are non-negative);
// When distributing workload between threads, input range of loop indices will
// be partitioned into disjoint contiguous intervals, with the maximal cost
// being minimized.
// For example, with iteration costs of [1, 1, 5, 3, 1, 4] cumulative_cost_fun
// should return [1, 2, 7, 10, 11, 15], and with num_threads = 4 this range
// will be split into segments [0, 2) [2, 3) [3, 5) [5, 6) with costs
// [2, 5, 4, 4].
template <typename F, typename CumulativeCostData, typename CumulativeCostFun>
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
F&& function,
const CumulativeCostData* cumulative_cost_data,
CumulativeCostFun&& cumulative_cost_fun) {
CHECK_GT(num_threads, 0);
if (start >= end) {
return;
}
if (num_threads == 1 || end - start <= num_threads) {
ParallelFor(context, start, end, num_threads, std::forward<F>(function));
return;
}
// Creating several partitions allows us to tolerate imperfections of
// partitioning and user-supplied iteration costs up to a certain extent
constexpr int kNumPartitionsPerThread = 4;
const int kMaxPartitions = num_threads * kNumPartitionsPerThread;
const auto& partitions = PartitionRangeForParallelFor(
start,
end,
kMaxPartitions,
cumulative_cost_data,
std::forward<CumulativeCostFun>(cumulative_cost_fun));
CHECK_GT(partitions.size(), 1);
ParallelFor(
context, start, end, num_threads, std::forward<F>(function), partitions);
}
} // namespace ceres::internal
#endif // CERES_INTERNAL_PARALLEL_FOR_H_

View File

@@ -1,245 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: vitus@google.com (Michael Vitus)
// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/config.h"
#ifdef CERES_USE_CXX_THREADS
#include <cmath>
#include <condition_variable>
#include <memory>
#include <mutex>
#include "ceres/concurrent_queue.h"
#include "ceres/parallel_for.h"
#include "ceres/scoped_thread_token.h"
#include "ceres/thread_token_provider.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace {
// This class creates a thread safe barrier which will block until a
// pre-specified number of threads call Finished. This allows us to block the
// main thread until all the parallel threads are finished processing all the
// work.
class BlockUntilFinished {
public:
explicit BlockUntilFinished(int num_total)
: num_finished_(0), num_total_(num_total) {}
// Increment the number of jobs that have finished and signal the blocking
// thread if all jobs have finished.
void Finished() {
std::lock_guard<std::mutex> lock(mutex_);
++num_finished_;
CHECK_LE(num_finished_, num_total_);
if (num_finished_ == num_total_) {
condition_.notify_one();
}
}
// Block until all threads have signaled they are finished.
void Block() {
std::unique_lock<std::mutex> lock(mutex_);
condition_.wait(lock, [&]() { return num_finished_ == num_total_; });
}
private:
std::mutex mutex_;
std::condition_variable condition_;
// The current number of jobs finished.
int num_finished_;
// The total number of jobs.
int num_total_;
};
// Shared state between the parallel tasks. Each thread will use this
// information to get the next block of work to be performed.
struct SharedState {
SharedState(int start, int end, int num_work_items)
: start(start),
end(end),
num_work_items(num_work_items),
i(0),
thread_token_provider(num_work_items),
block_until_finished(num_work_items) {}
// The start and end index of the for loop.
const int start;
const int end;
// The number of blocks that need to be processed.
const int num_work_items;
// The next block of work to be assigned to a worker. The parallel for loop
// range is split into num_work_items blocks of work, i.e. a single block of
// work is:
// for (int j = start + i; j < end; j += num_work_items) { ... }.
int i;
std::mutex mutex_i;
// Provides a unique thread ID among all active threads working on the same
// group of tasks. Thread-safe.
ThreadTokenProvider thread_token_provider;
// Used to signal when all the work has been completed. Thread safe.
BlockUntilFinished block_until_finished;
};
} // namespace
int MaxNumThreadsAvailable() { return ThreadPool::MaxNumThreadsAvailable(); }
// See ParallelFor (below) for more details.
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
const std::function<void(int)>& function) {
CHECK_GT(num_threads, 0);
CHECK(context != nullptr);
if (end <= start) {
return;
}
// Fast path for when it is single threaded.
if (num_threads == 1) {
for (int i = start; i < end; ++i) {
function(i);
}
return;
}
ParallelFor(
context, start, end, num_threads, [&function](int /*thread_id*/, int i) {
function(i);
});
}
// This implementation uses a fixed size max worker pool with a shared task
// queue. The problem of executing the function for the interval of [start, end)
// is broken up into at most num_threads blocks and added to the thread pool. To
// avoid deadlocks, the calling thread is allowed to steal work from the worker
// pool. This is implemented via a shared state between the tasks. In order for
// the calling thread or thread pool to get a block of work, it will query the
// shared state for the next block of work to be done. If there is nothing left,
// it will return. We will exit the ParallelFor call when all of the work has
// been done, not when all of the tasks have been popped off the task queue.
//
// A unique thread ID among all active tasks will be acquired once for each
// block of work. This avoids the significant performance penalty for acquiring
// it on every iteration of the for loop. The thread ID is guaranteed to be in
// [0, num_threads).
//
// A performance analysis has shown this implementation is onpar with OpenMP and
// TBB.
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
const std::function<void(int thread_id, int i)>& function) {
CHECK_GT(num_threads, 0);
CHECK(context != nullptr);
if (end <= start) {
return;
}
// Fast path for when it is single threaded.
if (num_threads == 1) {
// Even though we only have one thread, use the thread token provider to
// guarantee the exact same behavior when running with multiple threads.
ThreadTokenProvider thread_token_provider(num_threads);
const ScopedThreadToken scoped_thread_token(&thread_token_provider);
const int thread_id = scoped_thread_token.token();
for (int i = start; i < end; ++i) {
function(thread_id, i);
}
return;
}
// We use a std::shared_ptr because the main thread can finish all
// the work before the tasks have been popped off the queue. So the
// shared state needs to exist for the duration of all the tasks.
const int num_work_items = std::min((end - start), num_threads);
std::shared_ptr<SharedState> shared_state(
new SharedState(start, end, num_work_items));
// A function which tries to perform a chunk of work. This returns false if
// there is no work to be done.
auto task_function = [shared_state, &function]() {
int i = 0;
{
// Get the next available chunk of work to be performed. If there is no
// work, return false.
std::lock_guard<std::mutex> lock(shared_state->mutex_i);
if (shared_state->i >= shared_state->num_work_items) {
return false;
}
i = shared_state->i;
++shared_state->i;
}
const ScopedThreadToken scoped_thread_token(
&shared_state->thread_token_provider);
const int thread_id = scoped_thread_token.token();
// Perform each task.
for (int j = shared_state->start + i; j < shared_state->end;
j += shared_state->num_work_items) {
function(thread_id, j);
}
shared_state->block_until_finished.Finished();
return true;
};
// Add all the tasks to the thread pool.
for (int i = 0; i < num_work_items; ++i) {
// Note we are taking the task_function as value so the shared_state
// shared pointer is copied and the ref count is increased. This is to
// prevent it from being deleted when the main thread finishes all the
// work and exits before the threads finish.
context->thread_pool.AddTask([task_function]() { task_function(); });
}
// Try to do any available work on the main thread. This may steal work from
// the thread pool, but when there is no work left the thread pool tasks
// will be no-ops.
while (task_function()) {
}
// Wait until all tasks have finished.
shared_state->block_until_finished.Block();
}
} // namespace internal
} // namespace ceres
#endif // CERES_USE_CXX_THREADS

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -28,58 +28,50 @@
//
// Author: vitus@google.com (Michael Vitus)
// This include must come before any #ifndef check on Ceres compile options.
#include <algorithm>
#include <atomic>
#include <cmath>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <tuple>
#include "ceres/internal/config.h"
#if defined(CERES_USE_OPENMP)
#include "ceres/parallel_for.h"
#include "ceres/scoped_thread_token.h"
#include "ceres/thread_token_provider.h"
#include "ceres/parallel_vector_ops.h"
#include "glog/logging.h"
#include "omp.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
int MaxNumThreadsAvailable() { return omp_get_max_threads(); }
BlockUntilFinished::BlockUntilFinished(int num_total_jobs)
: num_total_jobs_finished_(0), num_total_jobs_(num_total_jobs) {}
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
const std::function<void(int)>& function) {
CHECK_GT(num_threads, 0);
CHECK(context != nullptr);
if (end <= start) {
return;
}
#ifdef CERES_USE_OPENMP
#pragma omp parallel for num_threads(num_threads) \
schedule(dynamic) if (num_threads > 1)
#endif // CERES_USE_OPENMP
for (int i = start; i < end; ++i) {
function(i);
void BlockUntilFinished::Finished(int num_jobs_finished) {
if (num_jobs_finished == 0) return;
std::lock_guard<std::mutex> lock(mutex_);
num_total_jobs_finished_ += num_jobs_finished;
CHECK_LE(num_total_jobs_finished_, num_total_jobs_);
if (num_total_jobs_finished_ == num_total_jobs_) {
condition_.notify_one();
}
}
void ParallelFor(ContextImpl* context,
int start,
int end,
int num_threads,
const std::function<void(int thread_id, int i)>& function) {
CHECK(context != nullptr);
ThreadTokenProvider thread_token_provider(num_threads);
ParallelFor(context, start, end, num_threads, [&](int i) {
const ScopedThreadToken scoped_thread_token(&thread_token_provider);
const int thread_id = scoped_thread_token.token();
function(thread_id, i);
});
void BlockUntilFinished::Block() {
std::unique_lock<std::mutex> lock(mutex_);
condition_.wait(
lock, [this]() { return num_total_jobs_finished_ == num_total_jobs_; });
}
} // namespace internal
} // namespace ceres
ParallelInvokeState::ParallelInvokeState(int start,
int end,
int num_work_blocks)
: start(start),
end(end),
num_work_blocks(num_work_blocks),
base_block_size((end - start) / num_work_blocks),
num_base_p1_sized_blocks((end - start) % num_work_blocks),
block_id(0),
thread_id(0),
block_until_finished(num_work_blocks) {}
#endif // defined(CERES_USE_OPENMP)
} // namespace ceres::internal

View File

@@ -0,0 +1,272 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: vitus@google.com (Michael Vitus),
// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#ifndef CERES_INTERNAL_PARALLEL_INVOKE_H_
#define CERES_INTERNAL_PARALLEL_INVOKE_H_
#include <atomic>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <tuple>
#include <type_traits>
namespace ceres::internal {
// InvokeWithThreadId handles passing thread_id to the function
template <typename F, typename... Args>
void InvokeWithThreadId(int thread_id, F&& function, Args&&... args) {
constexpr bool kPassThreadId = std::is_invocable_v<F, int, Args...>;
if constexpr (kPassThreadId) {
function(thread_id, std::forward<Args>(args)...);
} else {
function(std::forward<Args>(args)...);
}
}
// InvokeOnSegment either runs a loop over segment indices or passes it to the
// function
template <typename F>
void InvokeOnSegment(int thread_id, std::tuple<int, int> range, F&& function) {
constexpr bool kExplicitLoop =
std::is_invocable_v<F, int> || std::is_invocable_v<F, int, int>;
if constexpr (kExplicitLoop) {
const auto [start, end] = range;
for (int i = start; i != end; ++i) {
InvokeWithThreadId(thread_id, std::forward<F>(function), i);
}
} else {
InvokeWithThreadId(thread_id, std::forward<F>(function), range);
}
}
// This class creates a thread safe barrier which will block until a
// pre-specified number of threads call Finished. This allows us to block the
// main thread until all the parallel threads are finished processing all the
// work.
class BlockUntilFinished {
public:
explicit BlockUntilFinished(int num_total_jobs);
// Increment the number of jobs that have been processed by the number of
// jobs processed by caller and signal the blocking thread if all jobs
// have finished.
void Finished(int num_jobs_finished);
// Block until receiving confirmation of all jobs being finished.
void Block();
private:
std::mutex mutex_;
std::condition_variable condition_;
int num_total_jobs_finished_;
const int num_total_jobs_;
};
// Shared state between the parallel tasks. Each thread will use this
// information to get the next block of work to be performed.
struct ParallelInvokeState {
// The entire range [start, end) is split into num_work_blocks contiguous
// disjoint intervals (blocks), which are as equal as possible given
// total index count and requested number of blocks.
//
// Those num_work_blocks blocks are then processed in parallel.
//
// Total number of integer indices in interval [start, end) is
// end - start, and when splitting them into num_work_blocks blocks
// we can either
// - Split into equal blocks when (end - start) is divisible by
// num_work_blocks
// - Split into blocks with size difference at most 1:
// - Size of the smallest block(s) is (end - start) / num_work_blocks
// - (end - start) % num_work_blocks will need to be 1 index larger
//
// Note that this splitting is optimal in the sense of maximal difference
// between block sizes, since splitting into equal blocks is possible
// if and only if number of indices is divisible by number of blocks.
ParallelInvokeState(int start, int end, int num_work_blocks);
// The start and end index of the for loop.
const int start;
const int end;
// The number of blocks that need to be processed.
const int num_work_blocks;
// Size of the smallest block
const int base_block_size;
// Number of blocks of size base_block_size + 1
const int num_base_p1_sized_blocks;
// The next block of work to be assigned to a worker. The parallel for loop
// range is split into num_work_blocks blocks of work, with a single block of
// work being of size
// - base_block_size + 1 for the first num_base_p1_sized_blocks blocks
// - base_block_size for the rest of the blocks
// blocks of indices are contiguous and disjoint
std::atomic<int> block_id;
// Provides a unique thread ID among all active threads
// We do not schedule more than num_threads threads via thread pool
// and caller thread might steal one ID
std::atomic<int> thread_id;
// Used to signal when all the work has been completed. Thread safe.
BlockUntilFinished block_until_finished;
};
// This implementation uses a fixed size max worker pool with a shared task
// queue. The problem of executing the function for the interval of [start, end)
// is broken up into at most num_threads * kWorkBlocksPerThread blocks (each of
// size at least min_block_size) and added to the thread pool. To avoid
// deadlocks, the calling thread is allowed to steal work from the worker pool.
// This is implemented via a shared state between the tasks. In order for
// the calling thread or thread pool to get a block of work, it will query the
// shared state for the next block of work to be done. If there is nothing left,
// it will return. We will exit the ParallelFor call when all of the work has
// been done, not when all of the tasks have been popped off the task queue.
//
// A unique thread ID among all active tasks will be acquired once for each
// block of work. This avoids the significant performance penalty for acquiring
// it on every iteration of the for loop. The thread ID is guaranteed to be in
// [0, num_threads).
//
// A performance analysis has shown this implementation is on par with OpenMP
// and TBB.
template <typename F>
void ParallelInvoke(ContextImpl* context,
int start,
int end,
int num_threads,
F&& function,
int min_block_size) {
CHECK(context != nullptr);
// Maximal number of work items scheduled for a single thread
// - Lower number of work items results in larger runtimes on unequal tasks
// - Higher number of work items results in larger losses for synchronization
constexpr int kWorkBlocksPerThread = 4;
// Interval [start, end) is being split into
// num_threads * kWorkBlocksPerThread contiguous disjoint blocks.
//
// In order to avoid creating empty blocks of work, we need to limit
// number of work blocks by a total number of indices.
const int num_work_blocks = std::min((end - start) / min_block_size,
num_threads * kWorkBlocksPerThread);
// We use a std::shared_ptr because the main thread can finish all
// the work before the tasks have been popped off the queue. So the
// shared state needs to exist for the duration of all the tasks.
auto shared_state =
std::make_shared<ParallelInvokeState>(start, end, num_work_blocks);
// A function which tries to schedule another task in the thread pool and
// perform several chunks of work. Function expects itself as the argument in
// order to schedule next task in the thread pool.
auto task = [context, shared_state, num_threads, &function](auto& task_copy) {
int num_jobs_finished = 0;
const int thread_id = shared_state->thread_id.fetch_add(1);
// In order to avoid dead-locks in nested parallel for loops, task() will be
// invoked num_threads + 1 times:
// - num_threads times via enqueueing task into thread pool
// - one more time in the main thread
// Tasks enqueued to thread pool might take some time before execution, and
// the last task being executed will be terminated here in order to avoid
// having more than num_threads active threads
if (thread_id >= num_threads) return;
const int num_work_blocks = shared_state->num_work_blocks;
if (thread_id + 1 < num_threads &&
shared_state->block_id < num_work_blocks) {
// Add another thread to the thread pool.
// Note we are taking the task as value so the copy of shared_state shared
// pointer (captured by value at declaration of task lambda-function) is
// copied and the ref count is increased. This is to prevent it from being
// deleted when the main thread finishes all the work and exits before the
// threads finish.
context->thread_pool.AddTask([task_copy]() { task_copy(task_copy); });
}
const int start = shared_state->start;
const int base_block_size = shared_state->base_block_size;
const int num_base_p1_sized_blocks = shared_state->num_base_p1_sized_blocks;
while (true) {
// Get the next available chunk of work to be performed. If there is no
// work, return.
int block_id = shared_state->block_id.fetch_add(1);
if (block_id >= num_work_blocks) {
break;
}
++num_jobs_finished;
// For-loop interval [start, end) was split into num_work_blocks,
// with num_base_p1_sized_blocks of size base_block_size + 1 and remaining
// num_work_blocks - num_base_p1_sized_blocks of size base_block_size
//
// Then, start index of the block #block_id is given by a total
// length of preceeding blocks:
// * Total length of preceeding blocks of size base_block_size + 1:
// min(block_id, num_base_p1_sized_blocks) * (base_block_size + 1)
//
// * Total length of preceeding blocks of size base_block_size:
// (block_id - min(block_id, num_base_p1_sized_blocks)) *
// base_block_size
//
// Simplifying sum of those quantities yields a following
// expression for start index of the block #block_id
const int curr_start = start + block_id * base_block_size +
std::min(block_id, num_base_p1_sized_blocks);
// First num_base_p1_sized_blocks have size base_block_size + 1
//
// Note that it is guaranteed that all blocks are within
// [start, end) interval
const int curr_end = curr_start + base_block_size +
(block_id < num_base_p1_sized_blocks ? 1 : 0);
// Perform each task in current block
const auto range = std::make_tuple(curr_start, curr_end);
InvokeOnSegment(thread_id, range, function);
}
shared_state->block_until_finished.Finished(num_jobs_finished);
};
// Start scheduling threads and doing work. We might end up with less threads
// scheduled than expected, if scheduling overhead is larger than the amount
// of work to be done.
task(task);
// Wait until all tasks have finished.
shared_state->block_until_finished.Block();
}
} // namespace ceres::internal
#endif

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,8 +30,7 @@
#include "ceres/parallel_utils.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
void LinearIndexToUpperTriangularIndex(int k, int n, int* i, int* j) {
// This works by unfolding a rectangle into a triangle.
@@ -86,5 +85,4 @@ void LinearIndexToUpperTriangularIndex(int k, int n, int* i, int* j) {
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,8 +33,7 @@
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Converts a linear iteration order into a triangular iteration order.
// Suppose you have nested loops that look like
@@ -66,7 +65,6 @@ CERES_NO_EXPORT void LinearIndexToUpperTriangularIndex(int k,
int* i,
int* j);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_PARALLEL_UTILS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -25,35 +25,30 @@
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
#ifndef CERES_INTERNAL_FLOAT_CXSPARSE_H_
#define CERES_INTERNAL_FLOAT_CXSPARSE_H_
#include "ceres/parallel_vector_ops.h"
// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/config.h"
#include <algorithm>
#include <tuple>
#if !defined(CERES_NO_CXSPARSE)
#include "ceres/context_impl.h"
#include "ceres/parallel_for.h"
#include <memory>
namespace ceres::internal {
void ParallelSetZero(ContextImpl* context,
int num_threads,
double* values,
int num_values) {
ParallelFor(
context,
0,
num_values,
num_threads,
[values](std::tuple<int, int> range) {
auto [start, end] = range;
std::fill(values + start, values + end, 0.);
},
kMinBlockSizeParallelVectorOps);
}
#include "ceres/internal/export.h"
#include "ceres/sparse_cholesky.h"
namespace ceres {
namespace internal {
// Fake implementation of a single precision Sparse Cholesky using
// CXSparse.
class CERES_NO_EXPORT FloatCXSparseCholesky : public SparseCholesky {
public:
static std::unique_ptr<SparseCholesky> Create(OrderingType ordering_type);
};
} // namespace internal
} // namespace ceres
#endif // !defined(CERES_NO_CXSPARSE)
#endif // CERES_INTERNAL_FLOAT_CXSPARSE_H_
} // namespace ceres::internal

View File

@@ -0,0 +1,90 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: vitus@google.com (Michael Vitus),
// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#ifndef CERES_INTERNAL_PARALLEL_VECTOR_OPS_H_
#define CERES_INTERNAL_PARALLEL_VECTOR_OPS_H_
#include <mutex>
#include <vector>
#include "ceres/context_impl.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/parallel_for.h"
namespace ceres::internal {
// Lower bound on block size for parallel vector operations.
// Operations with vectors of less than kMinBlockSizeParallelVectorOps elements
// will be executed in a single thread.
constexpr int kMinBlockSizeParallelVectorOps = 1 << 16;
// Evaluate vector expression in parallel
// Assuming LhsExpression and RhsExpression are some sort of column-vector
// expression, assignment lhs = rhs is eavluated over a set of contiguous blocks
// in parallel. This is expected to work well in the case of vector-based
// expressions (since they typically do not result into temporaries). This
// method expects lhs to be size-compatible with rhs
template <typename LhsExpression, typename RhsExpression>
void ParallelAssign(ContextImpl* context,
int num_threads,
LhsExpression& lhs,
const RhsExpression& rhs) {
static_assert(LhsExpression::ColsAtCompileTime == 1);
static_assert(RhsExpression::ColsAtCompileTime == 1);
CHECK_EQ(lhs.rows(), rhs.rows());
const int num_rows = lhs.rows();
ParallelFor(
context,
0,
num_rows,
num_threads,
[&lhs, &rhs](const std::tuple<int, int>& range) {
auto [start, end] = range;
lhs.segment(start, end - start) = rhs.segment(start, end - start);
},
kMinBlockSizeParallelVectorOps);
}
// Set vector to zero using num_threads
template <typename VectorType>
void ParallelSetZero(ContextImpl* context,
int num_threads,
VectorType& vector) {
ParallelSetZero(context, num_threads, vector.data(), vector.rows());
}
void ParallelSetZero(ContextImpl* context,
int num_threads,
double* values,
int num_values);
} // namespace ceres::internal
#endif // CERES_INTERNAL_PARALLEL_FOR_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2021 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,8 +47,7 @@
#include "ceres/stringprintf.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ProblemImpl;
class ResidualBlock;
@@ -382,8 +381,7 @@ class CERES_NO_EXPORT ParameterBlock {
friend class ProblemImpl;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,8 +30,11 @@
#include "ceres/parameter_block_ordering.h"
#include <map>
#include <memory>
#include <set>
#include <unordered_set>
#include <vector>
#include "ceres/graph.h"
#include "ceres/graph_algorithms.h"
@@ -42,22 +45,18 @@
#include "ceres/wall_time.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::map;
using std::set;
using std::vector;
namespace ceres::internal {
int ComputeStableSchurOrdering(const Program& program,
vector<ParameterBlock*>* ordering) {
std::vector<ParameterBlock*>* ordering) {
CHECK(ordering != nullptr);
ordering->clear();
EventLogger event_logger("ComputeStableSchurOrdering");
auto graph = CreateHessianGraph(program);
event_logger.AddEvent("CreateHessianGraph");
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
const std::vector<ParameterBlock*>& parameter_blocks =
program.parameter_blocks();
const std::unordered_set<ParameterBlock*>& vertices = graph->vertices();
for (auto* parameter_block : parameter_blocks) {
if (vertices.count(parameter_block) > 0) {
@@ -81,13 +80,14 @@ int ComputeStableSchurOrdering(const Program& program,
}
int ComputeSchurOrdering(const Program& program,
vector<ParameterBlock*>* ordering) {
std::vector<ParameterBlock*>* ordering) {
CHECK(ordering != nullptr);
ordering->clear();
auto graph = CreateHessianGraph(program);
int independent_set_size = IndependentSetOrdering(*graph, ordering);
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
const std::vector<ParameterBlock*>& parameter_blocks =
program.parameter_blocks();
// Add the excluded blocks to back of the ordering vector.
for (auto* parameter_block : parameter_blocks) {
@@ -103,13 +103,14 @@ void ComputeRecursiveIndependentSetOrdering(const Program& program,
ParameterBlockOrdering* ordering) {
CHECK(ordering != nullptr);
ordering->Clear();
const vector<ParameterBlock*> parameter_blocks = program.parameter_blocks();
const std::vector<ParameterBlock*> parameter_blocks =
program.parameter_blocks();
auto graph = CreateHessianGraph(program);
int num_covered = 0;
int round = 0;
while (num_covered < parameter_blocks.size()) {
vector<ParameterBlock*> independent_set_ordering;
std::vector<ParameterBlock*> independent_set_ordering;
const int independent_set_size =
IndependentSetOrdering(*graph, &independent_set_ordering);
for (int i = 0; i < independent_set_size; ++i) {
@@ -126,14 +127,16 @@ std::unique_ptr<Graph<ParameterBlock*>> CreateHessianGraph(
const Program& program) {
auto graph = std::make_unique<Graph<ParameterBlock*>>();
CHECK(graph != nullptr);
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
const std::vector<ParameterBlock*>& parameter_blocks =
program.parameter_blocks();
for (auto* parameter_block : parameter_blocks) {
if (!parameter_block->IsConstant()) {
graph->AddVertex(parameter_block);
}
}
const vector<ResidualBlock*>& residual_blocks = program.residual_blocks();
const std::vector<ResidualBlock*>& residual_blocks =
program.residual_blocks();
for (auto* residual_block : residual_blocks) {
const int num_parameter_blocks = residual_block->NumParameterBlocks();
ParameterBlock* const* parameter_blocks =
@@ -157,19 +160,20 @@ std::unique_ptr<Graph<ParameterBlock*>> CreateHessianGraph(
}
void OrderingToGroupSizes(const ParameterBlockOrdering* ordering,
vector<int>* group_sizes) {
std::vector<int>* group_sizes) {
CHECK(group_sizes != nullptr);
group_sizes->clear();
if (ordering == nullptr) {
return;
}
const map<int, set<double*>>& group_to_elements =
// TODO(sameeragarwal): Investigate if this should be a set or an
// unordered_set.
const std::map<int, std::set<double*>>& group_to_elements =
ordering->group_to_elements();
for (const auto& g_t_e : group_to_elements) {
group_sizes->push_back(g_t_e.second.size());
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,15 +40,14 @@
#include "ceres/ordered_groups.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Program;
class ParameterBlock;
// Uses an approximate independent set ordering to order the parameter
// blocks of a problem so that it is suitable for use with Schur
// complement based solvers. The output variable ordering contains an
// blocks of a problem so that it is suitable for use with Schur-
// complement-based solvers. The output variable ordering contains an
// ordering of the parameter blocks and the return value is size of
// the independent set or the number of e_blocks (see
// schur_complement_solver.h for an explanation). Constant parameters
@@ -88,8 +87,7 @@ CERES_NO_EXPORT std::unique_ptr<Graph<ParameterBlock*>> CreateHessianGraph(
CERES_NO_EXPORT void OrderingToGroupSizes(
const ParameterBlockOrdering* ordering, std::vector<int>* group_sizes);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -0,0 +1,150 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: vitus@google.com (Michael Vitus),
// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
#ifndef CERES_INTERNAL_PARTITION_RANGE_FOR_PARALLEL_FOR_H_
#define CERES_INTERNAL_PARTITION_RANGE_FOR_PARALLEL_FOR_H_
#include <algorithm>
#include <vector>
namespace ceres::internal {
// Check if it is possible to split range [start; end) into at most
// max_num_partitions contiguous partitions of cost not greater than
// max_partition_cost. Inclusive integer cumulative costs are provided by
// cumulative_cost_data objects, with cumulative_cost_offset being a total cost
// of all indices (starting from zero) preceding start element. Cumulative costs
// are returned by cumulative_cost_fun called with a reference to
// cumulative_cost_data element with index from range[start; end), and should be
// non-decreasing. Partition of the range is returned via partition argument
template <typename CumulativeCostData, typename CumulativeCostFun>
bool MaxPartitionCostIsFeasible(int start,
int end,
int max_num_partitions,
int max_partition_cost,
int cumulative_cost_offset,
const CumulativeCostData* cumulative_cost_data,
CumulativeCostFun&& cumulative_cost_fun,
std::vector<int>* partition) {
partition->clear();
partition->push_back(start);
int partition_start = start;
int cost_offset = cumulative_cost_offset;
while (partition_start < end) {
// Already have max_num_partitions
if (partition->size() > max_num_partitions) {
return false;
}
const int target = max_partition_cost + cost_offset;
const int partition_end =
std::partition_point(
cumulative_cost_data + partition_start,
cumulative_cost_data + end,
[&cumulative_cost_fun, target](const CumulativeCostData& item) {
return cumulative_cost_fun(item) <= target;
}) -
cumulative_cost_data;
// Unable to make a partition from a single element
if (partition_end == partition_start) {
return false;
}
const int cost_last =
cumulative_cost_fun(cumulative_cost_data[partition_end - 1]);
partition->push_back(partition_end);
partition_start = partition_end;
cost_offset = cost_last;
}
return true;
}
// Split integer interval [start, end) into at most max_num_partitions
// contiguous intervals, minimizing maximal total cost of a single interval.
// Inclusive integer cumulative costs for each (zero-based) index are provided
// by cumulative_cost_data objects, and are returned by cumulative_cost_fun call
// with a reference to one of the objects from range [start, end)
template <typename CumulativeCostData, typename CumulativeCostFun>
std::vector<int> PartitionRangeForParallelFor(
int start,
int end,
int max_num_partitions,
const CumulativeCostData* cumulative_cost_data,
CumulativeCostFun&& cumulative_cost_fun) {
// Given maximal partition cost, it is possible to verify if it is admissible
// and obtain corresponding partition using MaxPartitionCostIsFeasible
// function. In order to find the lowest admissible value, a binary search
// over all potentially optimal cost values is being performed
const int cumulative_cost_last =
cumulative_cost_fun(cumulative_cost_data[end - 1]);
const int cumulative_cost_offset =
start ? cumulative_cost_fun(cumulative_cost_data[start - 1]) : 0;
const int total_cost = cumulative_cost_last - cumulative_cost_offset;
// Minimal maximal partition cost is not smaller than the average
// We will use non-inclusive lower bound
int partition_cost_lower_bound = total_cost / max_num_partitions - 1;
// Minimal maximal partition cost is not larger than the total cost
// Upper bound is inclusive
int partition_cost_upper_bound = total_cost;
std::vector<int> partition;
// Range partition corresponding to the latest evaluated upper bound.
// A single segment covering the whole input interval [start, end) corresponds
// to minimal maximal partition cost of total_cost.
std::vector<int> partition_upper_bound = {start, end};
// Binary search over partition cost, returning the lowest admissible cost
while (partition_cost_upper_bound - partition_cost_lower_bound > 1) {
partition.reserve(max_num_partitions + 1);
const int partition_cost =
partition_cost_lower_bound +
(partition_cost_upper_bound - partition_cost_lower_bound) / 2;
bool admissible = MaxPartitionCostIsFeasible(
start,
end,
max_num_partitions,
partition_cost,
cumulative_cost_offset,
cumulative_cost_data,
std::forward<CumulativeCostFun>(cumulative_cost_fun),
&partition);
if (admissible) {
partition_cost_upper_bound = partition_cost;
std::swap(partition, partition_upper_bound);
} else {
partition_cost_lower_bound = partition_cost;
}
}
return partition_upper_bound;
}
} // namespace ceres::internal
#endif

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,8 +44,7 @@
#include "ceres/linear_solver.h"
#include "ceres/partitioned_matrix_view.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
PartitionedMatrixViewBase::~PartitionedMatrixViewBase() = default;
@@ -56,121 +55,121 @@ std::unique_ptr<PartitionedMatrixViewBase> PartitionedMatrixViewBase::Create(
(options.e_block_size == 2) &&
(options.f_block_size == 2)) {
return std::make_unique<PartitionedMatrixView<2,2, 2>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 2) &&
(options.f_block_size == 3)) {
return std::make_unique<PartitionedMatrixView<2,2, 3>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 2) &&
(options.f_block_size == 4)) {
return std::make_unique<PartitionedMatrixView<2,2, 4>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 2)) {
return std::make_unique<PartitionedMatrixView<2,2, Eigen::Dynamic>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 3) &&
(options.f_block_size == 3)) {
return std::make_unique<PartitionedMatrixView<2,3, 3>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 3) &&
(options.f_block_size == 4)) {
return std::make_unique<PartitionedMatrixView<2,3, 4>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 3) &&
(options.f_block_size == 6)) {
return std::make_unique<PartitionedMatrixView<2,3, 6>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 3) &&
(options.f_block_size == 9)) {
return std::make_unique<PartitionedMatrixView<2,3, 9>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 3)) {
return std::make_unique<PartitionedMatrixView<2,3, Eigen::Dynamic>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 4) &&
(options.f_block_size == 3)) {
return std::make_unique<PartitionedMatrixView<2,4, 3>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 4) &&
(options.f_block_size == 4)) {
return std::make_unique<PartitionedMatrixView<2,4, 4>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 4) &&
(options.f_block_size == 6)) {
return std::make_unique<PartitionedMatrixView<2,4, 6>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 4) &&
(options.f_block_size == 8)) {
return std::make_unique<PartitionedMatrixView<2,4, 8>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 4) &&
(options.f_block_size == 9)) {
return std::make_unique<PartitionedMatrixView<2,4, 9>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 2) &&
(options.e_block_size == 4)) {
return std::make_unique<PartitionedMatrixView<2,4, Eigen::Dynamic>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if (options.row_block_size == 2) {
return std::make_unique<PartitionedMatrixView<2,Eigen::Dynamic, Eigen::Dynamic>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 3) &&
(options.e_block_size == 3) &&
(options.f_block_size == 3)) {
return std::make_unique<PartitionedMatrixView<3,3, 3>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 4) &&
(options.e_block_size == 4) &&
(options.f_block_size == 2)) {
return std::make_unique<PartitionedMatrixView<4,4, 2>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 4) &&
(options.e_block_size == 4) &&
(options.f_block_size == 3)) {
return std::make_unique<PartitionedMatrixView<4,4, 3>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 4) &&
(options.e_block_size == 4) &&
(options.f_block_size == 4)) {
return std::make_unique<PartitionedMatrixView<4,4, 4>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
if ((options.row_block_size == 4) &&
(options.e_block_size == 4)) {
return std::make_unique<PartitionedMatrixView<4,4, Eigen::Dynamic>>(
matrix, options.elimination_groups[0]);
options, matrix);
}
#endif
@@ -180,8 +179,7 @@ std::unique_ptr<PartitionedMatrixViewBase> PartitionedMatrixViewBase::Create(
return std::make_unique<PartitionedMatrixView<Eigen::Dynamic,
Eigen::Dynamic,
Eigen::Dynamic>>(
matrix, options.elimination_groups[0]);
options, matrix);
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -50,12 +50,13 @@
#include "ceres/small_blas.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ContextImpl;
// Given generalized bi-partite matrix A = [E F], with the same block
// structure as required by the Schur complement based solver, found
// in explicit_schur_complement_solver.h, provide access to the
// in schur_complement_solver.h, provide access to the
// matrices E and F and their outer products E'E and F'F with
// themselves.
//
@@ -68,16 +69,26 @@ class CERES_NO_EXPORT PartitionedMatrixViewBase {
virtual ~PartitionedMatrixViewBase();
// y += E'x
virtual void LeftMultiplyE(const double* x, double* y) const = 0;
virtual void LeftMultiplyAndAccumulateE(const double* x, double* y) const = 0;
virtual void LeftMultiplyAndAccumulateESingleThreaded(const double* x,
double* y) const = 0;
virtual void LeftMultiplyAndAccumulateEMultiThreaded(const double* x,
double* y) const = 0;
// y += F'x
virtual void LeftMultiplyF(const double* x, double* y) const = 0;
virtual void LeftMultiplyAndAccumulateF(const double* x, double* y) const = 0;
virtual void LeftMultiplyAndAccumulateFSingleThreaded(const double* x,
double* y) const = 0;
virtual void LeftMultiplyAndAccumulateFMultiThreaded(const double* x,
double* y) const = 0;
// y += Ex
virtual void RightMultiplyE(const double* x, double* y) const = 0;
virtual void RightMultiplyAndAccumulateE(const double* x,
double* y) const = 0;
// y += Fx
virtual void RightMultiplyF(const double* x, double* y) const = 0;
virtual void RightMultiplyAndAccumulateF(const double* x,
double* y) const = 0;
// Create and return the block diagonal of the matrix E'E.
virtual std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalEtE() const = 0;
@@ -109,6 +120,8 @@ class CERES_NO_EXPORT PartitionedMatrixViewBase {
virtual int num_cols_f() const = 0;
virtual int num_rows() const = 0;
virtual int num_cols() const = 0;
virtual const std::vector<int>& e_cols_partition() const = 0;
virtual const std::vector<int>& f_cols_partition() const = 0;
// clang-format on
static std::unique_ptr<PartitionedMatrixViewBase> Create(
@@ -122,17 +135,46 @@ class CERES_NO_EXPORT PartitionedMatrixView final
: public PartitionedMatrixViewBase {
public:
// matrix = [E F], where the matrix E contains the first
// num_col_blocks_a column blocks.
PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e);
// options.elimination_groups[0] column blocks.
PartitionedMatrixView(const LinearSolver::Options& options,
const BlockSparseMatrix& matrix);
// y += E'x
virtual void LeftMultiplyAndAccumulateE(const double* x,
double* y) const final;
virtual void LeftMultiplyAndAccumulateESingleThreaded(const double* x,
double* y) const final;
virtual void LeftMultiplyAndAccumulateEMultiThreaded(const double* x,
double* y) const final;
// y += F'x
virtual void LeftMultiplyAndAccumulateF(const double* x,
double* y) const final;
virtual void LeftMultiplyAndAccumulateFSingleThreaded(const double* x,
double* y) const final;
virtual void LeftMultiplyAndAccumulateFMultiThreaded(const double* x,
double* y) const final;
// y += Ex
virtual void RightMultiplyAndAccumulateE(const double* x,
double* y) const final;
// y += Fx
virtual void RightMultiplyAndAccumulateF(const double* x,
double* y) const final;
void LeftMultiplyE(const double* x, double* y) const final;
void LeftMultiplyF(const double* x, double* y) const final;
void RightMultiplyE(const double* x, double* y) const final;
void RightMultiplyF(const double* x, double* y) const final;
std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalEtE() const final;
std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalFtF() const final;
void UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const final;
void UpdateBlockDiagonalEtESingleThreaded(
BlockSparseMatrix* block_diagonal) const;
void UpdateBlockDiagonalEtEMultiThreaded(
BlockSparseMatrix* block_diagonal) const;
void UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const final;
void UpdateBlockDiagonalFtFSingleThreaded(
BlockSparseMatrix* block_diagonal) const;
void UpdateBlockDiagonalFtFMultiThreaded(
BlockSparseMatrix* block_diagonal) const;
// clang-format off
int num_col_blocks_e() const final { return num_col_blocks_e_; }
int num_col_blocks_f() const final { return num_col_blocks_f_; }
@@ -141,21 +183,29 @@ class CERES_NO_EXPORT PartitionedMatrixView final
int num_rows() const final { return matrix_.num_rows(); }
int num_cols() const final { return matrix_.num_cols(); }
// clang-format on
const std::vector<int>& e_cols_partition() const final {
return e_cols_partition_;
}
const std::vector<int>& f_cols_partition() const final {
return f_cols_partition_;
}
private:
std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalMatrixLayout(
int start_col_block, int end_col_block) const;
const LinearSolver::Options options_;
const BlockSparseMatrix& matrix_;
int num_row_blocks_e_;
int num_col_blocks_e_;
int num_col_blocks_f_;
int num_cols_e_;
int num_cols_f_;
std::vector<int> e_cols_partition_;
std::vector<int> f_cols_partition_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,27 +36,31 @@
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
#include "ceres/parallel_for.h"
#include "ceres/partition_range_for_parallel_for.h"
#include "ceres/partitioned_matrix_view.h"
#include "ceres/small_blas.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e)
: matrix_(matrix), num_col_blocks_e_(num_col_blocks_e) {
PartitionedMatrixView(const LinearSolver::Options& options,
const BlockSparseMatrix& matrix)
: options_(options), matrix_(matrix) {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
CHECK(bs != nullptr);
num_col_blocks_e_ = options_.elimination_groups[0];
num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_;
// Compute the number of row blocks in E. The number of row blocks
// in E maybe less than the number of row blocks in the input matrix
// as some of the row blocks at the bottom may not have any
// e_blocks. For a definition of what an e_block is, please see
// explicit_schur_complement_solver.h
// schur_complement_solver.h
num_row_blocks_e_ = 0;
for (const auto& row : bs->rows) {
const std::vector<Cell>& cells = row.cells;
@@ -79,6 +83,25 @@ PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
}
CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols());
auto transpose_bs = matrix_.transpose_block_structure();
const int num_threads = options_.num_threads;
if (transpose_bs != nullptr && num_threads > 1) {
int kMaxPartitions = num_threads * 4;
e_cols_partition_ = PartitionRangeForParallelFor(
0,
num_col_blocks_e_,
kMaxPartitions,
transpose_bs->rows.data(),
[](const CompressedRow& row) { return row.cumulative_nnz; });
f_cols_partition_ = PartitionRangeForParallelFor(
num_col_blocks_e_,
num_col_blocks_e_ + num_col_blocks_f_,
kMaxPartitions,
transpose_bs->rows.data(),
[](const CompressedRow& row) { return row.cumulative_nnz; });
}
}
// The next four methods don't seem to be particularly cache
@@ -88,77 +111,101 @@ PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
RightMultiplyE(const double* x, double* y) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
RightMultiplyAndAccumulateE(const double* x, double* y) const {
// Iterate over the first num_row_blocks_e_ row blocks, and multiply
// by the first cell in each row block.
auto bs = matrix_.block_structure();
const double* values = matrix_.values();
for (int r = 0; r < num_row_blocks_e_; ++r) {
const Cell& cell = bs->rows[r].cells[0];
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
values + cell.position, row_block_size, col_block_size,
x + col_block_pos,
y + row_block_pos);
// clang-format on
}
ParallelFor(options_.context,
0,
num_row_blocks_e_,
options_.num_threads,
[values, bs, x, y](int row_block_id) {
const Cell& cell = bs->rows[row_block_id].cells[0];
const int row_block_pos = bs->rows[row_block_id].block.position;
const int row_block_size = bs->rows[row_block_id].block.size;
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
values + cell.position, row_block_size, col_block_size,
x + col_block_pos,
y + row_block_pos);
// clang-format on
});
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
RightMultiplyF(const double* x, double* y) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
RightMultiplyAndAccumulateF(const double* x, double* y) const {
// Iterate over row blocks, and if the row block is in E, then
// multiply by all the cells except the first one which is of type
// E. If the row block is not in E (i.e its in the bottom
// num_row_blocks - num_row_blocks_e row blocks), then all the cells
// are of type F and multiply by them all.
const CompressedRowBlockStructure* bs = matrix_.block_structure();
const int num_row_blocks = bs->rows.size();
const int num_cols_e = num_cols_e_;
const double* values = matrix_.values();
for (int r = 0; r < num_row_blocks_e_; ++r) {
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
const std::vector<Cell>& cells = bs->rows[r].cells;
for (int c = 1; c < cells.size(); ++c) {
const int col_block_id = cells[c].block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
values + cells[c].position, row_block_size, col_block_size,
x + col_block_pos - num_cols_e_,
y + row_block_pos);
// clang-format on
}
}
ParallelFor(options_.context,
0,
num_row_blocks_e_,
options_.num_threads,
[values, bs, num_cols_e, x, y](int row_block_id) {
const int row_block_pos = bs->rows[row_block_id].block.position;
const int row_block_size = bs->rows[row_block_id].block.size;
const auto& cells = bs->rows[row_block_id].cells;
for (int c = 1; c < cells.size(); ++c) {
const int col_block_id = cells[c].block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
values + cells[c].position, row_block_size, col_block_size,
x + col_block_pos - num_cols_e,
y + row_block_pos);
// clang-format on
}
});
ParallelFor(options_.context,
num_row_blocks_e_,
num_row_blocks,
options_.num_threads,
[values, bs, num_cols_e, x, y](int row_block_id) {
const int row_block_pos = bs->rows[row_block_id].block.position;
const int row_block_size = bs->rows[row_block_id].block.size;
const auto& cells = bs->rows[row_block_id].cells;
for (const auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position, row_block_size, col_block_size,
x + col_block_pos - num_cols_e,
y + row_block_pos);
// clang-format on
}
});
}
for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
const std::vector<Cell>& cells = bs->rows[r].cells;
for (const auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
// clang-format off
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position, row_block_size, col_block_size,
x + col_block_pos - num_cols_e_,
y + row_block_pos);
// clang-format on
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateE(const double* x, double* y) const {
if (!num_col_blocks_e_) return;
if (!num_row_blocks_e_) return;
if (options_.num_threads == 1) {
LeftMultiplyAndAccumulateESingleThreaded(x, y);
} else {
CHECK(options_.context != nullptr);
LeftMultiplyAndAccumulateEMultiThreaded(x, y);
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyE(const double* x, double* y) const {
LeftMultiplyAndAccumulateESingleThreaded(const double* x, double* y) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
// Iterate over the first num_row_blocks_e_ row blocks, and multiply
@@ -182,7 +229,55 @@ void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyF(const double* x, double* y) const {
LeftMultiplyAndAccumulateEMultiThreaded(const double* x, double* y) const {
auto transpose_bs = matrix_.transpose_block_structure();
CHECK(transpose_bs != nullptr);
// Local copies of class members in order to avoid capturing pointer to the
// whole object in lambda function
auto values = matrix_.values();
const int num_row_blocks_e = num_row_blocks_e_;
ParallelFor(
options_.context,
0,
num_col_blocks_e_,
options_.num_threads,
[values, transpose_bs, num_row_blocks_e, x, y](int row_block_id) {
int row_block_pos = transpose_bs->rows[row_block_id].block.position;
int row_block_size = transpose_bs->rows[row_block_id].block.size;
auto& cells = transpose_bs->rows[row_block_id].cells;
for (auto& cell : cells) {
const int col_block_id = cell.block_id;
const int col_block_size = transpose_bs->cols[col_block_id].size;
const int col_block_pos = transpose_bs->cols[col_block_id].position;
if (col_block_id >= num_row_blocks_e) break;
MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
values + cell.position,
col_block_size,
row_block_size,
x + col_block_pos,
y + row_block_pos);
}
},
e_cols_partition());
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateF(const double* x, double* y) const {
if (!num_col_blocks_f_) return;
if (options_.num_threads == 1) {
LeftMultiplyAndAccumulateFSingleThreaded(x, y);
} else {
CHECK(options_.context != nullptr);
LeftMultiplyAndAccumulateFMultiThreaded(x, y);
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateFSingleThreaded(const double* x, double* y) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
// Iterate over row blocks, and if the row block is in E, then
@@ -226,10 +321,63 @@ void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
}
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyAndAccumulateFMultiThreaded(const double* x, double* y) const {
auto transpose_bs = matrix_.transpose_block_structure();
CHECK(transpose_bs != nullptr);
// Local copies of class members in order to avoid capturing pointer to the
// whole object in lambda function
auto values = matrix_.values();
const int num_row_blocks_e = num_row_blocks_e_;
const int num_cols_e = num_cols_e_;
ParallelFor(
options_.context,
num_col_blocks_e_,
num_col_blocks_e_ + num_col_blocks_f_,
options_.num_threads,
[values, transpose_bs, num_row_blocks_e, num_cols_e, x, y](
int row_block_id) {
int row_block_pos = transpose_bs->rows[row_block_id].block.position;
int row_block_size = transpose_bs->rows[row_block_id].block.size;
auto& cells = transpose_bs->rows[row_block_id].cells;
const int num_cells = cells.size();
int cell_idx = 0;
for (; cell_idx < num_cells; ++cell_idx) {
auto& cell = cells[cell_idx];
const int col_block_id = cell.block_id;
const int col_block_size = transpose_bs->cols[col_block_id].size;
const int col_block_pos = transpose_bs->cols[col_block_id].position;
if (col_block_id >= num_row_blocks_e) break;
MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
values + cell.position,
col_block_size,
row_block_size,
x + col_block_pos,
y + row_block_pos - num_cols_e);
}
for (; cell_idx < num_cells; ++cell_idx) {
auto& cell = cells[cell_idx];
const int col_block_id = cell.block_id;
const int col_block_size = transpose_bs->cols[col_block_id].size;
const int col_block_pos = transpose_bs->cols[col_block_id].position;
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position,
col_block_size,
row_block_size,
x + col_block_pos,
y + row_block_pos - num_cols_e);
}
},
f_cols_partition());
}
// Given a range of columns blocks of a matrix m, compute the block
// structure of the block diagonal of the matrix m(:,
// start_col_block:end_col_block)'m(:, start_col_block:end_col_block)
// and return a BlockSparseMatrix with the this block structure. The
// and return a BlockSparseMatrix with this block structure. The
// caller owns the result.
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
std::unique_ptr<BlockSparseMatrix>
@@ -290,17 +438,17 @@ PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
return block_diagonal;
}
// Similar to the code in RightMultiplyE, except instead of the matrix
// vector multiply its an outer product.
// Similar to the code in RightMultiplyAndAccumulateE, except instead of the
// matrix vector multiply its an outer product.
//
// block_diagonal = block_diagonal(E'E)
//
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
const CompressedRowBlockStructure* block_diagonal_structure =
block_diagonal->block_structure();
UpdateBlockDiagonalEtESingleThreaded(
BlockSparseMatrix* block_diagonal) const {
auto bs = matrix_.block_structure();
auto block_diagonal_structure = block_diagonal->block_structure();
block_diagonal->SetZero();
const double* values = matrix_.values();
@@ -323,17 +471,68 @@ void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
}
}
// Similar to the code in RightMultiplyF, except instead of the matrix
// vector multiply its an outer product.
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalEtEMultiThreaded(
BlockSparseMatrix* block_diagonal) const {
auto transpose_block_structure = matrix_.transpose_block_structure();
CHECK(transpose_block_structure != nullptr);
auto block_diagonal_structure = block_diagonal->block_structure();
const double* values = matrix_.values();
double* values_diagonal = block_diagonal->mutable_values();
ParallelFor(
options_.context,
0,
num_col_blocks_e_,
options_.num_threads,
[values,
transpose_block_structure,
values_diagonal,
block_diagonal_structure](int col_block_id) {
int cell_position =
block_diagonal_structure->rows[col_block_id].cells[0].position;
double* cell_values = values_diagonal + cell_position;
int col_block_size =
transpose_block_structure->rows[col_block_id].block.size;
auto& cells = transpose_block_structure->rows[col_block_id].cells;
MatrixRef(cell_values, col_block_size, col_block_size).setZero();
for (auto& c : cells) {
int row_block_size = transpose_block_structure->cols[c.block_id].size;
// clang-format off
MatrixTransposeMatrixMultiply<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
values + c.position, row_block_size, col_block_size,
values + c.position, row_block_size, col_block_size,
cell_values, 0, 0, col_block_size, col_block_size);
// clang-format on
}
},
e_cols_partition_);
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
if (options_.num_threads == 1) {
UpdateBlockDiagonalEtESingleThreaded(block_diagonal);
} else {
CHECK(options_.context != nullptr);
UpdateBlockDiagonalEtEMultiThreaded(block_diagonal);
}
}
// Similar to the code in RightMultiplyAndAccumulateF, except instead of the
// matrix vector multiply its an outer product.
//
// block_diagonal = block_diagonal(F'F)
//
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
const CompressedRowBlockStructure* bs = matrix_.block_structure();
const CompressedRowBlockStructure* block_diagonal_structure =
block_diagonal->block_structure();
UpdateBlockDiagonalFtFSingleThreaded(
BlockSparseMatrix* block_diagonal) const {
auto bs = matrix_.block_structure();
auto block_diagonal_structure = block_diagonal->block_structure();
block_diagonal->SetZero();
const double* values = matrix_.values();
@@ -380,5 +579,82 @@ void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
}
}
} // namespace internal
} // namespace ceres
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalFtFMultiThreaded(
BlockSparseMatrix* block_diagonal) const {
auto transpose_block_structure = matrix_.transpose_block_structure();
CHECK(transpose_block_structure != nullptr);
auto block_diagonal_structure = block_diagonal->block_structure();
const double* values = matrix_.values();
double* values_diagonal = block_diagonal->mutable_values();
const int num_col_blocks_e = num_col_blocks_e_;
const int num_row_blocks_e = num_row_blocks_e_;
ParallelFor(
options_.context,
num_col_blocks_e_,
num_col_blocks_e + num_col_blocks_f_,
options_.num_threads,
[transpose_block_structure,
block_diagonal_structure,
num_col_blocks_e,
num_row_blocks_e,
values,
values_diagonal](int col_block_id) {
const int col_block_size =
transpose_block_structure->rows[col_block_id].block.size;
const int diagonal_block_id = col_block_id - num_col_blocks_e;
const int cell_position =
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
double* cell_values = values_diagonal + cell_position;
MatrixRef(cell_values, col_block_size, col_block_size).setZero();
auto& cells = transpose_block_structure->rows[col_block_id].cells;
const int num_cells = cells.size();
int i = 0;
for (; i < num_cells; ++i) {
auto& cell = cells[i];
const int row_block_id = cell.block_id;
if (row_block_id >= num_row_blocks_e) break;
const int row_block_size =
transpose_block_structure->cols[row_block_id].size;
// clang-format off
MatrixTransposeMatrixMultiply
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
values + cell.position, row_block_size, col_block_size,
values + cell.position, row_block_size, col_block_size,
cell_values, 0, 0, col_block_size, col_block_size);
// clang-format on
}
for (; i < num_cells; ++i) {
auto& cell = cells[i];
const int row_block_id = cell.block_id;
const int row_block_size =
transpose_block_structure->cols[row_block_id].size;
// clang-format off
MatrixTransposeMatrixMultiply
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
values + cell.position, row_block_size, col_block_size,
values + cell.position, row_block_size, col_block_size,
cell_values, 0, 0, col_block_size, col_block_size);
// clang-format on
}
},
f_cols_partition_);
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
if (options_.num_threads == 1) {
UpdateBlockDiagonalFtFSingleThreaded(block_diagonal);
} else {
CHECK(options_.context != nullptr);
UpdateBlockDiagonalFtFMultiThreaded(block_diagonal);
}
}
} // namespace ceres::internal

View File

@@ -0,0 +1,149 @@
# Ceres Solver - A fast non-linear least squares minimizer
# Copyright 2023 Google Inc. All rights reserved.
# http://ceres-solver.org/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of Google Inc. nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: sameeragarwal@google.com (Sameer Agarwal)
#
# Script for explicitly generating template specialization of the
# PartitionedMatrixView class. Explicitly generating these
# instantiations in separate .cc files breaks the compilation into
# separate compilation unit rather than one large cc file.
#
# This script creates two sets of files.
#
# 1. partitioned_matrix_view_x_x_x.cc
# where the x indicates the template parameters and
#
# 2. partitioned_matrix_view.cc
#
# that contains a factory function for instantiating these classes
# based on runtime parameters.
#
# The list of tuples, specializations indicates the set of
# specializations that is generated.
HEADER = """// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
//
// Template specialization of PartitionedMatrixView.
//
// ========================================
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
//=========================================
//
// This file is generated using generate_template_specializations.py.
"""
DYNAMIC_FILE = """
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres::internal {
template class PartitionedMatrixView<%s,
%s,
%s>;
} // namespace ceres::internal
"""
SPECIALIZATION_FILE = """
// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/config.h"
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
#include "ceres/partitioned_matrix_view_impl.h"
namespace ceres::internal {
template class PartitionedMatrixView<%s, %s, %s>;
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
"""
FACTORY_FILE_HEADER = """
#include <memory>
#include "ceres/linear_solver.h"
#include "ceres/partitioned_matrix_view.h"
namespace ceres::internal {
PartitionedMatrixViewBase::~PartitionedMatrixViewBase() = default;
std::unique_ptr<PartitionedMatrixViewBase> PartitionedMatrixViewBase::Create(
const LinearSolver::Options& options, const BlockSparseMatrix& matrix) {
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
"""
FACTORY = """ return std::make_unique<PartitionedMatrixView<%s,%s, %s>>(
options, matrix);"""
FACTORY_FOOTER = """
#endif
VLOG(1) << "Template specializations not found for <"
<< options.row_block_size << "," << options.e_block_size << ","
<< options.f_block_size << ">";
return std::make_unique<PartitionedMatrixView<Eigen::Dynamic,
Eigen::Dynamic,
Eigen::Dynamic>>(
options, matrix);
};
} // namespace ceres::internal
"""

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,10 +40,7 @@
#include "ceres/internal/export.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::vector;
namespace ceres::internal {
namespace {
@@ -326,7 +323,7 @@ void MinimizePolynomial(const Vector& polynomial,
}
}
Vector FindInterpolatingPolynomial(const vector<FunctionSample>& samples) {
Vector FindInterpolatingPolynomial(const std::vector<FunctionSample>& samples) {
const int num_samples = samples.size();
int num_constraints = 0;
for (int i = 0; i < num_samples; ++i) {
@@ -369,7 +366,7 @@ Vector FindInterpolatingPolynomial(const vector<FunctionSample>& samples) {
return lu.setThreshold(0.0).solve(rhs);
}
void MinimizeInterpolatingPolynomial(const vector<FunctionSample>& samples,
void MinimizeInterpolatingPolynomial(const std::vector<FunctionSample>& samples,
double x_min,
double x_max,
double* optimal_x,
@@ -389,5 +386,4 @@ void MinimizeInterpolatingPolynomial(const vector<FunctionSample>& samples,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
struct FunctionSample;
@@ -116,8 +115,7 @@ CERES_NO_EXPORT void MinimizeInterpolatingPolynomial(
double* optimal_x,
double* optimal_value);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -0,0 +1,88 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: markshachkov@gmail.com (Mark Shachkov)
#include "ceres/power_series_expansion_preconditioner.h"
#include "ceres/eigen_vector_ops.h"
#include "ceres/parallel_vector_ops.h"
#include "ceres/preconditioner.h"
namespace ceres::internal {
PowerSeriesExpansionPreconditioner::PowerSeriesExpansionPreconditioner(
const ImplicitSchurComplement* isc,
const int max_num_spse_iterations,
const double spse_tolerance,
const Preconditioner::Options& options)
: isc_(isc),
max_num_spse_iterations_(max_num_spse_iterations),
spse_tolerance_(spse_tolerance),
options_(options) {}
PowerSeriesExpansionPreconditioner::~PowerSeriesExpansionPreconditioner() =
default;
bool PowerSeriesExpansionPreconditioner::Update(const LinearOperator& /*A*/,
const double* /*D*/) {
return true;
}
void PowerSeriesExpansionPreconditioner::RightMultiplyAndAccumulate(
const double* x, double* y) const {
VectorRef yref(y, num_rows());
Vector series_term(num_rows());
Vector previous_series_term(num_rows());
ParallelSetZero(options_.context, options_.num_threads, yref);
isc_->block_diagonal_FtF_inverse()->RightMultiplyAndAccumulate(
x, y, options_.context, options_.num_threads);
ParallelAssign(
options_.context, options_.num_threads, previous_series_term, yref);
const double norm_threshold =
spse_tolerance_ * Norm(yref, options_.context, options_.num_threads);
for (int i = 1;; i++) {
ParallelSetZero(options_.context, options_.num_threads, series_term);
isc_->InversePowerSeriesOperatorRightMultiplyAccumulate(
previous_series_term.data(), series_term.data());
ParallelAssign(
options_.context, options_.num_threads, yref, yref + series_term);
if (i >= max_num_spse_iterations_ || series_term.norm() < norm_threshold) {
break;
}
std::swap(previous_series_term, series_term);
}
}
int PowerSeriesExpansionPreconditioner::num_rows() const {
return isc_->num_rows();
}
} // namespace ceres::internal

View File

@@ -0,0 +1,71 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: markshachkov@gmail.com (Mark Shachkov)
#ifndef CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_
#define CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_
#include "ceres/implicit_schur_complement.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/preconditioner.h"
namespace ceres::internal {
// This is a preconditioner via power series expansion of Schur
// complement inverse based on "Weber et al, Power Bundle Adjustment for
// Large-Scale 3D Reconstruction".
class CERES_NO_EXPORT PowerSeriesExpansionPreconditioner
: public Preconditioner {
public:
// TODO: Consider moving max_num_spse_iterations and spse_tolerance to
// Preconditioner::Options
PowerSeriesExpansionPreconditioner(const ImplicitSchurComplement* isc,
const int max_num_spse_iterations,
const double spse_tolerance,
const Preconditioner::Options& options);
PowerSeriesExpansionPreconditioner(
const PowerSeriesExpansionPreconditioner&) = delete;
void operator=(const PowerSeriesExpansionPreconditioner&) = delete;
~PowerSeriesExpansionPreconditioner() override;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
bool Update(const LinearOperator& A, const double* D) final;
int num_rows() const final;
private:
const ImplicitSchurComplement* isc_;
const int max_num_spse_iterations_;
const double spse_tolerance_;
const Preconditioner::Options options_;
};
} // namespace ceres::internal
#endif // CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,8 +32,7 @@
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
Preconditioner::~Preconditioner() = default;
@@ -48,27 +47,27 @@ PreconditionerType Preconditioner::PreconditionerForZeroEBlocks(
}
SparseMatrixPreconditionerWrapper::SparseMatrixPreconditionerWrapper(
const SparseMatrix* matrix)
: matrix_(matrix) {
const SparseMatrix* matrix, const Preconditioner::Options& options)
: matrix_(matrix), options_(options) {
CHECK(matrix != nullptr);
}
SparseMatrixPreconditionerWrapper::~SparseMatrixPreconditionerWrapper() =
default;
bool SparseMatrixPreconditionerWrapper::UpdateImpl(const SparseMatrix& A,
const double* D) {
bool SparseMatrixPreconditionerWrapper::UpdateImpl(const SparseMatrix& /*A*/,
const double* /*D*/) {
return true;
}
void SparseMatrixPreconditionerWrapper::RightMultiply(const double* x,
double* y) const {
matrix_->RightMultiply(x, y);
void SparseMatrixPreconditionerWrapper::RightMultiplyAndAccumulate(
const double* x, double* y) const {
matrix_->RightMultiplyAndAccumulate(
x, y, options_.context, options_.num_threads);
}
int SparseMatrixPreconditionerWrapper::num_rows() const {
return matrix_->num_rows();
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,11 +39,11 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
#include "ceres/linear_operator.h"
#include "ceres/linear_solver.h"
#include "ceres/sparse_matrix.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockSparseMatrix;
class SparseMatrix;
@@ -51,10 +51,25 @@ class SparseMatrix;
class CERES_NO_EXPORT Preconditioner : public LinearOperator {
public:
struct Options {
Options() = default;
Options(const LinearSolver::Options& linear_solver_options)
: type(linear_solver_options.preconditioner_type),
visibility_clustering_type(
linear_solver_options.visibility_clustering_type),
sparse_linear_algebra_library_type(
linear_solver_options.sparse_linear_algebra_library_type),
num_threads(linear_solver_options.num_threads),
row_block_size(linear_solver_options.row_block_size),
e_block_size(linear_solver_options.e_block_size),
f_block_size(linear_solver_options.f_block_size),
elimination_groups(linear_solver_options.elimination_groups),
context(linear_solver_options.context) {}
PreconditionerType type = JACOBI;
VisibilityClusteringType visibility_clustering_type = CANONICAL_VIEWS;
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
SUITE_SPARSE;
OrderingType ordering_type = OrderingType::NATURAL;
// When using the subset preconditioner, all row blocks starting
// from this row block are used to construct the preconditioner.
@@ -68,9 +83,6 @@ class CERES_NO_EXPORT Preconditioner : public LinearOperator {
// and the preconditioner is the inverse of the matrix Q'Q.
int subset_preconditioner_start_row_block = -1;
// See solver.h for information about these flags.
bool use_postordering = false;
// If possible, how many threads the preconditioner can use.
int num_threads = 1;
@@ -132,18 +144,37 @@ class CERES_NO_EXPORT Preconditioner : public LinearOperator {
virtual bool Update(const LinearOperator& A, const double* D) = 0;
// LinearOperator interface. Since the operator is symmetric,
// LeftMultiply and num_cols are just calls to RightMultiply and
// num_rows respectively. Update() must be called before
// RightMultiply can be called.
void RightMultiply(const double* x, double* y) const override = 0;
void LeftMultiply(const double* x, double* y) const override {
return RightMultiply(x, y);
// LeftMultiplyAndAccumulate and num_cols are just calls to
// RightMultiplyAndAccumulate and num_rows respectively. Update() must be
// called before RightMultiplyAndAccumulate can be called.
void RightMultiplyAndAccumulate(const double* x,
double* y) const override = 0;
void LeftMultiplyAndAccumulate(const double* x, double* y) const override {
return RightMultiplyAndAccumulate(x, y);
}
int num_rows() const override = 0;
int num_cols() const override { return num_rows(); }
};
class CERES_NO_EXPORT IdentityPreconditioner : public Preconditioner {
public:
IdentityPreconditioner(int num_rows) : num_rows_(num_rows) {}
bool Update(const LinearOperator& /*A*/, const double* /*D*/) final {
return true;
}
void RightMultiplyAndAccumulate(const double* x, double* y) const final {
VectorRef(y, num_rows_) += ConstVectorRef(x, num_rows_);
}
int num_rows() const final { return num_rows_; }
private:
int num_rows_ = -1;
};
// This templated subclass of Preconditioner serves as a base class for
// other preconditioners that depend on the particular matrix layout of
// the underlying linear operator.
@@ -171,20 +202,21 @@ class CERES_NO_EXPORT SparseMatrixPreconditionerWrapper final
: public SparseMatrixPreconditioner {
public:
// Wrapper does NOT take ownership of the matrix pointer.
explicit SparseMatrixPreconditionerWrapper(const SparseMatrix* matrix);
explicit SparseMatrixPreconditionerWrapper(
const SparseMatrix* matrix, const Preconditioner::Options& options);
~SparseMatrixPreconditionerWrapper() override;
// Preconditioner interface
void RightMultiply(const double* x, double* y) const override;
void RightMultiplyAndAccumulate(const double* x, double* y) const override;
int num_rows() const override;
private:
bool UpdateImpl(const SparseMatrix& A, const double* D) override;
const SparseMatrix* matrix_;
const Preconditioner::Options options_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,13 +35,12 @@
#include "ceres/callbacks.h"
#include "ceres/gradient_checking_cost_function.h"
#include "ceres/line_search_preprocessor.h"
#include "ceres/parallel_for.h"
#include "ceres/problem_impl.h"
#include "ceres/solver.h"
#include "ceres/thread_pool.h"
#include "ceres/trust_region_preprocessor.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
std::unique_ptr<Preprocessor> Preprocessor::Create(
MinimizerType minimizer_type) {
@@ -63,7 +62,7 @@ void ChangeNumThreadsIfNeeded(Solver::Options* options) {
if (options->num_threads == 1) {
return;
}
const int num_threads_available = MaxNumThreadsAvailable();
const int num_threads_available = ThreadPool::MaxNumThreadsAvailable();
if (options->num_threads > num_threads_available) {
LOG(WARNING) << "Specified options.num_threads: " << options->num_threads
<< " exceeds maximum available from the threading model Ceres "
@@ -83,9 +82,11 @@ void SetupCommonMinimizerOptions(PreprocessedProblem* pp) {
double* reduced_parameters = pp->reduced_parameters.data();
program->ParameterBlocksToStateVector(reduced_parameters);
auto context = pp->problem->context();
Minimizer::Options& minimizer_options = pp->minimizer_options;
minimizer_options = Minimizer::Options(options);
minimizer_options.evaluator = pp->evaluator;
minimizer_options.context = context;
if (options.logging_type != SILENT) {
pp->logging_callback = std::make_unique<LoggingCallback>(
@@ -104,5 +105,4 @@ void SetupCommonMinimizerOptions(PreprocessedProblem* pp) {
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,8 +47,7 @@
#include "ceres/program.h"
#include "ceres/solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
struct PreprocessedProblem;
@@ -118,8 +117,7 @@ void ChangeNumThreadsIfNeeded(Solver::Options* options);
CERES_NO_EXPORT
void SetupCommonMinimizerOptions(PreprocessedProblem* pp);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2021 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,6 @@
namespace ceres {
using std::vector;
Problem::Problem() : impl_(new internal::ProblemImpl) {}
Problem::Problem(const Problem::Options& options)
: impl_(new internal::ProblemImpl(options)) {}
@@ -52,7 +50,7 @@ Problem::~Problem() = default;
ResidualBlockId Problem::AddResidualBlock(
CostFunction* cost_function,
LossFunction* loss_function,
const vector<double*>& parameter_blocks) {
const std::vector<double*>& parameter_blocks) {
return impl_->AddResidualBlock(cost_function,
loss_function,
parameter_blocks.data(),
@@ -71,12 +69,6 @@ void Problem::AddParameterBlock(double* values, int size) {
impl_->AddParameterBlock(values, size);
}
void Problem::AddParameterBlock(double* values,
int size,
LocalParameterization* local_parameterization) {
impl_->AddParameterBlock(values, size, local_parameterization);
}
void Problem::AddParameterBlock(double* values, int size, Manifold* manifold) {
impl_->AddParameterBlock(values, size, manifold);
}
@@ -101,20 +93,6 @@ bool Problem::IsParameterBlockConstant(const double* values) const {
return impl_->IsParameterBlockConstant(values);
}
void Problem::SetParameterization(
double* values, LocalParameterization* local_parameterization) {
impl_->SetParameterization(values, local_parameterization);
}
const LocalParameterization* Problem::GetParameterization(
const double* values) const {
return impl_->GetParameterization(values);
}
bool Problem::HasParameterization(const double* values) const {
return impl_->HasParameterization(values);
}
void Problem::SetManifold(double* values, Manifold* manifold) {
impl_->SetManifold(values, manifold);
}
@@ -149,8 +127,8 @@ double Problem::GetParameterLowerBound(const double* values, int index) const {
bool Problem::Evaluate(const EvaluateOptions& evaluate_options,
double* cost,
vector<double>* residuals,
vector<double>* gradient,
std::vector<double>* residuals,
std::vector<double>* gradient,
CRSMatrix* jacobian) {
return impl_->Evaluate(evaluate_options, cost, residuals, gradient, jacobian);
}
@@ -194,10 +172,6 @@ int Problem::ParameterBlockSize(const double* values) const {
return impl_->ParameterBlockSize(values);
}
int Problem::ParameterBlockLocalSize(const double* values) const {
return impl_->ParameterBlockTangentSize(values);
}
int Problem::ParameterBlockTangentSize(const double* values) const {
return impl_->ParameterBlockTangentSize(values);
}
@@ -206,18 +180,18 @@ bool Problem::HasParameterBlock(const double* values) const {
return impl_->HasParameterBlock(values);
}
void Problem::GetParameterBlocks(vector<double*>* parameter_blocks) const {
void Problem::GetParameterBlocks(std::vector<double*>* parameter_blocks) const {
impl_->GetParameterBlocks(parameter_blocks);
}
void Problem::GetResidualBlocks(
vector<ResidualBlockId>* residual_blocks) const {
std::vector<ResidualBlockId>* residual_blocks) const {
impl_->GetResidualBlocks(residual_blocks);
}
void Problem::GetParameterBlocksForResidualBlock(
const ResidualBlockId residual_block,
vector<double*>* parameter_blocks) const {
std::vector<double*>* parameter_blocks) const {
impl_->GetParameterBlocksForResidualBlock(residual_block, parameter_blocks);
}
@@ -232,8 +206,12 @@ const LossFunction* Problem::GetLossFunctionForResidualBlock(
}
void Problem::GetResidualBlocksForParameterBlock(
const double* values, vector<ResidualBlockId>* residual_blocks) const {
const double* values, std::vector<ResidualBlockId>* residual_blocks) const {
impl_->GetResidualBlocksForParameterBlock(values, residual_blocks);
}
const Problem::Options& Problem::options() const { return impl_->options(); }
internal::ProblemImpl* Problem::mutable_impl() { return impl_.get(); }
} // namespace ceres

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -53,7 +53,6 @@
#include "ceres/internal/fixed_array.h"
#include "ceres/loss_function.h"
#include "ceres/manifold.h"
#include "ceres/manifold_adapter.h"
#include "ceres/map_util.h"
#include "ceres/parameter_block.h"
#include "ceres/program.h"
@@ -64,8 +63,7 @@
#include "ceres/stringprintf.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
namespace {
// Returns true if two regions of memory, a and b, with sizes size_a and size_b
// respectively, overlap.
@@ -257,10 +255,6 @@ ProblemImpl::~ProblemImpl() {
DeleteBlock(parameter_block);
}
// Delete the owned parameterizations.
STLDeleteUniqueContainerPointers(local_parameterizations_to_delete_.begin(),
local_parameterizations_to_delete_.end());
// Delete the owned manifolds.
STLDeleteUniqueContainerPointers(manifolds_to_delete_.begin(),
manifolds_to_delete_.end());
@@ -365,45 +359,15 @@ void ProblemImpl::AddParameterBlock(double* values, int size) {
InternalAddParameterBlock(values, size);
}
void ProblemImpl::InternalSetParameterization(
double* values,
ParameterBlock* parameter_block,
LocalParameterization* local_parameterization) {
parameter_block_to_local_param_[values] = local_parameterization;
Manifold* manifold = nullptr;
if (local_parameterization != nullptr) {
if (options_.local_parameterization_ownership == TAKE_OWNERSHIP) {
local_parameterizations_to_delete_.push_back(local_parameterization);
}
manifold = new ManifoldAdapter(local_parameterization);
// Add the manifold to manifolds_to_delete_ unconditionally since
// we own it and it will need to be deleted.
manifolds_to_delete_.push_back(manifold);
}
parameter_block->SetManifold(manifold);
}
void ProblemImpl::InternalSetManifold(double* values,
void ProblemImpl::InternalSetManifold(double* /*values*/,
ParameterBlock* parameter_block,
Manifold* manifold) {
// Reset any association between this parameter block and a local
// parameterization. This only needs done while we are in the transition from
// LocalParameterization to Manifold.
parameter_block_to_local_param_[values] = nullptr;
if (manifold != nullptr && options_.manifold_ownership == TAKE_OWNERSHIP) {
manifolds_to_delete_.push_back(manifold);
}
parameter_block->SetManifold(manifold);
}
void ProblemImpl::AddParameterBlock(
double* values, int size, LocalParameterization* local_parameterization) {
ParameterBlock* parameter_block = InternalAddParameterBlock(values, size);
InternalSetParameterization(values, parameter_block, local_parameterization);
}
void ProblemImpl::AddParameterBlock(double* values,
int size,
Manifold* manifold) {
@@ -539,19 +503,6 @@ void ProblemImpl::SetParameterBlockVariable(double* values) {
parameter_block->SetVarying();
}
void ProblemImpl::SetParameterization(
double* values, LocalParameterization* local_parameterization) {
ParameterBlock* parameter_block =
FindWithDefault(parameter_block_map_, values, nullptr);
if (parameter_block == nullptr) {
LOG(FATAL) << "Parameter block not found: " << values
<< ". You must add the parameter block to the problem before "
<< "you can set its local parameterization.";
}
InternalSetParameterization(values, parameter_block, local_parameterization);
}
void ProblemImpl::SetManifold(double* values, Manifold* manifold) {
ParameterBlock* parameter_block =
FindWithDefault(parameter_block_map_, values, nullptr);
@@ -564,22 +515,13 @@ void ProblemImpl::SetManifold(double* values, Manifold* manifold) {
InternalSetManifold(values, parameter_block, manifold);
}
const LocalParameterization* ProblemImpl::GetParameterization(
const double* values) const {
return FindWithDefault(parameter_block_to_local_param_, values, nullptr);
}
bool ProblemImpl::HasParameterization(const double* values) const {
return GetParameterization(values) != nullptr;
}
const Manifold* ProblemImpl::GetManifold(const double* values) const {
ParameterBlock* parameter_block = FindWithDefault(
parameter_block_map_, const_cast<double*>(values), nullptr);
if (parameter_block == nullptr) {
LOG(FATAL) << "Parameter block not found: " << values
<< ". You must add the parameter block to the problem before "
<< "you can get its local parameterization.";
<< "you can get its manifold.";
}
return parameter_block->manifold();
@@ -730,17 +672,7 @@ bool ProblemImpl::Evaluate(const Problem::EvaluateOptions& evaluate_options,
// the Evaluator decides the storage for the Jacobian based on the
// type of linear solver being used.
evaluator_options.linear_solver_type = SPARSE_NORMAL_CHOLESKY;
#ifdef CERES_NO_THREADS
if (evaluate_options.num_threads > 1) {
LOG(WARNING)
<< "No threading support is compiled into this binary; "
<< "only evaluate_options.num_threads = 1 is supported. Switching "
<< "to single threaded mode.";
}
evaluator_options.num_threads = 1;
#else
evaluator_options.num_threads = evaluate_options.num_threads;
#endif // CERES_NO_THREADS
// The main thread also does work so we only need to launch num_threads - 1.
context_impl_->EnsureMinimumThreads(evaluator_options.num_threads - 1);
@@ -968,5 +900,4 @@ void ProblemImpl::GetResidualBlocksForParameterBlock(
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2021 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,6 @@ namespace ceres {
class CostFunction;
class EvaluationCallback;
class LossFunction;
class LocalParameterization;
struct CRSMatrix;
namespace internal {
@@ -100,10 +99,6 @@ class CERES_NO_EXPORT ProblemImpl {
}
void AddParameterBlock(double* values, int size);
void AddParameterBlock(double* values,
int size,
LocalParameterization* local_parameterization);
void AddParameterBlock(double* values, int size, Manifold* manifold);
void RemoveResidualBlock(ResidualBlock* residual_block);
@@ -113,11 +108,6 @@ class CERES_NO_EXPORT ProblemImpl {
void SetParameterBlockVariable(double* values);
bool IsParameterBlockConstant(const double* values) const;
void SetParameterization(double* values,
LocalParameterization* local_parameterization);
const LocalParameterization* GetParameterization(const double* values) const;
bool HasParameterization(const double* values) const;
void SetManifold(double* values, Manifold* manifold);
const Manifold* GetManifold(const double* values) const;
bool HasManifold(const double* values) const;
@@ -176,14 +166,12 @@ class CERES_NO_EXPORT ProblemImpl {
return residual_block_set_;
}
const Problem::Options& options() const { return options_; }
ContextImpl* context() { return context_impl_; }
private:
ParameterBlock* InternalAddParameterBlock(double* values, int size);
void InternalSetParameterization(
double* values,
ParameterBlock* parameter_block,
LocalParameterization* local_parameterization);
void InternalSetManifold(double* values,
ParameterBlock* parameter_block,
Manifold* manifold);
@@ -214,15 +202,8 @@ class CERES_NO_EXPORT ProblemImpl {
std::unique_ptr<internal::Program> program_;
// TODO(sameeragarwal): Unify the shared object handling across object types.
// Right now we are using vectors for LocalParameterization and Manifold
// objects and reference counting for CostFunctions and LossFunctions. Ideally
// this should be done uniformly.
// When removing parameter blocks, parameterizations have ambiguous
// ownership. Instead of scanning the entire problem to see if the
// parameterization is shared with other parameter blocks, buffer
// them until destruction.
std::vector<LocalParameterization*> local_parameterizations_to_delete_;
// Right now we are using vectors for Manifold objects and reference counting
// for CostFunctions and LossFunctions. Ideally this should be done uniformly.
// When removing parameter blocks, manifolds have ambiguous
// ownership. Instead of scanning the entire problem to see if the
@@ -236,17 +217,6 @@ class CERES_NO_EXPORT ProblemImpl {
// destroyed.
CostFunctionRefCount cost_function_ref_count_;
LossFunctionRefCount loss_function_ref_count_;
// Because we wrap LocalParameterization objects using a ManifoldAdapter, when
// the user calls GetParameterization we cannot use the same logic as
// GetManifold as the ParameterBlock object only returns a Manifold object. So
// this map stores the association between parameter blocks and local
// parameterizations.
//
// This is a temporary object which will be removed once the
// LocalParameterization to Manifold transition is complete.
std::unordered_map<const double*, LocalParameterization*>
parameter_block_to_local_param_;
};
} // namespace internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -45,14 +45,14 @@
#include "ceres/loss_function.h"
#include "ceres/manifold.h"
#include "ceres/map_util.h"
#include "ceres/parallel_for.h"
#include "ceres/parameter_block.h"
#include "ceres/problem.h"
#include "ceres/residual_block.h"
#include "ceres/stl_util.h"
#include "ceres/triplet_sparse_matrix.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
const std::vector<ParameterBlock*>& Program::parameter_blocks() const {
return parameter_blocks_;
@@ -109,16 +109,32 @@ bool Program::SetParameterBlockStatePtrsToUserStatePtrs() {
bool Program::Plus(const double* state,
const double* delta,
double* state_plus_delta) const {
for (auto* parameter_block : parameter_blocks_) {
if (!parameter_block->Plus(state, delta, state_plus_delta)) {
return false;
}
state += parameter_block->Size();
delta += parameter_block->TangentSize();
state_plus_delta += parameter_block->Size();
}
return true;
double* state_plus_delta,
ContextImpl* context,
int num_threads) const {
std::atomic<bool> abort(false);
auto* parameter_blocks = parameter_blocks_.data();
ParallelFor(
context,
0,
parameter_blocks_.size(),
num_threads,
[&abort, state, delta, state_plus_delta, parameter_blocks](int block_id) {
if (abort) {
return;
}
auto parameter_block = parameter_blocks[block_id];
auto block_state = state + parameter_block->state_offset();
auto block_delta = delta + parameter_block->delta_offset();
auto block_state_plus_delta =
state_plus_delta + parameter_block->state_offset();
if (!parameter_block->Plus(
block_state, block_delta, block_state_plus_delta)) {
abort = true;
}
});
return abort == false;
}
void Program::SetParameterOffsetsAndIndex() {
@@ -545,5 +561,4 @@ std::string Program::ToString() const {
return ret;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,13 +40,13 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ParameterBlock;
class ProblemImpl;
class ResidualBlock;
class TripletSparseMatrix;
class ContextImpl;
// A nonlinear least squares optimization problem. This is different from the
// similarly-named "Problem" object, which offers a mutation interface for
@@ -87,7 +87,9 @@ class CERES_NO_EXPORT Program {
// Update a state vector for the program given a delta.
bool Plus(const double* state,
const double* delta,
double* state_plus_delta) const;
double* state_plus_delta,
ContextImpl* context,
int num_threads) const;
// Set the parameter indices and offsets. This permits mapping backward
// from a ParameterBlock* to an index in the parameter_blocks() vector. For
@@ -192,8 +194,7 @@ class CERES_NO_EXPORT Program {
friend class ProblemImpl;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,7 @@
// residual jacobians are written directly into their final position in the
// block sparse matrix by the user's CostFunction; there is no copying.
//
// The evaluation is threaded with OpenMP or C++ threads.
// The evaluation is threaded with C++ threads.
//
// The EvaluatePreparer and JacobianWriter interfaces are as follows:
//
@@ -96,6 +96,7 @@
#include "ceres/execution_summary.h"
#include "ceres/internal/eigen.h"
#include "ceres/parallel_for.h"
#include "ceres/parallel_vector_ops.h"
#include "ceres/parameter_block.h"
#include "ceres/program.h"
#include "ceres/residual_block.h"
@@ -105,7 +106,7 @@ namespace ceres {
namespace internal {
struct NullJacobianFinalizer {
void operator()(SparseMatrix* jacobian, int num_parameters) {}
void operator()(SparseMatrix* /*jacobian*/, int /*num_parameters*/) {}
};
template <typename EvaluatePreparer,
@@ -118,19 +119,11 @@ class ProgramEvaluator final : public Evaluator {
program_(program),
jacobian_writer_(options, program),
evaluate_preparers_(std::move(
jacobian_writer_.CreateEvaluatePreparers(options.num_threads))) {
#ifdef CERES_NO_THREADS
if (options_.num_threads > 1) {
LOG(WARNING) << "No threading support is compiled into this binary; "
<< "only options.num_threads = 1 is supported. Switching "
<< "to single threaded mode.";
options_.num_threads = 1;
}
#endif // CERES_NO_THREADS
jacobian_writer_.CreateEvaluatePreparers(options.num_threads))),
num_parameters_(program->NumEffectiveParameters()) {
BuildResidualLayout(*program, &residual_layout_);
evaluate_scratch_ =
std::move(CreateEvaluatorScratch(*program, options.num_threads));
evaluate_scratch_ = std::move(CreateEvaluatorScratch(
*program, static_cast<unsigned>(options.num_threads)));
}
// Implementation of Evaluator interface.
@@ -164,20 +157,24 @@ class ProgramEvaluator final : public Evaluator {
}
if (residuals != nullptr) {
VectorRef(residuals, program_->NumResiduals()).setZero();
ParallelSetZero(options_.context,
options_.num_threads,
residuals,
program_->NumResiduals());
}
if (jacobian != nullptr) {
jacobian->SetZero();
jacobian->SetZero(options_.context, options_.num_threads);
}
// Each thread gets it's own cost and evaluate scratch space.
for (int i = 0; i < options_.num_threads; ++i) {
evaluate_scratch_[i].cost = 0.0;
if (gradient != nullptr) {
VectorRef(evaluate_scratch_[i].gradient.get(),
program_->NumEffectiveParameters())
.setZero();
ParallelSetZero(options_.context,
options_.num_threads,
evaluate_scratch_[i].gradient.get(),
num_parameters_);
}
}
@@ -259,38 +256,55 @@ class ProgramEvaluator final : public Evaluator {
}
});
if (!abort) {
const int num_parameters = program_->NumEffectiveParameters();
if (abort) {
return false;
}
// Sum the cost and gradient (if requested) from each thread.
(*cost) = 0.0;
// Sum the cost and gradient (if requested) from each thread.
(*cost) = 0.0;
if (gradient != nullptr) {
auto gradient_vector = VectorRef(gradient, num_parameters_);
ParallelSetZero(options_.context, options_.num_threads, gradient_vector);
}
for (int i = 0; i < options_.num_threads; ++i) {
(*cost) += evaluate_scratch_[i].cost;
if (gradient != nullptr) {
VectorRef(gradient, num_parameters).setZero();
}
for (int i = 0; i < options_.num_threads; ++i) {
(*cost) += evaluate_scratch_[i].cost;
if (gradient != nullptr) {
VectorRef(gradient, num_parameters) +=
VectorRef(evaluate_scratch_[i].gradient.get(), num_parameters);
}
}
// Finalize the Jacobian if it is available.
// `num_parameters` is passed to the finalizer so that additional
// storage can be reserved for additional diagonal elements if
// necessary.
if (jacobian != nullptr) {
JacobianFinalizer f;
f(jacobian, num_parameters);
auto gradient_vector = VectorRef(gradient, num_parameters_);
ParallelAssign(
options_.context,
options_.num_threads,
gradient_vector,
gradient_vector + VectorRef(evaluate_scratch_[i].gradient.get(),
num_parameters_));
}
}
return !abort;
// It is possible that after accumulation that the cost has become infinite
// or a nan.
if (!std::isfinite(*cost)) {
LOG(ERROR) << "Accumulated cost = " << *cost
<< " is not a finite number. Evaluation failed.";
return false;
}
// Finalize the Jacobian if it is available.
// `num_parameters` is passed to the finalizer so that additional
// storage can be reserved for additional diagonal elements if
// necessary.
if (jacobian != nullptr) {
JacobianFinalizer f;
f(jacobian, num_parameters_);
}
return true;
}
bool Plus(const double* state,
const double* delta,
double* state_plus_delta) const final {
return program_->Plus(state, delta, state_plus_delta);
return program_->Plus(
state, delta, state_plus_delta, options_.context, options_.num_threads);
}
int NumParameters() const final { return program_->NumParameters(); }
@@ -345,7 +359,7 @@ class ProgramEvaluator final : public Evaluator {
// Create scratch space for each thread evaluating the program.
static std::unique_ptr<EvaluateScratch[]> CreateEvaluatorScratch(
const Program& program, int num_threads) {
const Program& program, unsigned num_threads) {
int max_parameters_per_residual_block =
program.MaxParametersPerResidualBlock();
int max_scratch_doubles_needed_for_evaluate =
@@ -370,6 +384,7 @@ class ProgramEvaluator final : public Evaluator {
std::unique_ptr<EvaluatePreparer[]> evaluate_preparers_;
std::unique_ptr<EvaluateScratch[]> evaluate_scratch_;
std::vector<int> residual_layout_;
int num_parameters_;
::ceres::internal::ExecutionSummary execution_summary_;
};

View File

@@ -1,73 +0,0 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: keir@google.com (Keir Mierle)
// sameeragarwal@google.com (Sameer Agarwal)
#ifndef CERES_INTERNAL_RANDOM_H_
#define CERES_INTERNAL_RANDOM_H_
#include <cmath>
#include <cstdlib>
#include "ceres/internal/export.h"
namespace ceres {
inline void SetRandomState(int state) { srand(state); }
inline int Uniform(int n) {
if (n) {
return rand() % n;
} else {
return 0;
}
}
inline double RandDouble() {
auto r = static_cast<double>(rand());
return r / RAND_MAX;
}
// Box-Muller algorithm for normal random number generation.
// http://en.wikipedia.org/wiki/Box-Muller_transform
inline double RandNormal() {
double x1, x2, w;
do {
x1 = 2.0 * RandDouble() - 1.0;
x2 = 2.0 * RandDouble() - 1.0;
w = x1 * x1 + x2 * x2;
} while (w >= 1.0 || w == 0.0);
w = sqrt((-2.0 * log(w)) / w);
return x1 * w;
}
} // namespace ceres
#endif // CERES_INTERNAL_RANDOM_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -31,12 +31,14 @@
#include "ceres/reorder_program.h"
#include <algorithm>
#include <map>
#include <memory>
#include <numeric>
#include <set>
#include <string>
#include <vector>
#include "Eigen/SparseCore"
#include "ceres/cxsparse.h"
#include "ceres/internal/config.h"
#include "ceres/internal/export.h"
#include "ceres/ordered_groups.h"
@@ -51,18 +53,19 @@
#include "ceres/types.h"
#ifdef CERES_USE_EIGEN_SPARSE
#ifndef CERES_NO_EIGEN_METIS
#include <iostream> // Need this because MetisSupport refers to std::cerr.
#include "Eigen/MetisSupport"
#endif
#include "Eigen/OrderingMethods"
#endif
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::map;
using std::set;
using std::string;
using std::vector;
namespace ceres::internal {
namespace {
@@ -86,7 +89,6 @@ static int MinParameterBlock(const ResidualBlock* residual_block,
return min_parameter_block_position;
}
#if defined(CERES_USE_EIGEN_SPARSE)
Eigen::SparseMatrix<int> CreateBlockJacobian(
const TripletSparseMatrix& block_jacobian_transpose) {
using SparseMatrix = Eigen::SparseMatrix<int>;
@@ -95,7 +97,7 @@ Eigen::SparseMatrix<int> CreateBlockJacobian(
const int* rows = block_jacobian_transpose.rows();
const int* cols = block_jacobian_transpose.cols();
int num_nonzeros = block_jacobian_transpose.num_nonzeros();
vector<Triplet> triplets;
std::vector<Triplet> triplets;
triplets.reserve(num_nonzeros);
for (int i = 0; i < num_nonzeros; ++i) {
triplets.emplace_back(cols[i], rows[i], 1);
@@ -106,14 +108,20 @@ Eigen::SparseMatrix<int> CreateBlockJacobian(
block_jacobian.setFromTriplets(triplets.begin(), triplets.end());
return block_jacobian;
}
#endif
void OrderingForSparseNormalCholeskyUsingSuiteSparse(
const LinearSolverOrderingType linear_solver_ordering_type,
const TripletSparseMatrix& tsm_block_jacobian_transpose,
const vector<ParameterBlock*>& parameter_blocks,
const std::vector<ParameterBlock*>& parameter_blocks,
const ParameterBlockOrdering& parameter_block_ordering,
int* ordering) {
#ifdef CERES_NO_SUITESPARSE
// "Void"ing values to avoid compiler warnings about unused parameters
(void)linear_solver_ordering_type;
(void)tsm_block_jacobian_transpose;
(void)parameter_blocks;
(void)parameter_block_ordering;
(void)ordering;
LOG(FATAL) << "Congratulations, you found a Ceres bug! "
<< "Please report this error to the developers.";
#else
@@ -121,61 +129,47 @@ void OrderingForSparseNormalCholeskyUsingSuiteSparse(
cholmod_sparse* block_jacobian_transpose = ss.CreateSparseMatrix(
const_cast<TripletSparseMatrix*>(&tsm_block_jacobian_transpose));
// No CAMD or the user did not supply a useful ordering, then just
// use regular AMD.
if (parameter_block_ordering.NumGroups() <= 1 ||
!SuiteSparse::IsConstrainedApproximateMinimumDegreeOrderingAvailable()) {
ss.ApproximateMinimumDegreeOrdering(block_jacobian_transpose, &ordering[0]);
} else {
vector<int> constraints;
for (auto* parameter_block : parameter_blocks) {
constraints.push_back(parameter_block_ordering.GroupId(
parameter_block->mutable_user_state()));
if (linear_solver_ordering_type == ceres::AMD) {
if (parameter_block_ordering.NumGroups() <= 1) {
// The user did not supply a useful ordering so just go ahead
// and use AMD.
ss.Ordering(block_jacobian_transpose, OrderingType::AMD, ordering);
} else {
// The user supplied an ordering, so use CAMD.
std::vector<int> constraints;
constraints.reserve(parameter_blocks.size());
for (auto* parameter_block : parameter_blocks) {
constraints.push_back(parameter_block_ordering.GroupId(
parameter_block->mutable_user_state()));
}
// Renumber the entries of constraints to be contiguous integers
// as CAMD requires that the group ids be in the range [0,
// parameter_blocks.size() - 1].
MapValuesToContiguousRange(constraints.size(), constraints.data());
ss.ConstrainedApproximateMinimumDegreeOrdering(
block_jacobian_transpose, constraints.data(), ordering);
}
// Renumber the entries of constraints to be contiguous integers
// as CAMD requires that the group ids be in the range [0,
// parameter_blocks.size() - 1].
MapValuesToContiguousRange(constraints.size(), &constraints[0]);
ss.ConstrainedApproximateMinimumDegreeOrdering(
block_jacobian_transpose, &constraints[0], ordering);
} else if (linear_solver_ordering_type == ceres::NESDIS) {
// If nested dissection is chosen as an ordering algorithm, then
// ignore any user provided linear_solver_ordering.
CHECK(SuiteSparse::IsNestedDissectionAvailable())
<< "Congratulations, you found a Ceres bug! "
<< "Please report this error to the developers.";
ss.Ordering(block_jacobian_transpose, OrderingType::NESDIS, ordering);
} else {
LOG(FATAL) << "Congratulations, you found a Ceres bug! "
<< "Please report this error to the developers.";
}
VLOG(2) << "Block ordering stats: "
<< " flops: " << ss.mutable_cc()->fl
<< " lnz : " << ss.mutable_cc()->lnz
<< " anz : " << ss.mutable_cc()->anz;
ss.Free(block_jacobian_transpose);
#endif // CERES_NO_SUITESPARSE
}
void OrderingForSparseNormalCholeskyUsingCXSparse(
const TripletSparseMatrix& tsm_block_jacobian_transpose, int* ordering) {
#ifdef CERES_NO_CXSPARSE
LOG(FATAL) << "Congratulations, you found a Ceres bug! "
<< "Please report this error to the developers.";
#else
// CXSparse works with J'J instead of J'. So compute the block
// sparsity for J'J and compute an approximate minimum degree
// ordering.
CXSparse cxsparse;
cs_di* block_jacobian_transpose;
block_jacobian_transpose = cxsparse.CreateSparseMatrix(
const_cast<TripletSparseMatrix*>(&tsm_block_jacobian_transpose));
cs_di* block_jacobian = cxsparse.TransposeMatrix(block_jacobian_transpose);
cs_di* block_hessian =
cxsparse.MatrixMatrixMultiply(block_jacobian_transpose, block_jacobian);
cxsparse.Free(block_jacobian);
cxsparse.Free(block_jacobian_transpose);
cxsparse.ApproximateMinimumDegreeOrdering(block_hessian, ordering);
cxsparse.Free(block_hessian);
#endif // CERES_NO_CXSPARSE
}
void OrderingForSparseNormalCholeskyUsingEigenSparse(
const TripletSparseMatrix& tsm_block_jacobian_transpose, int* ordering) {
const LinearSolverOrderingType linear_solver_ordering_type,
const TripletSparseMatrix& tsm_block_jacobian_transpose,
int* ordering) {
#ifndef CERES_USE_EIGEN_SPARSE
LOG(FATAL) << "SPARSE_NORMAL_CHOLESKY cannot be used with EIGEN_SPARSE "
"because Ceres was not built with support for "
@@ -183,12 +177,12 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse(
"This requires enabling building with -DEIGENSPARSE=ON.";
#else
// This conversion from a TripletSparseMatrix to a Eigen::Triplet
// matrix is unfortunate, but unavoidable for now. It is not a
// significant performance penalty in the grand scheme of
// things. The right thing to do here would be to get a compressed
// row sparse matrix representation of the jacobian and go from
// there. But that is a project for another day.
// TODO(sameeragarwal): This conversion from a TripletSparseMatrix
// to a Eigen::Triplet matrix is unfortunate, but unavoidable for
// now. It is not a significant performance penalty in the grand
// scheme of things. The right thing to do here would be to get a
// compressed row sparse matrix representation of the jacobian and
// go from there. But that is a project for another day.
using SparseMatrix = Eigen::SparseMatrix<int>;
const SparseMatrix block_jacobian =
@@ -196,9 +190,19 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse(
const SparseMatrix block_hessian =
block_jacobian.transpose() * block_jacobian;
Eigen::AMDOrdering<int> amd_ordering;
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> perm;
amd_ordering(block_hessian, perm);
if (linear_solver_ordering_type == ceres::AMD) {
Eigen::AMDOrdering<int> amd_ordering;
amd_ordering(block_hessian, perm);
} else {
#ifndef CERES_NO_EIGEN_METIS
Eigen::MetisOrdering<int> metis_ordering;
metis_ordering(block_hessian, perm);
#else
perm.setIdentity(block_hessian.rows());
#endif
}
for (int i = 0; i < block_hessian.rows(); ++i) {
ordering[i] = perm.indices()[i];
}
@@ -210,7 +214,7 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse(
bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map,
const ParameterBlockOrdering& ordering,
Program* program,
string* error) {
std::string* error) {
const int num_parameter_blocks = program->NumParameterBlocks();
if (ordering.NumElements() != num_parameter_blocks) {
*error = StringPrintf(
@@ -222,13 +226,15 @@ bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map,
return false;
}
vector<ParameterBlock*>* parameter_blocks =
std::vector<ParameterBlock*>* parameter_blocks =
program->mutable_parameter_blocks();
parameter_blocks->clear();
const map<int, set<double*>>& groups = ordering.group_to_elements();
// TODO(sameeragarwal): Investigate whether this should be a set or an
// unordered_set.
const std::map<int, std::set<double*>>& groups = ordering.group_to_elements();
for (const auto& p : groups) {
const set<double*>& group = p.second;
const std::set<double*>& group = p.second;
for (double* parameter_block_ptr : group) {
auto it = parameter_map.find(parameter_block_ptr);
if (it == parameter_map.end()) {
@@ -248,16 +254,18 @@ bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map,
bool LexicographicallyOrderResidualBlocks(
const int size_of_first_elimination_group,
Program* program,
string* error) {
std::string* /*error*/) {
CHECK_GE(size_of_first_elimination_group, 1)
<< "Congratulations, you found a Ceres bug! Please report this error "
<< "to the developers.";
// Create a histogram of the number of residuals for each E block. There is an
// extra bucket at the end to catch all non-eliminated F blocks.
vector<int> residual_blocks_per_e_block(size_of_first_elimination_group + 1);
vector<ResidualBlock*>* residual_blocks = program->mutable_residual_blocks();
vector<int> min_position_per_residual(residual_blocks->size());
std::vector<int> residual_blocks_per_e_block(size_of_first_elimination_group +
1);
std::vector<ResidualBlock*>* residual_blocks =
program->mutable_residual_blocks();
std::vector<int> min_position_per_residual(residual_blocks->size());
for (int i = 0; i < residual_blocks->size(); ++i) {
ResidualBlock* residual_block = (*residual_blocks)[i];
int position =
@@ -270,7 +278,7 @@ bool LexicographicallyOrderResidualBlocks(
// Run a cumulative sum on the histogram, to obtain offsets to the start of
// each histogram bucket (where each bucket is for the residuals for that
// E-block).
vector<int> offsets(size_of_first_elimination_group + 1);
std::vector<int> offsets(size_of_first_elimination_group + 1);
std::partial_sum(residual_blocks_per_e_block.begin(),
residual_blocks_per_e_block.end(),
offsets.begin());
@@ -289,9 +297,9 @@ bool LexicographicallyOrderResidualBlocks(
// of the bucket. The filling order among the buckets is dictated by the
// residual blocks. This loop uses the offsets as counters; subtracting one
// from each offset as a residual block is placed in the bucket. When the
// filling is finished, the offset pointerts should have shifted down one
// filling is finished, the offset pointers should have shifted down one
// entry (this is verified below).
vector<ResidualBlock*> reordered_residual_blocks(
std::vector<ResidualBlock*> reordered_residual_blocks(
(*residual_blocks).size(), static_cast<ResidualBlock*>(nullptr));
for (int i = 0; i < residual_blocks->size(); ++i) {
int bucket = min_position_per_residual[i];
@@ -326,18 +334,18 @@ bool LexicographicallyOrderResidualBlocks(
return true;
}
// Pre-order the columns corresponding to the schur complement if
// Pre-order the columns corresponding to the Schur complement if
// possible.
static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
static void ReorderSchurComplementColumnsUsingSuiteSparse(
const ParameterBlockOrdering& parameter_block_ordering, Program* program) {
#ifndef CERES_NO_SUITESPARSE
#ifdef CERES_NO_SUITESPARSE
// "Void"ing values to avoid compiler warnings about unused parameters
(void)parameter_block_ordering;
(void)program;
#else
SuiteSparse ss;
if (!SuiteSparse::IsConstrainedApproximateMinimumDegreeOrderingAvailable()) {
return;
}
vector<int> constraints;
vector<ParameterBlock*>& parameter_blocks =
std::vector<int> constraints;
std::vector<ParameterBlock*>& parameter_blocks =
*(program->mutable_parameter_blocks());
for (auto* parameter_block : parameter_blocks) {
@@ -348,7 +356,7 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
// Renumber the entries of constraints to be contiguous integers as
// CAMD requires that the group ids be in the range [0,
// parameter_blocks.size() - 1].
MapValuesToContiguousRange(constraints.size(), &constraints[0]);
MapValuesToContiguousRange(constraints.size(), constraints.data());
// Compute a block sparse presentation of J'.
std::unique_ptr<TripletSparseMatrix> tsm_block_jacobian_transpose(
@@ -357,12 +365,12 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
cholmod_sparse* block_jacobian_transpose =
ss.CreateSparseMatrix(tsm_block_jacobian_transpose.get());
vector<int> ordering(parameter_blocks.size(), 0);
std::vector<int> ordering(parameter_blocks.size(), 0);
ss.ConstrainedApproximateMinimumDegreeOrdering(
block_jacobian_transpose, &constraints[0], &ordering[0]);
block_jacobian_transpose, constraints.data(), ordering.data());
ss.Free(block_jacobian_transpose);
const vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks);
const std::vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks);
for (int i = 0; i < program->NumParameterBlocks(); ++i) {
parameter_blocks[i] = parameter_blocks_copy[ordering[i]];
}
@@ -371,14 +379,14 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
#endif
}
static void MaybeReorderSchurComplementColumnsUsingEigen(
static void ReorderSchurComplementColumnsUsingEigen(
LinearSolverOrderingType ordering_type,
const int size_of_first_elimination_group,
const ProblemImpl::ParameterMap& parameter_map,
const ProblemImpl::ParameterMap& /*parameter_map*/,
Program* program) {
#if defined(CERES_USE_EIGEN_SPARSE)
std::unique_ptr<TripletSparseMatrix> tsm_block_jacobian_transpose(
program->CreateJacobianBlockSparsityTranspose());
using SparseMatrix = Eigen::SparseMatrix<int>;
const SparseMatrix block_jacobian =
CreateBlockJacobian(*tsm_block_jacobian_transpose);
@@ -399,12 +407,22 @@ static void MaybeReorderSchurComplementColumnsUsingEigen(
const SparseMatrix block_schur_complement =
F.transpose() * F - F.transpose() * E * E.transpose() * F;
Eigen::AMDOrdering<int> amd_ordering;
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> perm;
amd_ordering(block_schur_complement, perm);
if (ordering_type == ceres::AMD) {
Eigen::AMDOrdering<int> amd_ordering;
amd_ordering(block_schur_complement, perm);
} else {
#ifndef CERES_NO_EIGEN_METIS
Eigen::MetisOrdering<int> metis_ordering;
metis_ordering(block_schur_complement, perm);
#else
perm.setIdentity(block_schur_complement.rows());
#endif
}
const vector<ParameterBlock*>& parameter_blocks = program->parameter_blocks();
vector<ParameterBlock*> ordering(num_cols);
const std::vector<ParameterBlock*>& parameter_blocks =
program->parameter_blocks();
std::vector<ParameterBlock*> ordering(num_cols);
// The ordering of the first size_of_first_elimination_group does
// not matter, so we preserve the existing ordering.
@@ -426,10 +444,11 @@ static void MaybeReorderSchurComplementColumnsUsingEigen(
bool ReorderProgramForSchurTypeLinearSolver(
const LinearSolverType linear_solver_type,
const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
const LinearSolverOrderingType linear_solver_ordering_type,
const ProblemImpl::ParameterMap& parameter_map,
ParameterBlockOrdering* parameter_block_ordering,
Program* program,
string* error) {
std::string* error) {
if (parameter_block_ordering->NumElements() !=
program->NumParameterBlocks()) {
*error = StringPrintf(
@@ -447,7 +466,7 @@ bool ReorderProgramForSchurTypeLinearSolver(
// parameter block ordering as it sees fit. For Schur type solvers,
// this means that the user wishes for Ceres to identify the
// e_blocks, which we do by computing a maximal independent set.
vector<ParameterBlock*> schur_ordering;
std::vector<ParameterBlock*> schur_ordering;
const int size_of_first_elimination_group =
ComputeStableSchurOrdering(*program, &schur_ordering);
@@ -470,7 +489,10 @@ bool ReorderProgramForSchurTypeLinearSolver(
// group.
// Verify that the first elimination group is an independent set.
const set<double*>& first_elimination_group =
// TODO(sameeragarwal): Investigate if this should be a set or an
// unordered_set.
const std::set<double*>& first_elimination_group =
parameter_block_ordering->group_to_elements().begin()->second;
if (!program->IsParameterBlockSetIndependent(first_elimination_group)) {
*error = StringPrintf(
@@ -492,12 +514,20 @@ bool ReorderProgramForSchurTypeLinearSolver(
parameter_block_ordering->group_to_elements().begin()->second.size();
if (linear_solver_type == SPARSE_SCHUR) {
if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
MaybeReorderSchurComplementColumnsUsingSuiteSparse(
*parameter_block_ordering, program);
if (sparse_linear_algebra_library_type == SUITE_SPARSE &&
linear_solver_ordering_type == ceres::AMD) {
// Preordering support for schur complement only works with AMD
// for now, since we are using CAMD.
//
// TODO(sameeragarwal): It maybe worth adding pre-ordering support for
// nested dissection too.
ReorderSchurComplementColumnsUsingSuiteSparse(*parameter_block_ordering,
program);
} else if (sparse_linear_algebra_library_type == EIGEN_SPARSE) {
MaybeReorderSchurComplementColumnsUsingEigen(
size_of_first_elimination_group, parameter_map, program);
ReorderSchurComplementColumnsUsingEigen(linear_solver_ordering_type,
size_of_first_elimination_group,
parameter_map,
program);
}
}
@@ -509,10 +539,11 @@ bool ReorderProgramForSchurTypeLinearSolver(
bool ReorderProgramForSparseCholesky(
const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
const LinearSolverOrderingType linear_solver_ordering_type,
const ParameterBlockOrdering& parameter_block_ordering,
int start_row_block,
Program* program,
string* error) {
std::string* error) {
if (parameter_block_ordering.NumElements() != program->NumParameterBlocks()) {
*error = StringPrintf(
"The program has %d parameter blocks, but the parameter block "
@@ -526,19 +557,17 @@ bool ReorderProgramForSparseCholesky(
std::unique_ptr<TripletSparseMatrix> tsm_block_jacobian_transpose(
program->CreateJacobianBlockSparsityTranspose(start_row_block));
vector<int> ordering(program->NumParameterBlocks(), 0);
vector<ParameterBlock*>& parameter_blocks =
std::vector<int> ordering(program->NumParameterBlocks(), 0);
std::vector<ParameterBlock*>& parameter_blocks =
*(program->mutable_parameter_blocks());
if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
OrderingForSparseNormalCholeskyUsingSuiteSparse(
linear_solver_ordering_type,
*tsm_block_jacobian_transpose,
parameter_blocks,
parameter_block_ordering,
&ordering[0]);
} else if (sparse_linear_algebra_library_type == CX_SPARSE) {
OrderingForSparseNormalCholeskyUsingCXSparse(*tsm_block_jacobian_transpose,
&ordering[0]);
ordering.data());
} else if (sparse_linear_algebra_library_type == ACCELERATE_SPARSE) {
// Accelerate does not provide a function to perform reordering without
// performing a full symbolic factorisation. As such, we have nothing
@@ -550,11 +579,13 @@ bool ReorderProgramForSparseCholesky(
} else if (sparse_linear_algebra_library_type == EIGEN_SPARSE) {
OrderingForSparseNormalCholeskyUsingEigenSparse(
*tsm_block_jacobian_transpose, &ordering[0]);
linear_solver_ordering_type,
*tsm_block_jacobian_transpose,
ordering.data());
}
// Apply ordering.
const vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks);
const std::vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks);
for (int i = 0; i < program->NumParameterBlocks(); ++i) {
parameter_blocks[i] = parameter_blocks_copy[ordering[i]];
}
@@ -575,5 +606,39 @@ int ReorderResidualBlocksByPartition(
return it - residual_blocks->begin();
}
} // namespace internal
} // namespace ceres
bool AreJacobianColumnsOrdered(
const LinearSolverType linear_solver_type,
const PreconditionerType preconditioner_type,
const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
const LinearSolverOrderingType linear_solver_ordering_type) {
if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
if (linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
(linear_solver_type == CGNR && preconditioner_type == SUBSET)) {
return true;
}
if (linear_solver_type == SPARSE_SCHUR &&
linear_solver_ordering_type == ceres::AMD) {
return true;
}
return false;
}
if (sparse_linear_algebra_library_type == ceres::EIGEN_SPARSE) {
if (linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
linear_solver_type == SPARSE_SCHUR ||
(linear_solver_type == CGNR && preconditioner_type == SUBSET)) {
return true;
}
return false;
}
if (sparse_linear_algebra_library_type == ceres::ACCELERATE_SPARSE) {
// Apple's accelerate framework does not allow direct access to
// ordering algorithms, so jacobian columns are never pre-ordered.
return false;
}
return false;
}
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,12 +35,12 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
#include "ceres/parameter_block_ordering.h"
#include "ceres/problem_impl.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Program;
@@ -76,6 +76,7 @@ CERES_NO_EXPORT bool LexicographicallyOrderResidualBlocks(
CERES_NO_EXPORT bool ReorderProgramForSchurTypeLinearSolver(
LinearSolverType linear_solver_type,
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
LinearSolverOrderingType linear_solver_ordering_type,
const ProblemImpl::ParameterMap& parameter_map,
ParameterBlockOrdering* parameter_block_ordering,
Program* program,
@@ -93,6 +94,7 @@ CERES_NO_EXPORT bool ReorderProgramForSchurTypeLinearSolver(
// ordering will take it into account, otherwise it will be ignored.
CERES_NO_EXPORT bool ReorderProgramForSparseCholesky(
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
LinearSolverOrderingType linear_solver_ordering_type,
const ParameterBlockOrdering& parameter_block_ordering,
int start_row_block,
Program* program,
@@ -112,8 +114,15 @@ CERES_NO_EXPORT int ReorderResidualBlocksByPartition(
const std::unordered_set<ResidualBlockId>& bottom_residual_blocks,
Program* program);
} // namespace internal
} // namespace ceres
// The return value of this function indicates whether the columns of
// the Jacobian can be reordered using a fill reducing ordering.
CERES_NO_EXPORT bool AreJacobianColumnsOrdered(
LinearSolverType linear_solver_type,
PreconditionerType preconditioner_type,
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
LinearSolverOrderingType linear_solver_ordering_type);
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,8 +47,7 @@
using Eigen::Dynamic;
namespace ceres {
namespace internal {
namespace ceres::internal {
ResidualBlock::ResidualBlock(
const CostFunction* cost_function,
@@ -114,8 +113,7 @@ bool ResidualBlock::Evaluate(const bool apply_loss_function,
return false;
}
if (!IsEvaluationValid(
*this, parameters.data(), cost, residuals, eval_jacobians)) {
if (!IsEvaluationValid(*this, parameters.data(), residuals, eval_jacobians)) {
// clang-format off
std::string message =
"\n\n"
@@ -216,5 +214,4 @@ int ResidualBlock::NumScratchDoublesForEvaluate() const {
return scratch_doubles;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,7 @@
#include <cmath>
#include <cstddef>
#include <limits>
#include <string>
#include "ceres/array_utils.h"
#include "ceres/internal/eigen.h"
@@ -42,10 +43,7 @@
#include "ceres/stringprintf.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::string;
namespace ceres::internal {
void InvalidateEvaluation(const ResidualBlock& block,
double* cost,
@@ -64,17 +62,17 @@ void InvalidateEvaluation(const ResidualBlock& block,
}
}
string EvaluationToString(const ResidualBlock& block,
double const* const* parameters,
double* cost,
double* residuals,
double** jacobians) {
std::string EvaluationToString(const ResidualBlock& block,
double const* const* parameters,
double* cost,
double* residuals,
double** jacobians) {
CHECK(cost != nullptr);
CHECK(residuals != nullptr);
const int num_parameter_blocks = block.NumParameterBlocks();
const int num_residuals = block.NumResiduals();
string result = "";
std::string result = "";
// clang-format off
StringAppendF(&result,
@@ -89,7 +87,7 @@ string EvaluationToString(const ResidualBlock& block,
"to Inf or NaN is also an error. \n\n"; // NOLINT
// clang-format on
string space = "Residuals: ";
std::string space = "Residuals: ";
result += space;
AppendArrayToString(num_residuals, residuals, &result);
StringAppendF(&result, "\n\n");
@@ -117,9 +115,11 @@ string EvaluationToString(const ResidualBlock& block,
return result;
}
// TODO(sameeragarwal) Check cost value validness here
// Cost value is a part of evaluation but not checked here since according to
// residual_block.cc cost is not valid at the time this method is called
bool IsEvaluationValid(const ResidualBlock& block,
double const* const* parameters,
double* cost,
double const* const* /*parameters*/,
double* residuals,
double** jacobians) {
const int num_parameter_blocks = block.NumParameterBlocks();
@@ -141,5 +141,4 @@ bool IsEvaluationValid(const ResidualBlock& block,
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,8 +47,7 @@
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ResidualBlock;
@@ -64,7 +63,6 @@ void InvalidateEvaluation(const ResidualBlock& block,
CERES_NO_EXPORT
bool IsEvaluationValid(const ResidualBlock& block,
double const* const* parameters,
double* cost,
double* residuals,
double** jacobians);
@@ -78,7 +76,6 @@ std::string EvaluationToString(const ResidualBlock& block,
double* residuals,
double** jacobians);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_RESIDUAL_BLOCK_UTILS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -34,6 +34,7 @@
#include <ctime>
#include <memory>
#include <set>
#include <utility>
#include <vector>
#include "Eigen/Dense"
@@ -52,58 +53,36 @@
#include "ceres/types.h"
#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
using std::make_pair;
using std::pair;
using std::set;
using std::vector;
namespace ceres::internal {
namespace {
class BlockRandomAccessSparseMatrixAdapter final : public LinearOperator {
class BlockRandomAccessSparseMatrixAdapter final
: public ConjugateGradientsLinearOperator<Vector> {
public:
explicit BlockRandomAccessSparseMatrixAdapter(
const BlockRandomAccessSparseMatrix& m)
: m_(m) {}
// y = y + Ax;
void RightMultiply(const double* x, double* y) const final {
m_.SymmetricRightMultiply(x, y);
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
m_.SymmetricRightMultiplyAndAccumulate(x.data(), y.data());
}
// y = y + A'x;
void LeftMultiply(const double* x, double* y) const final {
m_.SymmetricRightMultiply(x, y);
}
int num_rows() const final { return m_.num_rows(); }
int num_cols() const final { return m_.num_rows(); }
private:
const BlockRandomAccessSparseMatrix& m_;
};
class BlockRandomAccessDiagonalMatrixAdapter final : public LinearOperator {
class BlockRandomAccessDiagonalMatrixAdapter final
: public ConjugateGradientsLinearOperator<Vector> {
public:
explicit BlockRandomAccessDiagonalMatrixAdapter(
const BlockRandomAccessDiagonalMatrix& m)
: m_(m) {}
// y = y + Ax;
void RightMultiply(const double* x, double* y) const final {
m_.RightMultiply(x, y);
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
m_.RightMultiplyAndAccumulate(x.data(), y.data());
}
// y = y + A'x;
void LeftMultiply(const double* x, double* y) const final {
m_.RightMultiply(x, y);
}
int num_rows() const final { return m_.num_rows(); }
int num_cols() const final { return m_.num_rows(); }
private:
const BlockRandomAccessDiagonalMatrix& m_;
};
@@ -126,7 +105,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl(
EventLogger event_logger("SchurComplementSolver::Solve");
const CompressedRowBlockStructure* bs = A->block_structure();
if (eliminator_.get() == nullptr) {
if (eliminator_ == nullptr) {
const int num_eliminate_blocks = options_.elimination_groups[0];
const int num_f_blocks = bs->cols.size() - num_eliminate_blocks;
@@ -161,7 +140,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl(
b,
per_solve_options.D,
lhs_.get(),
rhs_.get());
rhs_.data());
event_logger.AddEvent("Eliminate");
double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
@@ -169,7 +148,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl(
SolveReducedLinearSystem(per_solve_options, reduced_solution);
event_logger.AddEvent("ReducedSolve");
if (summary.termination_type == LINEAR_SOLVER_SUCCESS) {
if (summary.termination_type == LinearSolverTerminationType::SUCCESS) {
eliminator_->BackSubstitute(
BlockSparseMatrixData(*A), b, per_solve_options.D, reduced_solution, x);
event_logger.AddEvent("BackSubstitute");
@@ -190,24 +169,21 @@ void DenseSchurComplementSolver::InitStorage(
const CompressedRowBlockStructure* bs) {
const int num_eliminate_blocks = options().elimination_groups[0];
const int num_col_blocks = bs->cols.size();
vector<int> blocks(num_col_blocks - num_eliminate_blocks, 0);
for (int i = num_eliminate_blocks, j = 0; i < num_col_blocks; ++i, ++j) {
blocks[j] = bs->cols[i].size;
}
set_lhs(std::make_unique<BlockRandomAccessDenseMatrix>(blocks));
set_rhs(std::make_unique<double[]>(lhs()->num_rows()));
auto blocks = Tail(bs->cols, num_col_blocks - num_eliminate_blocks);
set_lhs(std::make_unique<BlockRandomAccessDenseMatrix>(
blocks, options().context, options().num_threads));
ResizeRhs(lhs()->num_rows());
}
// Solve the system Sx = r, assuming that the matrix S is stored in a
// BlockRandomAccessDenseMatrix. The linear system is solved using
// Eigen's Cholesky factorization.
LinearSolver::Summary DenseSchurComplementSolver::SolveReducedLinearSystem(
const LinearSolver::PerSolveOptions& per_solve_options, double* solution) {
const LinearSolver::PerSolveOptions& /*per_solve_options*/,
double* solution) {
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message = "Success.";
auto* m = down_cast<BlockRandomAccessDenseMatrix*>(mutable_lhs());
@@ -221,7 +197,7 @@ LinearSolver::Summary DenseSchurComplementSolver::SolveReducedLinearSystem(
summary.num_iterations = 1;
summary.termination_type = cholesky_->FactorAndSolve(
num_rows, m->mutable_values(), rhs(), solution, &summary.message);
num_rows, m->mutable_values(), rhs().data(), solution, &summary.message);
return summary;
}
@@ -233,7 +209,14 @@ SparseSchurComplementSolver::SparseSchurComplementSolver(
}
}
SparseSchurComplementSolver::~SparseSchurComplementSolver() = default;
SparseSchurComplementSolver::~SparseSchurComplementSolver() {
for (int i = 0; i < 4; ++i) {
if (scratch_[i]) {
delete scratch_[i];
scratch_[i] = nullptr;
}
}
}
// Determine the non-zero blocks in the Schur Complement matrix, and
// initialize a BlockRandomAccessSparseMatrix object.
@@ -243,14 +226,11 @@ void SparseSchurComplementSolver::InitStorage(
const int num_col_blocks = bs->cols.size();
const int num_row_blocks = bs->rows.size();
blocks_.resize(num_col_blocks - num_eliminate_blocks, 0);
for (int i = num_eliminate_blocks; i < num_col_blocks; ++i) {
blocks_[i - num_eliminate_blocks] = bs->cols[i].size;
}
blocks_ = Tail(bs->cols, num_col_blocks - num_eliminate_blocks);
set<pair<int, int>> block_pairs;
std::set<std::pair<int, int>> block_pairs;
for (int i = 0; i < blocks_.size(); ++i) {
block_pairs.insert(make_pair(i, i));
block_pairs.emplace(i, i);
}
int r = 0;
@@ -259,7 +239,7 @@ void SparseSchurComplementSolver::InitStorage(
if (e_block_id >= num_eliminate_blocks) {
break;
}
vector<int> f_blocks;
std::vector<int> f_blocks;
// Add to the chunk until the first block in the row is
// different than the one in the first row for the chunk.
@@ -281,7 +261,7 @@ void SparseSchurComplementSolver::InitStorage(
f_blocks.erase(unique(f_blocks.begin(), f_blocks.end()), f_blocks.end());
for (int i = 0; i < f_blocks.size(); ++i) {
for (int j = i + 1; j < f_blocks.size(); ++j) {
block_pairs.insert(make_pair(f_blocks[i], f_blocks[j]));
block_pairs.emplace(f_blocks[i], f_blocks[j]);
}
}
}
@@ -296,15 +276,15 @@ void SparseSchurComplementSolver::InitStorage(
for (const auto& cell : row.cells) {
int r_block2_id = cell.block_id - num_eliminate_blocks;
if (r_block1_id <= r_block2_id) {
block_pairs.insert(make_pair(r_block1_id, r_block2_id));
block_pairs.emplace(r_block1_id, r_block2_id);
}
}
}
}
set_lhs(
std::make_unique<BlockRandomAccessSparseMatrix>(blocks_, block_pairs));
set_rhs(std::make_unique<double[]>(lhs()->num_rows()));
set_lhs(std::make_unique<BlockRandomAccessSparseMatrix>(
blocks_, block_pairs, options().context, options().num_threads));
ResizeRhs(lhs()->num_rows());
}
LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystem(
@@ -316,32 +296,39 @@ LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystem(
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message = "Success.";
const TripletSparseMatrix* tsm =
const BlockSparseMatrix* bsm =
down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix();
if (tsm->num_rows() == 0) {
if (bsm->num_rows() == 0) {
return summary;
}
std::unique_ptr<CompressedRowSparseMatrix> lhs;
const CompressedRowSparseMatrix::StorageType storage_type =
sparse_cholesky_->StorageType();
if (storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR) {
lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm);
lhs->set_storage_type(CompressedRowSparseMatrix::UPPER_TRIANGULAR);
if (storage_type ==
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
if (!crs_lhs_) {
crs_lhs_ = bsm->ToCompressedRowSparseMatrix();
crs_lhs_->set_storage_type(
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
} else {
bsm->UpdateCompressedRowSparseMatrix(crs_lhs_.get());
}
} else {
lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm);
lhs->set_storage_type(CompressedRowSparseMatrix::LOWER_TRIANGULAR);
if (!crs_lhs_) {
crs_lhs_ = bsm->ToCompressedRowSparseMatrixTranspose();
crs_lhs_->set_storage_type(
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
} else {
bsm->UpdateCompressedRowSparseMatrixTranspose(crs_lhs_.get());
}
}
*lhs->mutable_col_blocks() = blocks_;
*lhs->mutable_row_blocks() = blocks_;
summary.num_iterations = 1;
summary.termination_type = sparse_cholesky_->FactorAndSolve(
lhs.get(), rhs(), solution, &summary.message);
crs_lhs_.get(), rhs().data(), solution, &summary.message);
return summary;
}
@@ -355,7 +342,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
if (num_rows == 0) {
LinearSolver::Summary summary;
summary.num_iterations = 0;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message = "Success.";
return summary;
}
@@ -363,9 +350,9 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
// Only SCHUR_JACOBI is supported over here right now.
CHECK_EQ(options().preconditioner_type, SCHUR_JACOBI);
if (preconditioner_.get() == nullptr) {
preconditioner_ =
std::make_unique<BlockRandomAccessDiagonalMatrix>(blocks_);
if (preconditioner_ == nullptr) {
preconditioner_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(
blocks_, options().context, options().num_threads);
}
auto* sc = down_cast<BlockRandomAccessSparseMatrix*>(mutable_lhs());
@@ -373,7 +360,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
// Extract block diagonal from the Schur complement to construct the
// schur_jacobi preconditioner.
for (int i = 0; i < blocks_.size(); ++i) {
const int block_size = blocks_[i];
const int block_size = blocks_[i].size;
int sc_r, sc_c, sc_row_stride, sc_col_stride;
CellInfo* sc_cell_info =
@@ -394,25 +381,28 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
VectorRef(solution, num_rows).setZero();
std::unique_ptr<LinearOperator> lhs_adapter =
std::make_unique<BlockRandomAccessSparseMatrixAdapter>(*sc);
std::unique_ptr<LinearOperator> preconditioner_adapter =
auto lhs = std::make_unique<BlockRandomAccessSparseMatrixAdapter>(*sc);
auto preconditioner =
std::make_unique<BlockRandomAccessDiagonalMatrixAdapter>(
*preconditioner_);
LinearSolver::Options cg_options;
ConjugateGradientsSolverOptions cg_options;
cg_options.min_num_iterations = options().min_num_iterations;
cg_options.max_num_iterations = options().max_num_iterations;
ConjugateGradientsSolver cg_solver(cg_options);
cg_options.residual_reset_period = options().residual_reset_period;
cg_options.q_tolerance = per_solve_options.q_tolerance;
cg_options.r_tolerance = per_solve_options.r_tolerance;
LinearSolver::PerSolveOptions cg_per_solve_options;
cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance;
cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance;
cg_per_solve_options.preconditioner = preconditioner_adapter.get();
return cg_solver.Solve(
lhs_adapter.get(), rhs(), cg_per_solve_options, solution);
cg_solution_ = Vector::Zero(sc->num_rows());
for (int i = 0; i < 4; ++i) {
if (scratch_[i] == nullptr) {
scratch_[i] = new Vector(sc->num_rows());
}
}
auto summary = ConjugateGradientsSolver<Vector>(
cg_options, *lhs, rhs(), *preconditioner, scratch_, cg_solution_);
VectorRef(solution, sc->num_rows()) = cg_solution_;
return summary;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -54,8 +54,7 @@
#include "ceres/internal/disable_warnings.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockSparseMatrix;
class SparseCholesky;
@@ -66,7 +65,7 @@ class SparseCholesky;
//
// E y + F z = b
//
// Where x = [y;z] is a partition of the variables. The paritioning
// Where x = [y;z] is a partition of the variables. The partitioning
// of the variables is such that, E'E is a block diagonal
// matrix. Further, the rows of A are ordered so that for every
// variable block in y, all the rows containing that variable block
@@ -131,9 +130,8 @@ class CERES_NO_EXPORT SchurComplementSolver : public BlockSparseMatrixSolver {
}
const BlockRandomAccessMatrix* lhs() const { return lhs_.get(); }
BlockRandomAccessMatrix* mutable_lhs() { return lhs_.get(); }
void set_rhs(std::unique_ptr<double[]> rhs) { rhs_ = std::move(rhs); }
const double* rhs() const { return rhs_.get(); }
void ResizeRhs(int n) { rhs_.resize(n); }
const Vector& rhs() const { return rhs_; }
private:
virtual void InitStorage(const CompressedRowBlockStructure* bs) = 0;
@@ -145,7 +143,7 @@ class CERES_NO_EXPORT SchurComplementSolver : public BlockSparseMatrixSolver {
std::unique_ptr<SchurEliminatorBase> eliminator_;
std::unique_ptr<BlockRandomAccessMatrix> lhs_;
std::unique_ptr<double[]> rhs_;
Vector rhs_;
};
// Dense Cholesky factorization based solver.
@@ -185,14 +183,15 @@ class CERES_NO_EXPORT SparseSchurComplementSolver final
LinearSolver::Summary SolveReducedLinearSystemUsingConjugateGradients(
const LinearSolver::PerSolveOptions& per_solve_options, double* solution);
// Size of the blocks in the Schur complement.
std::vector<int> blocks_;
std::vector<Block> blocks_;
std::unique_ptr<SparseCholesky> sparse_cholesky_;
std::unique_ptr<BlockRandomAccessDiagonalMatrix> preconditioner_;
std::unique_ptr<CompressedRowSparseMatrix> crs_lhs_;
Vector cg_solution_;
Vector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr};
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,8 +44,7 @@
#include "ceres/linear_solver.h"
#include "ceres/schur_eliminator.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
SchurEliminatorBase::~SchurEliminatorBase() = default;
@@ -161,5 +160,4 @@ std::unique_ptr<SchurEliminatorBase> SchurEliminatorBase::Create(
Eigen::Dynamic>>(options);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2019 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -46,8 +46,7 @@
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Classes implementing the SchurEliminatorBase interface implement
// variable elimination for linear least squares problems. Assuming
@@ -169,9 +168,9 @@ class CERES_NO_EXPORT SchurEliminatorBase {
public:
virtual ~SchurEliminatorBase();
// Initialize the eliminator. It is the user's responsibilty to call
// Initialize the eliminator. It is the user's responsibility to call
// this function before calling Eliminate or BackSubstitute. It is
// also the caller's responsibilty to ensure that the
// also the caller's responsibility to ensure that the
// CompressedRowBlockStructure object passed to this method is the
// same one (or is equivalent to) the one associated with the
// BlockSparseMatrix objects below.
@@ -383,8 +382,9 @@ template <int kRowBlockSize = Eigen::Dynamic,
class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
: public SchurEliminatorBase {
public:
// TODO(sameeragarwal) Find out why "assume_full_rank_ete" is not used here
void Init(int num_eliminate_blocks,
bool assume_full_rank_ete,
bool /*assume_full_rank_ete*/,
const CompressedRowBlockStructure* bs) override {
CHECK_GT(num_eliminate_blocks, 0)
<< "SchurComplementSolver cannot be initialized with "
@@ -447,7 +447,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
const CompressedRowBlockStructure* bs = A.block_structure();
const double* values = A.values();
// Add the diagonal to the schur complement.
// Add the diagonal to the Schur complement.
if (D != nullptr) {
typename EigenTypes<kFBlockSize>::ConstVectorRef diag(
D + bs->cols[num_eliminate_blocks_].position, kFBlockSize);
@@ -479,7 +479,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
const Chunk& chunk = chunks_[i];
const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
// Naming covention, e_t_e = e_block.transpose() * e_block;
// Naming convention, e_t_e = e_block.transpose() * e_block;
Eigen::Matrix<double, kEBlockSize, kEBlockSize> e_t_e;
Eigen::Matrix<double, kEBlockSize, kFBlockSize> e_t_f;
Eigen::Matrix<double, kEBlockSize, 1> e_t_b;
@@ -570,7 +570,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
// y_i = e_t_e_inverse * sum_i e_i^T * (b_i - f_i * z);
void BackSubstitute(const BlockSparseMatrixData& A,
const double* b,
const double* D,
const double* /*D*/,
const double* z_ptr,
double* y) override {
typename EigenTypes<kFBlockSize>::ConstVectorRef z(z_ptr, kFBlockSize);
@@ -623,8 +623,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
std::vector<double> e_t_e_inverse_matrices_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -69,8 +69,7 @@
#include "ceres/thread_token_provider.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::~SchurEliminator() {
@@ -107,7 +106,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::Init(
}
// TODO(sameeragarwal): Now that we may have subset block structure,
// we need to make sure that we account for the fact that somep
// we need to make sure that we account for the fact that some
// point blocks only have a "diagonal" row and nothing more.
//
// This likely requires a slightly different algorithm, which works
@@ -206,8 +205,6 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::Eliminate(
const int block_size = bs->cols[i].size;
typename EigenTypes<Eigen::Dynamic>::ConstVectorRef diag(
D + bs->cols[i].position, block_size);
std::lock_guard<std::mutex> l(cell_info->m);
MatrixRef m(cell_info->values, row_stride, col_stride);
m.block(r, c, block_size, block_size).diagonal() +=
diag.array().square().matrix();
@@ -301,7 +298,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::Eliminate(
thread_id, bs, inverse_ete, buffer, chunk.buffer_layout, lhs);
});
// For rows with no e_blocks, the schur complement update reduces to
// For rows with no e_blocks, the Schur complement update reduces to
// S += F'F.
NoEBlockRowsUpdate(A, b, uneliminated_row_begins_, lhs, rhs);
}
@@ -410,7 +407,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::UpdateRhs(
const int block_id = row.cells[c].block_id;
const int block_size = bs->cols[block_id].size;
const int block = block_id - num_eliminate_blocks_;
std::lock_guard<std::mutex> l(*rhs_locks_[block]);
auto lock = MakeConditionalLock(num_threads_, *rhs_locks_[block]);
// clang-format off
MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
values + row.cells[c].position,
@@ -433,7 +430,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::UpdateRhs(
//
// ete = y11 * y11' + y12 * y12'
//
// and the off diagonal blocks in the Guass Newton Hessian.
// and the off diagonal blocks in the Gauss Newton Hessian.
//
// buffer = [y11'(z11 + z12), y12' * z22, y11' * z51]
//
@@ -550,7 +547,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride);
if (cell_info != nullptr) {
const int block2_size = bs->cols[it2->first].size;
std::lock_guard<std::mutex> l(cell_info->m);
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
// clang-format off
MatrixMatrixMultiply
<kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>(
@@ -563,7 +560,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
}
}
// For rows with no e_blocks, the schur complement update reduces to S
// For rows with no e_blocks, the Schur complement update reduces to S
// += F'F. This function iterates over the rows of A with no e_block,
// and calls NoEBlockRowOuterProduct on each row.
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
@@ -596,7 +593,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
}
// A row r of A, which has no e_blocks gets added to the Schur
// Complement as S += r r'. This function is responsible for computing
// complement as S += r r'. This function is responsible for computing
// the contribution of a single row r to the Schur complement. It is
// very similar in structure to EBlockRowOuterProduct except for
// one difference. It does not use any of the template
@@ -627,7 +624,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
CellInfo* cell_info =
lhs->GetCell(block1, block1, &r, &c, &row_stride, &col_stride);
if (cell_info != nullptr) {
std::lock_guard<std::mutex> l(cell_info->m);
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
// This multiply currently ignores the fact that this is a
// symmetric outer product.
// clang-format off
@@ -648,7 +645,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride);
if (cell_info != nullptr) {
const int block2_size = bs->cols[row.cells[j].block_id].size;
std::lock_guard<std::mutex> l(cell_info->m);
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
// clang-format off
MatrixTransposeMatrixMultiply
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
@@ -682,7 +679,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
CellInfo* cell_info =
lhs->GetCell(block1, block1, &r, &c, &row_stride, &col_stride);
if (cell_info != nullptr) {
std::lock_guard<std::mutex> l(cell_info->m);
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
// block += b1.transpose() * b1;
// clang-format off
MatrixTransposeMatrixMultiply
@@ -703,7 +700,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride);
if (cell_info != nullptr) {
// block += b1.transpose() * b2;
std::lock_guard<std::mutex> l(cell_info->m);
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
// clang-format off
MatrixTransposeMatrixMultiply
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
@@ -716,7 +713,6 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_

View File

@@ -0,0 +1,150 @@
# Ceres Solver - A fast non-linear least squares minimizer
# Copyright 2023 Google Inc. All rights reserved.
# http://ceres-solver.org/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * Neither the name of Google Inc. nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: sameeragarwal@google.com (Sameer Agarwal)
#
# Script for explicitly generating template specialization of the
# SchurEliminator class. It is a rather large class
# and the number of explicit instantiations is also large. Explicitly
# generating these instantiations in separate .cc files breaks the
# compilation into separate compilation unit rather than one large cc
# file which takes 2+GB of RAM to compile.
#
# This script creates two sets of files.
#
# 1. schur_eliminator_x_x_x.cc
# where, the x indicates the template parameters and
#
# 2. schur_eliminator.cc
#
# that contains a factory function for instantiating these classes
# based on runtime parameters.
#
# The list of tuples, specializations indicates the set of
# specializations that is generated.
# Set of template specializations to generate
HEADER = """// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)
//
// Template specialization of SchurEliminator.
//
// ========================================
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
//=========================================
//
// This file is generated using generate_template_specializations.py.
"""
DYNAMIC_FILE = """
#include "ceres/schur_eliminator_impl.h"
namespace ceres::internal {
template class SchurEliminator<%s, %s, %s>;
} // namespace ceres::internal
"""
SPECIALIZATION_FILE = """
// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/config.h"
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
#include "ceres/schur_eliminator_impl.h"
namespace ceres::internal {
template class SchurEliminator<%s, %s, %s>;
} // namespace ceres::internal
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
"""
FACTORY_FILE_HEADER = """
#include <memory>
#include "ceres/linear_solver.h"
#include "ceres/schur_eliminator.h"
namespace ceres::internal {
SchurEliminatorBase::~SchurEliminatorBase() = default;
std::unique_ptr<SchurEliminatorBase> SchurEliminatorBase::Create(
const LinearSolver::Options& options) {
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
"""
FACTORY = """ return std::make_unique<SchurEliminator<%s, %s, %s>>(options);"""
FACTORY_FOOTER = """
#endif
VLOG(1) << "Template specializations not found for <"
<< options.row_block_size << "," << options.e_block_size << ","
<< options.f_block_size << ">";
return std::make_unique<SchurEliminator<Eigen::Dynamic,
Eigen::Dynamic,
Eigen::Dynamic>>(options);
}
} // namespace ceres::internal
"""

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,6 +30,7 @@
#include "ceres/schur_jacobi_preconditioner.h"
#include <memory>
#include <utility>
#include <vector>
@@ -39,8 +40,7 @@
#include "ceres/schur_eliminator.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
SchurJacobiPreconditioner::SchurJacobiPreconditioner(
const CompressedRowBlockStructure& bs, Preconditioner::Options options)
@@ -52,12 +52,16 @@ SchurJacobiPreconditioner::SchurJacobiPreconditioner(
<< "SCHUR_JACOBI preconditioner.";
CHECK(options_.context != nullptr);
std::vector<int> blocks(num_blocks);
std::vector<Block> blocks(num_blocks);
int position = 0;
for (int i = 0; i < num_blocks; ++i) {
blocks[i] = bs.cols[i + options_.elimination_groups[0]].size;
blocks[i] =
Block(bs.cols[i + options_.elimination_groups[0]].size, position);
position += blocks[i].size;
}
m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(blocks);
m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(
blocks, options_.context, options_.num_threads);
InitEliminator(bs);
}
@@ -92,12 +96,11 @@ bool SchurJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
return true;
}
void SchurJacobiPreconditioner::RightMultiply(const double* x,
double* y) const {
m_->RightMultiply(x, y);
void SchurJacobiPreconditioner::RightMultiplyAndAccumulate(const double* x,
double* y) const {
m_->RightMultiplyAndAccumulate(x, y);
}
int SchurJacobiPreconditioner::num_rows() const { return m_->num_rows(); }
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -47,8 +47,7 @@
#include "ceres/internal/export.h"
#include "ceres/preconditioner.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockRandomAccessDiagonalMatrix;
class BlockSparseMatrix;
@@ -72,8 +71,10 @@ class SchurEliminatorBase;
// SchurJacobiPreconditioner preconditioner(
// *A.block_structure(), options);
// preconditioner.Update(A, nullptr);
// preconditioner.RightMultiply(x, y);
// preconditioner.RightMultiplyAndAccumulate(x, y);
//
// TODO(https://github.com/ceres-solver/ceres-solver/issues/935):
// SchurJacobiPreconditioner::RightMultiply will benefit from multithreading
class CERES_NO_EXPORT SchurJacobiPreconditioner
: public BlockSparseMatrixPreconditioner {
public:
@@ -91,7 +92,7 @@ class CERES_NO_EXPORT SchurJacobiPreconditioner
~SchurJacobiPreconditioner() override;
// Preconditioner interface.
void RightMultiply(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
int num_rows() const final;
private:
@@ -104,8 +105,7 @@ class CERES_NO_EXPORT SchurJacobiPreconditioner
std::unique_ptr<BlockRandomAccessDiagonalMatrix> m_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,14 +36,12 @@
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
CERES_NO_EXPORT
void GetBestSchurTemplateSpecialization(int* row_block_size,
int* e_block_size,
int* f_block_size);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_SCHUR_TEMPLATES_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -34,8 +34,7 @@
#include "ceres/internal/export.h"
#include "ceres/thread_token_provider.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Helper class for ThreadTokenProvider. This object acquires a token in its
// constructor and puts that token back with destruction.
@@ -55,7 +54,6 @@ class CERES_NO_EXPORT ScopedThreadToken {
int token_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_SCOPED_THREAD_TOKEN_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,23 +36,22 @@
#include "ceres/program.h"
#include "ceres/residual_block.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
std::unique_ptr<ScratchEvaluatePreparer[]> ScratchEvaluatePreparer::Create(
const Program& program, int num_threads) {
const Program& program, unsigned num_threads) {
auto preparers = std::make_unique<ScratchEvaluatePreparer[]>(num_threads);
int max_derivatives_per_residual_block =
program.MaxDerivativesPerResidualBlock();
for (int i = 0; i < num_threads; i++) {
for (unsigned i = 0; i < num_threads; i++) {
preparers[i].Init(max_derivatives_per_residual_block);
}
return preparers;
}
void ScratchEvaluatePreparer::Init(int max_derivatives_per_residual_block) {
jacobian_scratch_ =
std::make_unique<double[]>(max_derivatives_per_residual_block);
jacobian_scratch_ = std::make_unique<double[]>(
static_cast<std::size_t>(max_derivatives_per_residual_block));
}
// Point the jacobian blocks into the scratch area of this evaluate preparer.
@@ -75,5 +74,4 @@ void ScratchEvaluatePreparer::Prepare(const ResidualBlock* residual_block,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class Program;
class ResidualBlock;
@@ -51,7 +50,7 @@ class CERES_NO_EXPORT ScratchEvaluatePreparer {
public:
// Create num_threads ScratchEvaluatePreparers.
static std::unique_ptr<ScratchEvaluatePreparer[]> Create(
const Program& program, int num_threads);
const Program& program, unsigned num_threads);
// EvaluatePreparer interface
void Init(int max_derivatives_per_residual_block);
@@ -66,8 +65,7 @@ class CERES_NO_EXPORT ScratchEvaluatePreparer {
std::unique_ptr<double[]> jacobian_scratch_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -36,8 +36,7 @@
#include "ceres/graph.h"
#include "ceres/graph_algorithms.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
int ComputeSingleLinkageClustering(
const SingleLinkageClusteringOptions& options,
@@ -91,5 +90,4 @@ int ComputeSingleLinkageClustering(
return num_clusters;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -37,8 +37,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
struct SingleLinkageClusteringOptions {
// Graph edges with edge weight less than min_similarity are ignored
@@ -61,8 +60,7 @@ CERES_NO_EXPORT int ComputeSingleLinkageClustering(
const WeightedGraph<int>& graph,
std::unordered_map<int, int>* membership);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "glog/logging.h"
#include "small_blas_generic.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// The following three macros are used to share code and reduce
// template junk across the various GEMM variants.
@@ -561,7 +560,6 @@ inline void MatrixTransposeVectorMultiply(const double* A,
#undef CERES_GEMM_STORE_SINGLE
#undef CERES_GEMM_STORE_PAIR
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_SMALL_BLAS_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,38 +35,35 @@
#ifndef CERES_INTERNAL_SMALL_BLAS_GENERIC_H_
#define CERES_INTERNAL_SMALL_BLAS_GENERIC_H_
namespace ceres {
namespace internal {
namespace ceres::internal {
// The following macros are used to share code
#define CERES_GEMM_OPT_NAIVE_HEADER \
double c0 = 0.0; \
double c1 = 0.0; \
double c2 = 0.0; \
double c3 = 0.0; \
const double* pa = a; \
const double* pb = b; \
const int span = 4; \
int col_r = col_a & (span - 1); \
#define CERES_GEMM_OPT_NAIVE_HEADER \
double cvec4[4] = {0.0, 0.0, 0.0, 0.0}; \
const double* pa = a; \
const double* pb = b; \
const int span = 4; \
int col_r = col_a & (span - 1); \
int col_m = col_a - col_r;
#define CERES_GEMM_OPT_STORE_MAT1X4 \
if (kOperation > 0) { \
*c++ += c0; \
*c++ += c1; \
*c++ += c2; \
*c++ += c3; \
c[0] += cvec4[0]; \
c[1] += cvec4[1]; \
c[2] += cvec4[2]; \
c[3] += cvec4[3]; \
} else if (kOperation < 0) { \
*c++ -= c0; \
*c++ -= c1; \
*c++ -= c2; \
*c++ -= c3; \
c[0] -= cvec4[0]; \
c[1] -= cvec4[1]; \
c[2] -= cvec4[2]; \
c[3] -= cvec4[3]; \
} else { \
*c++ = c0; \
*c++ = c1; \
*c++ = c2; \
*c++ = c3; \
}
c[0] = cvec4[0]; \
c[1] = cvec4[1]; \
c[2] = cvec4[2]; \
c[3] = cvec4[3]; \
} \
c += 4;
// Matrix-Matrix Multiplication
// Figure out 1x4 of Matrix C in one batch
@@ -100,10 +97,10 @@ static inline void MMM_mat1x4(const int col_a,
#define CERES_GEMM_OPT_MMM_MAT1X4_MUL \
av = pa[k]; \
pb = b + bi; \
c0 += av * pb[0]; \
c1 += av * pb[1]; \
c2 += av * pb[2]; \
c3 += av * pb[3]; \
cvec4[0] += av * pb[0]; \
cvec4[1] += av * pb[1]; \
cvec4[2] += av * pb[2]; \
cvec4[3] += av * pb[3]; \
pb += 4; \
bi += col_stride_b; \
k++;
@@ -168,10 +165,10 @@ static inline void MTM_mat1x4(const int col_a,
#define CERES_GEMM_OPT_MTM_MAT1X4_MUL \
av = pa[ai]; \
pb = b + bi; \
c0 += av * pb[0]; \
c1 += av * pb[1]; \
c2 += av * pb[2]; \
c3 += av * pb[3]; \
cvec4[0] += av * pb[0]; \
cvec4[1] += av * pb[1]; \
cvec4[2] += av * pb[2]; \
cvec4[3] += av * pb[3]; \
pb += 4; \
ai += col_stride_a; \
bi += col_stride_b;
@@ -221,13 +218,13 @@ static inline void MVM_mat4x1(const int col_a,
double bv = 0.0;
// clang-format off
#define CERES_GEMM_OPT_MVM_MAT4X1_MUL \
bv = *pb; \
c0 += *(pa ) * bv; \
c1 += *(pa + col_stride_a ) * bv; \
c2 += *(pa + col_stride_a * 2) * bv; \
c3 += *(pa + col_stride_a * 3) * bv; \
pa++; \
#define CERES_GEMM_OPT_MVM_MAT4X1_MUL \
bv = *pb; \
cvec4[0] += *(pa ) * bv; \
cvec4[1] += *(pa + col_stride_a ) * bv; \
cvec4[2] += *(pa + col_stride_a * 2) * bv; \
cvec4[3] += *(pa + col_stride_a * 3) * bv; \
pa++; \
pb++;
// clang-format on
@@ -285,16 +282,14 @@ static inline void MTV_mat4x1(const int col_a,
CERES_GEMM_OPT_NAIVE_HEADER
double bv = 0.0;
// clang-format off
#define CERES_GEMM_OPT_MTV_MAT4X1_MUL \
bv = *pb; \
c0 += *(pa ) * bv; \
c1 += *(pa + 1) * bv; \
c2 += *(pa + 2) * bv; \
c3 += *(pa + 3) * bv; \
cvec4[0] += pa[0] * bv; \
cvec4[1] += pa[1] * bv; \
cvec4[2] += pa[2] * bv; \
cvec4[3] += pa[3] * bv; \
pa += col_stride_a; \
pb++;
// clang-format on
for (int k = 0; k < col_m; k += span) {
CERES_GEMM_OPT_MTV_MAT4X1_MUL
@@ -315,7 +310,6 @@ static inline void MTV_mat4x1(const int col_a,
#undef CERES_GEMM_OPT_NAIVE_HEADER
#undef CERES_GEMM_OPT_STORE_MAT1X4
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_SMALL_BLAS_GENERIC_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,14 +32,17 @@
#include "ceres/solver.h"
#include <algorithm>
#include <map>
#include <memory>
#include <sstream> // NOLINT
#include <string>
#include <vector>
#include "ceres/casts.h"
#include "ceres/context.h"
#include "ceres/context_impl.h"
#include "ceres/detect_structure.h"
#include "ceres/eigensparse.h"
#include "ceres/gradient_checking_cost_function.h"
#include "ceres/internal/export.h"
#include "ceres/parameter_block_ordering.h"
@@ -50,6 +53,7 @@
#include "ceres/schur_templates.h"
#include "ceres/solver_utils.h"
#include "ceres/stringprintf.h"
#include "ceres/suitesparse.h"
#include "ceres/types.h"
#include "ceres/wall_time.h"
@@ -58,32 +62,29 @@ namespace {
using internal::StringAppendF;
using internal::StringPrintf;
using std::map;
using std::string;
using std::vector;
#define OPTION_OP(x, y, OP) \
if (!(options.x OP y)) { \
std::stringstream ss; \
ss << "Invalid configuration. "; \
ss << string("Solver::Options::" #x " = ") << options.x << ". "; \
ss << "Violated constraint: "; \
ss << string("Solver::Options::" #x " " #OP " " #y); \
*error = ss.str(); \
return false; \
#define OPTION_OP(x, y, OP) \
if (!(options.x OP y)) { \
std::stringstream ss; \
ss << "Invalid configuration. "; \
ss << std::string("Solver::Options::" #x " = ") << options.x << ". "; \
ss << "Violated constraint: "; \
ss << std::string("Solver::Options::" #x " " #OP " " #y); \
*error = ss.str(); \
return false; \
}
#define OPTION_OP_OPTION(x, y, OP) \
if (!(options.x OP options.y)) { \
std::stringstream ss; \
ss << "Invalid configuration. "; \
ss << string("Solver::Options::" #x " = ") << options.x << ". "; \
ss << string("Solver::Options::" #y " = ") << options.y << ". "; \
ss << "Violated constraint: "; \
ss << string("Solver::Options::" #x); \
ss << string(#OP " Solver::Options::" #y "."); \
*error = ss.str(); \
return false; \
#define OPTION_OP_OPTION(x, y, OP) \
if (!(options.x OP options.y)) { \
std::stringstream ss; \
ss << "Invalid configuration. "; \
ss << std::string("Solver::Options::" #x " = ") << options.x << ". "; \
ss << std::string("Solver::Options::" #y " = ") << options.y << ". "; \
ss << "Violated constraint: "; \
ss << std::string("Solver::Options::" #x); \
ss << std::string(#OP " Solver::Options::" #y "."); \
*error = ss.str(); \
return false; \
}
#define OPTION_GE(x, y) OPTION_OP(x, y, >=);
@@ -93,7 +94,7 @@ using std::vector;
#define OPTION_LE_OPTION(x, y) OPTION_OP_OPTION(x, y, <=)
#define OPTION_LT_OPTION(x, y) OPTION_OP_OPTION(x, y, <)
bool CommonOptionsAreValid(const Solver::Options& options, string* error) {
bool CommonOptionsAreValid(const Solver::Options& options, std::string* error) {
OPTION_GE(max_num_iterations, 0);
OPTION_GE(max_solver_time_in_seconds, 0.0);
OPTION_GE(function_tolerance, 0.0);
@@ -107,7 +108,286 @@ bool CommonOptionsAreValid(const Solver::Options& options, string* error) {
return true;
}
bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
bool IsNestedDissectionAvailable(SparseLinearAlgebraLibraryType type) {
return (((type == SUITE_SPARSE) &&
internal::SuiteSparse::IsNestedDissectionAvailable()) ||
(type == ACCELERATE_SPARSE) ||
((type == EIGEN_SPARSE) &&
internal::EigenSparse::IsNestedDissectionAvailable()));
}
bool IsIterativeSolver(LinearSolverType type) {
return (type == CGNR || type == ITERATIVE_SCHUR);
}
bool OptionsAreValidForDenseSolver(const Solver::Options& options,
std::string* error) {
const char* library_name = DenseLinearAlgebraLibraryTypeToString(
options.dense_linear_algebra_library_type);
const char* solver_name =
LinearSolverTypeToString(options.linear_solver_type);
constexpr char kFormat[] =
"Can't use %s with dense_linear_algebra_library_type = %s "
"because support not enabled when Ceres was built.";
if (!IsDenseLinearAlgebraLibraryTypeAvailable(
options.dense_linear_algebra_library_type)) {
*error = StringPrintf(kFormat, solver_name, library_name);
return false;
}
return true;
}
bool OptionsAreValidForSparseCholeskyBasedSolver(const Solver::Options& options,
std::string* error) {
const char* library_name = SparseLinearAlgebraLibraryTypeToString(
options.sparse_linear_algebra_library_type);
// Sparse factorization based solvers and some preconditioners require a
// sparse Cholesky factorization.
const char* solver_name =
IsIterativeSolver(options.linear_solver_type)
? PreconditionerTypeToString(options.preconditioner_type)
: LinearSolverTypeToString(options.linear_solver_type);
constexpr char kNoSparseFormat[] =
"Can't use %s with sparse_linear_algebra_library_type = %s.";
constexpr char kNoLibraryFormat[] =
"Can't use %s sparse_linear_algebra_library_type = %s, because support "
"was not enabled when Ceres Solver was built.";
constexpr char kNoNesdisFormat[] =
"NESDIS is not available with sparse_linear_algebra_library_type = %s.";
constexpr char kMixedFormat[] =
"use_mixed_precision_solves with %s is not supported with "
"sparse_linear_algebra_library_type = %s";
constexpr char kDynamicSparsityFormat[] =
"dynamic sparsity is not supported with "
"sparse_linear_algebra_library_type = %s";
if (options.sparse_linear_algebra_library_type == NO_SPARSE) {
*error = StringPrintf(kNoSparseFormat, solver_name, library_name);
return false;
}
if (!IsSparseLinearAlgebraLibraryTypeAvailable(
options.sparse_linear_algebra_library_type)) {
*error = StringPrintf(kNoLibraryFormat, solver_name, library_name);
return false;
}
if (options.linear_solver_ordering_type == ceres::NESDIS &&
!IsNestedDissectionAvailable(
options.sparse_linear_algebra_library_type)) {
*error = StringPrintf(kNoNesdisFormat, library_name);
return false;
}
if (options.use_mixed_precision_solves &&
options.sparse_linear_algebra_library_type == SUITE_SPARSE) {
*error = StringPrintf(kMixedFormat, solver_name, library_name);
return false;
}
if (options.dynamic_sparsity &&
options.sparse_linear_algebra_library_type == ACCELERATE_SPARSE) {
*error = StringPrintf(kDynamicSparsityFormat, library_name);
return false;
}
return true;
}
bool OptionsAreValidForDenseNormalCholesky(const Solver::Options& options,
std::string* error) {
CHECK_EQ(options.linear_solver_type, DENSE_NORMAL_CHOLESKY);
return OptionsAreValidForDenseSolver(options, error);
}
bool OptionsAreValidForDenseQr(const Solver::Options& options,
std::string* error) {
CHECK_EQ(options.linear_solver_type, DENSE_QR);
if (!OptionsAreValidForDenseSolver(options, error)) {
return false;
}
if (options.use_mixed_precision_solves) {
*error = "Can't use use_mixed_precision_solves with DENSE_QR.";
return false;
}
return true;
}
bool OptionsAreValidForSparseNormalCholesky(const Solver::Options& options,
std::string* error) {
CHECK_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY);
return OptionsAreValidForSparseCholeskyBasedSolver(options, error);
}
bool OptionsAreValidForDenseSchur(const Solver::Options& options,
std::string* error) {
CHECK_EQ(options.linear_solver_type, DENSE_SCHUR);
if (options.dynamic_sparsity) {
*error = "dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY";
return false;
}
if (!OptionsAreValidForDenseSolver(options, error)) {
return false;
}
return true;
}
bool OptionsAreValidForSparseSchur(const Solver::Options& options,
std::string* error) {
CHECK_EQ(options.linear_solver_type, SPARSE_SCHUR);
if (options.dynamic_sparsity) {
*error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
return false;
}
return OptionsAreValidForSparseCholeskyBasedSolver(options, error);
}
bool OptionsAreValidForIterativeSchur(const Solver::Options& options,
std::string* error) {
CHECK_EQ(options.linear_solver_type, ITERATIVE_SCHUR);
if (options.dynamic_sparsity) {
*error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
return false;
}
if (options.use_explicit_schur_complement) {
if (options.preconditioner_type != SCHUR_JACOBI) {
*error =
"use_explicit_schur_complement only supports "
"SCHUR_JACOBI as the preconditioner.";
return false;
}
if (options.use_spse_initialization) {
*error =
"use_explicit_schur_complement does not support "
"use_spse_initialization.";
return false;
}
}
if (options.use_spse_initialization ||
options.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION) {
OPTION_GE(max_num_spse_iterations, 1)
OPTION_GE(spse_tolerance, 0.0)
}
if (options.use_mixed_precision_solves) {
*error = "Can't use use_mixed_precision_solves with ITERATIVE_SCHUR";
return false;
}
if (options.dynamic_sparsity) {
*error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
return false;
}
if (options.preconditioner_type == SUBSET) {
*error = "Can't use SUBSET preconditioner with ITERATIVE_SCHUR";
return false;
}
// CLUSTER_JACOBI and CLUSTER_TRIDIAGONAL require sparse Cholesky
// factorization.
if (options.preconditioner_type == CLUSTER_JACOBI ||
options.preconditioner_type == CLUSTER_TRIDIAGONAL) {
return OptionsAreValidForSparseCholeskyBasedSolver(options, error);
}
return true;
}
bool OptionsAreValidForCgnr(const Solver::Options& options,
std::string* error) {
CHECK_EQ(options.linear_solver_type, CGNR);
if (options.preconditioner_type != IDENTITY &&
options.preconditioner_type != JACOBI &&
options.preconditioner_type != SUBSET) {
*error =
StringPrintf("Can't use CGNR with preconditioner_type = %s.",
PreconditionerTypeToString(options.preconditioner_type));
return false;
}
if (options.use_mixed_precision_solves) {
*error = "use_mixed_precision_solves cannot be used with CGNR";
return false;
}
if (options.dynamic_sparsity) {
*error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
return false;
}
if (options.preconditioner_type == SUBSET) {
if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) {
*error =
"Can't use CGNR with preconditioner_type = SUBSET when "
"sparse_linear_algebra_library_type = CUDA_SPARSE.";
return false;
}
if (options.residual_blocks_for_subset_preconditioner.empty()) {
*error =
"When using SUBSET preconditioner, "
"residual_blocks_for_subset_preconditioner cannot be empty";
return false;
}
// SUBSET preconditioner requires sparse Cholesky factorization.
if (!OptionsAreValidForSparseCholeskyBasedSolver(options, error)) {
return false;
}
}
// Check options for CGNR with CUDA_SPARSE.
if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) {
if (!IsSparseLinearAlgebraLibraryTypeAvailable(CUDA_SPARSE)) {
*error =
"Can't use CGNR with sparse_linear_algebra_library_type = "
"CUDA_SPARSE because support was not enabled when Ceres was built.";
return false;
}
}
return true;
}
bool OptionsAreValidForLinearSolver(const Solver::Options& options,
std::string* error) {
switch (options.linear_solver_type) {
case DENSE_NORMAL_CHOLESKY:
return OptionsAreValidForDenseNormalCholesky(options, error);
case DENSE_QR:
return OptionsAreValidForDenseQr(options, error);
case SPARSE_NORMAL_CHOLESKY:
return OptionsAreValidForSparseNormalCholesky(options, error);
case DENSE_SCHUR:
return OptionsAreValidForDenseSchur(options, error);
case SPARSE_SCHUR:
return OptionsAreValidForSparseSchur(options, error);
case ITERATIVE_SCHUR:
return OptionsAreValidForIterativeSchur(options, error);
case CGNR:
return OptionsAreValidForCgnr(options, error);
default:
LOG(FATAL) << "Congratulations you have found a bug. Please report "
"this to the "
"Ceres Solver developers. Unknown linear solver type: "
<< LinearSolverTypeToString(options.linear_solver_type);
}
return false;
}
bool TrustRegionOptionsAreValid(const Solver::Options& options,
std::string* error) {
OPTION_GT(initial_trust_region_radius, 0.0);
OPTION_GT(min_trust_region_radius, 0.0);
OPTION_GT(max_trust_region_radius, 0.0);
@@ -121,7 +401,7 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
OPTION_GE(max_num_consecutive_invalid_steps, 0);
OPTION_GT(eta, 0.0);
OPTION_GE(min_linear_solver_iterations, 0);
OPTION_GE(max_linear_solver_iterations, 1);
OPTION_GE(max_linear_solver_iterations, 0);
OPTION_LE_OPTION(min_linear_solver_iterations, max_linear_solver_iterations);
if (options.use_inner_iterations) {
@@ -132,80 +412,19 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
OPTION_GT(max_consecutive_nonmonotonic_steps, 0);
}
if (options.linear_solver_type == ITERATIVE_SCHUR &&
options.use_explicit_schur_complement &&
options.preconditioner_type != SCHUR_JACOBI) {
if ((options.trust_region_strategy_type == DOGLEG) &&
IsIterativeSolver(options.linear_solver_type)) {
*error =
"use_explicit_schur_complement only supports "
"SCHUR_JACOBI as the preconditioner.";
"DOGLEG only supports exact factorization based linear "
"solvers. If you want to use an iterative solver please "
"use LEVENBERG_MARQUARDT as the trust_region_strategy_type";
return false;
}
if (!IsDenseLinearAlgebraLibraryTypeAvailable(
options.dense_linear_algebra_library_type) &&
(options.linear_solver_type == DENSE_NORMAL_CHOLESKY ||
options.linear_solver_type == DENSE_QR ||
options.linear_solver_type == DENSE_SCHUR)) {
*error = StringPrintf(
"Can't use %s with "
"Solver::Options::dense_linear_algebra_library_type = %s "
"because %s was not enabled when Ceres was built.",
LinearSolverTypeToString(options.linear_solver_type),
DenseLinearAlgebraLibraryTypeToString(
options.dense_linear_algebra_library_type),
DenseLinearAlgebraLibraryTypeToString(
options.dense_linear_algebra_library_type));
if (!OptionsAreValidForLinearSolver(options, error)) {
return false;
}
{
const char* sparse_linear_algebra_library_name =
SparseLinearAlgebraLibraryTypeToString(
options.sparse_linear_algebra_library_type);
const char* name = nullptr;
if (options.linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
options.linear_solver_type == SPARSE_SCHUR) {
name = LinearSolverTypeToString(options.linear_solver_type);
} else if ((options.linear_solver_type == ITERATIVE_SCHUR &&
(options.preconditioner_type == CLUSTER_JACOBI ||
options.preconditioner_type == CLUSTER_TRIDIAGONAL)) ||
(options.linear_solver_type == CGNR &&
options.preconditioner_type == SUBSET)) {
name = PreconditionerTypeToString(options.preconditioner_type);
}
if (name) {
if (options.sparse_linear_algebra_library_type == NO_SPARSE) {
*error = StringPrintf(
"Can't use %s with "
"Solver::Options::sparse_linear_algebra_library_type = %s.",
name,
sparse_linear_algebra_library_name);
return false;
} else if (!IsSparseLinearAlgebraLibraryTypeAvailable(
options.sparse_linear_algebra_library_type)) {
*error = StringPrintf(
"Can't use %s with "
"Solver::Options::sparse_linear_algebra_library_type = %s, "
"because support was not enabled when Ceres Solver was built.",
name,
sparse_linear_algebra_library_name);
return false;
}
}
}
if (options.trust_region_strategy_type == DOGLEG) {
if (options.linear_solver_type == ITERATIVE_SCHUR ||
options.linear_solver_type == CGNR) {
*error =
"DOGLEG only supports exact factorization based linear "
"solvers. If you want to use an iterative solver please "
"use LEVENBERG_MARQUARDT as the trust_region_strategy_type";
return false;
}
}
if (!options.trust_region_minimizer_iterations_to_dump.empty() &&
options.trust_region_problem_dump_format_type != CONSOLE &&
options.trust_region_problem_dump_directory.empty()) {
@@ -213,33 +432,11 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
return false;
}
if (options.dynamic_sparsity) {
if (options.linear_solver_type != SPARSE_NORMAL_CHOLESKY) {
*error =
"Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
return false;
}
if (options.sparse_linear_algebra_library_type == ACCELERATE_SPARSE) {
*error =
"ACCELERATE_SPARSE is not currently supported with dynamic sparsity.";
return false;
}
}
if (options.linear_solver_type == CGNR &&
options.preconditioner_type == SUBSET &&
options.residual_blocks_for_subset_preconditioner.empty()) {
*error =
"When using SUBSET preconditioner, "
"Solver::Options::residual_blocks_for_subset_preconditioner cannot be "
"empty";
return false;
}
return true;
}
bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) {
bool LineSearchOptionsAreValid(const Solver::Options& options,
std::string* error) {
OPTION_GT(max_lbfgs_rank, 0);
OPTION_GT(min_line_search_step_size, 0.0);
OPTION_GT(max_line_search_step_contraction, 0.0);
@@ -259,9 +456,10 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) {
options.line_search_direction_type == ceres::LBFGS) &&
options.line_search_type != ceres::WOLFE) {
*error =
string("Invalid configuration: Solver::Options::line_search_type = ") +
string(LineSearchTypeToString(options.line_search_type)) +
string(
std::string(
"Invalid configuration: Solver::Options::line_search_type = ") +
std::string(LineSearchTypeToString(options.line_search_type)) +
std::string(
". When using (L)BFGS, "
"Solver::Options::line_search_type must be set to WOLFE.");
return false;
@@ -269,8 +467,8 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) {
// Warn user if they have requested BISECTION interpolation, but constraints
// on max/min step size change during line search prevent bisection scaling
// from occurring. Warn only, as this is likely a user mistake, but one which
// does not prevent us from continuing.
// from occurring. Warn only, as this is likely a user mistake, but one
// which does not prevent us from continuing.
if (options.line_search_interpolation_type == ceres::BISECTION &&
(options.max_line_search_step_contraction > 0.5 ||
options.min_line_search_step_contraction < 0.5)) {
@@ -295,7 +493,7 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) {
#undef OPTION_LE_OPTION
#undef OPTION_LT_OPTION
void StringifyOrdering(const vector<int>& ordering, string* report) {
void StringifyOrdering(const std::vector<int>& ordering, std::string* report) {
if (ordering.empty()) {
internal::StringAppendF(report, "AUTOMATIC");
return;
@@ -339,7 +537,7 @@ void PreSolveSummarize(const Solver::Options& options,
&(summary->inner_iteration_ordering_given));
// clang-format off
summary->dense_linear_algebra_library_type = options.dense_linear_algebra_library_type; // NOLINT
summary->dense_linear_algebra_library_type = options.dense_linear_algebra_library_type;
summary->dogleg_type = options.dogleg_type;
summary->inner_iteration_time_in_seconds = 0.0;
summary->num_line_search_steps = 0;
@@ -348,18 +546,19 @@ void PreSolveSummarize(const Solver::Options& options,
summary->line_search_polynomial_minimization_time_in_seconds = 0.0;
summary->line_search_total_time_in_seconds = 0.0;
summary->inner_iterations_given = options.use_inner_iterations;
summary->line_search_direction_type = options.line_search_direction_type; // NOLINT
summary->line_search_interpolation_type = options.line_search_interpolation_type; // NOLINT
summary->line_search_direction_type = options.line_search_direction_type;
summary->line_search_interpolation_type = options.line_search_interpolation_type;
summary->line_search_type = options.line_search_type;
summary->linear_solver_type_given = options.linear_solver_type;
summary->max_lbfgs_rank = options.max_lbfgs_rank;
summary->minimizer_type = options.minimizer_type;
summary->nonlinear_conjugate_gradient_type = options.nonlinear_conjugate_gradient_type; // NOLINT
summary->nonlinear_conjugate_gradient_type = options.nonlinear_conjugate_gradient_type;
summary->num_threads_given = options.num_threads;
summary->preconditioner_type_given = options.preconditioner_type;
summary->sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type; // NOLINT
summary->trust_region_strategy_type = options.trust_region_strategy_type; // NOLINT
summary->visibility_clustering_type = options.visibility_clustering_type; // NOLINT
summary->sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type;
summary->linear_solver_ordering_type = options.linear_solver_ordering_type;
summary->trust_region_strategy_type = options.trust_region_strategy_type;
summary->visibility_clustering_type = options.visibility_clustering_type;
// clang-format on
}
@@ -367,19 +566,23 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp,
Solver::Summary* summary) {
internal::OrderingToGroupSizes(pp.options.linear_solver_ordering.get(),
&(summary->linear_solver_ordering_used));
// TODO(sameeragarwal): Update the preprocessor to collapse the
// second and higher groups into one group when nested dissection is
// used.
internal::OrderingToGroupSizes(pp.options.inner_iteration_ordering.get(),
&(summary->inner_iteration_ordering_used));
// clang-format off
summary->inner_iterations_used = pp.inner_iteration_minimizer.get() != nullptr; // NOLINT
summary->inner_iterations_used = pp.inner_iteration_minimizer != nullptr;
summary->linear_solver_type_used = pp.linear_solver_options.type;
summary->mixed_precision_solves_used = pp.options.use_mixed_precision_solves;
summary->num_threads_used = pp.options.num_threads;
summary->preconditioner_type_used = pp.options.preconditioner_type;
// clang-format on
internal::SetSummaryFinalCost(summary);
if (pp.reduced_program.get() != nullptr) {
if (pp.reduced_program != nullptr) {
SummarizeReducedProgram(*pp.reduced_program, summary);
}
@@ -389,8 +592,8 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp,
// case if the preprocessor failed, or if the reduced problem did
// not contain any parameter blocks. Thus, only extract the
// evaluator statistics if one exists.
if (pp.evaluator.get() != nullptr) {
const map<string, CallStatistics>& evaluator_statistics =
if (pp.evaluator != nullptr) {
const std::map<std::string, CallStatistics>& evaluator_statistics =
pp.evaluator->Statistics();
{
const CallStatistics& call_stats = FindWithDefault(
@@ -411,8 +614,8 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp,
// Again, like the evaluator, there may or may not be a linear
// solver from which we can extract run time statistics. In
// particular the line search solver does not use a linear solver.
if (pp.linear_solver.get() != nullptr) {
const map<string, CallStatistics>& linear_solver_statistics =
if (pp.linear_solver != nullptr) {
const std::map<std::string, CallStatistics>& linear_solver_statistics =
pp.linear_solver->Statistics();
const CallStatistics& call_stats = FindWithDefault(
linear_solver_statistics, "LinearSolver::Solve", CallStatistics());
@@ -468,9 +671,23 @@ std::string SchurStructureToString(const int row_block_size,
return internal::StringPrintf("%s,%s,%s", row.c_str(), e.c_str(), f.c_str());
}
#ifndef CERES_NO_CUDA
bool IsCudaRequired(const Solver::Options& options) {
if (options.linear_solver_type == DENSE_NORMAL_CHOLESKY ||
options.linear_solver_type == DENSE_SCHUR ||
options.linear_solver_type == DENSE_QR) {
return (options.dense_linear_algebra_library_type == CUDA);
}
if (options.linear_solver_type == CGNR) {
return (options.sparse_linear_algebra_library_type == CUDA_SPARSE);
}
return false;
}
#endif
} // namespace
bool Solver::Options::IsValid(string* error) const {
bool Solver::Options::IsValid(std::string* error) const {
if (!CommonOptionsAreValid(*this, error)) {
return false;
}
@@ -509,10 +726,19 @@ void Solver::Solve(const Solver::Options& options,
return;
}
ProblemImpl* problem_impl = problem->impl_.get();
ProblemImpl* problem_impl = problem->mutable_impl();
Program* program = problem_impl->mutable_program();
PreSolveSummarize(options, problem_impl, summary);
#ifndef CERES_NO_CUDA
if (IsCudaRequired(options)) {
if (!problem_impl->context()->InitCuda(&summary->message)) {
LOG(ERROR) << "Terminating: " << summary->message;
return;
}
}
#endif // CERES_NO_CUDA
// If gradient_checking is enabled, wrap all cost functions in a
// gradient checker and install a callback that terminates if any gradient
// error is detected.
@@ -582,7 +808,7 @@ void Solver::Solve(const Solver::Options& options,
}
const double postprocessor_start_time = WallTimeInSeconds();
problem_impl = problem->impl_.get();
problem_impl = problem->mutable_impl();
program = problem_impl->mutable_program();
// On exit, ensure that the parameter blocks again point at the user
// provided values and the parameter blocks are numbered according
@@ -610,7 +836,7 @@ void Solve(const Solver::Options& options,
solver.Solve(options, problem, summary);
}
string Solver::Summary::BriefReport() const {
std::string Solver::Summary::BriefReport() const {
return StringPrintf(
"Ceres Solver Report: "
"Iterations: %d, "
@@ -623,10 +849,12 @@ string Solver::Summary::BriefReport() const {
TerminationTypeToString(termination_type));
}
string Solver::Summary::FullReport() const {
std::string Solver::Summary::FullReport() const {
using internal::VersionString;
string report = string("\nSolver Summary (v " + VersionString() + ")\n\n");
// NOTE operator+ is not usable for concatenating a string and a string_view.
std::string report =
std::string{"\nSolver Summary (v "}.append(VersionString()) + ")\n\n";
StringAppendF(&report, "%45s %21s\n", "Original", "Reduced");
StringAppendF(&report,
@@ -660,21 +888,13 @@ string Solver::Summary::FullReport() const {
if (linear_solver_type_used == DENSE_NORMAL_CHOLESKY ||
linear_solver_type_used == DENSE_SCHUR ||
linear_solver_type_used == DENSE_QR) {
const char* mixed_precision_suffix =
(mixed_precision_solves_used ? "(Mixed Precision)" : "");
StringAppendF(&report,
"\nDense linear algebra library %15s\n",
"\nDense linear algebra library %15s %s\n",
DenseLinearAlgebraLibraryTypeToString(
dense_linear_algebra_library_type));
}
if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY ||
linear_solver_type_used == SPARSE_SCHUR ||
(linear_solver_type_used == ITERATIVE_SCHUR &&
(preconditioner_type_used == CLUSTER_JACOBI ||
preconditioner_type_used == CLUSTER_TRIDIAGONAL))) {
StringAppendF(&report,
"\nSparse linear algebra library %15s\n",
SparseLinearAlgebraLibraryTypeToString(
sparse_linear_algebra_library_type));
dense_linear_algebra_library_type),
mixed_precision_suffix);
}
StringAppendF(&report,
@@ -687,17 +907,50 @@ string Solver::Summary::FullReport() const {
StringAppendF(&report, " (SUBSPACE)");
}
}
StringAppendF(&report, "\n");
StringAppendF(&report, "\n");
const bool used_sparse_linear_algebra_library =
linear_solver_type_used == SPARSE_NORMAL_CHOLESKY ||
linear_solver_type_used == SPARSE_SCHUR ||
linear_solver_type_used == CGNR ||
(linear_solver_type_used == ITERATIVE_SCHUR &&
(preconditioner_type_used == CLUSTER_JACOBI ||
preconditioner_type_used == CLUSTER_TRIDIAGONAL));
const bool linear_solver_ordering_required =
linear_solver_type_used == SPARSE_SCHUR ||
(linear_solver_type_used == ITERATIVE_SCHUR &&
(preconditioner_type_used == CLUSTER_JACOBI ||
preconditioner_type_used == CLUSTER_TRIDIAGONAL)) ||
(linear_solver_type_used == CGNR && preconditioner_type_used == SUBSET);
if (used_sparse_linear_algebra_library) {
const char* mixed_precision_suffix =
(mixed_precision_solves_used ? "(Mixed Precision)" : "");
if (linear_solver_ordering_required) {
StringAppendF(
&report,
"\nSparse linear algebra library %15s + %s %s\n",
SparseLinearAlgebraLibraryTypeToString(
sparse_linear_algebra_library_type),
LinearSolverOrderingTypeToString(linear_solver_ordering_type),
mixed_precision_suffix);
} else {
StringAppendF(&report,
"\nSparse linear algebra library %15s %s\n",
SparseLinearAlgebraLibraryTypeToString(
sparse_linear_algebra_library_type),
mixed_precision_suffix);
}
}
StringAppendF(&report, "\n");
StringAppendF(&report, "%45s %21s\n", "Given", "Used");
StringAppendF(&report,
"Linear solver %25s%25s\n",
LinearSolverTypeToString(linear_solver_type_given),
LinearSolverTypeToString(linear_solver_type_used));
if (linear_solver_type_given == CGNR ||
linear_solver_type_given == ITERATIVE_SCHUR) {
if (IsIterativeSolver(linear_solver_type_given)) {
StringAppendF(&report,
"Preconditioner %25s%25s\n",
PreconditionerTypeToString(preconditioner_type_given),
@@ -717,9 +970,9 @@ string Solver::Summary::FullReport() const {
num_threads_given,
num_threads_used);
string given;
std::string given;
StringifyOrdering(linear_solver_ordering_given, &given);
string used;
std::string used;
StringifyOrdering(linear_solver_ordering_used, &used);
StringAppendF(&report,
"Linear solver ordering %22s %24s\n",
@@ -740,9 +993,9 @@ string Solver::Summary::FullReport() const {
}
if (inner_iterations_used) {
string given;
std::string given;
StringifyOrdering(inner_iteration_ordering_given, &given);
string used;
std::string used;
StringifyOrdering(inner_iteration_ordering_used, &used);
StringAppendF(&report,
"Inner iteration ordering %20s %24s\n",
@@ -753,7 +1006,7 @@ string Solver::Summary::FullReport() const {
// LINE_SEARCH HEADER
StringAppendF(&report, "\nMinimizer %19s\n", "LINE_SEARCH");
string line_search_direction_string;
std::string line_search_direction_string;
if (line_search_direction_type == LBFGS) {
line_search_direction_string = StringPrintf("LBFGS (%d)", max_lbfgs_rank);
} else if (line_search_direction_type == NONLINEAR_CONJUGATE_GRADIENT) {
@@ -768,7 +1021,7 @@ string Solver::Summary::FullReport() const {
"Line search direction %19s\n",
line_search_direction_string.c_str());
const string line_search_type_string = StringPrintf(
const std::string line_search_type_string = StringPrintf(
"%s %s",
LineSearchInterpolationTypeToString(line_search_interpolation_type),
LineSearchTypeToString(line_search_type));

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,8 +30,6 @@
#include "ceres/solver_utils.h"
#include <string>
#include "Eigen/Core"
#include "ceres/internal/config.h"
#include "ceres/internal/export.h"
@@ -40,8 +38,7 @@
#include "cuda_runtime.h"
#endif // CERES_NO_CUDA
namespace ceres {
namespace internal {
namespace ceres::internal {
// clang-format off
#define CERES_EIGEN_VERSION \
@@ -50,52 +47,47 @@ namespace internal {
CERES_TO_STRING(EIGEN_MINOR_VERSION)
// clang-format on
std::string VersionString() {
std::string value = std::string(CERES_VERSION_STRING);
value += "-eigen-(" + std::string(CERES_EIGEN_VERSION) + ")";
constexpr char kVersion[] =
// clang-format off
CERES_VERSION_STRING
"-eigen-(" CERES_EIGEN_VERSION ")"
#ifdef CERES_NO_LAPACK
value += "-no_lapack";
"-no_lapack"
#else
value += "-lapack";
"-lapack"
#endif
#ifndef CERES_NO_SUITESPARSE
value += "-suitesparse-(" + std::string(CERES_SUITESPARSE_VERSION) + ")";
"-suitesparse-(" CERES_SUITESPARSE_VERSION ")"
#endif
#ifndef CERES_NO_CXSPARSE
value += "-cxsparse-(" + std::string(CERES_CXSPARSE_VERSION) + ")";
#if !defined(CERES_NO_EIGEN_METIS) || !defined(CERES_NO_CHOLMOD_PARTITION)
"-metis-(" CERES_METIS_VERSION ")"
#endif
#ifndef CERES_NO_ACCELERATE_SPARSE
value += "-acceleratesparse";
"-acceleratesparse"
#endif
#ifdef CERES_USE_EIGEN_SPARSE
value += "-eigensparse";
"-eigensparse"
#endif
#ifdef CERES_RESTRUCT_SCHUR_SPECIALIZATIONS
value += "-no_schur_specializations";
#endif
#ifdef CERES_USE_OPENMP
value += "-openmp";
#else
value += "-no_openmp";
"-no_schur_specializations"
#endif
#ifdef CERES_NO_CUSTOM_BLAS
value += "-no_custom_blas";
"-no_custom_blas"
#endif
#ifndef CERES_NO_CUDA
value += "-cuda-(" + std::to_string(CUDART_VERSION) + ")";
"-cuda-(" CERES_TO_STRING(CUDART_VERSION) ")"
#endif
;
// clang-format on
return value;
}
std::string_view VersionString() noexcept { return kVersion; }
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,15 +32,14 @@
#define CERES_INTERNAL_SOLVER_UTILS_H_
#include <algorithm>
#include <string>
#include <string_view>
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
#include "ceres/iteration_callback.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
template <typename SummaryType>
bool IsSolutionUsable(const SummaryType& summary) {
@@ -61,10 +60,9 @@ void SetSummaryFinalCost(SummaryType* summary) {
}
CERES_NO_EXPORT
std::string VersionString();
std::string_view VersionString() noexcept;
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -31,30 +31,28 @@
#include "ceres/sparse_cholesky.h"
#include <memory>
#include <utility>
#include "ceres/accelerate_sparse.h"
#include "ceres/cxsparse.h"
#include "ceres/eigensparse.h"
#include "ceres/float_cxsparse.h"
#include "ceres/float_suitesparse.h"
#include "ceres/iterative_refiner.h"
#include "ceres/suitesparse.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
std::unique_ptr<SparseCholesky> SparseCholesky::Create(
const LinearSolver::Options& options) {
const OrderingType ordering_type = options.use_postordering ? AMD : NATURAL;
std::unique_ptr<SparseCholesky> sparse_cholesky;
switch (options.sparse_linear_algebra_library_type) {
case SUITE_SPARSE:
#ifndef CERES_NO_SUITESPARSE
if (options.use_mixed_precision_solves) {
sparse_cholesky = FloatSuiteSparseCholesky::Create(ordering_type);
sparse_cholesky =
FloatSuiteSparseCholesky::Create(options.ordering_type);
} else {
sparse_cholesky = SuiteSparseCholesky::Create(ordering_type);
sparse_cholesky = SuiteSparseCholesky::Create(options.ordering_type);
}
break;
#else
@@ -64,9 +62,10 @@ std::unique_ptr<SparseCholesky> SparseCholesky::Create(
case EIGEN_SPARSE:
#ifdef CERES_USE_EIGEN_SPARSE
if (options.use_mixed_precision_solves) {
sparse_cholesky = FloatEigenSparseCholesky::Create(ordering_type);
sparse_cholesky =
FloatEigenSparseCholesky::Create(options.ordering_type);
} else {
sparse_cholesky = EigenSparseCholesky::Create(ordering_type);
sparse_cholesky = EigenSparseCholesky::Create(options.ordering_type);
}
break;
#else
@@ -74,25 +73,14 @@ std::unique_ptr<SparseCholesky> SparseCholesky::Create(
<< "Eigen's sparse Cholesky factorization routines.";
#endif
case CX_SPARSE:
#ifndef CERES_NO_CXSPARSE
if (options.use_mixed_precision_solves) {
sparse_cholesky = FloatCXSparseCholesky::Create(ordering_type);
} else {
sparse_cholesky = CXSparseCholesky::Create(ordering_type);
}
break;
#else
LOG(FATAL) << "Ceres was compiled without support for CXSparse.";
#endif
case ACCELERATE_SPARSE:
#ifndef CERES_NO_ACCELERATE_SPARSE
if (options.use_mixed_precision_solves) {
sparse_cholesky = AppleAccelerateCholesky<float>::Create(ordering_type);
sparse_cholesky =
AppleAccelerateCholesky<float>::Create(options.ordering_type);
} else {
sparse_cholesky =
AppleAccelerateCholesky<double>::Create(ordering_type);
AppleAccelerateCholesky<double>::Create(options.ordering_type);
}
break;
#else
@@ -107,10 +95,10 @@ std::unique_ptr<SparseCholesky> SparseCholesky::Create(
}
if (options.max_num_refinement_iterations > 0) {
std::unique_ptr<IterativeRefiner> refiner(
new IterativeRefiner(options.max_num_refinement_iterations));
sparse_cholesky = std::unique_ptr<SparseCholesky>(new RefinedSparseCholesky(
std::move(sparse_cholesky), std::move(refiner)));
auto refiner = std::make_unique<SparseIterativeRefiner>(
options.max_num_refinement_iterations);
sparse_cholesky = std::make_unique<RefinedSparseCholesky>(
std::move(sparse_cholesky), std::move(refiner));
}
return sparse_cholesky;
}
@@ -123,7 +111,7 @@ LinearSolverTerminationType SparseCholesky::FactorAndSolve(
double* solution,
std::string* message) {
LinearSolverTerminationType termination_type = Factorize(lhs, message);
if (termination_type == LINEAR_SOLVER_SUCCESS) {
if (termination_type == LinearSolverTerminationType::SUCCESS) {
termination_type = Solve(rhs, solution, message);
}
return termination_type;
@@ -131,7 +119,7 @@ LinearSolverTerminationType SparseCholesky::FactorAndSolve(
RefinedSparseCholesky::RefinedSparseCholesky(
std::unique_ptr<SparseCholesky> sparse_cholesky,
std::unique_ptr<IterativeRefiner> iterative_refiner)
std::unique_ptr<SparseIterativeRefiner> iterative_refiner)
: sparse_cholesky_(std::move(sparse_cholesky)),
iterative_refiner_(std::move(iterative_refiner)) {}
@@ -153,13 +141,12 @@ LinearSolverTerminationType RefinedSparseCholesky::Solve(const double* rhs,
std::string* message) {
CHECK(lhs_ != nullptr);
auto termination_type = sparse_cholesky_->Solve(rhs, solution, message);
if (termination_type != LINEAR_SOLVER_SUCCESS) {
if (termination_type != LinearSolverTerminationType::SUCCESS) {
return termination_type;
}
iterative_refiner_->Refine(*lhs_, rhs, sparse_cholesky_.get(), solution);
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,8 +43,7 @@
#include "ceres/linear_solver.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// An interface that abstracts away the internal details of various
// sparse linear algebra libraries and offers a simple API for solving
@@ -63,11 +62,12 @@ namespace internal {
//
// CompressedRowSparseMatrix lhs = ...;
// std::string message;
// CHECK_EQ(sparse_cholesky->Factorize(&lhs, &message), LINEAR_SOLVER_SUCCESS);
// CHECK_EQ(sparse_cholesky->Factorize(&lhs, &message),
// LinearSolverTerminationType::SUCCESS);
// Vector rhs = ...;
// Vector solution = ...;
// CHECK_EQ(sparse_cholesky->Solve(rhs.data(), solution.data(), &message),
// LINEAR_SOLVER_SUCCESS);
// LinearSolverTerminationType::SUCCESS);
class CERES_NO_EXPORT SparseCholesky {
public:
@@ -105,21 +105,22 @@ class CERES_NO_EXPORT SparseCholesky {
// Convenience method which combines a call to Factorize and
// Solve. Solve is only called if Factorize returns
// LINEAR_SOLVER_SUCCESS.
// LinearSolverTerminationType::SUCCESS.
LinearSolverTerminationType FactorAndSolve(CompressedRowSparseMatrix* lhs,
const double* rhs,
double* solution,
std::string* message);
};
class IterativeRefiner;
class SparseIterativeRefiner;
// Computes an initial solution using the given instance of
// SparseCholesky, and then refines it using the IterativeRefiner.
// SparseCholesky, and then refines it using the SparseIterativeRefiner.
class CERES_NO_EXPORT RefinedSparseCholesky final : public SparseCholesky {
public:
RefinedSparseCholesky(std::unique_ptr<SparseCholesky> sparse_cholesky,
std::unique_ptr<IterativeRefiner> iterative_refiner);
RefinedSparseCholesky(
std::unique_ptr<SparseCholesky> sparse_cholesky,
std::unique_ptr<SparseIterativeRefiner> iterative_refiner);
~RefinedSparseCholesky() override;
CompressedRowSparseMatrix::StorageType StorageType() const override;
@@ -131,12 +132,11 @@ class CERES_NO_EXPORT RefinedSparseCholesky final : public SparseCholesky {
private:
std::unique_ptr<SparseCholesky> sparse_cholesky_;
std::unique_ptr<IterativeRefiner> iterative_refiner_;
std::unique_ptr<SparseIterativeRefiner> iterative_refiner_;
CompressedRowSparseMatrix* lhs_ = nullptr;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,10 +30,24 @@
#include "ceres/sparse_matrix.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
SparseMatrix::~SparseMatrix() = default;
} // namespace internal
} // namespace ceres
void SparseMatrix::SquaredColumnNorm(double* x,
ContextImpl* context,
int num_threads) const {
(void)context;
(void)num_threads;
SquaredColumnNorm(x);
}
void SparseMatrix::ScaleColumns(const double* scale,
ContextImpl* context,
int num_threads) {
(void)context;
(void)num_threads;
ScaleColumns(scale);
}
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,8 @@
#include "ceres/linear_operator.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class ContextImpl;
// This class defines the interface for storing and manipulating
// sparse matrices. The key property that differentiates different
@@ -69,18 +69,30 @@ class CERES_NO_EXPORT SparseMatrix : public LinearOperator {
~SparseMatrix() override;
// y += Ax;
void RightMultiply(const double* x, double* y) const override = 0;
using LinearOperator::RightMultiplyAndAccumulate;
void RightMultiplyAndAccumulate(const double* x,
double* y) const override = 0;
// y += A'x;
void LeftMultiply(const double* x, double* y) const override = 0;
void LeftMultiplyAndAccumulate(const double* x, double* y) const override = 0;
// In MATLAB notation sum(A.*A, 1)
virtual void SquaredColumnNorm(double* x) const = 0;
virtual void SquaredColumnNorm(double* x,
ContextImpl* context,
int num_threads) const;
// A = A * diag(scale)
virtual void ScaleColumns(const double* scale) = 0;
virtual void ScaleColumns(const double* scale,
ContextImpl* context,
int num_threads);
// A = 0. A->num_nonzeros() == 0 is true after this call. The
// sparsity pattern is preserved.
virtual void SetZero() = 0;
virtual void SetZero(ContextImpl* /*context*/, int /*num_threads*/) {
SetZero();
}
// Resize and populate dense_matrix with a dense version of the
// sparse matrix.
@@ -103,7 +115,6 @@ class CERES_NO_EXPORT SparseMatrix : public LinearOperator {
virtual int num_nonzeros() const = 0;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_SPARSE_MATRIX_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -45,8 +45,7 @@
#include "ceres/types.h"
#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
SparseNormalCholeskySolver::SparseNormalCholeskySolver(
const LinearSolver::Options& options)
@@ -64,7 +63,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl(
EventLogger event_logger("SparseNormalCholeskySolver::Solve");
LinearSolver::Summary summary;
summary.num_iterations = 1;
summary.termination_type = LINEAR_SOLVER_SUCCESS;
summary.termination_type = LinearSolverTerminationType::SUCCESS;
summary.message = "Success.";
const int num_cols = A->num_cols();
@@ -72,7 +71,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl(
xref.setZero();
rhs_.resize(num_cols);
rhs_.setZero();
A->LeftMultiply(b, rhs_.data());
A->LeftMultiplyAndAccumulate(b, rhs_.data());
event_logger.AddEvent("Compute RHS");
if (per_solve_options.D != nullptr) {
@@ -110,5 +109,4 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl(
return summary;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -45,8 +45,7 @@
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CompressedRowSparseMatrix;
class InnerProductComputer;
@@ -75,7 +74,6 @@ class CERES_NO_EXPORT SparseNormalCholeskySolver
std::unique_ptr<InnerProductComputer> inner_product_computer_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_SPARSE_NORMAL_CHOLESKY_SOLVER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,12 +38,9 @@
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
using std::string;
void StringAppendV(string* dst, const char* format, va_list ap) {
void StringAppendV(std::string* dst, const char* format, va_list ap) {
// First try with a small fixed size buffer
char space[1024];
@@ -93,16 +90,16 @@ void StringAppendV(string* dst, const char* format, va_list ap) {
delete[] buf;
}
string StringPrintf(const char* format, ...) {
std::string StringPrintf(const char* format, ...) {
va_list ap;
va_start(ap, format);
string result;
std::string result;
StringAppendV(&result, format, ap);
va_end(ap);
return result;
}
const string& SStringPrintf(string* dst, const char* format, ...) {
const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
va_list ap;
va_start(ap, format);
dst->clear();
@@ -111,12 +108,11 @@ const string& SStringPrintf(string* dst, const char* format, ...) {
return *dst;
}
void StringAppendF(string* dst, const char* format, ...) {
void StringAppendF(std::string* dst, const char* format, ...) {
va_list ap;
va_start(ap, format);
StringAppendV(dst, format, ap);
va_end(ap);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,8 +44,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
#if (defined(__GNUC__) || defined(__clang__))
// Tell the compiler to do printf format string checking if the compiler
@@ -90,8 +89,7 @@ CERES_NO_EXPORT extern void StringAppendV(std::string* dst,
#undef CERES_PRINTF_ATTRIBUTE
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,7 @@
#include "ceres/sparse_cholesky.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
SubsetPreconditioner::SubsetPreconditioner(Preconditioner::Options options,
const BlockSparseMatrix& A)
@@ -52,13 +51,14 @@ SubsetPreconditioner::SubsetPreconditioner(Preconditioner::Options options,
LinearSolver::Options sparse_cholesky_options;
sparse_cholesky_options.sparse_linear_algebra_library_type =
options_.sparse_linear_algebra_library_type;
sparse_cholesky_options.use_postordering = options_.use_postordering;
sparse_cholesky_options.ordering_type = options_.ordering_type;
sparse_cholesky_ = SparseCholesky::Create(sparse_cholesky_options);
}
SubsetPreconditioner::~SubsetPreconditioner() = default;
void SubsetPreconditioner::RightMultiply(const double* x, double* y) const {
void SubsetPreconditioner::RightMultiplyAndAccumulate(const double* x,
double* y) const {
CHECK(x != nullptr);
CHECK(y != nullptr);
std::string message;
@@ -106,7 +106,7 @@ bool SubsetPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
const LinearSolverTerminationType termination_type =
sparse_cholesky_->Factorize(inner_product_computer_->mutable_result(),
&message);
if (termination_type != LINEAR_SOLVER_SUCCESS) {
if (termination_type != LinearSolverTerminationType::SUCCESS) {
LOG(ERROR) << "Preconditioner factorization failed: " << message;
return false;
}
@@ -114,5 +114,4 @@ bool SubsetPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -37,8 +37,7 @@
#include "ceres/internal/export.h"
#include "ceres/preconditioner.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockSparseMatrix;
class SparseCholesky;
@@ -76,7 +75,7 @@ class CERES_NO_EXPORT SubsetPreconditioner
~SubsetPreconditioner() override;
// Preconditioner interface
void RightMultiply(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
int num_rows() const final { return num_cols_; }
int num_cols() const final { return num_cols_; }
@@ -89,8 +88,7 @@ class CERES_NO_EXPORT SubsetPreconditioner
std::unique_ptr<InnerProductComputer> inner_product_computer_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,9 @@
#include "ceres/internal/config.h"
#ifndef CERES_NO_SUITESPARSE
#include <memory>
#include <string>
#include <vector>
#include "ceres/compressed_col_sparse_matrix_utils.h"
@@ -42,11 +44,24 @@
#include "ceres/triplet_sparse_matrix.h"
#include "cholmod.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
namespace {
int OrderingTypeToCHOLMODEnum(OrderingType ordering_type) {
if (ordering_type == OrderingType::AMD) {
return CHOLMOD_AMD;
}
if (ordering_type == OrderingType::NESDIS) {
return CHOLMOD_NESDIS;
}
using std::string;
using std::vector;
if (ordering_type == OrderingType::NATURAL) {
return CHOLMOD_NATURAL;
}
LOG(FATAL) << "Congratulations you have discovered a bug in Ceres Solver."
<< "Please report it to the developers. " << ordering_type;
return -1;
}
} // namespace
SuiteSparse::SuiteSparse() { cholmod_start(&cc_); }
@@ -103,9 +118,11 @@ cholmod_sparse SuiteSparse::CreateSparseMatrixTransposeView(
m.x = reinterpret_cast<void*>(A->mutable_values());
m.z = nullptr;
if (A->storage_type() == CompressedRowSparseMatrix::LOWER_TRIANGULAR) {
if (A->storage_type() ==
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) {
m.stype = 1;
} else if (A->storage_type() == CompressedRowSparseMatrix::UPPER_TRIANGULAR) {
} else if (A->storage_type() ==
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
m.stype = -1;
} else {
m.stype = 0;
@@ -144,19 +161,18 @@ cholmod_dense* SuiteSparse::CreateDenseVector(const double* x,
}
cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A,
string* message) {
// Cholmod can try multiple re-ordering strategies to find a fill
// reducing ordering. Here we just tell it use AMD with automatic
// matrix dependence choice of supernodal versus simplicial
// factorization.
OrderingType ordering_type,
std::string* message) {
cc_.nmethods = 1;
cc_.method[0].ordering = CHOLMOD_AMD;
cc_.supernodal = CHOLMOD_AUTO;
cc_.method[0].ordering = OrderingTypeToCHOLMODEnum(ordering_type);
// postordering with a NATURAL ordering leads to a significant regression in
// performance. See https://github.com/ceres-solver/ceres-solver/issues/905
if (ordering_type == OrderingType::NATURAL) {
cc_.postorder = 0;
}
cholmod_factor* factor = cholmod_analyze(A, &cc_);
if (VLOG_IS_ON(2)) {
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
}
if (cc_.status != CHOLMOD_OK) {
*message =
@@ -165,32 +181,22 @@ cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A,
}
CHECK(factor != nullptr);
if (VLOG_IS_ON(2)) {
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
}
return factor;
}
cholmod_factor* SuiteSparse::BlockAnalyzeCholesky(cholmod_sparse* A,
const vector<int>& row_blocks,
const vector<int>& col_blocks,
string* message) {
vector<int> ordering;
if (!BlockAMDOrdering(A, row_blocks, col_blocks, &ordering)) {
return nullptr;
}
return AnalyzeCholeskyWithUserOrdering(A, ordering, message);
}
cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(
cholmod_sparse* A, const vector<int>& ordering, string* message) {
cholmod_factor* SuiteSparse::AnalyzeCholeskyWithGivenOrdering(
cholmod_sparse* A, const std::vector<int>& ordering, std::string* message) {
CHECK_EQ(ordering.size(), A->nrow);
cc_.nmethods = 1;
cc_.method[0].ordering = CHOLMOD_GIVEN;
cholmod_factor* factor =
cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), nullptr, 0, &cc_);
if (VLOG_IS_ON(2)) {
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
}
cholmod_analyze_p(A, const_cast<int*>(ordering.data()), nullptr, 0, &cc_);
if (cc_.status != CHOLMOD_OK) {
*message =
StringPrintf("cholmod_analyze failed. error code: %d", cc_.status);
@@ -198,40 +204,33 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(
}
CHECK(factor != nullptr);
return factor;
}
cholmod_factor* SuiteSparse::AnalyzeCholeskyWithNaturalOrdering(
cholmod_sparse* A, string* message) {
cc_.nmethods = 1;
cc_.method[0].ordering = CHOLMOD_NATURAL;
cc_.postorder = 0;
cholmod_factor* factor = cholmod_analyze(A, &cc_);
if (VLOG_IS_ON(2)) {
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
}
if (cc_.status != CHOLMOD_OK) {
*message =
StringPrintf("cholmod_analyze failed. error code: %d", cc_.status);
return nullptr;
}
CHECK(factor != nullptr);
return factor;
}
bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
const vector<int>& row_blocks,
const vector<int>& col_blocks,
vector<int>* ordering) {
bool SuiteSparse::BlockOrdering(const cholmod_sparse* A,
OrderingType ordering_type,
const std::vector<Block>& row_blocks,
const std::vector<Block>& col_blocks,
std::vector<int>* ordering) {
if (ordering_type == OrderingType::NATURAL) {
ordering->resize(A->nrow);
for (int i = 0; i < A->nrow; ++i) {
(*ordering)[i] = i;
}
return true;
}
const int num_row_blocks = row_blocks.size();
const int num_col_blocks = col_blocks.size();
// Arrays storing the compressed column structure of the matrix
// incoding the block sparsity of A.
vector<int> block_cols;
vector<int> block_rows;
// encoding the block sparsity of A.
std::vector<int> block_cols;
std::vector<int> block_rows;
CompressedColumnScalarMatrixToBlockMatrix(reinterpret_cast<const int*>(A->i),
reinterpret_cast<const int*>(A->p),
@@ -243,8 +242,8 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
block_matrix.nrow = num_row_blocks;
block_matrix.ncol = num_col_blocks;
block_matrix.nzmax = block_rows.size();
block_matrix.p = reinterpret_cast<void*>(&block_cols[0]);
block_matrix.i = reinterpret_cast<void*>(&block_rows[0]);
block_matrix.p = reinterpret_cast<void*>(block_cols.data());
block_matrix.i = reinterpret_cast<void*>(block_rows.data());
block_matrix.x = nullptr;
block_matrix.stype = A->stype;
block_matrix.itype = CHOLMOD_INT;
@@ -253,8 +252,8 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
block_matrix.sorted = 1;
block_matrix.packed = 1;
vector<int> block_ordering(num_row_blocks);
if (!cholmod_amd(&block_matrix, nullptr, 0, &block_ordering[0], &cc_)) {
std::vector<int> block_ordering(num_row_blocks);
if (!Ordering(&block_matrix, ordering_type, block_ordering.data())) {
return false;
}
@@ -262,9 +261,22 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
return true;
}
cholmod_factor* SuiteSparse::BlockAnalyzeCholesky(
cholmod_sparse* A,
OrderingType ordering_type,
const std::vector<Block>& row_blocks,
const std::vector<Block>& col_blocks,
std::string* message) {
std::vector<int> ordering;
if (!BlockOrdering(A, ordering_type, row_blocks, col_blocks, &ordering)) {
return nullptr;
}
return AnalyzeCholeskyWithGivenOrdering(A, ordering, message);
}
LinearSolverTerminationType SuiteSparse::Cholesky(cholmod_sparse* A,
cholmod_factor* L,
string* message) {
std::string* message) {
CHECK(A != nullptr);
CHECK(L != nullptr);
@@ -282,48 +294,48 @@ LinearSolverTerminationType SuiteSparse::Cholesky(cholmod_sparse* A,
switch (cc_.status) {
case CHOLMOD_NOT_INSTALLED:
*message = "CHOLMOD failure: Method not installed.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
case CHOLMOD_OUT_OF_MEMORY:
*message = "CHOLMOD failure: Out of memory.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
case CHOLMOD_TOO_LARGE:
*message = "CHOLMOD failure: Integer overflow occurred.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
case CHOLMOD_INVALID:
*message = "CHOLMOD failure: Invalid input.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
case CHOLMOD_NOT_POSDEF:
*message = "CHOLMOD warning: Matrix not positive definite.";
return LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::FAILURE;
case CHOLMOD_DSMALL:
*message =
"CHOLMOD warning: D for LDL' or diag(L) or "
"LL' has tiny absolute value.";
return LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::FAILURE;
case CHOLMOD_OK:
if (cholmod_status != 0) {
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
*message =
"CHOLMOD failure: cholmod_factorize returned false "
"but cholmod_common::status is CHOLMOD_OK."
"Please report this to ceres-solver@googlegroups.com.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
default:
*message = StringPrintf(
"Unknown cholmod return code: %d. "
"Please report this to ceres-solver@googlegroups.com.",
cc_.status);
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
cholmod_dense* SuiteSparse::Solve(cholmod_factor* L,
cholmod_dense* b,
string* message) {
std::string* message) {
if (cc_.status != CHOLMOD_OK) {
*message = "cholmod_solve failed. CHOLMOD status is not CHOLMOD_OK";
return nullptr;
@@ -332,22 +344,34 @@ cholmod_dense* SuiteSparse::Solve(cholmod_factor* L,
return cholmod_solve(CHOLMOD_A, L, b, &cc_);
}
bool SuiteSparse::ApproximateMinimumDegreeOrdering(cholmod_sparse* matrix,
int* ordering) {
return cholmod_amd(matrix, nullptr, 0, ordering, &cc_);
bool SuiteSparse::Ordering(cholmod_sparse* matrix,
OrderingType ordering_type,
int* ordering) {
CHECK_NE(ordering_type, OrderingType::NATURAL);
if (ordering_type == OrderingType::AMD) {
return cholmod_amd(matrix, nullptr, 0, ordering, &cc_);
}
#ifdef CERES_NO_CHOLMOD_PARTITION
return false;
#else
std::vector<int> CParent(matrix->nrow, 0);
std::vector<int> CMember(matrix->nrow, 0);
return cholmod_nested_dissection(
matrix, nullptr, 0, ordering, CParent.data(), CMember.data(), &cc_);
#endif
}
bool SuiteSparse::ConstrainedApproximateMinimumDegreeOrdering(
cholmod_sparse* matrix, int* constraints, int* ordering) {
#ifndef CERES_NO_CAMD
return cholmod_camd(matrix, nullptr, 0, constraints, ordering, &cc_);
#else
LOG(FATAL) << "Congratulations you have found a bug in Ceres."
<< "Ceres Solver was compiled with SuiteSparse "
<< "version 4.1.0 or less. Calling this function "
<< "in that case is a bug. Please contact the"
<< "the Ceres Solver developers.";
}
bool SuiteSparse::IsNestedDissectionAvailable() {
#ifdef CERES_NO_CHOLMOD_PARTITION
return false;
#else
return true;
#endif
}
@@ -367,48 +391,61 @@ SuiteSparseCholesky::~SuiteSparseCholesky() {
}
LinearSolverTerminationType SuiteSparseCholesky::Factorize(
CompressedRowSparseMatrix* lhs, string* message) {
CompressedRowSparseMatrix* lhs, std::string* message) {
if (lhs == nullptr) {
*message = "Failure: Input lhs is nullptr.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
cholmod_sparse cholmod_lhs = ss_.CreateSparseMatrixTransposeView(lhs);
// If a factorization does not exist, compute the symbolic
// factorization first.
//
// If the ordering type is NATURAL, then there is no fill reducing
// ordering to be computed, regardless of block structure, so we can
// just call the scalar version of symbolic factorization. For
// SuiteSparse this is the common case since we have already
// pre-ordered the columns of the Jacobian.
//
// Similarly regardless of ordering type, if there is no block
// structure in the matrix we call the scalar version of symbolic
// factorization.
if (factor_ == nullptr) {
if (ordering_type_ == NATURAL) {
factor_ = ss_.AnalyzeCholeskyWithNaturalOrdering(&cholmod_lhs, message);
if (ordering_type_ == OrderingType::NATURAL ||
(lhs->col_blocks().empty() || lhs->row_blocks().empty())) {
factor_ = ss_.AnalyzeCholesky(&cholmod_lhs, ordering_type_, message);
} else {
if (!lhs->col_blocks().empty() && !(lhs->row_blocks().empty())) {
factor_ = ss_.BlockAnalyzeCholesky(
&cholmod_lhs, lhs->col_blocks(), lhs->row_blocks(), message);
} else {
factor_ = ss_.AnalyzeCholesky(&cholmod_lhs, message);
}
}
if (factor_ == nullptr) {
return LINEAR_SOLVER_FATAL_ERROR;
factor_ = ss_.BlockAnalyzeCholesky(&cholmod_lhs,
ordering_type_,
lhs->col_blocks(),
lhs->row_blocks(),
message);
}
}
if (factor_ == nullptr) {
return LinearSolverTerminationType::FATAL_ERROR;
}
// Compute and return the numeric factorization.
return ss_.Cholesky(&cholmod_lhs, factor_, message);
}
CompressedRowSparseMatrix::StorageType SuiteSparseCholesky::StorageType()
const {
return ((ordering_type_ == NATURAL)
? CompressedRowSparseMatrix::UPPER_TRIANGULAR
: CompressedRowSparseMatrix::LOWER_TRIANGULAR);
return ((ordering_type_ == OrderingType::NATURAL)
? CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR
: CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
}
LinearSolverTerminationType SuiteSparseCholesky::Solve(const double* rhs,
double* solution,
string* message) {
std::string* message) {
// Error checking
if (factor_ == nullptr) {
*message = "Solve called without a call to Factorize first.";
return LINEAR_SOLVER_FATAL_ERROR;
return LinearSolverTerminationType::FATAL_ERROR;
}
const int num_cols = factor_->n;
@@ -417,15 +454,14 @@ LinearSolverTerminationType SuiteSparseCholesky::Solve(const double* rhs,
ss_.Solve(factor_, &cholmod_rhs, message);
if (cholmod_dense_solution == nullptr) {
return LINEAR_SOLVER_FAILURE;
return LinearSolverTerminationType::FAILURE;
}
memcpy(solution, cholmod_dense_solution->x, num_cols * sizeof(*solution));
ss_.Free(cholmod_dense_solution);
return LINEAR_SOLVER_SUCCESS;
return LinearSolverTerminationType::SUCCESS;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_NO_SUITESPARSE

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,37 +44,14 @@
#include <vector>
#include "SuiteSparseQR.hpp"
#include "ceres/block_structure.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/linear_solver.h"
#include "ceres/sparse_cholesky.h"
#include "cholmod.h"
#include "glog/logging.h"
// Before SuiteSparse version 4.2.0, cholmod_camd was only enabled
// if SuiteSparse was compiled with Metis support. This makes
// calling and linking into cholmod_camd problematic even though it
// has nothing to do with Metis. This has been fixed reliably in
// 4.2.0.
//
// The fix was actually committed in 4.1.0, but there is
// some confusion about a silent update to the tar ball, so we are
// being conservative and choosing the next minor version where
// things are stable.
#if (SUITESPARSE_VERSION < 4002)
#define CERES_NO_CAMD
#endif
// UF_long is deprecated but SuiteSparse_long is only available in
// newer versions of SuiteSparse. So for older versions of
// SuiteSparse, we define SuiteSparse_long to be the same as UF_long,
// which is what recent versions of SuiteSparse do anyways.
#ifndef SuiteSparse_long
#define SuiteSparse_long UF_long
#endif
#include "ceres/internal/disable_warnings.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CompressedRowSparseMatrix;
class TripletSparseMatrix;
@@ -91,7 +68,7 @@ class CERES_NO_EXPORT SuiteSparse {
// Functions for building cholmod_sparse objects from sparse
// matrices stored in triplet form. The matrix A is not
// modifed. Called owns the result.
// modified. Called owns the result.
cholmod_sparse* CreateSparseMatrix(TripletSparseMatrix* A);
// This function works like CreateSparseMatrix, except that the
@@ -142,12 +119,11 @@ class CERES_NO_EXPORT SuiteSparse {
cholmod_sdmult(A, 0, alpha_, beta_, x, y, &cc_);
}
// Find an ordering of A or AA' (if A is unsymmetric) that minimizes
// the fill-in in the Cholesky factorization of the corresponding
// matrix. This is done by using the AMD algorithm.
//
// Using this ordering, the symbolic Cholesky factorization of A (or
// AA') is computed and returned.
// Compute a symbolic factorization for A or AA' (if A is
// unsymmetric). If ordering_type is NATURAL, then no fill reducing
// ordering is computed, otherwise depending on the value of
// ordering_type AMD or Nested Dissection is used to compute a fill
// reducing ordering before the symbolic factorization is computed.
//
// A is not modified, only the pattern of non-zeros of A is used,
// the actual numerical values in A are of no consequence.
@@ -155,11 +131,15 @@ class CERES_NO_EXPORT SuiteSparse {
// message contains an explanation of the failures if any.
//
// Caller owns the result.
cholmod_factor* AnalyzeCholesky(cholmod_sparse* A, std::string* message);
cholmod_factor* AnalyzeCholesky(cholmod_sparse* A,
OrderingType ordering_type,
std::string* message);
// Block oriented version of AnalyzeCholesky.
cholmod_factor* BlockAnalyzeCholesky(cholmod_sparse* A,
const std::vector<int>& row_blocks,
const std::vector<int>& col_blocks,
OrderingType ordering_type,
const std::vector<Block>& row_blocks,
const std::vector<Block>& col_blocks,
std::string* message);
// If A is symmetric, then compute the symbolic Cholesky
@@ -173,20 +153,11 @@ class CERES_NO_EXPORT SuiteSparse {
// message contains an explanation of the failures if any.
//
// Caller owns the result.
cholmod_factor* AnalyzeCholeskyWithUserOrdering(
cholmod_factor* AnalyzeCholeskyWithGivenOrdering(
cholmod_sparse* A,
const std::vector<int>& ordering,
std::string* message);
// Perform a symbolic factorization of A without re-ordering A. No
// postordering of the elimination tree is performed. This ensures
// that the symbolic factor does not introduce an extra permutation
// on the matrix. See the documentation for CHOLMOD for more details.
//
// message contains an explanation of the failures if any.
cholmod_factor* AnalyzeCholeskyWithNaturalOrdering(cholmod_sparse* A,
std::string* message);
// Use the symbolic factorization in L, to find the numerical
// factorization for the matrix A or AA^T. Return true if
// successful, false otherwise. L contains the numeric factorization
@@ -206,51 +177,39 @@ class CERES_NO_EXPORT SuiteSparse {
cholmod_dense* b,
std::string* message);
// Find a fill reducing ordering. ordering is expected to be large
// enough to hold the ordering. ordering_type must be AMD or NESDIS.
bool Ordering(cholmod_sparse* matrix,
OrderingType ordering_type,
int* ordering);
// Find the block oriented fill reducing ordering of a matrix A,
// whose row and column blocks are given by row_blocks, and
// col_blocks respectively. The matrix may or may not be
// symmetric. The entries of col_blocks do not need to sum to the
// number of columns in A. If this is the case, only the first
// sum(col_blocks) are used to compute the ordering.
//
// By virtue of the modeling layer in Ceres being block oriented,
// all the matrices used by Ceres are also block oriented. When
// doing sparse direct factorization of these matrices the
// fill-reducing ordering algorithms (in particular AMD) can either
// be run on the block or the scalar form of these matrices. The two
// SuiteSparse::AnalyzeCholesky methods allows the client to
// compute the symbolic factorization of a matrix by either using
// AMD on the matrix or a user provided ordering of the rows.
//
// But since the underlying matrices are block oriented, it is worth
// running AMD on just the block structure of these matrices and then
// lifting these block orderings to a full scalar ordering. This
// preserves the block structure of the permuted matrix, and exposes
// more of the super-nodal structure of the matrix to the numerical
// factorization routines.
//
// Find the block oriented AMD ordering of a matrix A, whose row and
// column blocks are given by row_blocks, and col_blocks
// respectively. The matrix may or may not be symmetric. The entries
// of col_blocks do not need to sum to the number of columns in
// A. If this is the case, only the first sum(col_blocks) are used
// to compute the ordering.
bool BlockAMDOrdering(const cholmod_sparse* A,
const std::vector<int>& row_blocks,
const std::vector<int>& col_blocks,
std::vector<int>* ordering);
// fill-reducing ordering algorithms can either be run on the block
// or the scalar form of these matrices. But since the underlying
// matrices are block oriented, it is worth running the fill
// reducing ordering on just the block structure of these matrices
// and then lifting these block orderings to a full scalar
// ordering. This preserves the block structure of the permuted
// matrix, and exposes more of the super-nodal structure of the
// matrix to the numerical factorization routines.
bool BlockOrdering(const cholmod_sparse* A,
OrderingType ordering_type,
const std::vector<Block>& row_blocks,
const std::vector<Block>& col_blocks,
std::vector<int>* ordering);
// Find a fill reducing approximate minimum degree
// ordering. ordering is expected to be large enough to hold the
// ordering.
bool ApproximateMinimumDegreeOrdering(cholmod_sparse* matrix, int* ordering);
// Before SuiteSparse version 4.2.0, cholmod_camd was only enabled
// if SuiteSparse was compiled with Metis support. This makes
// calling and linking into cholmod_camd problematic even though it
// has nothing to do with Metis. This has been fixed reliably in
// 4.2.0.
//
// The fix was actually committed in 4.1.0, but there is
// some confusion about a silent update to the tar ball, so we are
// being conservative and choosing the next minor version where
// things are stable.
static bool IsConstrainedApproximateMinimumDegreeOrderingAvailable() {
return (SUITESPARSE_VERSION > 4001);
}
// Nested dissection is only available if SuiteSparse is compiled
// with Metis support.
static bool IsNestedDissectionAvailable();
// Find a fill reducing approximate minimum degree
// ordering. constraints is an array which associates with each
@@ -262,9 +221,6 @@ class CERES_NO_EXPORT SuiteSparse {
// Calling ApproximateMinimumDegreeOrdering is equivalent to calling
// ConstrainedApproximateMinimumDegreeOrdering with a constraint
// array that puts all columns in the same elimination group.
//
// If CERES_NO_CAMD is defined then calling this function will
// result in a crash.
bool ConstrainedApproximateMinimumDegreeOrdering(cholmod_sparse* matrix,
int* constraints,
int* ordering);
@@ -312,14 +268,13 @@ class CERES_NO_EXPORT SuiteSparseCholesky final : public SparseCholesky {
cholmod_factor* factor_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"
#else // CERES_NO_SUITESPARSE
typedef void cholmod_factor;
using cholmod_factor = void;
#include "ceres/internal/disable_warnings.h"
@@ -328,17 +283,9 @@ namespace internal {
class CERES_NO_EXPORT SuiteSparse {
public:
// Defining this static function even when SuiteSparse is not
// available, allows client code to check for the presence of CAMD
// without checking for the absence of the CERES_NO_CAMD symbol.
//
// This is safer because the symbol maybe missing due to a user
// accidentally not including suitesparse.h in their code when
// checking for the symbol.
static bool IsConstrainedApproximateMinimumDegreeOrderingAvailable() {
return false;
}
// Nested dissection is only available if SuiteSparse is compiled
// with Metis support.
static bool IsNestedDissectionAvailable() { return false; }
void Free(void* /*arg*/) {}
};

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -28,18 +28,14 @@
//
// Author: vitus@google.com (Michael Vitus)
// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/config.h"
#ifdef CERES_USE_CXX_THREADS
#include "ceres/thread_pool.h"
#include <cmath>
#include <limits>
#include "ceres/thread_pool.h"
#include "ceres/internal/config.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
namespace {
// Constrain the total number of threads to the amount the hardware can support.
@@ -105,7 +101,4 @@ void ThreadPool::ThreadMainLoop() {
void ThreadPool::Stop() { task_queue_.StopWaiters(); }
} // namespace internal
} // namespace ceres
#endif // CERES_USE_CXX_THREADS
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@
#include "ceres/concurrent_queue.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// A thread-safe thread pool with an unbounded task queue and a resizable number
// of workers. The size of the thread pool can be increased but never decreased
@@ -115,7 +114,6 @@ class CERES_NO_EXPORT ThreadPool {
std::mutex thread_pool_mutex_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_THREAD_POOL_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,44 +30,20 @@
#include "ceres/thread_token_provider.h"
#ifdef CERES_USE_OPENMP
#include <omp.h>
#endif
namespace ceres {
namespace internal {
namespace ceres::internal {
ThreadTokenProvider::ThreadTokenProvider(int num_threads) {
(void)num_threads;
#ifdef CERES_USE_CXX_THREADS
for (int i = 0; i < num_threads; i++) {
pool_.Push(i);
}
#endif
}
int ThreadTokenProvider::Acquire() {
#ifdef CERES_USE_OPENMP
return omp_get_thread_num();
#endif
#ifdef CERES_NO_THREADS
return 0;
#endif
#ifdef CERES_USE_CXX_THREADS
int thread_id;
CHECK(pool_.Wait(&thread_id));
return thread_id;
#endif
}
void ThreadTokenProvider::Release(int thread_id) {
(void)thread_id;
#ifdef CERES_USE_CXX_THREADS
pool_.Push(thread_id);
#endif
}
void ThreadTokenProvider::Release(int thread_id) { pool_.Push(thread_id); }
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -31,15 +31,11 @@
#ifndef CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_
#define CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_
#include "ceres/concurrent_queue.h"
#include "ceres/internal/config.h"
#include "ceres/internal/export.h"
#ifdef CERES_USE_CXX_THREADS
#include "ceres/concurrent_queue.h"
#endif
namespace ceres {
namespace internal {
namespace ceres::internal {
// Helper for C++ thread number identification that is similar to
// omp_get_thread_num() behaviour. This is necessary to support C++
@@ -48,12 +44,6 @@ namespace internal {
// 0 to num_threads - 1 that can be acquired to identify the thread in a thread
// pool.
//
// If CERES_NO_THREADS is defined, Acquire() always returns 0 and Release()
// takes no action.
//
// If CERES_USE_OPENMP, omp_get_thread_num() is used to Acquire() with no action
// in Release()
//
//
// Example usage pseudocode:
//
@@ -78,20 +68,16 @@ class CERES_NO_EXPORT ThreadTokenProvider {
void Release(int thread_id);
private:
#ifdef CERES_USE_CXX_THREADS
// This queue initially holds a sequence from 0..num_threads-1. Every
// Acquire() call the first number is removed from here. When the token is not
// needed anymore it shall be given back with corresponding Release()
// call. This concurrent queue is more expensive than TBB's version, so you
// should not acquire the thread ID on every for loop iteration.
ConcurrentQueue<int> pool_;
#endif
ThreadTokenProvider(ThreadTokenProvider&) = delete;
ThreadTokenProvider& operator=(ThreadTokenProvider&) = delete;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,15 +32,16 @@
#include <algorithm>
#include <memory>
#include <random>
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/crs_matrix.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/random.h"
#include "ceres/types.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
TripletSparseMatrix::TripletSparseMatrix()
: num_rows_(0), num_cols_(0), max_num_nonzeros_(0), num_nonzeros_(0) {}
@@ -168,13 +169,15 @@ void TripletSparseMatrix::CopyData(const TripletSparseMatrix& orig) {
}
}
void TripletSparseMatrix::RightMultiply(const double* x, double* y) const {
void TripletSparseMatrix::RightMultiplyAndAccumulate(const double* x,
double* y) const {
for (int i = 0; i < num_nonzeros_; ++i) {
y[rows_[i]] += values_[i] * x[cols_[i]];
}
}
void TripletSparseMatrix::LeftMultiply(const double* x, double* y) const {
void TripletSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
double* y) const {
for (int i = 0; i < num_nonzeros_; ++i) {
y[cols_[i]] += values_[i] * x[rows_[i]];
}
@@ -195,6 +198,11 @@ void TripletSparseMatrix::ScaleColumns(const double* scale) {
}
}
void TripletSparseMatrix::ToCRSMatrix(CRSMatrix* crs_matrix) const {
CompressedRowSparseMatrix::FromTripletSparseMatrix(*this)->ToCRSMatrix(
crs_matrix);
}
void TripletSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
dense_matrix->resize(num_rows_, num_cols_);
dense_matrix->setZero();
@@ -276,8 +284,34 @@ void TripletSparseMatrix::ToTextFile(FILE* file) const {
}
}
std::unique_ptr<TripletSparseMatrix> TripletSparseMatrix::CreateFromTextFile(
FILE* file) {
CHECK(file != nullptr);
int num_rows = 0;
int num_cols = 0;
std::vector<int> rows;
std::vector<int> cols;
std::vector<double> values;
while (true) {
int row, col;
double value;
if (fscanf(file, "%d %d %lf", &row, &col, &value) != 3) {
break;
}
rows.push_back(row);
cols.push_back(col);
values.push_back(value);
num_rows = std::max(num_rows, row + 1);
num_cols = std::max(num_cols, col + 1);
}
VLOG(1) << "Read " << rows.size() << " nonzeros from file.";
return std::make_unique<TripletSparseMatrix>(
num_rows, num_cols, rows, cols, values);
}
std::unique_ptr<TripletSparseMatrix> TripletSparseMatrix::CreateRandomMatrix(
const TripletSparseMatrix::RandomMatrixOptions& options) {
const TripletSparseMatrix::RandomMatrixOptions& options,
std::mt19937& prng) {
CHECK_GT(options.num_rows, 0);
CHECK_GT(options.num_cols, 0);
CHECK_GT(options.density, 0.0);
@@ -286,16 +320,18 @@ std::unique_ptr<TripletSparseMatrix> TripletSparseMatrix::CreateRandomMatrix(
std::vector<int> rows;
std::vector<int> cols;
std::vector<double> values;
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
std::normal_distribution<double> standard_normal;
while (rows.empty()) {
rows.clear();
cols.clear();
values.clear();
for (int r = 0; r < options.num_rows; ++r) {
for (int c = 0; c < options.num_cols; ++c) {
if (RandDouble() <= options.density) {
if (uniform01(prng) <= options.density) {
rows.push_back(r);
cols.push_back(c);
values.push_back(RandNormal());
values.push_back(standard_normal(prng));
}
}
}
@@ -305,5 +341,4 @@ std::unique_ptr<TripletSparseMatrix> TripletSparseMatrix::CreateRandomMatrix(
options.num_rows, options.num_cols, rows, cols, values);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,16 +32,17 @@
#define CERES_INTERNAL_TRIPLET_SPARSE_MATRIX_H_
#include <memory>
#include <random>
#include <vector>
#include "ceres/crs_matrix.h"
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/export.h"
#include "ceres/sparse_matrix.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// An implementation of the SparseMatrix interface to store and
// manipulate sparse matrices in triplet (i,j,s) form. This object is
@@ -65,10 +66,11 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix {
// Implementation of the SparseMatrix interface.
void SetZero() final;
void RightMultiply(const double* x, double* y) const final;
void LeftMultiply(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
void LeftMultiplyAndAccumulate(const double* x, double* y) const final;
void SquaredColumnNorm(double* x) const final;
void ScaleColumns(const double* scale) final;
void ToCRSMatrix(CRSMatrix* matrix) const;
void ToDenseMatrix(Matrix* dense_matrix) const final;
void ToTextFile(FILE* file) const final;
// clang-format off
@@ -134,7 +136,11 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix {
// normally distributed and whose structure is determined by
// RandomMatrixOptions.
static std::unique_ptr<TripletSparseMatrix> CreateRandomMatrix(
const TripletSparseMatrix::RandomMatrixOptions& options);
const TripletSparseMatrix::RandomMatrixOptions& options,
std::mt19937& prng);
// Load a triplet sparse matrix from a text file.
static std::unique_ptr<TripletSparseMatrix> CreateFromTextFile(FILE* file);
private:
void AllocateMemory();
@@ -154,8 +160,7 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix {
std::unique_ptr<double[]> values_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2016 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,11 @@
#include "Eigen/Core"
#include "ceres/array_utils.h"
#include "ceres/coordinate_descent_minimizer.h"
#include "ceres/eigen_vector_ops.h"
#include "ceres/evaluator.h"
#include "ceres/file.h"
#include "ceres/line_search.h"
#include "ceres/parallel_for.h"
#include "ceres/stringprintf.h"
#include "ceres/types.h"
#include "ceres/wall_time.h"
@@ -59,8 +61,7 @@
} \
} while (0)
namespace ceres {
namespace internal {
namespace ceres::internal {
void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
double* parameters,
@@ -79,6 +80,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
? options_.max_consecutive_nonmonotonic_steps
: 0);
bool atleast_one_successful_step = false;
while (FinalizeIterationAndCheckIfMinimizerCanContinue()) {
iteration_start_time_in_secs_ = WallTimeInSeconds();
@@ -106,7 +108,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
ComputeCandidatePointAndEvaluateCost();
DoInnerIterationsIfNeeded();
if (ParameterToleranceReached()) {
if (atleast_one_successful_step && ParameterToleranceReached()) {
return;
}
@@ -115,6 +117,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
}
if (IsStepSuccessful()) {
atleast_one_successful_step = true;
RETURN_IF_ERROR_AND_LOG(HandleSuccessfulStep());
} else {
// Declare the step unsuccessful and inform the trust region strategy.
@@ -137,8 +140,8 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options,
double* parameters,
Solver::Summary* solver_summary) {
options_ = options;
sort(options_.trust_region_minimizer_iterations_to_dump.begin(),
options_.trust_region_minimizer_iterations_to_dump.end());
std::sort(options_.trust_region_minimizer_iterations_to_dump.begin(),
options_.trust_region_minimizer_iterations_to_dump.end());
parameters_ = parameters;
@@ -166,7 +169,6 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options,
num_consecutive_invalid_steps_ = 0;
x_ = ConstVectorRef(parameters_, num_parameters_);
x_norm_ = x_.norm();
residuals_.resize(num_residuals_);
trust_region_step_.resize(num_effective_parameters_);
delta_.resize(num_effective_parameters_);
@@ -180,7 +182,6 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options,
// the Jacobian, we will compute and overwrite this vector.
jacobian_scaling_ = Vector::Ones(num_effective_parameters_);
x_norm_ = -1; // Invalid value
x_cost_ = std::numeric_limits<double>::max();
minimum_cost_ = x_cost_;
model_cost_change_ = 0.0;
@@ -214,10 +215,11 @@ bool TrustRegionMinimizer::IterationZero() {
}
x_ = candidate_x_;
x_norm_ = x_.norm();
}
if (!EvaluateGradientAndJacobian(/*new_evaluation_point=*/true)) {
solver_summary_->message =
"Initial residual and Jacobian evaluation failed.";
return false;
}
@@ -270,7 +272,8 @@ bool TrustRegionMinimizer::EvaluateGradientAndJacobian(
}
// jacobian = jacobian * diag(J'J) ^{-1}
jacobian_->ScaleColumns(jacobian_scaling_.data());
jacobian_->ScaleColumns(
jacobian_scaling_.data(), options_.context, options_.num_threads);
}
// The gradient exists in the local tangent space. To account for
@@ -357,13 +360,13 @@ bool TrustRegionMinimizer::FinalizeIterationAndCheckIfMinimizerCanContinue() {
// Compute the trust region step using the TrustRegionStrategy chosen
// by the user.
//
// If the strategy returns with LINEAR_SOLVER_FATAL_ERROR, which
// If the strategy returns with LinearSolverTerminationType::FATAL_ERROR, which
// indicates an unrecoverable error, return false. This is the only
// condition that returns false.
//
// If the strategy returns with LINEAR_SOLVER_FAILURE, which indicates
// a numerical failure that could be recovered from by retrying
// (e.g. by increasing the strength of the regularization), we set
// If the strategy returns with LinearSolverTerminationType::FAILURE, which
// indicates a numerical failure that could be recovered from by retrying (e.g.
// by increasing the strength of the regularization), we set
// iteration_summary_.step_is_valid to false and return true.
//
// In all other cases, we compute the decrease in the trust region
@@ -395,7 +398,8 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() {
residuals_.data(),
trust_region_step_.data());
if (strategy_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) {
if (strategy_summary.termination_type ==
LinearSolverTerminationType::FATAL_ERROR) {
solver_summary_->message =
"Linear solver failed due to unrecoverable "
"non-numeric causes. Please see the error log for clues. ";
@@ -407,7 +411,8 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() {
WallTimeInSeconds() - strategy_start_time;
iteration_summary_.linear_solver_iterations = strategy_summary.num_iterations;
if (strategy_summary.termination_type == LINEAR_SOLVER_FAILURE) {
if (strategy_summary.termination_type ==
LinearSolverTerminationType::FAILURE) {
return true;
}
@@ -419,10 +424,15 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() {
// = f'f/2 - 1/2 [ f'f + 2f'J * step + step' * J' * J * step]
// = -f'J * step - step' * J' * J * step / 2
// = -(J * step)'(f + J * step / 2)
model_residuals_.setZero();
jacobian_->RightMultiply(trust_region_step_.data(), model_residuals_.data());
model_cost_change_ =
-model_residuals_.dot(residuals_ + model_residuals_ / 2.0);
ParallelSetZero(options_.context, options_.num_threads, model_residuals_);
jacobian_->RightMultiplyAndAccumulate(trust_region_step_.data(),
model_residuals_.data(),
options_.context,
options_.num_threads);
model_cost_change_ = -Dot(model_residuals_,
residuals_ + model_residuals_ / 2.0,
options_.context,
options_.num_threads);
// TODO(sameeragarwal)
//
@@ -432,7 +442,10 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() {
iteration_summary_.step_is_valid = (model_cost_change_ > 0.0);
if (iteration_summary_.step_is_valid) {
// Undo the Jacobian column scaling.
delta_ = (trust_region_step_.array() * jacobian_scaling_.array()).matrix();
ParallelAssign(options_.context,
options_.num_threads,
delta_,
(trust_region_step_.array() * jacobian_scaling_.array()));
num_consecutive_invalid_steps_ = 0;
}
@@ -702,10 +715,12 @@ bool TrustRegionMinimizer::MinTrustRegionRadiusReached() {
// Solver::Options::parameter_tolerance based convergence check.
bool TrustRegionMinimizer::ParameterToleranceReached() {
const double x_norm = x_.norm();
// Compute the norm of the step in the ambient space.
iteration_summary_.step_norm = (x_ - candidate_x_).norm();
const double step_size_tolerance =
options_.parameter_tolerance * (x_norm_ + options_.parameter_tolerance);
options_.parameter_tolerance * (x_norm + options_.parameter_tolerance);
if (iteration_summary_.step_norm > step_size_tolerance) {
return false;
@@ -714,7 +729,7 @@ bool TrustRegionMinimizer::ParameterToleranceReached() {
solver_summary_->message = StringPrintf(
"Parameter tolerance reached. "
"Relative step_norm: %e <= %e.",
(iteration_summary_.step_norm / (x_norm_ + options_.parameter_tolerance)),
(iteration_summary_.step_norm / (x_norm + options_.parameter_tolerance)),
options_.parameter_tolerance);
solver_summary_->termination_type = CONVERGENCE;
if (is_not_silent_) {
@@ -807,7 +822,6 @@ bool TrustRegionMinimizer::IsStepSuccessful() {
// evaluator know that the step has been accepted.
bool TrustRegionMinimizer::HandleSuccessfulStep() {
x_ = candidate_x_;
x_norm_ = x_.norm();
// Since the step was successful, this point has already had the residual
// evaluated (but not the jacobian). So indicate that to the evaluator.
@@ -821,5 +835,4 @@ bool TrustRegionMinimizer::HandleSuccessfulStep() {
return true;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2016 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,8 +43,7 @@
#include "ceres/trust_region_strategy.h"
#include "ceres/types.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Generic trust region minimization algorithm.
//
@@ -139,8 +138,6 @@ class CERES_NO_EXPORT TrustRegionMinimizer final : public Minimizer {
// Scaling vector to scale the columns of the Jacobian.
Vector jacobian_scaling_;
// Euclidean norm of x_.
double x_norm_;
// Cost at x_.
double x_cost_;
// Minimum cost encountered up till now.
@@ -160,8 +157,7 @@ class CERES_NO_EXPORT TrustRegionMinimizer final : public Minimizer {
int num_consecutive_invalid_steps_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,7 @@
#include <numeric>
#include <string>
#include <vector>
#include "ceres/callbacks.h"
#include "ceres/context_impl.h"
@@ -48,10 +49,7 @@
#include "ceres/trust_region_strategy.h"
#include "ceres/wall_time.h"
namespace ceres {
namespace internal {
using std::vector;
namespace ceres::internal {
namespace {
@@ -59,7 +57,8 @@ std::shared_ptr<ParameterBlockOrdering> CreateDefaultLinearSolverOrdering(
const Program& program) {
std::shared_ptr<ParameterBlockOrdering> ordering =
std::make_shared<ParameterBlockOrdering>();
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
const std::vector<ParameterBlock*>& parameter_blocks =
program.parameter_blocks();
for (auto* parameter_block : parameter_blocks) {
ordering->AddElementToGroup(
const_cast<double*>(parameter_block->user_state()), 0);
@@ -114,6 +113,7 @@ bool ReorderProgram(PreprocessedProblem* pp) {
return ReorderProgramForSchurTypeLinearSolver(
options.linear_solver_type,
options.sparse_linear_algebra_library_type,
options.linear_solver_ordering_type,
pp->problem->parameter_map(),
options.linear_solver_ordering.get(),
pp->reduced_program.get(),
@@ -124,6 +124,7 @@ bool ReorderProgram(PreprocessedProblem* pp) {
!options.dynamic_sparsity) {
return ReorderProgramForSparseCholesky(
options.sparse_linear_algebra_library_type,
options.linear_solver_ordering_type,
*options.linear_solver_ordering,
0, /* use all the rows of the jacobian */
pp->reduced_program.get(),
@@ -139,6 +140,7 @@ bool ReorderProgram(PreprocessedProblem* pp) {
return ReorderProgramForSparseCholesky(
options.sparse_linear_algebra_library_type,
options.linear_solver_ordering_type,
*options.linear_solver_ordering,
pp->linear_solver_options.subset_preconditioner_start_row_block,
pp->reduced_program.get(),
@@ -197,10 +199,16 @@ bool SetupLinearSolver(PreprocessedProblem* pp) {
options.max_linear_solver_iterations;
pp->linear_solver_options.type = options.linear_solver_type;
pp->linear_solver_options.preconditioner_type = options.preconditioner_type;
pp->linear_solver_options.use_spse_initialization =
options.use_spse_initialization;
pp->linear_solver_options.spse_tolerance = options.spse_tolerance;
pp->linear_solver_options.max_num_spse_iterations =
options.max_num_spse_iterations;
pp->linear_solver_options.visibility_clustering_type =
options.visibility_clustering_type;
pp->linear_solver_options.sparse_linear_algebra_library_type =
options.sparse_linear_algebra_library_type;
pp->linear_solver_options.dense_linear_algebra_library_type =
options.dense_linear_algebra_library_type;
pp->linear_solver_options.use_explicit_schur_complement =
@@ -211,7 +219,6 @@ bool SetupLinearSolver(PreprocessedProblem* pp) {
pp->linear_solver_options.max_num_refinement_iterations =
options.max_num_refinement_iterations;
pp->linear_solver_options.num_threads = options.num_threads;
pp->linear_solver_options.use_postordering = options.use_postordering;
pp->linear_solver_options.context = pp->problem->context();
if (IsSchurType(pp->linear_solver_options.type)) {
@@ -225,26 +232,23 @@ bool SetupLinearSolver(PreprocessedProblem* pp) {
if (pp->linear_solver_options.elimination_groups.size() == 1) {
pp->linear_solver_options.elimination_groups.push_back(0);
}
}
if (options.linear_solver_type == SPARSE_SCHUR) {
// When using SPARSE_SCHUR, we ignore the user's postordering
// preferences in certain cases.
//
// 1. SUITE_SPARSE is the sparse linear algebra library requested
// but cholmod_camd is not available.
// 2. CX_SPARSE is the sparse linear algebra library requested.
//
// This ensures that the linear solver does not assume that a
// fill-reducing pre-ordering has been done.
//
// TODO(sameeragarwal): Implement the reordering of parameter
// blocks for CX_SPARSE.
if ((options.sparse_linear_algebra_library_type == SUITE_SPARSE &&
!SuiteSparse::
IsConstrainedApproximateMinimumDegreeOrderingAvailable()) ||
(options.sparse_linear_algebra_library_type == CX_SPARSE)) {
pp->linear_solver_options.use_postordering = true;
}
if (!options.dynamic_sparsity &&
AreJacobianColumnsOrdered(options.linear_solver_type,
options.preconditioner_type,
options.sparse_linear_algebra_library_type,
options.linear_solver_ordering_type)) {
pp->linear_solver_options.ordering_type = OrderingType::NATURAL;
} else {
if (options.linear_solver_ordering_type == ceres::AMD) {
pp->linear_solver_options.ordering_type = OrderingType::AMD;
} else if (options.linear_solver_ordering_type == ceres::NESDIS) {
pp->linear_solver_options.ordering_type = OrderingType::NESDIS;
} else {
LOG(FATAL) << "Congratulations you have found a bug in Ceres Solver."
<< " Please report this to the maintainers. : "
<< options.linear_solver_ordering_type;
}
}
@@ -257,6 +261,8 @@ bool SetupEvaluator(PreprocessedProblem* pp) {
const Solver::Options& options = pp->options;
pp->evaluator_options = Evaluator::Options();
pp->evaluator_options.linear_solver_type = options.linear_solver_type;
pp->evaluator_options.sparse_linear_algebra_library_type =
options.sparse_linear_algebra_library_type;
pp->evaluator_options.num_eliminate_blocks = 0;
if (IsSchurType(options.linear_solver_type)) {
pp->evaluator_options.num_eliminate_blocks =
@@ -330,13 +336,19 @@ bool SetupInnerIterationMinimizer(PreprocessedProblem* pp) {
}
// Configure and create a TrustRegionMinimizer object.
void SetupMinimizerOptions(PreprocessedProblem* pp) {
bool SetupMinimizerOptions(PreprocessedProblem* pp) {
const Solver::Options& options = pp->options;
SetupCommonMinimizerOptions(pp);
pp->minimizer_options.is_constrained =
pp->reduced_program->IsBoundsConstrained();
pp->minimizer_options.jacobian = pp->evaluator->CreateJacobian();
if (pp->minimizer_options.jacobian == nullptr) {
pp->error =
"Unable to create Jacobian matrix. Likely because it is too large.";
return false;
}
pp->minimizer_options.inner_iteration_minimizer =
pp->inner_iteration_minimizer;
@@ -349,9 +361,12 @@ void SetupMinimizerOptions(PreprocessedProblem* pp) {
strategy_options.trust_region_strategy_type =
options.trust_region_strategy_type;
strategy_options.dogleg_type = options.dogleg_type;
strategy_options.context = pp->problem->context();
strategy_options.num_threads = options.num_threads;
pp->minimizer_options.trust_region_strategy =
TrustRegionStrategy::Create(strategy_options);
CHECK(pp->minimizer_options.trust_region_strategy != nullptr);
return true;
}
} // namespace
@@ -387,9 +402,7 @@ bool TrustRegionPreprocessor::Preprocess(const Solver::Options& options,
return false;
}
SetupMinimizerOptions(pp);
return true;
return SetupMinimizerOptions(pp);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,8 +35,7 @@
#include "ceres/internal/export.h"
#include "ceres/preprocessor.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class CERES_NO_EXPORT TrustRegionPreprocessor final : public Preprocessor {
public:
@@ -45,8 +44,7 @@ class CERES_NO_EXPORT TrustRegionPreprocessor final : public Preprocessor {
PreprocessedProblem* preprocessed_problem) override;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2016 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,8 +35,7 @@
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
TrustRegionStepEvaluator::TrustRegionStepEvaluator(
const double initial_cost, const int max_consecutive_nonmonotonic_steps)
@@ -111,5 +110,4 @@ void TrustRegionStepEvaluator::StepAccepted(const double cost,
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2016 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -33,8 +33,7 @@
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// The job of the TrustRegionStepEvaluator is to evaluate the quality
// of a step, i.e., how the cost of a step compares with the reduction
@@ -118,7 +117,6 @@ class CERES_NO_EXPORT TrustRegionStepEvaluator {
int num_consecutive_nonmonotonic_steps_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_TRUST_REGION_STEP_EVALUATOR_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -37,8 +37,7 @@
#include "ceres/dogleg_strategy.h"
#include "ceres/levenberg_marquardt_strategy.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
TrustRegionStrategy::~TrustRegionStrategy() = default;
@@ -59,5 +58,4 @@ std::unique_ptr<TrustRegionStrategy> TrustRegionStrategy::Create(
return nullptr;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,7 @@
#include "ceres/internal/export.h"
#include "ceres/linear_solver.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class LinearSolver;
class SparseMatrix;
@@ -74,6 +73,9 @@ class CERES_NO_EXPORT TrustRegionStrategy {
// Further specify which dogleg method to use
DoglegType dogleg_type = TRADITIONAL_DOGLEG;
ContextImpl* context = nullptr;
int num_threads = 1;
};
// Factory.
@@ -112,7 +114,8 @@ class CERES_NO_EXPORT TrustRegionStrategy {
int num_iterations = -1;
// Status of the linear solver used to solve the Newton system.
LinearSolverTerminationType termination_type = LINEAR_SOLVER_FAILURE;
LinearSolverTerminationType termination_type =
LinearSolverTerminationType::FAILURE;
};
// Use the current radius to solve for the trust region step.
@@ -141,8 +144,7 @@ class CERES_NO_EXPORT TrustRegionStrategy {
virtual double Radius() const = 0;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,14 +39,12 @@
namespace ceres {
using std::string;
// clang-format off
#define CASESTR(x) case x: return #x
#define STRENUM(x) if (value == #x) { *type = x; return true; }
// clang-format on
static void UpperCase(string* input) {
static void UpperCase(std::string* input) {
std::transform(input->begin(), input->end(), input->begin(), ::toupper);
}
@@ -64,7 +62,7 @@ const char* LinearSolverTypeToString(LinearSolverType type) {
}
}
bool StringToLinearSolverType(string value, LinearSolverType* type) {
bool StringToLinearSolverType(std::string value, LinearSolverType* type) {
UpperCase(&value);
STRENUM(DENSE_NORMAL_CHOLESKY);
STRENUM(DENSE_QR);
@@ -81,6 +79,7 @@ const char* PreconditionerTypeToString(PreconditionerType type) {
CASESTR(IDENTITY);
CASESTR(JACOBI);
CASESTR(SCHUR_JACOBI);
CASESTR(SCHUR_POWER_SERIES_EXPANSION);
CASESTR(CLUSTER_JACOBI);
CASESTR(CLUSTER_TRIDIAGONAL);
CASESTR(SUBSET);
@@ -89,11 +88,12 @@ const char* PreconditionerTypeToString(PreconditionerType type) {
}
}
bool StringToPreconditionerType(string value, PreconditionerType* type) {
bool StringToPreconditionerType(std::string value, PreconditionerType* type) {
UpperCase(&value);
STRENUM(IDENTITY);
STRENUM(JACOBI);
STRENUM(SCHUR_JACOBI);
STRENUM(SCHUR_POWER_SERIES_EXPANSION);
STRENUM(CLUSTER_JACOBI);
STRENUM(CLUSTER_TRIDIAGONAL);
STRENUM(SUBSET);
@@ -104,9 +104,9 @@ const char* SparseLinearAlgebraLibraryTypeToString(
SparseLinearAlgebraLibraryType type) {
switch (type) {
CASESTR(SUITE_SPARSE);
CASESTR(CX_SPARSE);
CASESTR(EIGEN_SPARSE);
CASESTR(ACCELERATE_SPARSE);
CASESTR(CUDA_SPARSE);
CASESTR(NO_SPARSE);
default:
return "UNKNOWN";
@@ -114,16 +114,33 @@ const char* SparseLinearAlgebraLibraryTypeToString(
}
bool StringToSparseLinearAlgebraLibraryType(
string value, SparseLinearAlgebraLibraryType* type) {
std::string value, SparseLinearAlgebraLibraryType* type) {
UpperCase(&value);
STRENUM(SUITE_SPARSE);
STRENUM(CX_SPARSE);
STRENUM(EIGEN_SPARSE);
STRENUM(ACCELERATE_SPARSE);
STRENUM(CUDA_SPARSE);
STRENUM(NO_SPARSE);
return false;
}
const char* LinearSolverOrderingTypeToString(LinearSolverOrderingType type) {
switch (type) {
CASESTR(AMD);
CASESTR(NESDIS);
default:
return "UNKNOWN";
}
}
bool StringToLinearSolverOrderingType(std::string value,
LinearSolverOrderingType* type) {
UpperCase(&value);
STRENUM(AMD);
STRENUM(NESDIS);
return false;
}
const char* DenseLinearAlgebraLibraryTypeToString(
DenseLinearAlgebraLibraryType type) {
switch (type) {
@@ -136,7 +153,7 @@ const char* DenseLinearAlgebraLibraryTypeToString(
}
bool StringToDenseLinearAlgebraLibraryType(
string value, DenseLinearAlgebraLibraryType* type) {
std::string value, DenseLinearAlgebraLibraryType* type) {
UpperCase(&value);
STRENUM(EIGEN);
STRENUM(LAPACK);
@@ -153,7 +170,7 @@ const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type) {
}
}
bool StringToTrustRegionStrategyType(string value,
bool StringToTrustRegionStrategyType(std::string value,
TrustRegionStrategyType* type) {
UpperCase(&value);
STRENUM(LEVENBERG_MARQUARDT);
@@ -170,7 +187,7 @@ const char* DoglegTypeToString(DoglegType type) {
}
}
bool StringToDoglegType(string value, DoglegType* type) {
bool StringToDoglegType(std::string value, DoglegType* type) {
UpperCase(&value);
STRENUM(TRADITIONAL_DOGLEG);
STRENUM(SUBSPACE_DOGLEG);
@@ -186,7 +203,7 @@ const char* MinimizerTypeToString(MinimizerType type) {
}
}
bool StringToMinimizerType(string value, MinimizerType* type) {
bool StringToMinimizerType(std::string value, MinimizerType* type) {
UpperCase(&value);
STRENUM(TRUST_REGION);
STRENUM(LINE_SEARCH);
@@ -204,7 +221,7 @@ const char* LineSearchDirectionTypeToString(LineSearchDirectionType type) {
}
}
bool StringToLineSearchDirectionType(string value,
bool StringToLineSearchDirectionType(std::string value,
LineSearchDirectionType* type) {
UpperCase(&value);
STRENUM(STEEPEST_DESCENT);
@@ -223,7 +240,7 @@ const char* LineSearchTypeToString(LineSearchType type) {
}
}
bool StringToLineSearchType(string value, LineSearchType* type) {
bool StringToLineSearchType(std::string value, LineSearchType* type) {
UpperCase(&value);
STRENUM(ARMIJO);
STRENUM(WOLFE);
@@ -241,7 +258,7 @@ const char* LineSearchInterpolationTypeToString(
}
}
bool StringToLineSearchInterpolationType(string value,
bool StringToLineSearchInterpolationType(std::string value,
LineSearchInterpolationType* type) {
UpperCase(&value);
STRENUM(BISECTION);
@@ -262,7 +279,7 @@ const char* NonlinearConjugateGradientTypeToString(
}
bool StringToNonlinearConjugateGradientType(
string value, NonlinearConjugateGradientType* type) {
std::string value, NonlinearConjugateGradientType* type) {
UpperCase(&value);
STRENUM(FLETCHER_REEVES);
STRENUM(POLAK_RIBIERE);
@@ -279,7 +296,7 @@ const char* CovarianceAlgorithmTypeToString(CovarianceAlgorithmType type) {
}
}
bool StringToCovarianceAlgorithmType(string value,
bool StringToCovarianceAlgorithmType(std::string value,
CovarianceAlgorithmType* type) {
UpperCase(&value);
STRENUM(DENSE_SVD);
@@ -297,7 +314,8 @@ const char* NumericDiffMethodTypeToString(NumericDiffMethodType type) {
}
}
bool StringToNumericDiffMethodType(string value, NumericDiffMethodType* type) {
bool StringToNumericDiffMethodType(std::string value,
NumericDiffMethodType* type) {
UpperCase(&value);
STRENUM(CENTRAL);
STRENUM(FORWARD);
@@ -314,7 +332,7 @@ const char* VisibilityClusteringTypeToString(VisibilityClusteringType type) {
}
}
bool StringToVisibilityClusteringType(string value,
bool StringToVisibilityClusteringType(std::string value,
VisibilityClusteringType* type) {
UpperCase(&value);
STRENUM(CANONICAL_VIEWS);
@@ -387,14 +405,6 @@ bool IsSparseLinearAlgebraLibraryTypeAvailable(
#endif
}
if (type == CX_SPARSE) {
#ifdef CERES_NO_CXSPARSE
return false;
#else
return true;
#endif
}
if (type == ACCELERATE_SPARSE) {
#ifdef CERES_NO_ACCELERATE_SPARSE
return false;
@@ -411,6 +421,18 @@ bool IsSparseLinearAlgebraLibraryTypeAvailable(
#endif
}
if (type == CUDA_SPARSE) {
#ifdef CERES_NO_CUDA
return false;
#else
return true;
#endif
}
if (type == NO_SPARSE) {
return true;
}
LOG(WARNING) << "Unknown sparse linear algebra library " << type;
return false;
}

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -44,18 +44,11 @@
#include "ceres/pair_hash.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::make_pair;
using std::max;
using std::pair;
using std::set;
using std::vector;
namespace ceres::internal {
void ComputeVisibility(const CompressedRowBlockStructure& block_structure,
const int num_eliminate_blocks,
vector<set<int>>* visibility) {
std::vector<std::set<int>>* visibility) {
CHECK(visibility != nullptr);
// Clear the visibility vector and resize it to hold a
@@ -64,7 +57,7 @@ void ComputeVisibility(const CompressedRowBlockStructure& block_structure,
visibility->resize(block_structure.cols.size() - num_eliminate_blocks);
for (const auto& row : block_structure.rows) {
const vector<Cell>& cells = row.cells;
const std::vector<Cell>& cells = row.cells;
int block_id = cells[0].block_id;
// If the first block is not an e_block, then skip this row block.
if (block_id >= num_eliminate_blocks) {
@@ -81,7 +74,7 @@ void ComputeVisibility(const CompressedRowBlockStructure& block_structure,
}
std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
const vector<set<int>>& visibility) {
const std::vector<std::set<int>>& visibility) {
const time_t start_time = time(nullptr);
// Compute the number of e_blocks/point blocks. Since the visibility
// set for each e_block/camera contains the set of e_blocks/points
@@ -89,7 +82,7 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
int num_points = 0;
for (const auto& visible : visibility) {
if (!visible.empty()) {
num_points = max(num_points, (*visible.rbegin()) + 1);
num_points = std::max(num_points, (*visible.rbegin()) + 1);
}
}
@@ -98,9 +91,9 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
// cameras. However, to compute the sparsity structure of the Schur
// Complement efficiently, its better to have the point->camera
// mapping.
vector<set<int>> inverse_visibility(num_points);
std::vector<std::set<int>> inverse_visibility(num_points);
for (int i = 0; i < visibility.size(); i++) {
const set<int>& visibility_set = visibility[i];
const std::set<int>& visibility_set = visibility[i];
for (int v : visibility_set) {
inverse_visibility[v].insert(i);
}
@@ -108,7 +101,7 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
// Map from camera pairs to number of points visible to both cameras
// in the pair.
std::unordered_map<pair<int, int>, int, pair_hash> camera_pairs;
std::unordered_map<std::pair<int, int>, int, pair_hash> camera_pairs;
// Count the number of points visible to each camera/f_block pair.
for (const auto& inverse_visibility_set : inverse_visibility) {
@@ -117,7 +110,7 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
++camera1) {
auto camera2 = camera1;
for (++camera2; camera2 != inverse_visibility_set.end(); ++camera2) {
++(camera_pairs[make_pair(*camera1, *camera2)]);
++(camera_pairs[std::make_pair(*camera1, *camera2)]);
}
}
}
@@ -151,5 +144,4 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
return graph;
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -43,8 +43,7 @@
#include "ceres/internal/disable_warnings.h"
#include "ceres/internal/export.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
struct CompressedRowBlockStructure;
@@ -77,8 +76,7 @@ CERES_NO_EXPORT void ComputeVisibility(
CERES_NO_EXPORT std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
const std::vector<std::set<int>>& visibility);
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2022 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,8 @@
#include <iterator>
#include <memory>
#include <set>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
@@ -50,14 +52,7 @@
#include "ceres/visibility.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
using std::make_pair;
using std::pair;
using std::set;
using std::swap;
using std::vector;
namespace ceres::internal {
// TODO(sameeragarwal): Currently these are magic weights for the
// preconditioner construction. Move these higher up into the Options
@@ -82,10 +77,7 @@ VisibilityBasedPreconditioner::VisibilityBasedPreconditioner(
CHECK(options_.context != nullptr);
// Vector of camera block sizes
block_size_.resize(num_blocks_);
for (int i = 0; i < num_blocks_; ++i) {
block_size_[i] = bs.cols[i + options_.elimination_groups[0]].size;
}
blocks_ = Tail(bs.cols, bs.cols.size() - options_.elimination_groups[0]);
const time_t start_time = time(nullptr);
switch (options_.type) {
@@ -107,14 +99,7 @@ VisibilityBasedPreconditioner::VisibilityBasedPreconditioner(
LinearSolver::Options sparse_cholesky_options;
sparse_cholesky_options.sparse_linear_algebra_library_type =
options_.sparse_linear_algebra_library_type;
// The preconditioner's sparsity is not available in the
// preprocessor, so the columns of the Jacobian have not been
// reordered to minimize fill in when computing its sparse Cholesky
// factorization. So we must tell the SparseCholesky object to
// perform approximate minimum-degree reordering, which is done by
// setting use_postordering to true.
sparse_cholesky_options.use_postordering = true;
sparse_cholesky_options.ordering_type = options_.ordering_type;
sparse_cholesky_ = SparseCholesky::Create(sparse_cholesky_options);
const time_t init_time = time(nullptr);
@@ -132,13 +117,13 @@ VisibilityBasedPreconditioner::~VisibilityBasedPreconditioner() = default;
// preconditioner matrix.
void VisibilityBasedPreconditioner::ComputeClusterJacobiSparsity(
const CompressedRowBlockStructure& bs) {
vector<set<int>> visibility;
std::vector<std::set<int>> visibility;
ComputeVisibility(bs, options_.elimination_groups[0], &visibility);
CHECK_EQ(num_blocks_, visibility.size());
ClusterCameras(visibility);
cluster_pairs_.clear();
for (int i = 0; i < num_clusters_; ++i) {
cluster_pairs_.insert(make_pair(i, i));
cluster_pairs_.insert(std::make_pair(i, i));
}
}
@@ -150,7 +135,7 @@ void VisibilityBasedPreconditioner::ComputeClusterJacobiSparsity(
// of edges in this forest are the cluster pairs.
void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity(
const CompressedRowBlockStructure& bs) {
vector<set<int>> visibility;
std::vector<std::set<int>> visibility;
ComputeVisibility(bs, options_.elimination_groups[0], &visibility);
CHECK_EQ(num_blocks_, visibility.size());
ClusterCameras(visibility);
@@ -159,7 +144,7 @@ void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity(
// edges are the number of 3D points/e_blocks visible in both the
// clusters at the ends of the edge. Return an approximate degree-2
// maximum spanning forest of this graph.
vector<set<int>> cluster_visibility;
std::vector<std::set<int>> cluster_visibility;
ComputeClusterVisibility(visibility, &cluster_visibility);
auto cluster_graph = CreateClusterGraph(cluster_visibility);
CHECK(cluster_graph != nullptr);
@@ -172,8 +157,8 @@ void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity(
void VisibilityBasedPreconditioner::InitStorage(
const CompressedRowBlockStructure& bs) {
ComputeBlockPairsInPreconditioner(bs);
m_ = std::make_unique<BlockRandomAccessSparseMatrix>(block_size_,
block_pairs_);
m_ = std::make_unique<BlockRandomAccessSparseMatrix>(
blocks_, block_pairs_, options_.context, options_.num_threads);
}
// Call the canonical views algorithm and cluster the cameras based on
@@ -183,14 +168,14 @@ void VisibilityBasedPreconditioner::InitStorage(
// The cluster_membership_ vector is updated to indicate cluster
// memberships for each camera block.
void VisibilityBasedPreconditioner::ClusterCameras(
const vector<set<int>>& visibility) {
const std::vector<std::set<int>>& visibility) {
auto schur_complement_graph = CreateSchurComplementGraph(visibility);
CHECK(schur_complement_graph != nullptr);
std::unordered_map<int, int> membership;
if (options_.visibility_clustering_type == CANONICAL_VIEWS) {
vector<int> centers;
std::vector<int> centers;
CanonicalViewsClusteringOptions clustering_options;
clustering_options.size_penalty_weight = kCanonicalViewsSizePenaltyWeight;
clustering_options.similarity_penalty_weight =
@@ -236,7 +221,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner(
const CompressedRowBlockStructure& bs) {
block_pairs_.clear();
for (int i = 0; i < num_blocks_; ++i) {
block_pairs_.insert(make_pair(i, i));
block_pairs_.insert(std::make_pair(i, i));
}
int r = 0;
@@ -264,7 +249,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner(
break;
}
set<int> f_blocks;
std::set<int> f_blocks;
for (; r < num_row_blocks; ++r) {
const CompressedRow& row = bs.rows[r];
if (row.cells.front().block_id != e_block_id) {
@@ -303,7 +288,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner(
const int block2 = cell.block_id - num_eliminate_blocks;
if (block1 <= block2) {
if (IsBlockPairInPreconditioner(block1, block2)) {
block_pairs_.insert(make_pair(block1, block2));
block_pairs_.insert(std::make_pair(block1, block2));
}
}
}
@@ -354,7 +339,7 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
// scaling is not needed, which is quite often in our experience.
LinearSolverTerminationType status = Factorize();
if (status == LINEAR_SOLVER_FATAL_ERROR) {
if (status == LinearSolverTerminationType::FATAL_ERROR) {
return false;
}
@@ -363,7 +348,8 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
// belong to the edges of the degree-2 forest. In the CLUSTER_JACOBI
// case, the preconditioner is guaranteed to be positive
// semidefinite.
if (status == LINEAR_SOLVER_FAILURE && options_.type == CLUSTER_TRIDIAGONAL) {
if (status == LinearSolverTerminationType::FAILURE &&
options_.type == CLUSTER_TRIDIAGONAL) {
VLOG(1) << "Unscaled factorization failed. Retrying with off-diagonal "
<< "scaling";
ScaleOffDiagonalCells();
@@ -371,7 +357,7 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
}
VLOG(2) << "Compute time: " << time(nullptr) - start_time;
return (status == LINEAR_SOLVER_SUCCESS);
return (status == LinearSolverTerminationType::SUCCESS);
}
// Consider the preconditioner matrix as meta-block matrix, whose
@@ -399,35 +385,44 @@ void VisibilityBasedPreconditioner::ScaleOffDiagonalCells() {
// dominance. See Lemma 1 in "Visibility Based Preconditioning
// For Bundle Adjustment".
MatrixRef m(cell_info->values, row_stride, col_stride);
m.block(r, c, block_size_[block1], block_size_[block2]) *= 0.5;
m.block(r, c, blocks_[block1].size, blocks_[block2].size) *= 0.5;
}
}
// Compute the sparse Cholesky factorization of the preconditioner
// matrix.
LinearSolverTerminationType VisibilityBasedPreconditioner::Factorize() {
// Extract the TripletSparseMatrix that is used for actually storing
// Extract the BlockSparseMatrix that is used for actually storing
// S and convert it into a CompressedRowSparseMatrix.
const TripletSparseMatrix* tsm =
down_cast<BlockRandomAccessSparseMatrix*>(m_.get())->mutable_matrix();
std::unique_ptr<CompressedRowSparseMatrix> lhs;
const BlockSparseMatrix* bsm =
down_cast<BlockRandomAccessSparseMatrix*>(m_.get())->matrix();
const CompressedRowSparseMatrix::StorageType storage_type =
sparse_cholesky_->StorageType();
if (storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR) {
lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm);
lhs->set_storage_type(CompressedRowSparseMatrix::UPPER_TRIANGULAR);
if (storage_type ==
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
if (!m_crs_) {
m_crs_ = bsm->ToCompressedRowSparseMatrix();
m_crs_->set_storage_type(
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
} else {
bsm->UpdateCompressedRowSparseMatrix(m_crs_.get());
}
} else {
lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm);
lhs->set_storage_type(CompressedRowSparseMatrix::LOWER_TRIANGULAR);
if (!m_crs_) {
m_crs_ = bsm->ToCompressedRowSparseMatrixTranspose();
m_crs_->set_storage_type(
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
} else {
bsm->UpdateCompressedRowSparseMatrixTranspose(m_crs_.get());
}
}
std::string message;
return sparse_cholesky_->Factorize(lhs.get(), &message);
return sparse_cholesky_->Factorize(m_crs_.get(), &message);
}
void VisibilityBasedPreconditioner::RightMultiply(const double* x,
double* y) const {
void VisibilityBasedPreconditioner::RightMultiplyAndAccumulate(
const double* x, double* y) const {
CHECK(x != nullptr);
CHECK(y != nullptr);
CHECK(sparse_cholesky_ != nullptr);
@@ -445,9 +440,9 @@ bool VisibilityBasedPreconditioner::IsBlockPairInPreconditioner(
int cluster1 = cluster_membership_[block1];
int cluster2 = cluster_membership_[block2];
if (cluster1 > cluster2) {
swap(cluster1, cluster2);
std::swap(cluster1, cluster2);
}
return (cluster_pairs_.count(make_pair(cluster1, cluster2)) > 0);
return (cluster_pairs_.count(std::make_pair(cluster1, cluster2)) > 0);
}
bool VisibilityBasedPreconditioner::IsBlockPairOffDiagonal(
@@ -459,7 +454,7 @@ bool VisibilityBasedPreconditioner::IsBlockPairOffDiagonal(
// each vertex.
void VisibilityBasedPreconditioner::ForestToClusterPairs(
const WeightedGraph<int>& forest,
std::unordered_set<pair<int, int>, pair_hash>* cluster_pairs) const {
std::unordered_set<std::pair<int, int>, pair_hash>* cluster_pairs) const {
CHECK(cluster_pairs != nullptr);
cluster_pairs->clear();
const std::unordered_set<int>& vertices = forest.vertices();
@@ -468,11 +463,11 @@ void VisibilityBasedPreconditioner::ForestToClusterPairs(
// Add all the cluster pairs corresponding to the edges in the
// forest.
for (const int cluster1 : vertices) {
cluster_pairs->insert(make_pair(cluster1, cluster1));
cluster_pairs->insert(std::make_pair(cluster1, cluster1));
const std::unordered_set<int>& neighbors = forest.Neighbors(cluster1);
for (const int cluster2 : neighbors) {
if (cluster1 < cluster2) {
cluster_pairs->insert(make_pair(cluster1, cluster2));
cluster_pairs->insert(std::make_pair(cluster1, cluster2));
}
}
}
@@ -482,8 +477,8 @@ void VisibilityBasedPreconditioner::ForestToClusterPairs(
// of all its cameras. In other words, the set of points visible to
// any camera in the cluster.
void VisibilityBasedPreconditioner::ComputeClusterVisibility(
const vector<set<int>>& visibility,
vector<set<int>>* cluster_visibility) const {
const std::vector<std::set<int>>& visibility,
std::vector<std::set<int>>* cluster_visibility) const {
CHECK(cluster_visibility != nullptr);
cluster_visibility->resize(0);
cluster_visibility->resize(num_clusters_);
@@ -499,7 +494,7 @@ void VisibilityBasedPreconditioner::ComputeClusterVisibility(
// vertices.
std::unique_ptr<WeightedGraph<int>>
VisibilityBasedPreconditioner::CreateClusterGraph(
const vector<set<int>>& cluster_visibility) const {
const std::vector<std::set<int>>& cluster_visibility) const {
auto cluster_graph = std::make_unique<WeightedGraph<int>>();
for (int i = 0; i < num_clusters_; ++i) {
@@ -507,15 +502,15 @@ VisibilityBasedPreconditioner::CreateClusterGraph(
}
for (int i = 0; i < num_clusters_; ++i) {
const set<int>& cluster_i = cluster_visibility[i];
const std::set<int>& cluster_i = cluster_visibility[i];
for (int j = i + 1; j < num_clusters_; ++j) {
vector<int> intersection;
const set<int>& cluster_j = cluster_visibility[j];
set_intersection(cluster_i.begin(),
cluster_i.end(),
cluster_j.begin(),
cluster_j.end(),
back_inserter(intersection));
std::vector<int> intersection;
const std::set<int>& cluster_j = cluster_visibility[j];
std::set_intersection(cluster_i.begin(),
cluster_i.end(),
cluster_j.begin(),
cluster_j.end(),
std::back_inserter(intersection));
if (intersection.size() > 0) {
// Clusters interact strongly when they share a large number
@@ -540,7 +535,7 @@ VisibilityBasedPreconditioner::CreateClusterGraph(
// of integers so that the cluster ids are in [0, num_clusters_).
void VisibilityBasedPreconditioner::FlattenMembershipMap(
const std::unordered_map<int, int>& membership_map,
vector<int>* membership_vector) const {
std::vector<int>* membership_vector) const {
CHECK(membership_vector != nullptr);
membership_vector->resize(0);
membership_vector->resize(num_blocks_, -1);
@@ -576,5 +571,4 @@ void VisibilityBasedPreconditioner::FlattenMembershipMap(
}
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2017 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -55,14 +55,14 @@
#include <utility>
#include <vector>
#include "ceres/block_structure.h"
#include "ceres/graph.h"
#include "ceres/linear_solver.h"
#include "ceres/pair_hash.h"
#include "ceres/preconditioner.h"
#include "ceres/sparse_cholesky.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
class BlockRandomAccessSparseMatrix;
class BlockSparseMatrix;
@@ -123,7 +123,7 @@ class SchurEliminatorBase;
// VisibilityBasedPreconditioner preconditioner(
// *A.block_structure(), options);
// preconditioner.Update(A, nullptr);
// preconditioner.RightMultiply(x, y);
// preconditioner.RightMultiplyAndAccumulate(x, y);
class CERES_NO_EXPORT VisibilityBasedPreconditioner
: public BlockSparseMatrixPreconditioner {
public:
@@ -141,7 +141,7 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner
~VisibilityBasedPreconditioner() override;
// Preconditioner interface
void RightMultiply(const double* x, double* y) const final;
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
int num_rows() const final;
friend class VisibilityBasedPreconditionerTest;
@@ -177,7 +177,7 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner
int num_clusters_;
// Sizes of the blocks in the schur complement.
std::vector<int> block_size_;
std::vector<Block> blocks_;
// Mapping from cameras to clusters.
std::vector<int> cluster_membership_;
@@ -194,10 +194,10 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner
// Preconditioner matrix.
std::unique_ptr<BlockRandomAccessSparseMatrix> m_;
std::unique_ptr<CompressedRowSparseMatrix> m_crs_;
std::unique_ptr<SparseCholesky> sparse_cholesky_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#endif // CERES_INTERNAL_VISIBILITY_BASED_PRECONDITIONER_H_

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -30,13 +30,9 @@
#include "ceres/wall_time.h"
#include "ceres/internal/config.h"
#ifdef CERES_USE_OPENMP
#include <omp.h>
#else
#include <ctime>
#endif
#include "ceres/internal/config.h"
#ifdef _WIN32
#include <windows.h>
@@ -44,13 +40,9 @@
#include <sys/time.h>
#endif
namespace ceres {
namespace internal {
namespace ceres::internal {
double WallTimeInSeconds() {
#ifdef CERES_USE_OPENMP
return omp_get_wtime();
#else
#ifdef _WIN32
LARGE_INTEGER count;
LARGE_INTEGER frequency;
@@ -63,7 +55,6 @@ double WallTimeInSeconds() {
gettimeofday(&time_val, nullptr);
return (time_val.tv_sec + time_val.tv_usec * 1e-6);
#endif
#endif
}
EventLogger::EventLogger(const std::string& logger_name) {
@@ -74,7 +65,7 @@ EventLogger::EventLogger(const std::string& logger_name) {
start_time_ = WallTimeInSeconds();
last_event_time_ = start_time_;
events_ = StringPrintf(
"\n%s\n Delta Cumulative\n",
"\n%s\n Delta Cumulative\n",
logger_name.c_str());
}
@@ -103,5 +94,4 @@ void EventLogger::AddEvent(const std::string& event_name) {
absolute_time_delta);
}
} // namespace internal
} // namespace ceres
} // namespace ceres::internal

View File

@@ -1,5 +1,5 @@
// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2015 Google Inc. All rights reserved.
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
@@ -39,13 +39,10 @@
#include "ceres/stringprintf.h"
#include "glog/logging.h"
namespace ceres {
namespace internal {
namespace ceres::internal {
// Returns time, in seconds, from some arbitrary starting point. If
// OpenMP is available then the high precision openmp_get_wtime()
// function is used. Otherwise on unixes, gettimeofday is used. The
// granularity is in seconds on windows systems.
// Returns time, in seconds, from some arbitrary starting point. On unixes,
// gettimeofday is used. The granularity is microseconds.
CERES_NO_EXPORT double WallTimeInSeconds();
// Log a series of events, recording for each event the time elapsed
@@ -84,8 +81,7 @@ class CERES_NO_EXPORT EventLogger {
std::string events_;
};
} // namespace internal
} // namespace ceres
} // namespace ceres::internal
#include "ceres/internal/reenable_warnings.h"