Update Ceres to version 2.2.0
Brings a lot of performance improvements and bug fixes. Keyframe selection in bundle-adjustment.blend goes down from 4.5 seconds to 3.0 on M2 Ultra. The reconstruction itself stays within 0.2 seconds. Full change log can be found at http://ceres-solver.org/version_history.html Pull Request: https://projects.blender.org/blender/blender/pulls/136896
This commit is contained in:
committed by
Sergey Sharybin
parent
0eccadd452
commit
59991e54f5
47
extern/ceres/CMakeLists.txt
vendored
47
extern/ceres/CMakeLists.txt
vendored
@@ -17,11 +17,11 @@ set(INC_SYS
|
||||
set(SRC
|
||||
include/ceres/autodiff_cost_function.h
|
||||
include/ceres/autodiff_first_order_function.h
|
||||
include/ceres/autodiff_local_parameterization.h
|
||||
include/ceres/autodiff_manifold.h
|
||||
include/ceres/c_api.h
|
||||
include/ceres/ceres.h
|
||||
include/ceres/conditioned_cost_function.h
|
||||
include/ceres/constants.h
|
||||
include/ceres/context.h
|
||||
include/ceres/cost_function.h
|
||||
include/ceres/cost_function_to_functor.h
|
||||
@@ -41,7 +41,6 @@ set(SRC
|
||||
include/ceres/jet.h
|
||||
include/ceres/jet_fwd.h
|
||||
include/ceres/line_manifold.h
|
||||
include/ceres/local_parameterization.h
|
||||
include/ceres/loss_function.h
|
||||
include/ceres/manifold.h
|
||||
include/ceres/manifold_test_utils.h
|
||||
@@ -66,6 +65,7 @@ set(SRC
|
||||
include/ceres/internal/autodiff.h
|
||||
include/ceres/internal/disable_warnings.h
|
||||
include/ceres/internal/eigen.h
|
||||
include/ceres/internal/euler_angles.h
|
||||
include/ceres/internal/fixed_array.h
|
||||
include/ceres/internal/householder_vector.h
|
||||
include/ceres/internal/integer_sequence_algorithm.h
|
||||
@@ -107,7 +107,6 @@ set(SRC
|
||||
internal/ceres/canonical_views_clustering.cc
|
||||
internal/ceres/canonical_views_clustering.h
|
||||
internal/ceres/casts.h
|
||||
internal/ceres/cgnr_linear_operator.h
|
||||
internal/ceres/cgnr_solver.cc
|
||||
internal/ceres/cgnr_solver.h
|
||||
internal/ceres/compressed_col_sparse_matrix_utils.cc
|
||||
@@ -118,7 +117,6 @@ set(SRC
|
||||
internal/ceres/compressed_row_sparse_matrix.h
|
||||
internal/ceres/concurrent_queue.h
|
||||
internal/ceres/conditioned_cost_function.cc
|
||||
internal/ceres/conjugate_gradients_solver.cc
|
||||
internal/ceres/conjugate_gradients_solver.h
|
||||
internal/ceres/context.cc
|
||||
internal/ceres/context_impl.cc
|
||||
@@ -131,9 +129,23 @@ set(SRC
|
||||
internal/ceres/covariance.cc
|
||||
internal/ceres/covariance_impl.cc
|
||||
internal/ceres/covariance_impl.h
|
||||
internal/ceres/cuda_block_sparse_crs_view.cc
|
||||
internal/ceres/cuda_block_sparse_crs_view.h
|
||||
internal/ceres/cuda_block_structure.cc
|
||||
internal/ceres/cuda_block_structure.h
|
||||
internal/ceres/cuda_buffer.h
|
||||
internal/ceres/cxsparse.cc
|
||||
internal/ceres/cxsparse.h
|
||||
# internal/ceres/cuda_kernels_bsm_to_crs.cu.cc
|
||||
# internal/ceres/cuda_kernels_bsm_to_crs.h
|
||||
internal/ceres/cuda_kernels_utils.h
|
||||
# internal/ceres/cuda_kernels_vector_ops.cu.cc
|
||||
internal/ceres/cuda_kernels_vector_ops.h
|
||||
internal/ceres/cuda_partitioned_block_sparse_crs_view.cc
|
||||
internal/ceres/cuda_partitioned_block_sparse_crs_view.h
|
||||
internal/ceres/cuda_sparse_matrix.cc
|
||||
internal/ceres/cuda_sparse_matrix.h
|
||||
internal/ceres/cuda_streamed_buffer.h
|
||||
internal/ceres/cuda_vector.cc
|
||||
internal/ceres/cuda_vector.h
|
||||
internal/ceres/dense_cholesky.cc
|
||||
internal/ceres/dense_cholesky.h
|
||||
internal/ceres/dense_jacobian_writer.h
|
||||
@@ -156,21 +168,25 @@ set(SRC
|
||||
internal/ceres/dynamic_compressed_row_sparse_matrix.h
|
||||
internal/ceres/dynamic_sparse_normal_cholesky_solver.cc
|
||||
internal/ceres/dynamic_sparse_normal_cholesky_solver.h
|
||||
internal/ceres/eigen_vector_ops.h
|
||||
internal/ceres/eigensparse.cc
|
||||
internal/ceres/eigensparse.h
|
||||
internal/ceres/evaluation_callback.cc
|
||||
internal/ceres/evaluator.cc
|
||||
internal/ceres/evaluator.h
|
||||
internal/ceres/execution_summary.h
|
||||
internal/ceres/fake_bundle_adjustment_jacobian.cc
|
||||
internal/ceres/fake_bundle_adjustment_jacobian.h
|
||||
internal/ceres/file.cc
|
||||
internal/ceres/file.h
|
||||
internal/ceres/first_order_function.cc
|
||||
internal/ceres/float_cxsparse.cc
|
||||
internal/ceres/float_cxsparse.h
|
||||
internal/ceres/float_suitesparse.cc
|
||||
internal/ceres/float_suitesparse.h
|
||||
internal/ceres/function_sample.cc
|
||||
internal/ceres/function_sample.h
|
||||
internal/ceres/generate_bundle_adjustment_tests.py
|
||||
internal/ceres/generate_template_specializations.py
|
||||
internal/ceres/generated
|
||||
internal/ceres/gradient_checker.cc
|
||||
internal/ceres/gradient_checking_cost_function.cc
|
||||
internal/ceres/gradient_checking_cost_function.h
|
||||
@@ -207,31 +223,34 @@ set(SRC
|
||||
internal/ceres/linear_operator.h
|
||||
internal/ceres/linear_solver.cc
|
||||
internal/ceres/linear_solver.h
|
||||
internal/ceres/local_parameterization.cc
|
||||
internal/ceres/loss_function.cc
|
||||
internal/ceres/low_rank_inverse_hessian.cc
|
||||
internal/ceres/low_rank_inverse_hessian.h
|
||||
internal/ceres/manifold.cc
|
||||
internal/ceres/manifold_adapter.h
|
||||
internal/ceres/map_util.h
|
||||
internal/ceres/minimizer.cc
|
||||
internal/ceres/minimizer.h
|
||||
internal/ceres/normal_prior.cc
|
||||
internal/ceres/pair_hash.h
|
||||
internal/ceres/parallel_for.h
|
||||
internal/ceres/parallel_for_cxx.cc
|
||||
internal/ceres/parallel_for_nothreads.cc
|
||||
internal/ceres/parallel_for_openmp.cc
|
||||
internal/ceres/parallel_invoke.cc
|
||||
internal/ceres/parallel_invoke.h
|
||||
internal/ceres/parallel_utils.cc
|
||||
internal/ceres/parallel_utils.h
|
||||
internal/ceres/parallel_vector_ops.cc
|
||||
internal/ceres/parallel_vector_ops.h
|
||||
internal/ceres/parameter_block.h
|
||||
internal/ceres/parameter_block_ordering.cc
|
||||
internal/ceres/parameter_block_ordering.h
|
||||
internal/ceres/partition_range_for_parallel_for.h
|
||||
internal/ceres/partitioned_matrix_view.cc
|
||||
internal/ceres/partitioned_matrix_view.h
|
||||
internal/ceres/partitioned_matrix_view_impl.h
|
||||
internal/ceres/partitioned_matrix_view_template.py
|
||||
internal/ceres/polynomial.cc
|
||||
internal/ceres/polynomial.h
|
||||
internal/ceres/power_series_expansion_preconditioner.cc
|
||||
internal/ceres/power_series_expansion_preconditioner.h
|
||||
internal/ceres/preconditioner.cc
|
||||
internal/ceres/preconditioner.h
|
||||
internal/ceres/preprocessor.cc
|
||||
@@ -242,7 +261,6 @@ set(SRC
|
||||
internal/ceres/program.cc
|
||||
internal/ceres/program.h
|
||||
internal/ceres/program_evaluator.h
|
||||
internal/ceres/random.h
|
||||
internal/ceres/reorder_program.cc
|
||||
internal/ceres/reorder_program.h
|
||||
internal/ceres/residual_block.cc
|
||||
@@ -254,6 +272,7 @@ set(SRC
|
||||
internal/ceres/schur_eliminator.cc
|
||||
internal/ceres/schur_eliminator.h
|
||||
internal/ceres/schur_eliminator_impl.h
|
||||
internal/ceres/schur_eliminator_template.py
|
||||
internal/ceres/schur_jacobi_preconditioner.cc
|
||||
internal/ceres/schur_jacobi_preconditioner.h
|
||||
internal/ceres/schur_templates.cc
|
||||
|
||||
2
extern/ceres/LICENSE
vendored
2
extern/ceres/LICENSE
vendored
@@ -1,5 +1,5 @@
|
||||
Ceres Solver - A fast non-linear least squares minimizer
|
||||
Copyright 2015 Google Inc. All rights reserved.
|
||||
Copyright 2023 Google Inc. All rights reserved.
|
||||
http://ceres-solver.org/
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
4
extern/ceres/README.blender
vendored
4
extern/ceres/README.blender
vendored
@@ -1,6 +1,6 @@
|
||||
Project: Ceres Solver
|
||||
URL: http://ceres-solver.org/
|
||||
License: SPDX:BSD-3-Clause
|
||||
Upstream version 2.1.0
|
||||
Copyright: Copyright 2015 Google Inc. All rights reserved.
|
||||
Upstream version 2.2.0
|
||||
Copyright: Copyright 2023 Google Inc. All rights reserved.
|
||||
Local modifications: None
|
||||
|
||||
34
extern/ceres/config/ceres/internal/config.h
vendored
34
extern/ceres/config/ceres/internal/config.h
vendored
@@ -50,9 +50,6 @@
|
||||
// If defined, Ceres was compiled without SuiteSparse.
|
||||
#define CERES_NO_SUITESPARSE
|
||||
|
||||
// If defined, Ceres was compiled without CXSparse.
|
||||
#define CERES_NO_CXSPARSE
|
||||
|
||||
// If defined, Ceres was compiled without CUDA.
|
||||
#define CERES_NO_CUDA
|
||||
|
||||
@@ -61,7 +58,6 @@
|
||||
|
||||
#if defined(CERES_NO_SUITESPARSE) && \
|
||||
defined(CERES_NO_ACCELERATE_SPARSE) && \
|
||||
defined(CERES_NO_CXSPARSE) && \
|
||||
!defined(CERES_USE_EIGEN_SPARSE) // NOLINT
|
||||
// If defined Ceres was compiled without any sparse linear algebra support.
|
||||
#define CERES_NO_SPARSE
|
||||
@@ -74,12 +70,11 @@
|
||||
// routines.
|
||||
// #define CERES_NO_CUSTOM_BLAS
|
||||
|
||||
// If defined, Ceres was compiled without multithreading support.
|
||||
// #define CERES_NO_THREADS
|
||||
// If defined Ceres was compiled with OpenMP multithreading.
|
||||
// #define CERES_USE_OPENMP
|
||||
// If defined Ceres was compiled with modern C++ multithreading.
|
||||
#define CERES_USE_CXX_THREADS
|
||||
// If defined, Ceres was compiled with a version of SuiteSparse/CHOLMOD without
|
||||
// the Partition module (requires METIS).
|
||||
#define CERES_NO_CHOLMOD_PARTITION
|
||||
// If defined Ceres was compiled without support for METIS via Eigen.
|
||||
#define CERES_NO_EIGEN_METIS
|
||||
|
||||
// If defined, Ceres was compiled with a version MSVC >= 2005 which
|
||||
// deprecated the standard POSIX names for bessel functions, replacing them
|
||||
@@ -88,22 +83,6 @@
|
||||
#define CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS
|
||||
#endif
|
||||
|
||||
#if defined(CERES_USE_OPENMP)
|
||||
#if defined(CERES_USE_CXX_THREADS) || defined(CERES_NO_THREADS)
|
||||
#error CERES_USE_OPENMP is mutually exclusive to CERES_USE_CXX_THREADS and CERES_NO_THREADS
|
||||
#endif
|
||||
#elif defined(CERES_USE_CXX_THREADS)
|
||||
#if defined(CERES_USE_OPENMP) || defined(CERES_NO_THREADS)
|
||||
#error CERES_USE_CXX_THREADS is mutually exclusive to CERES_USE_OPENMP, CERES_USE_CXX_THREADS and CERES_NO_THREADS
|
||||
#endif
|
||||
#elif defined(CERES_NO_THREADS)
|
||||
#if defined(CERES_USE_OPENMP) || defined(CERES_USE_CXX_THREADS)
|
||||
#error CERES_NO_THREADS is mutually exclusive to CERES_USE_OPENMP and CERES_USE_CXX_THREADS
|
||||
#endif
|
||||
#else
|
||||
# error One of CERES_USE_OPENMP, CERES_USE_CXX_THREADS or CERES_NO_THREADS must be defined.
|
||||
#endif
|
||||
|
||||
// CERES_NO_SPARSE should be automatically defined by config.h if Ceres was
|
||||
// compiled without any sparse back-end. Verify that it has not subsequently
|
||||
// been inconsistently redefined.
|
||||
@@ -111,9 +90,6 @@
|
||||
#if !defined(CERES_NO_SUITESPARSE)
|
||||
#error CERES_NO_SPARSE requires CERES_NO_SUITESPARSE.
|
||||
#endif
|
||||
#if !defined(CERES_NO_CXSPARSE)
|
||||
#error CERES_NO_SPARSE requires CERES_NO_CXSPARSE
|
||||
#endif
|
||||
#if !defined(CERES_NO_ACCELERATE_SPARSE)
|
||||
#error CERES_NO_SPARSE requires CERES_NO_ACCELERATE_SPARSE
|
||||
#endif
|
||||
|
||||
1
extern/ceres/config/ceres/internal/export.h
vendored
1
extern/ceres/config/ceres/internal/export.h
vendored
@@ -33,6 +33,7 @@
|
||||
# define CERES_DEPRECATED_NO_EXPORT CERES_NO_EXPORT CERES_DEPRECATED
|
||||
#endif
|
||||
|
||||
/* NOLINTNEXTLINE(readability-avoid-unconditional-preprocessor-if) */
|
||||
#if 0 /* DEFINE_NO_DEPRECATED */
|
||||
# ifndef CERES_NO_DEPRECATED
|
||||
# define CERES_NO_DEPRECATED
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,158 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sergey.vfx@gmail.com (Sergey Sharybin)
|
||||
// mierle@gmail.com (Keir Mierle)
|
||||
// sameeragarwal@google.com (Sameer Agarwal)
|
||||
|
||||
#ifndef CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_
|
||||
#define CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/internal/autodiff.h"
|
||||
#include "ceres/local_parameterization.h"
|
||||
|
||||
namespace ceres {
|
||||
|
||||
// WARNING: LocalParameterizations are deprecated, so is
|
||||
// AutoDiffLocalParameterization. They will be removed from Ceres Solver in
|
||||
// version 2.2.0. Please use Manifolds and AutoDiffManifold instead.
|
||||
|
||||
// Create local parameterization with Jacobians computed via automatic
|
||||
// differentiation. For more information on local parameterizations,
|
||||
// see include/ceres/local_parameterization.h
|
||||
//
|
||||
// To get an auto differentiated local parameterization, you must define
|
||||
// a class with a templated operator() (a functor) that computes
|
||||
//
|
||||
// x_plus_delta = Plus(x, delta);
|
||||
//
|
||||
// the template parameter T. The autodiff framework substitutes appropriate
|
||||
// "Jet" objects for T in order to compute the derivative when necessary, but
|
||||
// this is hidden, and you should write the function as if T were a scalar type
|
||||
// (e.g. a double-precision floating point number).
|
||||
//
|
||||
// The function must write the computed value in the last argument (the only
|
||||
// non-const one) and return true to indicate success.
|
||||
//
|
||||
// For example, Quaternions have a three dimensional local
|
||||
// parameterization. It's plus operation can be implemented as (taken
|
||||
// from internal/ceres/auto_diff_local_parameterization_test.cc)
|
||||
//
|
||||
// struct QuaternionPlus {
|
||||
// template<typename T>
|
||||
// bool operator()(const T* x, const T* delta, T* x_plus_delta) const {
|
||||
// const T squared_norm_delta =
|
||||
// delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2];
|
||||
//
|
||||
// T q_delta[4];
|
||||
// if (squared_norm_delta > T(0.0)) {
|
||||
// T norm_delta = sqrt(squared_norm_delta);
|
||||
// const T sin_delta_by_delta = sin(norm_delta) / norm_delta;
|
||||
// q_delta[0] = cos(norm_delta);
|
||||
// q_delta[1] = sin_delta_by_delta * delta[0];
|
||||
// q_delta[2] = sin_delta_by_delta * delta[1];
|
||||
// q_delta[3] = sin_delta_by_delta * delta[2];
|
||||
// } else {
|
||||
// // We do not just use q_delta = [1,0,0,0] here because that is a
|
||||
// // constant and when used for automatic differentiation will
|
||||
// // lead to a zero derivative. Instead we take a first order
|
||||
// // approximation and evaluate it at zero.
|
||||
// q_delta[0] = T(1.0);
|
||||
// q_delta[1] = delta[0];
|
||||
// q_delta[2] = delta[1];
|
||||
// q_delta[3] = delta[2];
|
||||
// }
|
||||
//
|
||||
// QuaternionProduct(q_delta, x, x_plus_delta);
|
||||
// return true;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// Then given this struct, the auto differentiated local
|
||||
// parameterization can now be constructed as
|
||||
//
|
||||
// LocalParameterization* local_parameterization =
|
||||
// new AutoDiffLocalParameterization<QuaternionPlus, 4, 3>;
|
||||
// | |
|
||||
// Global Size ---------------+ |
|
||||
// Local Size -------------------+
|
||||
//
|
||||
// WARNING: Since the functor will get instantiated with different types for
|
||||
// T, you must to convert from other numeric types to T before mixing
|
||||
// computations with other variables of type T. In the example above, this is
|
||||
// seen where instead of using k_ directly, k_ is wrapped with T(k_).
|
||||
|
||||
template <typename Functor, int kGlobalSize, int kLocalSize>
|
||||
class CERES_DEPRECATED_WITH_MSG("Use AutoDiffManifold instead.")
|
||||
AutoDiffLocalParameterization : public LocalParameterization {
|
||||
public:
|
||||
AutoDiffLocalParameterization() : functor_(new Functor()) {}
|
||||
|
||||
// Takes ownership of functor.
|
||||
explicit AutoDiffLocalParameterization(Functor* functor)
|
||||
: functor_(functor) {}
|
||||
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override {
|
||||
return (*functor_)(x, delta, x_plus_delta);
|
||||
}
|
||||
|
||||
bool ComputeJacobian(const double* x, double* jacobian) const override {
|
||||
double zero_delta[kLocalSize];
|
||||
for (int i = 0; i < kLocalSize; ++i) {
|
||||
zero_delta[i] = 0.0;
|
||||
}
|
||||
|
||||
double x_plus_delta[kGlobalSize];
|
||||
for (int i = 0; i < kGlobalSize; ++i) {
|
||||
x_plus_delta[i] = 0.0;
|
||||
}
|
||||
|
||||
const double* parameter_ptrs[2] = {x, zero_delta};
|
||||
double* jacobian_ptrs[2] = {nullptr, jacobian};
|
||||
return internal::AutoDifferentiate<
|
||||
kGlobalSize,
|
||||
internal::StaticParameterDims<kGlobalSize, kLocalSize>>(
|
||||
*functor_, parameter_ptrs, kGlobalSize, x_plus_delta, jacobian_ptrs);
|
||||
}
|
||||
|
||||
int GlobalSize() const override { return kGlobalSize; }
|
||||
int LocalSize() const override { return kLocalSize; }
|
||||
|
||||
const Functor& functor() const { return *functor_; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<Functor> functor_;
|
||||
};
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
2
extern/ceres/include/ceres/c_api.h
vendored
2
extern/ceres/include/ceres/c_api.h
vendored
@@ -1,5 +1,5 @@
|
||||
/* Ceres Solver - A fast non-linear least squares minimizer
|
||||
* Copyright 2019 Google Inc. All rights reserved.
|
||||
* Copyright 2023 Google Inc. All rights reserved.
|
||||
* http://ceres-solver.org/
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
||||
7
extern/ceres/include/ceres/ceres.h
vendored
7
extern/ceres/include/ceres/ceres.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,11 +34,12 @@
|
||||
#ifndef CERES_PUBLIC_CERES_H_
|
||||
#define CERES_PUBLIC_CERES_H_
|
||||
|
||||
// IWYU pragma: begin_exports
|
||||
#include "ceres/autodiff_cost_function.h"
|
||||
#include "ceres/autodiff_first_order_function.h"
|
||||
#include "ceres/autodiff_local_parameterization.h"
|
||||
#include "ceres/autodiff_manifold.h"
|
||||
#include "ceres/conditioned_cost_function.h"
|
||||
#include "ceres/constants.h"
|
||||
#include "ceres/context.h"
|
||||
#include "ceres/cost_function.h"
|
||||
#include "ceres/cost_function_to_functor.h"
|
||||
@@ -56,7 +57,6 @@
|
||||
#include "ceres/iteration_callback.h"
|
||||
#include "ceres/jet.h"
|
||||
#include "ceres/line_manifold.h"
|
||||
#include "ceres/local_parameterization.h"
|
||||
#include "ceres/loss_function.h"
|
||||
#include "ceres/manifold.h"
|
||||
#include "ceres/numeric_diff_cost_function.h"
|
||||
@@ -70,5 +70,6 @@
|
||||
#include "ceres/sphere_manifold.h"
|
||||
#include "ceres/types.h"
|
||||
#include "ceres/version.h"
|
||||
// IWYU pragma: end_exports
|
||||
|
||||
#endif // CERES_PUBLIC_CERES_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -26,24 +26,17 @@
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
// Author: hellston20a@gmail.com (H S Helson Go)
|
||||
|
||||
#include "ceres/float_cxsparse.h"
|
||||
#ifndef CERES_PUBLIC_CONSTANTS_H_
|
||||
#define CERES_PUBLIC_CONSTANTS_H_
|
||||
|
||||
#include <memory>
|
||||
// TODO(HSHelson): This header should no longer be necessary once C++20's
|
||||
// <numbers> (e.g. std::numbers::pi_v) becomes usable
|
||||
namespace ceres::constants {
|
||||
template <typename T>
|
||||
inline constexpr T pi_v(3.141592653589793238462643383279502884);
|
||||
inline constexpr double pi = pi_v<double>;
|
||||
} // namespace ceres::constants
|
||||
|
||||
#if !defined(CERES_NO_CXSPARSE)
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
std::unique_ptr<SparseCholesky> FloatCXSparseCholesky::Create(
|
||||
OrderingType ordering_type) {
|
||||
LOG(FATAL) << "FloatCXSparseCholesky is not available.";
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // !defined(CERES_NO_CXSPARSE)
|
||||
#endif // CERES_PUBLIC_CONSTANTS_H_
|
||||
2
extern/ceres/include/ceres/context.h
vendored
2
extern/ceres/include/ceres/context.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
2
extern/ceres/include/ceres/cost_function.h
vendored
2
extern/ceres/include/ceres/cost_function.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -120,7 +120,7 @@ class CostFunctionToFunctor {
|
||||
if (parameter_block_sizes.size() == num_parameter_blocks) {
|
||||
for (int block = 0; block < num_parameter_blocks; ++block) {
|
||||
CHECK_EQ(ParameterDims::GetDim(block), parameter_block_sizes[block])
|
||||
<< "Parameter block size missmatch. The specified static parameter "
|
||||
<< "Parameter block size mismatch. The specified static parameter "
|
||||
"block dimension does not match the one from the cost function.";
|
||||
}
|
||||
}
|
||||
|
||||
33
extern/ceres/include/ceres/covariance.h
vendored
33
extern/ceres/include/ceres/covariance.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -146,7 +146,7 @@ class CovarianceImpl;
|
||||
// a. The rank deficiency arises from overparameterization. e.g., a
|
||||
// four dimensional quaternion used to parameterize SO(3), which is
|
||||
// a three dimensional manifold. In cases like this, the user should
|
||||
// use an appropriate LocalParameterization/Manifold. Not only will this lead
|
||||
// use an appropriate Manifold. Not only will this lead
|
||||
// to better numerical behaviour of the Solver, it will also expose
|
||||
// the rank deficiency to the Covariance object so that it can
|
||||
// handle it correctly.
|
||||
@@ -246,6 +246,20 @@ class CERES_EXPORT Covariance {
|
||||
// used.
|
||||
CovarianceAlgorithmType algorithm_type = SPARSE_QR;
|
||||
|
||||
// During QR factorization, if a column with Euclidean norm less
|
||||
// than column_pivot_threshold is encountered it is treated as
|
||||
// zero.
|
||||
//
|
||||
// If column_pivot_threshold < 0, then an automatic default value
|
||||
// of 20*(m+n)*eps*sqrt(max(diag(J’*J))) is used. Here m and n are
|
||||
// the number of rows and columns of the Jacobian (J)
|
||||
// respectively.
|
||||
//
|
||||
// This is an advanced option meant for users who know enough
|
||||
// about their Jacobian matrices that they can determine a value
|
||||
// better than the default.
|
||||
double column_pivot_threshold = -1;
|
||||
|
||||
// If the Jacobian matrix is near singular, then inverting J'J
|
||||
// will result in unreliable results, e.g, if
|
||||
//
|
||||
@@ -266,7 +280,7 @@ class CERES_EXPORT Covariance {
|
||||
//
|
||||
// min_sigma / max_sigma < sqrt(min_reciprocal_condition_number)
|
||||
//
|
||||
// where min_sigma and max_sigma are the minimum and maxiumum
|
||||
// where min_sigma and max_sigma are the minimum and maximum
|
||||
// singular values of J respectively.
|
||||
//
|
||||
// 2. SPARSE_QR
|
||||
@@ -394,11 +408,9 @@ class CERES_EXPORT Covariance {
|
||||
const double* parameter_block2,
|
||||
double* covariance_block) const;
|
||||
|
||||
// Return the block of the cross-covariance matrix corresponding to
|
||||
// parameter_block1 and parameter_block2.
|
||||
// Returns cross-covariance in the tangent space if a local
|
||||
// parameterization is associated with either parameter block;
|
||||
// else returns cross-covariance in the ambient space.
|
||||
// Returns the block of the cross-covariance in the tangent space if a
|
||||
// manifold is associated with either parameter block; else returns
|
||||
// cross-covariance in the ambient space.
|
||||
//
|
||||
// Compute must be called before the first call to
|
||||
// GetCovarianceBlock and the pair <parameter_block1,
|
||||
@@ -430,9 +442,8 @@ class CERES_EXPORT Covariance {
|
||||
double* covariance_matrix) const;
|
||||
|
||||
// Return the covariance matrix corresponding to parameter_blocks
|
||||
// in the tangent space if a local parameterization is associated
|
||||
// with one of the parameter blocks else returns the covariance
|
||||
// matrix in the ambient space.
|
||||
// in the tangent space if a manifold is associated with one of the parameter
|
||||
// blocks else returns the covariance matrix in the ambient space.
|
||||
//
|
||||
// Compute must be called before calling GetCovarianceMatrix and all
|
||||
// parameter_blocks must have been present in the vector
|
||||
|
||||
2
extern/ceres/include/ceres/crs_matrix.h
vendored
2
extern/ceres/include/ceres/crs_matrix.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -368,7 +368,7 @@ class BiCubicInterpolator {
|
||||
//
|
||||
// f001, f002, f011, f012, ...
|
||||
//
|
||||
// A commonly occuring example are color images (RGB) where the three
|
||||
// A commonly occurring example are color images (RGB) where the three
|
||||
// channels are stored interleaved.
|
||||
//
|
||||
// If kInterleaved = false, then it is stored as
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -264,11 +264,23 @@ class DynamicAutoDiffCostFunction final : public DynamicCostFunction {
|
||||
return true;
|
||||
}
|
||||
|
||||
const CostFunctor& functor() const { return *functor_; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<CostFunctor> functor_;
|
||||
Ownership ownership_;
|
||||
};
|
||||
|
||||
// Deduction guide that allows the user to avoid explicitly specifying the
|
||||
// template parameter of DynamicAutoDiffCostFunction. The class can instead be
|
||||
// instantiated as follows:
|
||||
//
|
||||
// new DynamicAutoDiffCostFunction{new MyCostFunctor{}};
|
||||
//
|
||||
template <typename CostFunctor>
|
||||
DynamicAutoDiffCostFunction(CostFunctor* functor, Ownership ownership)
|
||||
-> DynamicAutoDiffCostFunction<CostFunctor>;
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -76,7 +76,7 @@ namespace ceres {
|
||||
// cost_function.AddParameterBlock(5);
|
||||
// cost_function.AddParameterBlock(10);
|
||||
// cost_function.SetNumResiduals(21);
|
||||
template <typename CostFunctor, NumericDiffMethodType method = CENTRAL>
|
||||
template <typename CostFunctor, NumericDiffMethodType kMethod = CENTRAL>
|
||||
class DynamicNumericDiffCostFunction final : public DynamicCostFunction {
|
||||
public:
|
||||
explicit DynamicNumericDiffCostFunction(
|
||||
@@ -134,7 +134,7 @@ class DynamicNumericDiffCostFunction final : public DynamicCostFunction {
|
||||
for (size_t block = 0; block < block_sizes.size(); ++block) {
|
||||
if (jacobians[block] != nullptr &&
|
||||
!NumericDiff<CostFunctor,
|
||||
method,
|
||||
kMethod,
|
||||
ceres::DYNAMIC,
|
||||
internal::DynamicParameterDims,
|
||||
ceres::DYNAMIC,
|
||||
|
||||
10
extern/ceres/include/ceres/evaluation_callback.h
vendored
10
extern/ceres/include/ceres/evaluation_callback.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -66,8 +66,12 @@ class CERES_EXPORT EvaluationCallback {
|
||||
|
||||
// Called before Ceres requests residuals or jacobians for a given setting of
|
||||
// the parameters. User parameters (the double* values provided to the cost
|
||||
// functions) are fixed until the next call to PrepareForEvaluation(). If
|
||||
// new_evaluation_point == true, then this is a new point that is different
|
||||
// functions) are fixed until the next call to PrepareForEvaluation().
|
||||
//
|
||||
// If evaluate_jacobians == true, then the user provided CostFunctions will be
|
||||
// asked to evaluate one or more of their Jacobians.
|
||||
//
|
||||
// If new_evaluation_point == true, then this is a new point that is different
|
||||
// from the last evaluated point. Otherwise, it is the same point that was
|
||||
// evaluated previously (either jacobian or residual) and the user can use
|
||||
// cached results from previous evaluations.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
47
extern/ceres/include/ceres/gradient_checker.h
vendored
47
extern/ceres/include/ceres/gradient_checker.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -25,7 +25,7 @@
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
// Copyright 2007 Google Inc. All Rights Reserved.
|
||||
// Copyright 2023 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Authors: wjr@google.com (William Rucklidge),
|
||||
// keir@google.com (Keir Mierle),
|
||||
@@ -44,7 +44,6 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/internal/fixed_array.h"
|
||||
#include "ceres/local_parameterization.h"
|
||||
#include "ceres/manifold.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
@@ -59,37 +58,15 @@ namespace ceres {
|
||||
// ------------------------------------ < relative_precision
|
||||
// max(J_actual(i, j), J_numeric(i, j))
|
||||
//
|
||||
// where J_actual(i, j) is the jacobian as computed by the supplied cost
|
||||
// function (by the user) multiplied by the local parameterization Jacobian
|
||||
// and J_numeric is the jacobian as computed by finite differences, multiplied
|
||||
// by the local parameterization Jacobian as well.
|
||||
// where J_actual(i, j) is the Jacobian as computed by the supplied cost
|
||||
// function (by the user) multiplied by the manifold Jacobian and J_numeric is
|
||||
// the Jacobian as computed by finite differences, multiplied by the manifold
|
||||
// Jacobian as well.
|
||||
//
|
||||
// How to use: Fill in an array of pointers to parameter blocks for your
|
||||
// CostFunction, and then call Probe(). Check that the return value is 'true'.
|
||||
class CERES_EXPORT GradientChecker {
|
||||
public:
|
||||
// This constructor will not take ownership of the cost function or local
|
||||
// parameterizations.
|
||||
//
|
||||
// function: The cost function to probe.
|
||||
//
|
||||
// local_parameterizations: A vector of local parameterizations, one for each
|
||||
// parameter block. May be nullptr or contain nullptrs to indicate that the
|
||||
// respective parameter does not have a local parameterization.
|
||||
//
|
||||
// options: Options to use for numerical differentiation.
|
||||
//
|
||||
// NOTE: This constructor is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver. Please transition to using the Manifold based
|
||||
// version.
|
||||
CERES_DEPRECATED_WITH_MSG(
|
||||
"Local Parameterizations are deprecated. Use the constructor that uses "
|
||||
"Manifolds instead.")
|
||||
GradientChecker(
|
||||
const CostFunction* function,
|
||||
const std::vector<const LocalParameterization*>* local_parameterizations,
|
||||
const NumericDiffOptions& options);
|
||||
|
||||
// This will not take ownership of the cost function or manifolds.
|
||||
//
|
||||
// function: The cost function to probe.
|
||||
@@ -102,7 +79,6 @@ class CERES_EXPORT GradientChecker {
|
||||
GradientChecker(const CostFunction* function,
|
||||
const std::vector<const Manifold*>* manifolds,
|
||||
const NumericDiffOptions& options);
|
||||
~GradientChecker();
|
||||
|
||||
// Contains results from a call to Probe for later inspection.
|
||||
struct CERES_EXPORT ProbeResults {
|
||||
@@ -166,17 +142,6 @@ class CERES_EXPORT GradientChecker {
|
||||
GradientChecker(const GradientChecker&) = delete;
|
||||
void operator=(const GradientChecker&) = delete;
|
||||
|
||||
// This bool is used to determine whether the constructor with the
|
||||
// LocalParameterizations is called or the one with Manifolds is called. If
|
||||
// the former, then the vector of manifolds is a vector of ManifoldAdapter
|
||||
// objects which we own and should be deleted. If the latter then they are
|
||||
// real Manifold objects owned by the caller and will not be deleted.
|
||||
//
|
||||
// This bool is only needed during the LocalParameterization to Manifold
|
||||
// transition, once this transition is complete the LocalParameterization
|
||||
// based constructor and this bool will be removed.
|
||||
const bool delete_manifolds_ = false;
|
||||
|
||||
std::vector<const Manifold*> manifolds_;
|
||||
const CostFunction* function_;
|
||||
std::unique_ptr<CostFunction> finite_diff_cost_function_;
|
||||
|
||||
62
extern/ceres/include/ceres/gradient_problem.h
vendored
62
extern/ceres/include/ceres/gradient_problem.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,7 +36,6 @@
|
||||
#include "ceres/first_order_function.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/local_parameterization.h"
|
||||
#include "ceres/manifold.h"
|
||||
|
||||
namespace ceres {
|
||||
@@ -90,47 +89,19 @@ class FirstOrderFunction;
|
||||
// };
|
||||
//
|
||||
// ceres::GradientProblem problem(new Rosenbrock());
|
||||
//
|
||||
// NOTE: We are currently in the process of transitioning from
|
||||
// LocalParameterization to Manifolds in the Ceres API. During this period,
|
||||
// GradientProblem will support using both Manifold and LocalParameterization
|
||||
// objects interchangably. For methods in the API affected by this change, see
|
||||
// their documentation below.
|
||||
class CERES_EXPORT GradientProblem {
|
||||
public:
|
||||
// Takes ownership of the function.
|
||||
explicit GradientProblem(FirstOrderFunction* function);
|
||||
|
||||
// Takes ownership of the function and the parameterization.
|
||||
//
|
||||
// NOTE: This constructor is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver. Please move to using the Manifold based
|
||||
// constructor.
|
||||
CERES_DEPRECATED_WITH_MSG(
|
||||
"LocalParameterizations are deprecated. Please use the constructor that "
|
||||
"uses Manifold instead.")
|
||||
GradientProblem(FirstOrderFunction* function,
|
||||
LocalParameterization* parameterization);
|
||||
|
||||
// Takes ownership of the function and the manifold.
|
||||
GradientProblem(FirstOrderFunction* function, Manifold* manifold);
|
||||
|
||||
int NumParameters() const;
|
||||
|
||||
// Dimension of the manifold (and its tangent space).
|
||||
//
|
||||
// During the transition from LocalParameterization to Manifold, this method
|
||||
// reports the LocalSize of the LocalParameterization or the TangentSize of
|
||||
// the Manifold object associated with this problem.
|
||||
int NumTangentParameters() const;
|
||||
|
||||
// Dimension of the manifold (and its tangent space).
|
||||
//
|
||||
// NOTE: This method is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver. Please move to using NumTangentParameters()
|
||||
// instead.
|
||||
int NumLocalParameters() const { return NumTangentParameters(); }
|
||||
|
||||
// This call is not thread safe.
|
||||
bool Evaluate(const double* parameters, double* cost, double* gradient) const;
|
||||
bool Plus(const double* x, const double* delta, double* x_plus_delta) const;
|
||||
@@ -138,42 +109,11 @@ class CERES_EXPORT GradientProblem {
|
||||
const FirstOrderFunction* function() const { return function_.get(); }
|
||||
FirstOrderFunction* mutable_function() { return function_.get(); }
|
||||
|
||||
// NOTE: During the transition from LocalParameterization to Manifold we need
|
||||
// to support both The LocalParameterization and Manifold based constructors.
|
||||
//
|
||||
// When the user uses the LocalParameterization, internally the solver will
|
||||
// wrap it in a ManifoldAdapter object and return it when manifold or
|
||||
// mutable_manifold are called.
|
||||
//
|
||||
// As a result this method will return a non-nullptr result if a Manifold or a
|
||||
// LocalParameterization was used when constructing the GradientProblem.
|
||||
const Manifold* manifold() const { return manifold_.get(); }
|
||||
Manifold* mutable_manifold() { return manifold_.get(); }
|
||||
|
||||
// If the problem is constructed without a LocalParameterization or with a
|
||||
// Manifold this method will return a nullptr.
|
||||
//
|
||||
// NOTE: This method is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver.
|
||||
CERES_DEPRECATED_WITH_MSG("Use Manifolds instead.")
|
||||
const LocalParameterization* parameterization() const {
|
||||
return parameterization_.get();
|
||||
}
|
||||
|
||||
// If the problem is constructed without a LocalParameterization or with a
|
||||
// Manifold this method will return a nullptr.
|
||||
//
|
||||
// NOTE: This method is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver.
|
||||
CERES_DEPRECATED_WITH_MSG("Use Manifolds instead.")
|
||||
LocalParameterization* mutable_parameterization() {
|
||||
return parameterization_.get();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<FirstOrderFunction> function_;
|
||||
CERES_DEPRECATED_WITH_MSG("")
|
||||
std::unique_ptr<LocalParameterization> parameterization_;
|
||||
std::unique_ptr<Manifold> manifold_;
|
||||
std::unique_ptr<double[]> scratch_;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -305,10 +305,6 @@ class CERES_EXPORT GradientProblemSolver {
|
||||
// Number of parameters in the problem.
|
||||
int num_parameters = -1;
|
||||
|
||||
// Dimension of the tangent space of the problem.
|
||||
CERES_DEPRECATED_WITH_MSG("Use num_tangent_parameters.")
|
||||
int num_local_parameters = -1;
|
||||
|
||||
// Dimension of the tangent space of the problem.
|
||||
int num_tangent_parameters = -1;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2020 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/internal/fixed_array.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// StaticFixedArray selects the best array implementation based on template
|
||||
// arguments. If the size is not known at compile-time, pass
|
||||
@@ -91,7 +90,6 @@ struct ArraySelector<T, num_elements, max_num_elements_on_stack, false, false>
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_ARRAY_SELECTOR_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -164,8 +164,7 @@
|
||||
#define CERES_AUTODIFF_MAX_RESIDUALS_ON_STACK 20
|
||||
#endif
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Extends src by a 1st order perturbation for every dimension and puts it in
|
||||
// dst. The size of src is N. Since this is also used for perturbations in
|
||||
@@ -359,7 +358,6 @@ inline bool AutoDifferentiate(const Functor& functor,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_AUTODIFF_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
2
extern/ceres/include/ceres/internal/eigen.h
vendored
2
extern/ceres/include/ceres/internal/eigen.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
199
extern/ceres/include/ceres/internal/euler_angles.h
vendored
Normal file
199
extern/ceres/include/ceres/internal/euler_angles.h
vendored
Normal file
@@ -0,0 +1,199 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_
|
||||
#define CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
// The EulerSystem struct represents an Euler Angle Convention in compile time.
|
||||
// It acts like a trait structure and is also used as a tag for dispatching
|
||||
// Euler angle conversion function templates
|
||||
//
|
||||
// Internally, it implements the convention laid out in "Euler angle
|
||||
// conversion", Ken Shoemake, Graphics Gems IV, where a choice of axis for the
|
||||
// first rotation (out of 3) and 3 binary choices compactly specify all 24
|
||||
// rotation conventions
|
||||
//
|
||||
// - InnerAxis: Axis for the first rotation. This is specified by struct tags
|
||||
// axis::X, axis::Y, and axis::Z
|
||||
//
|
||||
// - Parity: Defines the parity of the axis permutation. The axis sequence has
|
||||
// Even parity if the second axis of rotation is 'greater-than' the first axis
|
||||
// of rotation according to the order X<Y<Z<X, otherwise it has Odd parity.
|
||||
// This is specified by struct tags Even and Odd
|
||||
//
|
||||
// - AngleConvention: Defines whether Proper Euler Angles (originally defined
|
||||
// by Euler, which has the last axis repeated, i.e. ZYZ, ZXZ, etc), or
|
||||
// Tait-Bryan Angles (introduced by the nautical and aerospace fields, i.e.
|
||||
// using ZYX for roll-pitch-yaw) are used. This is specified by struct Tags
|
||||
// ProperEuler and TaitBryan.
|
||||
//
|
||||
// - FrameConvention: Defines whether the three rotations are be in a global
|
||||
// frame of reference (extrinsic) or in a body centred frame of reference
|
||||
// (intrinsic). This is specified by struct tags Extrinsic and Intrinsic
|
||||
|
||||
namespace axis {
|
||||
struct X : std::integral_constant<int, 0> {};
|
||||
struct Y : std::integral_constant<int, 1> {};
|
||||
struct Z : std::integral_constant<int, 2> {};
|
||||
} // namespace axis
|
||||
|
||||
struct Even;
|
||||
struct Odd;
|
||||
|
||||
struct ProperEuler;
|
||||
struct TaitBryan;
|
||||
|
||||
struct Extrinsic;
|
||||
struct Intrinsic;
|
||||
|
||||
template <typename InnerAxisType,
|
||||
typename ParityType,
|
||||
typename AngleConventionType,
|
||||
typename FrameConventionType>
|
||||
struct EulerSystem {
|
||||
static constexpr bool kIsParityOdd = std::is_same_v<ParityType, Odd>;
|
||||
static constexpr bool kIsProperEuler =
|
||||
std::is_same_v<AngleConventionType, ProperEuler>;
|
||||
static constexpr bool kIsIntrinsic =
|
||||
std::is_same_v<FrameConventionType, Intrinsic>;
|
||||
|
||||
static constexpr int kAxes[3] = {
|
||||
InnerAxisType::value,
|
||||
(InnerAxisType::value + 1 + static_cast<int>(kIsParityOdd)) % 3,
|
||||
(InnerAxisType::value + 2 - static_cast<int>(kIsParityOdd)) % 3};
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// Define human readable aliases to the type of the tags
|
||||
using ExtrinsicXYZ = internal::EulerSystem<internal::axis::X,
|
||||
internal::Even,
|
||||
internal::TaitBryan,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicXYX = internal::EulerSystem<internal::axis::X,
|
||||
internal::Even,
|
||||
internal::ProperEuler,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicXZY = internal::EulerSystem<internal::axis::X,
|
||||
internal::Odd,
|
||||
internal::TaitBryan,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicXZX = internal::EulerSystem<internal::axis::X,
|
||||
internal::Odd,
|
||||
internal::ProperEuler,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicYZX = internal::EulerSystem<internal::axis::Y,
|
||||
internal::Even,
|
||||
internal::TaitBryan,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicYZY = internal::EulerSystem<internal::axis::Y,
|
||||
internal::Even,
|
||||
internal::ProperEuler,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicYXZ = internal::EulerSystem<internal::axis::Y,
|
||||
internal::Odd,
|
||||
internal::TaitBryan,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicYXY = internal::EulerSystem<internal::axis::Y,
|
||||
internal::Odd,
|
||||
internal::ProperEuler,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicZXY = internal::EulerSystem<internal::axis::Z,
|
||||
internal::Even,
|
||||
internal::TaitBryan,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicZXZ = internal::EulerSystem<internal::axis::Z,
|
||||
internal::Even,
|
||||
internal::ProperEuler,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicZYX = internal::EulerSystem<internal::axis::Z,
|
||||
internal::Odd,
|
||||
internal::TaitBryan,
|
||||
internal::Extrinsic>;
|
||||
using ExtrinsicZYZ = internal::EulerSystem<internal::axis::Z,
|
||||
internal::Odd,
|
||||
internal::ProperEuler,
|
||||
internal::Extrinsic>;
|
||||
/* Rotating axes */
|
||||
using IntrinsicZYX = internal::EulerSystem<internal::axis::X,
|
||||
internal::Even,
|
||||
internal::TaitBryan,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicXYX = internal::EulerSystem<internal::axis::X,
|
||||
internal::Even,
|
||||
internal::ProperEuler,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicYZX = internal::EulerSystem<internal::axis::X,
|
||||
internal::Odd,
|
||||
internal::TaitBryan,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicXZX = internal::EulerSystem<internal::axis::X,
|
||||
internal::Odd,
|
||||
internal::ProperEuler,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicXZY = internal::EulerSystem<internal::axis::Y,
|
||||
internal::Even,
|
||||
internal::TaitBryan,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicYZY = internal::EulerSystem<internal::axis::Y,
|
||||
internal::Even,
|
||||
internal::ProperEuler,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicZXY = internal::EulerSystem<internal::axis::Y,
|
||||
internal::Odd,
|
||||
internal::TaitBryan,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicYXY = internal::EulerSystem<internal::axis::Y,
|
||||
internal::Odd,
|
||||
internal::ProperEuler,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicYXZ = internal::EulerSystem<internal::axis::Z,
|
||||
internal::Even,
|
||||
internal::TaitBryan,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicZXZ = internal::EulerSystem<internal::axis::Z,
|
||||
internal::Even,
|
||||
internal::ProperEuler,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicXYZ = internal::EulerSystem<internal::axis::Z,
|
||||
internal::Odd,
|
||||
internal::TaitBryan,
|
||||
internal::Intrinsic>;
|
||||
using IntrinsicZYZ = internal::EulerSystem<internal::axis::Z,
|
||||
internal::Odd,
|
||||
internal::ProperEuler,
|
||||
internal::Intrinsic>;
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_
|
||||
@@ -41,8 +41,7 @@
|
||||
#include "ceres/internal/memory.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
constexpr static auto kFixedArrayUseDefault = static_cast<size_t>(-1);
|
||||
|
||||
@@ -372,8 +371,8 @@ class FixedArray {
|
||||
return std::addressof(ptr->array);
|
||||
}
|
||||
|
||||
static_assert(sizeof(StorageElement) == sizeof(value_type), "");
|
||||
static_assert(alignof(StorageElement) == alignof(value_type), "");
|
||||
static_assert(sizeof(StorageElement) == sizeof(value_type));
|
||||
static_assert(alignof(StorageElement) == alignof(value_type));
|
||||
|
||||
class NonEmptyInlinedStorage {
|
||||
public:
|
||||
@@ -461,7 +460,6 @@ template <typename T, size_t N, typename A>
|
||||
constexpr typename FixedArray<T, N, A>::size_type
|
||||
FixedArray<T, N, A>::inline_elements;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_FIXED_ARRAY_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://code.google.com/p/ceres-solver/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,8 +34,7 @@
|
||||
#include "Eigen/Core"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Algorithm 5.1.1 from 'Matrix Computations' by Golub et al. (Johns Hopkins
|
||||
// Studies in Mathematical Sciences) but using the nth element of the input
|
||||
@@ -90,7 +89,6 @@ typename Derived::PlainObject ApplyHouseholderVector(
|
||||
return (y - v * (beta * (v.transpose() * y)));
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_HOUSEHOLDER_VECTOR_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,70 +40,7 @@
|
||||
|
||||
#include "ceres/jet_fwd.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
// Implementation of calculating the sum of an integer sequence.
|
||||
// Recursively instantiate SumImpl and calculate the sum of the N first
|
||||
// numbers. This reduces the number of instantiations and speeds up
|
||||
// compilation.
|
||||
//
|
||||
// Examples:
|
||||
// 1) integer_sequence<int, 5>:
|
||||
// Value = 5
|
||||
//
|
||||
// 2) integer_sequence<int, 4, 2>:
|
||||
// Value = 4 + 2 + SumImpl<integer_sequence<int>>::Value
|
||||
// Value = 4 + 2 + 0
|
||||
//
|
||||
// 3) integer_sequence<int, 2, 1, 4>:
|
||||
// Value = 2 + 1 + SumImpl<integer_sequence<int, 4>>::Value
|
||||
// Value = 2 + 1 + 4
|
||||
template <typename Seq>
|
||||
struct SumImpl;
|
||||
|
||||
// Strip of and sum the first number.
|
||||
template <typename T, T N, T... Ns>
|
||||
struct SumImpl<std::integer_sequence<T, N, Ns...>> {
|
||||
static constexpr T Value =
|
||||
N + SumImpl<std::integer_sequence<T, Ns...>>::Value;
|
||||
};
|
||||
|
||||
// Strip of and sum the first two numbers.
|
||||
template <typename T, T N1, T N2, T... Ns>
|
||||
struct SumImpl<std::integer_sequence<T, N1, N2, Ns...>> {
|
||||
static constexpr T Value =
|
||||
N1 + N2 + SumImpl<std::integer_sequence<T, Ns...>>::Value;
|
||||
};
|
||||
|
||||
// Strip of and sum the first four numbers.
|
||||
template <typename T, T N1, T N2, T N3, T N4, T... Ns>
|
||||
struct SumImpl<std::integer_sequence<T, N1, N2, N3, N4, Ns...>> {
|
||||
static constexpr T Value =
|
||||
N1 + N2 + N3 + N4 + SumImpl<std::integer_sequence<T, Ns...>>::Value;
|
||||
};
|
||||
|
||||
// Only one number is left. 'Value' is just that number ('recursion' ends).
|
||||
template <typename T, T N>
|
||||
struct SumImpl<std::integer_sequence<T, N>> {
|
||||
static constexpr T Value = N;
|
||||
};
|
||||
|
||||
// No number is left. 'Value' is the identity element (for sum this is zero).
|
||||
template <typename T>
|
||||
struct SumImpl<std::integer_sequence<T>> {
|
||||
static constexpr T Value = T(0);
|
||||
};
|
||||
|
||||
// Calculate the sum of an integer sequence. The resulting sum will be stored in
|
||||
// 'Value'.
|
||||
template <typename Seq>
|
||||
class Sum {
|
||||
using T = typename Seq::value_type;
|
||||
|
||||
public:
|
||||
static constexpr T Value = SumImpl<Seq>::Value;
|
||||
};
|
||||
namespace ceres::internal {
|
||||
|
||||
// Implementation of calculating an exclusive scan (exclusive prefix sum) of an
|
||||
// integer sequence. Exclusive means that the i-th input element is not included
|
||||
@@ -232,40 +169,11 @@ struct RemoveValue
|
||||
template <typename Sequence, typename Sequence::value_type ValueToRemove>
|
||||
using RemoveValue_t = typename RemoveValue<Sequence, ValueToRemove>::type;
|
||||
|
||||
// Determines whether the values of an integer sequence are all the same.
|
||||
// Returns true if all elements of Values are equal to HeadValue.
|
||||
//
|
||||
// The integer sequence must contain at least one value. The predicate is
|
||||
// undefined for empty sequences. The evaluation result of the predicate for a
|
||||
// sequence containing only one value is defined to be true.
|
||||
template <typename... Sequence>
|
||||
struct AreAllEqual;
|
||||
|
||||
// The predicate result for a sequence containing one element is defined to be
|
||||
// true.
|
||||
template <typename T, T Value>
|
||||
struct AreAllEqual<std::integer_sequence<T, Value>> : std::true_type {};
|
||||
|
||||
// Recursion end.
|
||||
template <typename T, T Value1, T Value2>
|
||||
struct AreAllEqual<std::integer_sequence<T, Value1, Value2>>
|
||||
: std::integral_constant<bool, Value1 == Value2> {};
|
||||
|
||||
// Recursion for sequences containing at least two elements.
|
||||
template <typename T, T Value1, T Value2, T... Values>
|
||||
// clang-format off
|
||||
struct AreAllEqual<std::integer_sequence<T, Value1, Value2, Values...> >
|
||||
: std::integral_constant
|
||||
<
|
||||
bool,
|
||||
AreAllEqual<std::integer_sequence<T, Value1, Value2> >::value &&
|
||||
AreAllEqual<std::integer_sequence<T, Value2, Values...> >::value
|
||||
>
|
||||
// clang-format on
|
||||
{};
|
||||
|
||||
// Convenience variable template for AreAllEqual.
|
||||
template <class Sequence>
|
||||
constexpr bool AreAllEqual_v = AreAllEqual<Sequence>::value;
|
||||
// Returns true if Values is empty.
|
||||
template <typename T, T HeadValue, T... Values>
|
||||
inline constexpr bool AreAllEqual_v = ((HeadValue == Values) && ...);
|
||||
|
||||
// Predicate determining whether an integer sequence is either empty or all
|
||||
// values are equal.
|
||||
@@ -279,13 +187,13 @@ struct IsEmptyOrAreAllEqual<std::integer_sequence<T>> : std::true_type {};
|
||||
// General case for sequences containing at least one value.
|
||||
template <typename T, T HeadValue, T... Values>
|
||||
struct IsEmptyOrAreAllEqual<std::integer_sequence<T, HeadValue, Values...>>
|
||||
: AreAllEqual<std::integer_sequence<T, HeadValue, Values...>> {};
|
||||
: std::integral_constant<bool, AreAllEqual_v<T, HeadValue, Values...>> {};
|
||||
|
||||
// Convenience variable template for IsEmptyOrAreAllEqual.
|
||||
template <class Sequence>
|
||||
constexpr bool IsEmptyOrAreAllEqual_v = IsEmptyOrAreAllEqual<Sequence>::value;
|
||||
inline constexpr bool IsEmptyOrAreAllEqual_v =
|
||||
IsEmptyOrAreAllEqual<Sequence>::value;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_INTEGER_SEQUENCE_ALGORITHM_H_
|
||||
|
||||
46
extern/ceres/include/ceres/internal/jet_traits.h
vendored
46
extern/ceres/include/ceres/internal/jet_traits.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,17 +42,6 @@
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
// Predicate that determines whether T is a Jet.
|
||||
template <typename T, typename E = void>
|
||||
struct IsJet : std::false_type {};
|
||||
|
||||
template <typename T, int N>
|
||||
struct IsJet<Jet<T, N>> : std::true_type {};
|
||||
|
||||
// Convenience variable template for IsJet.
|
||||
template <typename T>
|
||||
constexpr bool IsJet_v = IsJet<T>::value;
|
||||
|
||||
// Predicate that determines whether any of the Types is a Jet.
|
||||
template <typename... Types>
|
||||
struct AreAnyJet : std::false_type {};
|
||||
@@ -65,7 +54,7 @@ struct AreAnyJet<Jet<T, N>, Types...> : std::true_type {};
|
||||
|
||||
// Convenience variable template for AreAnyJet.
|
||||
template <typename... Types>
|
||||
constexpr bool AreAnyJet_v = AreAnyJet<Types...>::value;
|
||||
inline constexpr bool AreAnyJet_v = AreAnyJet<Types...>::value;
|
||||
|
||||
// Extracts the underlying floating-point from a type T.
|
||||
template <typename T, typename E = void>
|
||||
@@ -84,27 +73,8 @@ using UnderlyingScalar_t = typename UnderlyingScalar<T>::type;
|
||||
//
|
||||
// Specifically, the predicate applies std::is_same recursively to pairs of
|
||||
// Types in the pack.
|
||||
//
|
||||
// The predicate is defined only for template packs containing at least two
|
||||
// types.
|
||||
template <typename T1, typename T2, typename... Types>
|
||||
// clang-format off
|
||||
struct AreAllSame : std::integral_constant
|
||||
<
|
||||
bool,
|
||||
AreAllSame<T1, T2>::value &&
|
||||
AreAllSame<T2, Types...>::value
|
||||
>
|
||||
// clang-format on
|
||||
{};
|
||||
|
||||
// AreAllSame pairwise test.
|
||||
template <typename T1, typename T2>
|
||||
struct AreAllSame<T1, T2> : std::is_same<T1, T2> {};
|
||||
|
||||
// Convenience variable template for AreAllSame.
|
||||
template <typename... Types>
|
||||
constexpr bool AreAllSame_v = AreAllSame<Types...>::value;
|
||||
template <typename T1, typename... Types>
|
||||
inline constexpr bool AreAllSame_v = (std::is_same<T1, Types>::value && ...);
|
||||
|
||||
// Determines the rank of a type. This allows to ensure that types passed as
|
||||
// arguments are compatible to each other. The rank of Jet is determined by the
|
||||
@@ -124,7 +94,7 @@ struct Rank<Jet<T, N>> : std::integral_constant<int, N> {};
|
||||
|
||||
// Convenience variable template for Rank.
|
||||
template <typename T>
|
||||
constexpr int Rank_v = Rank<T>::value;
|
||||
inline constexpr int Rank_v = Rank<T>::value;
|
||||
|
||||
// Constructs an integer sequence of ranks for each of the Types in the pack.
|
||||
template <typename... Types>
|
||||
@@ -186,7 +156,8 @@ struct CompatibleJetOperands<> : std::false_type {};
|
||||
// This trait is a candidate for a concept definition once C++20 features can
|
||||
// be used.
|
||||
template <typename... Types>
|
||||
constexpr bool CompatibleJetOperands_v = CompatibleJetOperands<Types...>::value;
|
||||
inline constexpr bool CompatibleJetOperands_v =
|
||||
CompatibleJetOperands<Types...>::value;
|
||||
|
||||
// Type trait ensuring at least one of the types is a Jet,
|
||||
// the underlying scalar types are compatible among each other and Jet
|
||||
@@ -216,7 +187,8 @@ struct PromotableJetOperands : std::integral_constant
|
||||
// This trait is a candidate for a concept definition once C++20 features can
|
||||
// be used.
|
||||
template <typename... Types>
|
||||
constexpr bool PromotableJetOperands_v = PromotableJetOperands<Types...>::value;
|
||||
inline constexpr bool PromotableJetOperands_v =
|
||||
PromotableJetOperands<Types...>::value;
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2020 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
6
extern/ceres/include/ceres/internal/memory.h
vendored
6
extern/ceres/include/ceres/internal/memory.h
vendored
@@ -40,8 +40,7 @@
|
||||
} while (false)
|
||||
#endif // CERES_HAVE_EXCEPTIONS
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template <typename Allocator, typename Iterator, typename... Args>
|
||||
void ConstructRange(Allocator& alloc,
|
||||
@@ -84,7 +83,6 @@ void CopyRange(Allocator& alloc,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_MEMORY_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,8 +47,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// This is split from the main class because C++ doesn't allow partial template
|
||||
// specializations for member functions. The alternative is to repeat the main
|
||||
@@ -502,7 +501,6 @@ struct EvaluateJacobianForParameterBlocks<ParameterDims,
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_NUMERIC_DIFF_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,22 +36,7 @@
|
||||
|
||||
#include "ceres/internal/integer_sequence_algorithm.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
// Checks, whether the given parameter block sizes are valid. Valid means every
|
||||
// dimension is bigger than zero.
|
||||
constexpr bool IsValidParameterDimensionSequence(std::integer_sequence<int>) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <int N, int... Ts>
|
||||
constexpr bool IsValidParameterDimensionSequence(
|
||||
std::integer_sequence<int, N, Ts...>) {
|
||||
return (N <= 0) ? false
|
||||
: IsValidParameterDimensionSequence(
|
||||
std::integer_sequence<int, Ts...>());
|
||||
}
|
||||
namespace ceres::internal {
|
||||
|
||||
// Helper class that represents the parameter dimensions. The parameter
|
||||
// dimensions are either dynamic or the sizes are known at compile time. It is
|
||||
@@ -70,8 +55,7 @@ class ParameterDims {
|
||||
|
||||
// The parameter dimensions are only valid if all parameter block dimensions
|
||||
// are greater than zero.
|
||||
static constexpr bool kIsValid =
|
||||
IsValidParameterDimensionSequence(Parameters());
|
||||
static constexpr bool kIsValid = ((Ns > 0) && ...);
|
||||
static_assert(kIsValid,
|
||||
"Invalid parameter block dimension detected. Each parameter "
|
||||
"block dimension must be bigger than zero.");
|
||||
@@ -81,8 +65,7 @@ class ParameterDims {
|
||||
static_assert(kIsDynamic || kNumParameterBlocks > 0,
|
||||
"At least one parameter block must be specified.");
|
||||
|
||||
static constexpr int kNumParameters =
|
||||
Sum<std::integer_sequence<int, Ns...>>::Value;
|
||||
static constexpr int kNumParameters = (Ns + ... + 0);
|
||||
|
||||
static constexpr int GetDim(int dim) { return params_[dim]; }
|
||||
|
||||
@@ -118,7 +101,6 @@ template <int... Ns>
|
||||
using StaticParameterDims = ParameterDims<false, Ns...>;
|
||||
using DynamicParameterDims = ParameterDims<true>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_PARAMETER_DIMS_H_
|
||||
|
||||
21
extern/ceres/include/ceres/internal/port.h
vendored
21
extern/ceres/include/ceres/internal/port.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,14 +47,6 @@
|
||||
#define CERES_GET_FLAG(X) X
|
||||
#endif
|
||||
|
||||
// Indicates whether C++17 is currently active
|
||||
#ifndef CERES_HAS_CPP17
|
||||
#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
|
||||
#define CERES_HAS_CPP17
|
||||
#endif // __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >=
|
||||
// 201703L)
|
||||
#endif // !defined(CERES_HAS_CPP17)
|
||||
|
||||
// Indicates whether C++20 is currently active
|
||||
#ifndef CERES_HAS_CPP20
|
||||
#if __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
||||
@@ -85,4 +77,15 @@
|
||||
//
|
||||
#define CERES_PREVENT_MACRO_SUBSTITUTION // Yes, it's empty
|
||||
|
||||
// CERES_DISABLE_DEPRECATED_WARNING and CERES_RESTORE_DEPRECATED_WARNING allow
|
||||
// to temporarily disable deprecation warnings
|
||||
#if defined(_MSC_VER)
|
||||
#define CERES_DISABLE_DEPRECATED_WARNING \
|
||||
_Pragma("warning(push)") _Pragma("warning(disable : 4996)")
|
||||
#define CERES_RESTORE_DEPRECATED_WARNING _Pragma("warning(pop)")
|
||||
#else // defined(_MSC_VER)
|
||||
#define CERES_DISABLE_DEPRECATED_WARNING
|
||||
#define CERES_RESTORE_DEPRECATED_WARNING
|
||||
#endif // defined(_MSC_VER)
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_PORT_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,6 +32,7 @@
|
||||
#ifndef CERES_PUBLIC_INTERNAL_SPHERE_MANIFOLD_HELPERS_H_
|
||||
#define CERES_PUBLIC_INTERNAL_SPHERE_MANIFOLD_HELPERS_H_
|
||||
|
||||
#include "ceres/constants.h"
|
||||
#include "ceres/internal/householder_vector.h"
|
||||
|
||||
// This module contains functions to compute the SphereManifold plus and minus
|
||||
@@ -58,26 +59,23 @@
|
||||
// used in order to allow also Eigen::Ref and Eigen block expressions to
|
||||
// be passed to the function.
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template <typename VT, typename XT, typename DeltaT, typename XPlusDeltaT>
|
||||
inline void ComputeSphereManifoldPlus(const VT& v,
|
||||
double beta,
|
||||
const XT& x,
|
||||
const DeltaT& delta,
|
||||
double norm_delta,
|
||||
const double norm_delta,
|
||||
XPlusDeltaT* x_plus_delta) {
|
||||
constexpr int AmbientDim = VT::RowsAtCompileTime;
|
||||
|
||||
// Map the delta from the minimum representation to the over parameterized
|
||||
// homogeneous vector. See B.2 p.25 equation (106) - (107) for more details.
|
||||
const double norm_delta_div_2 = 0.5 * norm_delta;
|
||||
const double sin_delta_by_delta =
|
||||
std::sin(norm_delta_div_2) / norm_delta_div_2;
|
||||
const double sin_delta_by_delta = std::sin(norm_delta) / norm_delta;
|
||||
|
||||
Eigen::Matrix<double, AmbientDim, 1> y(v.size());
|
||||
y << 0.5 * sin_delta_by_delta * delta, std::cos(norm_delta_div_2);
|
||||
y << sin_delta_by_delta * delta, std::cos(norm_delta);
|
||||
|
||||
// Apply the delta update to remain on the sphere.
|
||||
*x_plus_delta = x.norm() * ApplyHouseholderVector(y, v, beta);
|
||||
@@ -99,11 +97,11 @@ inline void ComputeSphereManifoldPlusJacobian(const VT& x,
|
||||
// have trouble deducing the type of v automatically.
|
||||
ComputeHouseholderVector<VT, double, AmbientSpaceDim>(x, &v, &beta);
|
||||
|
||||
// The Jacobian is equal to J = 0.5 * H.leftCols(size_ - 1) where H is the
|
||||
// The Jacobian is equal to J = H.leftCols(size_ - 1) where H is the
|
||||
// Householder matrix (H = I - beta * v * v').
|
||||
for (int i = 0; i < tangent_size; ++i) {
|
||||
(*jacobian).col(i) = -0.5 * beta * v(i) * v;
|
||||
(*jacobian)(i, i) += 0.5;
|
||||
(*jacobian).col(i) = -beta * v(i) * v;
|
||||
(*jacobian)(i, i) += 1.0;
|
||||
}
|
||||
(*jacobian) *= x.norm();
|
||||
}
|
||||
@@ -116,18 +114,19 @@ inline void ComputeSphereManifoldMinus(
|
||||
AmbientSpaceDim == Eigen::Dynamic ? Eigen::Dynamic : AmbientSpaceDim - 1;
|
||||
using AmbientVector = Eigen::Matrix<double, AmbientSpaceDim, 1>;
|
||||
|
||||
const int tanget_size = v.size() - 1;
|
||||
const int tangent_size = v.size() - 1;
|
||||
|
||||
const AmbientVector hy = ApplyHouseholderVector(y, v, beta) / x.norm();
|
||||
|
||||
// Calculate y - x. See B.2 p.25 equation (108).
|
||||
double y_last = hy[tanget_size];
|
||||
double hy_norm = hy.template head<TangentSpaceDim>(tanget_size).norm();
|
||||
const double y_last = hy[tangent_size];
|
||||
const double hy_norm = hy.template head<TangentSpaceDim>(tangent_size).norm();
|
||||
if (hy_norm == 0.0) {
|
||||
y_minus_x->setZero();
|
||||
y_minus_x->data()[tangent_size - 1] = y_last >= 0 ? 0.0 : constants::pi;
|
||||
} else {
|
||||
*y_minus_x = 2.0 * std::atan2(hy_norm, y_last) / hy_norm *
|
||||
hy.template head<TangentSpaceDim>(tanget_size);
|
||||
*y_minus_x = std::atan2(hy_norm, y_last) / hy_norm *
|
||||
hy.template head<TangentSpaceDim>(tangent_size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,16 +146,18 @@ inline void ComputeSphereManifoldMinusJacobian(const VT& x,
|
||||
// have trouble deducing the type of v automatically.
|
||||
ComputeHouseholderVector<VT, double, AmbientSpaceDim>(x, &v, &beta);
|
||||
|
||||
// The Jacobian is equal to J = 2.0 * H.leftCols(size_ - 1) where H is the
|
||||
// The Jacobian is equal to J = H.leftCols(size_ - 1) where H is the
|
||||
// Householder matrix (H = I - beta * v * v').
|
||||
for (int i = 0; i < tangent_size; ++i) {
|
||||
(*jacobian).row(i) = -2.0 * beta * v(i) * v;
|
||||
(*jacobian)(i, i) += 2.0;
|
||||
// NOTE: The transpose is used for correctness (the product is expected to
|
||||
// be a row vector), although here there seems to be no difference between
|
||||
// transposing or not for Eigen (possibly a compile-time auto fix).
|
||||
(*jacobian).row(i) = -beta * v(i) * v.transpose();
|
||||
(*jacobian)(i, i) += 1.0;
|
||||
}
|
||||
(*jacobian) /= x.norm();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
#include "ceres/cost_function.h"
|
||||
#include "ceres/internal/parameter_dims.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// For fixed size cost functors
|
||||
template <typename Functor, typename T, int... Indices>
|
||||
@@ -50,7 +49,7 @@ inline bool VariadicEvaluateImpl(const Functor& functor,
|
||||
T* output,
|
||||
std::false_type /*is_dynamic*/,
|
||||
std::integer_sequence<int, Indices...>) {
|
||||
static_assert(sizeof...(Indices),
|
||||
static_assert(sizeof...(Indices) > 0,
|
||||
"Invalid number of parameter blocks. At least one parameter "
|
||||
"block must be specified.");
|
||||
return functor(input[Indices]..., output);
|
||||
@@ -107,7 +106,29 @@ inline bool VariadicEvaluate(const Functor& functor,
|
||||
return VariadicEvaluateImpl<ParameterDims>(functor, input, output, &functor);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
// When differentiating dynamically sized CostFunctions, VariadicEvaluate
|
||||
// expects a functor with the signature:
|
||||
//
|
||||
// bool operator()(double const* const* parameters, double* cost) const
|
||||
//
|
||||
// However for NumericDiffFirstOrderFunction, the functor has the signature
|
||||
//
|
||||
// bool operator()(double const* parameters, double* cost) const
|
||||
//
|
||||
// This thin wrapper adapts the latter to the former.
|
||||
template <typename Functor>
|
||||
class FirstOrderFunctorAdapter {
|
||||
public:
|
||||
explicit FirstOrderFunctorAdapter(const Functor& functor)
|
||||
: functor_(functor) {}
|
||||
bool operator()(double const* const* parameters, double* cost) const {
|
||||
return functor_(*parameters, cost);
|
||||
}
|
||||
|
||||
private:
|
||||
const Functor& functor_;
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_PUBLIC_INTERNAL_VARIADIC_EVALUATE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
94
extern/ceres/include/ceres/jet.h
vendored
94
extern/ceres/include/ceres/jet.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -724,7 +724,6 @@ inline Jet<T, N> hypot(const Jet<T, N>& x, const Jet<T, N>& y) {
|
||||
return Jet<T, N>(tmp, x.a / tmp * x.v + y.a / tmp * y.v);
|
||||
}
|
||||
|
||||
#ifdef CERES_HAS_CPP17
|
||||
// Like sqrt(x^2 + y^2 + z^2),
|
||||
// but acts to prevent underflow/overflow for small/large x/y/z.
|
||||
// Note that the function is non-smooth at x=y=z=0,
|
||||
@@ -744,7 +743,6 @@ inline Jet<T, N> hypot(const Jet<T, N>& x,
|
||||
const T tmp = hypot(x.a, y.a, z.a);
|
||||
return Jet<T, N>(tmp, x.a / tmp * x.v + y.a / tmp * y.v + z.a / tmp * z.v);
|
||||
}
|
||||
#endif // defined(CERES_HAS_CPP17)
|
||||
|
||||
// Like x * y + z but rounded only once.
|
||||
template <typename T, int N>
|
||||
@@ -757,28 +755,76 @@ inline Jet<T, N> fma(const Jet<T, N>& x,
|
||||
return Jet<T, N>(fma(x.a, y.a, z.a), y.a * x.v + x.a * y.v + z.v);
|
||||
}
|
||||
|
||||
// Returns the larger of the two arguments. NaNs are treated as missing data.
|
||||
// Return value of fmax() and fmin() on equality
|
||||
// ---------------------------------------------
|
||||
//
|
||||
// There is arguably no good answer to what fmax() & fmin() should return on
|
||||
// equality, which for Jets by definition ONLY compares the scalar parts. We
|
||||
// choose what we think is the least worst option (averaging as Jets) which
|
||||
// minimises undesirable/unexpected behaviour as used, and also supports client
|
||||
// code written against Ceres versions prior to type promotion being supported
|
||||
// in Jet comparisons (< v2.1).
|
||||
//
|
||||
// The std::max() convention of returning the first argument on equality is
|
||||
// problematic, as it means that the derivative component may or may not be
|
||||
// preserved (when comparing a Jet with a scalar) depending upon the ordering.
|
||||
//
|
||||
// Always returning the Jet in {Jet, scalar} cases on equality is problematic
|
||||
// as it is inconsistent with the behaviour that would be obtained if the scalar
|
||||
// was first cast to Jet and the {Jet, Jet} case was used. Prior to type
|
||||
// promotion (Ceres v2.1) client code would typically cast constants to Jets
|
||||
// e.g: fmax(x, T(2.0)) which means the {Jet, Jet} case predominates, and we
|
||||
// still want the result to be order independent.
|
||||
//
|
||||
// Our intuition is that preserving a non-zero derivative is best, even if
|
||||
// its value does not match either of the inputs. Averaging achieves this
|
||||
// whilst ensuring argument ordering independence. This is also the approach
|
||||
// used by the Jax library, and TensorFlow's reduce_max().
|
||||
|
||||
// Returns the larger of the two arguments, with Jet averaging on equality.
|
||||
// NaNs are treated as missing data.
|
||||
//
|
||||
// NOTE: This function is NOT subject to any of the error conditions specified
|
||||
// in `math_errhandling`.
|
||||
// in `math_errhandling`.
|
||||
template <typename Lhs,
|
||||
typename Rhs,
|
||||
std::enable_if_t<CompatibleJetOperands_v<Lhs, Rhs>>* = nullptr>
|
||||
inline decltype(auto) fmax(const Lhs& f, const Rhs& g) {
|
||||
inline decltype(auto) fmax(const Lhs& x, const Rhs& y) {
|
||||
using J = std::common_type_t<Lhs, Rhs>;
|
||||
return (isnan(g) || isgreater(f, g)) ? J{f} : J{g};
|
||||
// As x == y may set FP exceptions in the presence of NaNs when used with
|
||||
// non-default compiler options so we avoid its use here.
|
||||
if (isnan(x) || isnan(y) || islessgreater(x, y)) {
|
||||
return isnan(x) || isless(x, y) ? J{y} : J{x};
|
||||
}
|
||||
// x == y (scalar parts) return the average of their Jet representations.
|
||||
#if defined(CERES_HAS_CPP20)
|
||||
return midpoint(J{x}, J{y});
|
||||
#else
|
||||
return (J{x} + J{y}) * typename J::Scalar(0.5);
|
||||
#endif // defined(CERES_HAS_CPP20)
|
||||
}
|
||||
|
||||
// Returns the smaller of the two arguments. NaNs are treated as missing data.
|
||||
// Returns the smaller of the two arguments, with Jet averaging on equality.
|
||||
// NaNs are treated as missing data.
|
||||
//
|
||||
// NOTE: This function is NOT subject to any of the error conditions specified
|
||||
// in `math_errhandling`.
|
||||
// in `math_errhandling`.
|
||||
template <typename Lhs,
|
||||
typename Rhs,
|
||||
std::enable_if_t<CompatibleJetOperands_v<Lhs, Rhs>>* = nullptr>
|
||||
inline decltype(auto) fmin(const Lhs& f, const Rhs& g) {
|
||||
inline decltype(auto) fmin(const Lhs& x, const Rhs& y) {
|
||||
using J = std::common_type_t<Lhs, Rhs>;
|
||||
return (isnan(f) || isless(g, f)) ? J{g} : J{f};
|
||||
// As x == y may set FP exceptions in the presence of NaNs when used with
|
||||
// non-default compiler options so we avoid its use here.
|
||||
if (isnan(x) || isnan(y) || islessgreater(x, y)) {
|
||||
return isnan(x) || isgreater(x, y) ? J{y} : J{x};
|
||||
}
|
||||
// x == y (scalar parts) return the average of their Jet representations.
|
||||
#if defined(CERES_HAS_CPP20)
|
||||
return midpoint(J{x}, J{y});
|
||||
#else
|
||||
return (J{x} + J{y}) * typename J::Scalar(0.5);
|
||||
#endif // defined(CERES_HAS_CPP20)
|
||||
}
|
||||
|
||||
// Returns the positive difference (f - g) of two arguments and zero if f <= g.
|
||||
@@ -804,7 +850,7 @@ template <typename T, int N>
|
||||
inline Jet<T, N> erf(const Jet<T, N>& x) {
|
||||
// We evaluate the constant as follows:
|
||||
// 2 / sqrt(pi) = 1 / sqrt(atan(1.))
|
||||
// On POSIX sytems it is defined as M_2_SQRTPI, but this is not
|
||||
// On POSIX systems it is defined as M_2_SQRTPI, but this is not
|
||||
// portable and the type may not be T. The above expression
|
||||
// evaluates to full precision with IEEE arithmetic and, since it's
|
||||
// constant, the compiler can generate exactly the same code. gcc
|
||||
@@ -828,25 +874,19 @@ inline Jet<T, N> erfc(const Jet<T, N>& x) {
|
||||
// function errors in client code (the specific warning is suppressed when
|
||||
// Ceres itself is built).
|
||||
inline double BesselJ0(double x) {
|
||||
#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS)
|
||||
return _j0(x);
|
||||
#else
|
||||
CERES_DISABLE_DEPRECATED_WARNING
|
||||
return j0(x);
|
||||
#endif
|
||||
CERES_RESTORE_DEPRECATED_WARNING
|
||||
}
|
||||
inline double BesselJ1(double x) {
|
||||
#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS)
|
||||
return _j1(x);
|
||||
#else
|
||||
CERES_DISABLE_DEPRECATED_WARNING
|
||||
return j1(x);
|
||||
#endif
|
||||
CERES_RESTORE_DEPRECATED_WARNING
|
||||
}
|
||||
inline double BesselJn(int n, double x) {
|
||||
#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS)
|
||||
return _jn(n, x);
|
||||
#else
|
||||
CERES_DISABLE_DEPRECATED_WARNING
|
||||
return jn(n, x);
|
||||
#endif
|
||||
CERES_RESTORE_DEPRECATED_WARNING
|
||||
}
|
||||
|
||||
// For the formulae of the derivatives of the Bessel functions see the book:
|
||||
@@ -1264,8 +1304,13 @@ struct numeric_limits<ceres::Jet<T, N>> {
|
||||
static constexpr bool is_bounded = std::numeric_limits<T>::is_bounded;
|
||||
static constexpr bool is_modulo = std::numeric_limits<T>::is_modulo;
|
||||
|
||||
// has_denorm (and has_denorm_loss, not defined for Jet) has been deprecated
|
||||
// in C++23. However, without an intent to remove the declaration. Disable
|
||||
// deprecation warnings temporarily just for the corresponding symbols.
|
||||
CERES_DISABLE_DEPRECATED_WARNING
|
||||
static constexpr std::float_denorm_style has_denorm =
|
||||
std::numeric_limits<T>::has_denorm;
|
||||
CERES_RESTORE_DEPRECATED_WARNING
|
||||
static constexpr std::float_round_style round_style =
|
||||
std::numeric_limits<T>::round_style;
|
||||
|
||||
@@ -1335,6 +1380,7 @@ struct NumTraits<ceres::Jet<T, N>> {
|
||||
}
|
||||
|
||||
static inline int digits10() { return NumTraits<T>::digits10(); }
|
||||
static inline int max_digits10() { return NumTraits<T>::max_digits10(); }
|
||||
|
||||
enum {
|
||||
IsComplex = 0,
|
||||
|
||||
2
extern/ceres/include/ceres/jet_fwd.h
vendored
2
extern/ceres/include/ceres/jet_fwd.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
13
extern/ceres/include/ceres/line_manifold.h
vendored
13
extern/ceres/include/ceres/line_manifold.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -156,7 +156,7 @@ bool LineManifold<AmbientSpaceDimension>::Plus(const double* x_ptr,
|
||||
//
|
||||
// The direction update function Plus_d is the same as as the SphereManifold:
|
||||
//
|
||||
// d* = H_{v(d)} [0.5 sinc(0.5 |delta_d|) delta_d, cos(0.5 |delta_d|)]^T
|
||||
// d* = H_{v(d)} [sinc(|delta_d|) delta_d, cos(|delta_d|)]^T
|
||||
//
|
||||
// where H is the householder matrix
|
||||
// H_{v} = I - (2 / |v|^2) v v^T
|
||||
@@ -165,7 +165,7 @@ bool LineManifold<AmbientSpaceDimension>::Plus(const double* x_ptr,
|
||||
//
|
||||
// The origin point update function Plus_o is defined as
|
||||
//
|
||||
// o* = o + H_{v(d)} [0.5 delta_o, 0]^T.
|
||||
// o* = o + H_{v(d)} [delta_o, 0]^T.
|
||||
|
||||
Eigen::Map<const AmbientVector> o(x_ptr, size_);
|
||||
Eigen::Map<const AmbientVector> d(x_ptr + size_, size_);
|
||||
@@ -208,11 +208,8 @@ bool LineManifold<AmbientSpaceDimension>::Plus(const double* x_ptr,
|
||||
// perpendicular to the line direction. This is achieved by using the
|
||||
// householder matrix of the direction and allow only movements
|
||||
// perpendicular to e_n.
|
||||
//
|
||||
// The factor of 0.5 is used to be consistent with the line direction
|
||||
// update.
|
||||
AmbientVector y(size_);
|
||||
y << 0.5 * delta_o, 0;
|
||||
y << delta_o, 0;
|
||||
o_plus_delta += internal::ApplyHouseholderVector(y, v, beta);
|
||||
|
||||
return true;
|
||||
@@ -266,7 +263,7 @@ bool LineManifold<AmbientSpaceDimension>::Minus(const double* y_ptr,
|
||||
|
||||
AmbientVector delta_o = y_o - x_o;
|
||||
const AmbientVector h_delta_o =
|
||||
2.0 * internal::ApplyHouseholderVector(delta_o, v, beta);
|
||||
internal::ApplyHouseholderVector(delta_o, v, beta);
|
||||
y_minus_x_o = h_delta_o.template head<TangentSpaceDimension>(size_ - 1);
|
||||
|
||||
return true;
|
||||
|
||||
371
extern/ceres/include/ceres/local_parameterization.h
vendored
371
extern/ceres/include/ceres/local_parameterization.h
vendored
@@ -1,371 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: keir@google.com (Keir Mierle)
|
||||
// sameeragarwal@google.com (Sameer Agarwal)
|
||||
|
||||
#ifndef CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_
|
||||
#define CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/internal/port.h"
|
||||
|
||||
namespace ceres {
|
||||
|
||||
// WARNING: LocalParameterizations are deprecated. They will be removed from
|
||||
// Ceres Solver in version 2.2.0. Please use Manifolds instead.
|
||||
|
||||
// Purpose: Sometimes parameter blocks x can overparameterize a problem
|
||||
//
|
||||
// min f(x)
|
||||
// x
|
||||
//
|
||||
// In that case it is desirable to choose a parameterization for the
|
||||
// block itself to remove the null directions of the cost. More
|
||||
// generally, if x lies on a manifold of a smaller dimension than the
|
||||
// ambient space that it is embedded in, then it is numerically and
|
||||
// computationally more effective to optimize it using a
|
||||
// parameterization that lives in the tangent space of that manifold
|
||||
// at each point.
|
||||
//
|
||||
// For example, a sphere in three dimensions is a 2 dimensional
|
||||
// manifold, embedded in a three dimensional space. At each point on
|
||||
// the sphere, the plane tangent to it defines a two dimensional
|
||||
// tangent space. For a cost function defined on this sphere, given a
|
||||
// point x, moving in the direction normal to the sphere at that point
|
||||
// is not useful. Thus a better way to do a local optimization is to
|
||||
// optimize over two dimensional vector delta in the tangent space at
|
||||
// that point and then "move" to the point x + delta, where the move
|
||||
// operation involves projecting back onto the sphere. Doing so
|
||||
// removes a redundant dimension from the optimization, making it
|
||||
// numerically more robust and efficient.
|
||||
//
|
||||
// More generally we can define a function
|
||||
//
|
||||
// x_plus_delta = Plus(x, delta),
|
||||
//
|
||||
// where x_plus_delta has the same size as x, and delta is of size
|
||||
// less than or equal to x. The function Plus, generalizes the
|
||||
// definition of vector addition. Thus it satisfies the identify
|
||||
//
|
||||
// Plus(x, 0) = x, for all x.
|
||||
//
|
||||
// A trivial version of Plus is when delta is of the same size as x
|
||||
// and
|
||||
//
|
||||
// Plus(x, delta) = x + delta
|
||||
//
|
||||
// A more interesting case if x is two dimensional vector, and the
|
||||
// user wishes to hold the first coordinate constant. Then, delta is a
|
||||
// scalar and Plus is defined as
|
||||
//
|
||||
// Plus(x, delta) = x + [0] * delta
|
||||
// [1]
|
||||
//
|
||||
// An example that occurs commonly in Structure from Motion problems
|
||||
// is when camera rotations are parameterized using Quaternion. There,
|
||||
// it is useful to only make updates orthogonal to that 4-vector
|
||||
// defining the quaternion. One way to do this is to let delta be a 3
|
||||
// dimensional vector and define Plus to be
|
||||
//
|
||||
// Plus(x, delta) = [cos(|delta|), sin(|delta|) delta / |delta|] * x
|
||||
//
|
||||
// The multiplication between the two 4-vectors on the RHS is the
|
||||
// standard quaternion product.
|
||||
//
|
||||
// Given f and a point x, optimizing f can now be restated as
|
||||
//
|
||||
// min f(Plus(x, delta))
|
||||
// delta
|
||||
//
|
||||
// Given a solution delta to this problem, the optimal value is then
|
||||
// given by
|
||||
//
|
||||
// x* = Plus(x, delta)
|
||||
//
|
||||
// The class LocalParameterization defines the function Plus and its
|
||||
// Jacobian which is needed to compute the Jacobian of f w.r.t delta.
|
||||
class CERES_DEPRECATED_WITH_MSG(
|
||||
"LocalParameterizations will be removed from the Ceres Solver API in "
|
||||
"version 2.2.0. Use Manifolds instead.")
|
||||
CERES_EXPORT LocalParameterization {
|
||||
public:
|
||||
virtual ~LocalParameterization();
|
||||
|
||||
// Generalization of the addition operation,
|
||||
//
|
||||
// x_plus_delta = Plus(x, delta)
|
||||
//
|
||||
// with the condition that Plus(x, 0) = x.
|
||||
//
|
||||
virtual bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const = 0;
|
||||
|
||||
// The jacobian of Plus(x, delta) w.r.t delta at delta = 0.
|
||||
//
|
||||
// jacobian is a row-major GlobalSize() x LocalSize() matrix.
|
||||
virtual bool ComputeJacobian(const double* x, double* jacobian) const = 0;
|
||||
|
||||
// local_matrix = global_matrix * jacobian
|
||||
//
|
||||
// global_matrix is a num_rows x GlobalSize row major matrix.
|
||||
// local_matrix is a num_rows x LocalSize row major matrix.
|
||||
// jacobian(x) is the matrix returned by ComputeJacobian at x.
|
||||
//
|
||||
// This is only used by GradientProblem. For most normal uses, it is
|
||||
// okay to use the default implementation.
|
||||
virtual bool MultiplyByJacobian(const double* x,
|
||||
const int num_rows,
|
||||
const double* global_matrix,
|
||||
double* local_matrix) const;
|
||||
|
||||
// Size of x.
|
||||
virtual int GlobalSize() const = 0;
|
||||
|
||||
// Size of delta.
|
||||
virtual int LocalSize() const = 0;
|
||||
};
|
||||
|
||||
// Some basic parameterizations
|
||||
|
||||
// Identity Parameterization: Plus(x, delta) = x + delta
|
||||
class CERES_DEPRECATED_WITH_MSG("Use EuclideanManifold instead.")
|
||||
CERES_EXPORT IdentityParameterization : public LocalParameterization {
|
||||
public:
|
||||
explicit IdentityParameterization(int size);
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override;
|
||||
bool ComputeJacobian(const double* x, double* jacobian) const override;
|
||||
bool MultiplyByJacobian(const double* x,
|
||||
const int num_cols,
|
||||
const double* global_matrix,
|
||||
double* local_matrix) const override;
|
||||
int GlobalSize() const override { return size_; }
|
||||
int LocalSize() const override { return size_; }
|
||||
|
||||
private:
|
||||
const int size_;
|
||||
};
|
||||
|
||||
// Hold a subset of the parameters inside a parameter block constant.
|
||||
class CERES_DEPRECATED_WITH_MSG("Use SubsetManifold instead.")
|
||||
CERES_EXPORT SubsetParameterization : public LocalParameterization {
|
||||
public:
|
||||
explicit SubsetParameterization(int size,
|
||||
const std::vector<int>& constant_parameters);
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override;
|
||||
bool ComputeJacobian(const double* x, double* jacobian) const override;
|
||||
bool MultiplyByJacobian(const double* x,
|
||||
const int num_cols,
|
||||
const double* global_matrix,
|
||||
double* local_matrix) const override;
|
||||
int GlobalSize() const override {
|
||||
return static_cast<int>(constancy_mask_.size());
|
||||
}
|
||||
int LocalSize() const override { return local_size_; }
|
||||
|
||||
private:
|
||||
const int local_size_;
|
||||
std::vector<char> constancy_mask_;
|
||||
};
|
||||
|
||||
// Plus(x, delta) = [cos(|delta|), sin(|delta|) delta / |delta|] * x
|
||||
// with * being the quaternion multiplication operator. Here we assume
|
||||
// that the first element of the quaternion vector is the real (cos
|
||||
// theta) part.
|
||||
class CERES_DEPRECATED_WITH_MSG("Use QuaternionManifold instead.")
|
||||
CERES_EXPORT QuaternionParameterization : public LocalParameterization {
|
||||
public:
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override;
|
||||
bool ComputeJacobian(const double* x, double* jacobian) const override;
|
||||
int GlobalSize() const override { return 4; }
|
||||
int LocalSize() const override { return 3; }
|
||||
};
|
||||
|
||||
// Implements the quaternion local parameterization for Eigen's representation
|
||||
// of the quaternion. Eigen uses a different internal memory layout for the
|
||||
// elements of the quaternion than what is commonly used. Specifically, Eigen
|
||||
// stores the elements in memory as [x, y, z, w] where the real part is last
|
||||
// whereas it is typically stored first. Note, when creating an Eigen quaternion
|
||||
// through the constructor the elements are accepted in w, x, y, z order. Since
|
||||
// Ceres operates on parameter blocks which are raw double pointers this
|
||||
// difference is important and requires a different parameterization.
|
||||
//
|
||||
// Plus(x, delta) = [sin(|delta|) delta / |delta|, cos(|delta|)] * x
|
||||
// with * being the quaternion multiplication operator.
|
||||
class CERES_DEPRECATED_WITH_MSG("Use EigenQuaternionManifold instead.")
|
||||
CERES_EXPORT EigenQuaternionParameterization
|
||||
: public ceres::LocalParameterization {
|
||||
public:
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override;
|
||||
bool ComputeJacobian(const double* x, double* jacobian) const override;
|
||||
int GlobalSize() const override { return 4; }
|
||||
int LocalSize() const override { return 3; }
|
||||
};
|
||||
|
||||
// This provides a parameterization for homogeneous vectors which are commonly
|
||||
// used in Structure from Motion problems. One example where they are used is
|
||||
// in representing points whose triangulation is ill-conditioned. Here it is
|
||||
// advantageous to use an over-parameterization since homogeneous vectors can
|
||||
// represent points at infinity.
|
||||
//
|
||||
// The plus operator is defined as
|
||||
// Plus(x, delta) =
|
||||
// [sin(0.5 * |delta|) * delta / |delta|, cos(0.5 * |delta|)] * x
|
||||
//
|
||||
// with * defined as an operator which applies the update orthogonal to x to
|
||||
// remain on the sphere. We assume that the last element of x is the scalar
|
||||
// component. The size of the homogeneous vector is required to be greater than
|
||||
// 1.
|
||||
class CERES_DEPRECATED_WITH_MSG("Use SphereManifold instead.") CERES_EXPORT
|
||||
HomogeneousVectorParameterization : public LocalParameterization {
|
||||
public:
|
||||
explicit HomogeneousVectorParameterization(int size);
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override;
|
||||
bool ComputeJacobian(const double* x, double* jacobian) const override;
|
||||
int GlobalSize() const override { return size_; }
|
||||
int LocalSize() const override { return size_ - 1; }
|
||||
|
||||
private:
|
||||
const int size_;
|
||||
};
|
||||
|
||||
// This provides a parameterization for lines, where the line is
|
||||
// over-parameterized by an origin point and a direction vector. So the
|
||||
// parameter vector size needs to be two times the ambient space dimension,
|
||||
// where the first half is interpreted as the origin point and the second half
|
||||
// as the direction.
|
||||
//
|
||||
// The plus operator for the line direction is the same as for the
|
||||
// HomogeneousVectorParameterization. The update of the origin point is
|
||||
// perpendicular to the line direction before the update.
|
||||
//
|
||||
// This local parameterization is a special case of the affine Grassmannian
|
||||
// manifold (see https://en.wikipedia.org/wiki/Affine_Grassmannian_(manifold))
|
||||
// for the case Graff_1(R^n).
|
||||
template <int AmbientSpaceDimension>
|
||||
class CERES_DEPRECATED_WITH_MSG("Use LineManifold instead.")
|
||||
LineParameterization : public LocalParameterization {
|
||||
public:
|
||||
static_assert(AmbientSpaceDimension >= 2,
|
||||
"The ambient space must be at least 2");
|
||||
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override;
|
||||
bool ComputeJacobian(const double* x, double* jacobian) const override;
|
||||
int GlobalSize() const override { return 2 * AmbientSpaceDimension; }
|
||||
int LocalSize() const override { return 2 * (AmbientSpaceDimension - 1); }
|
||||
};
|
||||
|
||||
// Construct a local parameterization by taking the Cartesian product
|
||||
// of a number of other local parameterizations. This is useful, when
|
||||
// a parameter block is the cartesian product of two or more
|
||||
// manifolds. For example the parameters of a camera consist of a
|
||||
// rotation and a translation, i.e., SO(3) x R^3.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// ProductParameterization product_param(new QuaterionionParameterization(),
|
||||
// new IdentityParameterization(3));
|
||||
//
|
||||
// is the local parameterization for a rigid transformation, where the
|
||||
// rotation is represented using a quaternion.
|
||||
//
|
||||
class CERES_DEPRECATED_WITH_MSG("Use ProductManifold instead.")
|
||||
CERES_EXPORT ProductParameterization : public LocalParameterization {
|
||||
public:
|
||||
ProductParameterization(const ProductParameterization&) = delete;
|
||||
ProductParameterization& operator=(const ProductParameterization&) = delete;
|
||||
//
|
||||
// NOTE: The constructor takes ownership of the input local
|
||||
// parameterizations.
|
||||
//
|
||||
template <typename... LocalParams>
|
||||
explicit ProductParameterization(LocalParams*... local_params)
|
||||
: local_params_(sizeof...(LocalParams)) {
|
||||
constexpr int kNumLocalParams = sizeof...(LocalParams);
|
||||
static_assert(kNumLocalParams >= 2,
|
||||
"At least two local parameterizations must be specified.");
|
||||
|
||||
using LocalParameterizationPtr = std::unique_ptr<LocalParameterization>;
|
||||
|
||||
// Wrap all raw pointers into std::unique_ptr for exception safety.
|
||||
std::array<LocalParameterizationPtr, kNumLocalParams> local_params_array{
|
||||
LocalParameterizationPtr(local_params)...};
|
||||
|
||||
// Initialize internal state.
|
||||
for (int i = 0; i < kNumLocalParams; ++i) {
|
||||
LocalParameterizationPtr& param = local_params_[i];
|
||||
param = std::move(local_params_array[i]);
|
||||
|
||||
buffer_size_ =
|
||||
std::max(buffer_size_, param->LocalSize() * param->GlobalSize());
|
||||
global_size_ += param->GlobalSize();
|
||||
local_size_ += param->LocalSize();
|
||||
}
|
||||
}
|
||||
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override;
|
||||
bool ComputeJacobian(const double* x, double* jacobian) const override;
|
||||
int GlobalSize() const override { return global_size_; }
|
||||
int LocalSize() const override { return local_size_; }
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<LocalParameterization>> local_params_;
|
||||
int local_size_{0};
|
||||
int global_size_{0};
|
||||
int buffer_size_{0};
|
||||
};
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
// clang-format off
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
// clang-format on
|
||||
|
||||
#include "ceres/internal/line_parameterization.h"
|
||||
|
||||
#endif // CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_
|
||||
2
extern/ceres/include/ceres/loss_function.h
vendored
2
extern/ceres/include/ceres/loss_function.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
2
extern/ceres/include/ceres/manifold.h
vendored
2
extern/ceres/include/ceres/manifold.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
73
extern/ceres/include/ceres/manifold_test_utils.h
vendored
73
extern/ceres/include/ceres/manifold_test_utils.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,24 +42,54 @@
|
||||
|
||||
namespace ceres {
|
||||
|
||||
// Matchers and macros for help with testing Manifold objects.
|
||||
// Matchers and macros to simplify testing of custom Manifold objects using the
|
||||
// gtest testing framework.
|
||||
//
|
||||
// Testing a Manifold has two parts.
|
||||
//
|
||||
// 1. Checking that Manifold::Plus is correctly defined. This requires per
|
||||
// manifold tests.
|
||||
// 1. Checking that Manifold::Plus() and Manifold::Minus() are correctly
|
||||
// defined. This requires per manifold tests.
|
||||
//
|
||||
// 2. The other methods of the manifold have mathematical properties that make
|
||||
// it compatible with Plus, as described in:
|
||||
// them compatible with Plus() and Minus(), as described in [1].
|
||||
//
|
||||
// "Integrating Generic Sensor Fusion Algorithms with Sound State
|
||||
// Representations through Encapsulation of Manifolds"
|
||||
// By C. Hertzberg, R. Wagner, U. Frese and L. Schroder
|
||||
// https://arxiv.org/pdf/1107.1119.pdf
|
||||
// To verify these general requirements for a custom Manifold, use the
|
||||
// EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD() macro from within a gtest test. Note
|
||||
// that additional domain-specific tests may also be prudent, e.g to verify the
|
||||
// behaviour of a Quaternion Manifold about pi.
|
||||
//
|
||||
// These tests are implemented using generic matchers defined below which can
|
||||
// all be called by the macro EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x,
|
||||
// delta, y, tolerance). See manifold_test.cc for example usage.
|
||||
// [1] "Integrating Generic Sensor Fusion Algorithms with Sound State
|
||||
// Representations through Encapsulation of Manifolds", C. Hertzberg,
|
||||
// R. Wagner, U. Frese and L. Schroder, https://arxiv.org/pdf/1107.1119.pdf
|
||||
|
||||
// Verifies the general requirements for a custom Manifold are satisfied to
|
||||
// within the specified (numerical) tolerance.
|
||||
//
|
||||
// Example usage for a custom Manifold: ExampleManifold:
|
||||
//
|
||||
// TEST(ExampleManifold, ManifoldInvariantsHold) {
|
||||
// constexpr double kTolerance = 1.0e-9;
|
||||
// ExampleManifold manifold;
|
||||
// ceres::Vector x = ceres::Vector::Zero(manifold.AmbientSize());
|
||||
// ceres::Vector y = ceres::Vector::Zero(manifold.AmbientSize());
|
||||
// ceres::Vector delta = ceres::Vector::Zero(manifold.TangentSize());
|
||||
// EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, kTolerance);
|
||||
// }
|
||||
#define EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, tolerance) \
|
||||
::ceres::Vector zero_tangent = \
|
||||
::ceres::Vector::Zero(manifold.TangentSize()); \
|
||||
EXPECT_THAT(manifold, ::ceres::XPlusZeroIsXAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, ::ceres::XMinusXIsZeroAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, ::ceres::MinusPlusIsIdentityAt(x, delta, tolerance)); \
|
||||
EXPECT_THAT(manifold, \
|
||||
::ceres::MinusPlusIsIdentityAt(x, zero_tangent, tolerance)); \
|
||||
EXPECT_THAT(manifold, ::ceres::PlusMinusIsIdentityAt(x, x, tolerance)); \
|
||||
EXPECT_THAT(manifold, ::ceres::PlusMinusIsIdentityAt(x, y, tolerance)); \
|
||||
EXPECT_THAT(manifold, ::ceres::HasCorrectPlusJacobianAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, ::ceres::HasCorrectMinusJacobianAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, ::ceres::MinusPlusJacobianIsIdentityAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, \
|
||||
::ceres::HasCorrectRightMultiplyByPlusJacobianAt(x, tolerance));
|
||||
|
||||
// Checks that the invariant Plus(x, 0) == x holds.
|
||||
MATCHER_P2(XPlusZeroIsXAt, x, tolerance, "") {
|
||||
@@ -69,7 +99,7 @@ MATCHER_P2(XPlusZeroIsXAt, x, tolerance, "") {
|
||||
Vector actual = Vector::Zero(ambient_size);
|
||||
Vector zero = Vector::Zero(tangent_size);
|
||||
EXPECT_TRUE(arg.Plus(x.data(), zero.data(), actual.data()));
|
||||
const double n = (actual - x).norm();
|
||||
const double n = (actual - Vector{x}).norm();
|
||||
const double d = x.norm();
|
||||
const double diffnorm = (d == 0.0) ? n : (n / d);
|
||||
if (diffnorm > tolerance) {
|
||||
@@ -159,7 +189,7 @@ MATCHER_P3(MinusPlusIsIdentityAt, x, delta, tolerance, "") {
|
||||
Vector actual = Vector::Zero(tangent_size);
|
||||
EXPECT_TRUE(arg.Minus(x_plus_delta.data(), x.data(), actual.data()));
|
||||
|
||||
const double n = (actual - delta).norm();
|
||||
const double n = (actual - Vector{delta}).norm();
|
||||
const double d = delta.norm();
|
||||
const double diffnorm = (d == 0.0) ? n : (n / d);
|
||||
if (diffnorm > tolerance) {
|
||||
@@ -184,7 +214,7 @@ MATCHER_P3(PlusMinusIsIdentityAt, x, y, tolerance, "") {
|
||||
Vector actual = Vector::Zero(ambient_size);
|
||||
EXPECT_TRUE(arg.Plus(x.data(), y_minus_x.data(), actual.data()));
|
||||
|
||||
const double n = (actual - y).norm();
|
||||
const double n = (actual - Vector{y}).norm();
|
||||
const double d = y.norm();
|
||||
const double diffnorm = (d == 0.0) ? n : (n / d);
|
||||
if (diffnorm > tolerance) {
|
||||
@@ -312,17 +342,4 @@ MATCHER_P2(HasCorrectRightMultiplyByPlusJacobianAt, x, tolerance, "") {
|
||||
return true;
|
||||
}
|
||||
|
||||
#define EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, tolerance) \
|
||||
Vector zero_tangent = Vector::Zero(manifold.TangentSize()); \
|
||||
EXPECT_THAT(manifold, XPlusZeroIsXAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, XMinusXIsZeroAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, MinusPlusIsIdentityAt(x, delta, tolerance)); \
|
||||
EXPECT_THAT(manifold, MinusPlusIsIdentityAt(x, zero_tangent, tolerance)); \
|
||||
EXPECT_THAT(manifold, PlusMinusIsIdentityAt(x, x, tolerance)); \
|
||||
EXPECT_THAT(manifold, PlusMinusIsIdentityAt(x, y, tolerance)); \
|
||||
EXPECT_THAT(manifold, HasCorrectPlusJacobianAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, HasCorrectMinusJacobianAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, MinusPlusJacobianIsIdentityAt(x, tolerance)); \
|
||||
EXPECT_THAT(manifold, HasCorrectRightMultiplyByPlusJacobianAt(x, tolerance));
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
4
extern/ceres/include/ceres/normal_prior.h
vendored
4
extern/ceres/include/ceres/normal_prior.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -61,7 +61,7 @@ class CERES_EXPORT NormalPrior final : public CostFunction {
|
||||
public:
|
||||
// Check that the number of rows in the vector b are the same as the
|
||||
// number of columns in the matrix A, crash otherwise.
|
||||
NormalPrior(const Matrix& A, const Vector& b);
|
||||
NormalPrior(const Matrix& A, Vector b);
|
||||
bool Evaluate(double const* const* parameters,
|
||||
double* residuals,
|
||||
double** jacobians) const override;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -176,7 +176,7 @@
|
||||
namespace ceres {
|
||||
|
||||
template <typename CostFunctor,
|
||||
NumericDiffMethodType method = CENTRAL,
|
||||
NumericDiffMethodType kMethod = CENTRAL,
|
||||
int kNumResiduals = 0, // Number of residuals, or ceres::DYNAMIC
|
||||
int... Ns> // Parameters dimensions for each block.
|
||||
class NumericDiffCostFunction final
|
||||
@@ -236,7 +236,7 @@ class NumericDiffCostFunction final
|
||||
}
|
||||
|
||||
internal::EvaluateJacobianForParameterBlocks<ParameterDims>::
|
||||
template Apply<method, kNumResiduals>(
|
||||
template Apply<kMethod, kNumResiduals>(
|
||||
functor_.get(),
|
||||
residuals,
|
||||
options_,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,6 +42,7 @@
|
||||
#include "ceres/internal/variadic_evaluate.h"
|
||||
#include "ceres/numeric_diff_options.h"
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
|
||||
@@ -99,19 +100,55 @@ namespace ceres {
|
||||
// "QuadraticCostFunctor", "CENTRAL, 4", describe the finite
|
||||
// differencing scheme as "central differencing" and the functor as
|
||||
// computing its cost from a 4 dimensional input.
|
||||
//
|
||||
// If the size of the parameter vector is not known at compile time, then an
|
||||
// alternate construction syntax can be used:
|
||||
//
|
||||
// FirstOrderFunction* function
|
||||
// = new NumericDiffFirstOrderFunction<MyScalarCostFunctor, CENTRAL>(
|
||||
// new QuadraticCostFunctor(1.0), 4);
|
||||
//
|
||||
// Note that instead of passing 4 as a template argument, it is now passed as
|
||||
// the second argument to the constructor.
|
||||
template <typename FirstOrderFunctor,
|
||||
NumericDiffMethodType method,
|
||||
int kNumParameters>
|
||||
NumericDiffMethodType kMethod,
|
||||
int kNumParameters = DYNAMIC>
|
||||
class NumericDiffFirstOrderFunction final : public FirstOrderFunction {
|
||||
public:
|
||||
// Constructor for the case where the parameter size is known at compile time.
|
||||
explicit NumericDiffFirstOrderFunction(
|
||||
FirstOrderFunctor* functor,
|
||||
Ownership ownership = TAKE_OWNERSHIP,
|
||||
const NumericDiffOptions& options = NumericDiffOptions())
|
||||
: functor_(functor), ownership_(ownership), options_(options) {
|
||||
: functor_(functor),
|
||||
num_parameters_(kNumParameters),
|
||||
ownership_(ownership),
|
||||
options_(options) {
|
||||
static_assert(kNumParameters != DYNAMIC,
|
||||
"Number of parameters must be static when defined via the "
|
||||
"template parameter. Use the other constructor for "
|
||||
"dynamically sized functions.");
|
||||
static_assert(kNumParameters > 0, "kNumParameters must be positive");
|
||||
}
|
||||
|
||||
// Constructor for the case where the parameter size is specified at run time.
|
||||
explicit NumericDiffFirstOrderFunction(
|
||||
FirstOrderFunctor* functor,
|
||||
int num_parameters,
|
||||
Ownership ownership = TAKE_OWNERSHIP,
|
||||
const NumericDiffOptions& options = NumericDiffOptions())
|
||||
: functor_(functor),
|
||||
num_parameters_(num_parameters),
|
||||
ownership_(ownership),
|
||||
options_(options) {
|
||||
static_assert(
|
||||
kNumParameters == DYNAMIC,
|
||||
"Template parameter must be DYNAMIC when using this constructor. If "
|
||||
"you want to provide the number of parameters statically use the other "
|
||||
"constructor.");
|
||||
CHECK_GT(num_parameters, 0);
|
||||
}
|
||||
|
||||
~NumericDiffFirstOrderFunction() override {
|
||||
if (ownership_ != TAKE_OWNERSHIP) {
|
||||
functor_.release();
|
||||
@@ -121,12 +158,8 @@ class NumericDiffFirstOrderFunction final : public FirstOrderFunction {
|
||||
bool Evaluate(const double* const parameters,
|
||||
double* cost,
|
||||
double* gradient) const override {
|
||||
using ParameterDims = internal::StaticParameterDims<kNumParameters>;
|
||||
constexpr int kNumResiduals = 1;
|
||||
|
||||
// Get the function value (cost) at the the point to evaluate.
|
||||
if (!internal::VariadicEvaluate<ParameterDims>(
|
||||
*functor_, ¶meters, cost)) {
|
||||
if (!(*functor_)(parameters, cost)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -135,27 +168,47 @@ class NumericDiffFirstOrderFunction final : public FirstOrderFunction {
|
||||
}
|
||||
|
||||
// Create a copy of the parameters which will get mutated.
|
||||
internal::FixedArray<double, 32> parameters_copy(kNumParameters);
|
||||
std::copy_n(parameters, kNumParameters, parameters_copy.data());
|
||||
internal::FixedArray<double, 32> parameters_copy(num_parameters_);
|
||||
std::copy_n(parameters, num_parameters_, parameters_copy.data());
|
||||
double* parameters_ptr = parameters_copy.data();
|
||||
internal::EvaluateJacobianForParameterBlocks<
|
||||
ParameterDims>::template Apply<method, kNumResiduals>(functor_.get(),
|
||||
cost,
|
||||
options_,
|
||||
kNumResiduals,
|
||||
¶meters_ptr,
|
||||
&gradient);
|
||||
return true;
|
||||
constexpr int kNumResiduals = 1;
|
||||
if constexpr (kNumParameters == DYNAMIC) {
|
||||
internal::FirstOrderFunctorAdapter<FirstOrderFunctor> fofa(*functor_);
|
||||
return internal::NumericDiff<
|
||||
internal::FirstOrderFunctorAdapter<FirstOrderFunctor>,
|
||||
kMethod,
|
||||
kNumResiduals,
|
||||
internal::DynamicParameterDims,
|
||||
0,
|
||||
DYNAMIC>::EvaluateJacobianForParameterBlock(&fofa,
|
||||
cost,
|
||||
options_,
|
||||
kNumResiduals,
|
||||
0,
|
||||
num_parameters_,
|
||||
¶meters_ptr,
|
||||
gradient);
|
||||
} else {
|
||||
return internal::EvaluateJacobianForParameterBlocks<
|
||||
internal::StaticParameterDims<kNumParameters>>::
|
||||
template Apply<kMethod, 1>(functor_.get(),
|
||||
cost,
|
||||
options_,
|
||||
kNumResiduals,
|
||||
¶meters_ptr,
|
||||
&gradient);
|
||||
}
|
||||
}
|
||||
|
||||
int NumParameters() const override { return kNumParameters; }
|
||||
int NumParameters() const override { return num_parameters_; }
|
||||
|
||||
const FirstOrderFunctor& functor() const { return *functor_; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<FirstOrderFunctor> functor_;
|
||||
Ownership ownership_;
|
||||
NumericDiffOptions options_;
|
||||
const int num_parameters_;
|
||||
const Ownership ownership_;
|
||||
const NumericDiffOptions options_;
|
||||
};
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
2
extern/ceres/include/ceres/ordered_groups.h
vendored
2
extern/ceres/include/ceres/ordered_groups.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
201
extern/ceres/include/ceres/problem.h
vendored
201
extern/ceres/include/ceres/problem.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -53,7 +53,6 @@ namespace ceres {
|
||||
class CostFunction;
|
||||
class EvaluationCallback;
|
||||
class LossFunction;
|
||||
class LocalParameterization;
|
||||
class Manifold;
|
||||
class Solver;
|
||||
struct CRSMatrix;
|
||||
@@ -118,29 +117,17 @@ using ResidualBlockId = internal::ResidualBlock*;
|
||||
// problem.AddResidualBlock(new MyBinaryCostFunction(...), nullptr, x2, x3);
|
||||
//
|
||||
// Please see cost_function.h for details of the CostFunction object.
|
||||
//
|
||||
// NOTE: We are currently in the process of transitioning from
|
||||
// LocalParameterization to Manifolds in the Ceres API. During this period,
|
||||
// Problem will support using both Manifold and LocalParameterization objects
|
||||
// interchangably. In particular, adding a LocalParameterization to a parameter
|
||||
// block is the same as adding a Manifold to that parameter block. For methods
|
||||
// in the API affected by this change, see their documentation below.
|
||||
class CERES_EXPORT Problem {
|
||||
public:
|
||||
struct CERES_EXPORT Options {
|
||||
// These flags control whether the Problem object owns the CostFunctions,
|
||||
// LossFunctions, LocalParameterizations, and Manifolds passed into the
|
||||
// Problem.
|
||||
// LossFunctions, and Manifolds passed into the Problem.
|
||||
//
|
||||
// If set to TAKE_OWNERSHIP, then the problem object will delete the
|
||||
// corresponding object on destruction. The destructor is careful to delete
|
||||
// the pointers only once, since sharing objects is allowed.
|
||||
Ownership cost_function_ownership = TAKE_OWNERSHIP;
|
||||
Ownership loss_function_ownership = TAKE_OWNERSHIP;
|
||||
CERES_DEPRECATED_WITH_MSG(
|
||||
"Local Parameterizations are deprecated. Use Manifold and "
|
||||
"manifold_ownership instead.")
|
||||
Ownership local_parameterization_ownership = TAKE_OWNERSHIP;
|
||||
Ownership manifold_ownership = TAKE_OWNERSHIP;
|
||||
|
||||
// If true, trades memory for faster RemoveResidualBlock() and
|
||||
@@ -271,66 +258,23 @@ class CERES_EXPORT Problem {
|
||||
// pointer but a different size will result in a crash.
|
||||
void AddParameterBlock(double* values, int size);
|
||||
|
||||
// Add a parameter block with appropriate size and parameterization to the
|
||||
// problem. It is okay for local_parameterization to be nullptr.
|
||||
//
|
||||
// Repeated calls with the same arguments are ignored. Repeated calls
|
||||
// with the same double pointer but a different size results in a crash
|
||||
// (unless Solver::Options::diable_all_safety_checks is set to true).
|
||||
//
|
||||
// Repeated calls with the same double pointer and size but different
|
||||
// LocalParameterization is equivalent to calling
|
||||
// SetParameterization(local_parameterization), i.e., any previously
|
||||
// associated LocalParameterization or Manifold object will be replaced with
|
||||
// the local_parameterization.
|
||||
//
|
||||
// NOTE:
|
||||
// ----
|
||||
//
|
||||
// This method is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver. Please move to using the Manifold based version of
|
||||
// AddParameterBlock.
|
||||
//
|
||||
// During the transition from LocalParameterization to Manifold, internally
|
||||
// the LocalParameterization is treated as a Manifold by wrapping it using a
|
||||
// ManifoldAdapter object. So HasManifold() will return true, GetManifold()
|
||||
// will return the wrapped object and ParameterBlockTangentSize() will return
|
||||
// the LocalSize of the LocalParameterization.
|
||||
CERES_DEPRECATED_WITH_MSG(
|
||||
"LocalParameterizations are deprecated. Use the version with Manifolds "
|
||||
"instead.")
|
||||
void AddParameterBlock(double* values,
|
||||
int size,
|
||||
LocalParameterization* local_parameterization);
|
||||
|
||||
// Add a parameter block with appropriate size and Manifold to the
|
||||
// problem. It is okay for manifold to be nullptr.
|
||||
//
|
||||
// Repeated calls with the same arguments are ignored. Repeated calls
|
||||
// with the same double pointer but a different size results in a crash
|
||||
// (unless Solver::Options::diable_all_safety_checks is set to true).
|
||||
// (unless Solver::Options::disable_all_safety_checks is set to true).
|
||||
//
|
||||
// Repeated calls with the same double pointer and size but different Manifold
|
||||
// is equivalent to calling SetManifold(manifold), i.e., any previously
|
||||
// associated LocalParameterization or Manifold object will be replaced with
|
||||
// the manifold.
|
||||
//
|
||||
// Note:
|
||||
// ----
|
||||
//
|
||||
// During the transition from LocalParameterization to Manifold, calling
|
||||
// AddParameterBlock with a Manifold when a LocalParameterization is already
|
||||
// associated with the parameter block is okay. It is equivalent to calling
|
||||
// SetManifold(manifold), i.e., any previously associated
|
||||
// LocalParameterization or Manifold object will be replaced with the
|
||||
// manifold.
|
||||
// associated Manifold object will be replaced with the manifold.
|
||||
void AddParameterBlock(double* values, int size, Manifold* manifold);
|
||||
|
||||
// Remove a parameter block from the problem. The LocalParameterization or
|
||||
// Manifold of the parameter block, if it exists, will persist until the
|
||||
// deletion of the problem (similar to cost/loss functions in residual block
|
||||
// removal). Any residual blocks that depend on the parameter are also
|
||||
// removed, as described above in RemoveResidualBlock().
|
||||
// Remove a parameter block from the problem. The Manifold of the parameter
|
||||
// block, if it exists, will persist until the deletion of the problem
|
||||
// (similar to cost/loss functions in residual block removal). Any residual
|
||||
// blocks that depend on the parameter are also removed, as described above
|
||||
// in RemoveResidualBlock().
|
||||
//
|
||||
// If Problem::Options::enable_fast_removal is true, then the removal is fast
|
||||
// (almost constant time). Otherwise, removing a parameter block will incur a
|
||||
@@ -361,76 +305,15 @@ class CERES_EXPORT Problem {
|
||||
|
||||
// Returns true if a parameter block is set constant, and false otherwise. A
|
||||
// parameter block may be set constant in two ways: either by calling
|
||||
// SetParameterBlockConstant or by associating a LocalParameterization or
|
||||
// Manifold with a zero dimensional tangent space with it.
|
||||
// SetParameterBlockConstant or by associating a Manifold with a zero
|
||||
// dimensional tangent space with it.
|
||||
bool IsParameterBlockConstant(const double* values) const;
|
||||
|
||||
// Set the LocalParameterization for the parameter block. Calling
|
||||
// SetParameterization with nullptr will clear any previously set
|
||||
// LocalParameterization or Manifold for the parameter block.
|
||||
//
|
||||
// Repeated calls will cause any previously associated LocalParameterization
|
||||
// or Manifold object to be replaced with the local_parameterization.
|
||||
//
|
||||
// The local_parameterization is owned by the Problem by default (See
|
||||
// Problem::Options to override this behaviour).
|
||||
//
|
||||
// It is acceptable to set the same LocalParameterization for multiple
|
||||
// parameter blocks; the destructor is careful to delete
|
||||
// LocalParamaterizations only once.
|
||||
//
|
||||
// NOTE:
|
||||
// ----
|
||||
//
|
||||
// This method is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver. Please move to using the SetManifold instead.
|
||||
//
|
||||
// During the transition from LocalParameterization to Manifold, internally
|
||||
// the LocalParameterization is treated as a Manifold by wrapping it using a
|
||||
// ManifoldAdapter object. So HasManifold() will return true, GetManifold()
|
||||
// will return the wrapped object and ParameterBlockTangentSize will return
|
||||
// the same value of ParameterBlockLocalSize.
|
||||
CERES_DEPRECATED_WITH_MSG(
|
||||
"LocalParameterizations are deprecated. Use SetManifold instead.")
|
||||
void SetParameterization(double* values,
|
||||
LocalParameterization* local_parameterization);
|
||||
|
||||
// Get the LocalParameterization object associated with this parameter block.
|
||||
// If there is no LocalParameterization associated then nullptr is returned.
|
||||
//
|
||||
// NOTE: This method is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver. Use GetManifold instead.
|
||||
//
|
||||
// Note also that if a LocalParameterization is associated with a parameter
|
||||
// block, HasManifold will return true and GetManifold will return the
|
||||
// LocalParameterization wrapped in a ManifoldAdapter.
|
||||
//
|
||||
// The converse is NOT true, i.e., if a Manifold is associated with a
|
||||
// parameter block, HasParameterization will return false and
|
||||
// GetParameterization will return a nullptr.
|
||||
CERES_DEPRECATED_WITH_MSG(
|
||||
"LocalParameterizations are deprecated. Use GetManifold "
|
||||
"instead.")
|
||||
const LocalParameterization* GetParameterization(const double* values) const;
|
||||
|
||||
// Returns true if a LocalParameterization is associated with this parameter
|
||||
// block, false otherwise.
|
||||
//
|
||||
// NOTE: This method is deprecated and will be removed in the next public
|
||||
// release of Ceres Solver. Use HasManifold instead.
|
||||
//
|
||||
// Note also that if a Manifold is associated with the parameter block, this
|
||||
// method will return false.
|
||||
CERES_DEPRECATED_WITH_MSG(
|
||||
"LocalParameterizations are deprecated. Use HasManifold instead.")
|
||||
bool HasParameterization(const double* values) const;
|
||||
|
||||
// Set the Manifold for the parameter block. Calling SetManifold with nullptr
|
||||
// will clear any previously set LocalParameterization or Manifold for the
|
||||
// parameter block.
|
||||
// will clear any previously set Manifold for the parameter block.
|
||||
//
|
||||
// Repeated calls will result in any previously associated
|
||||
// LocalParameterization or Manifold object to be replaced with the manifold.
|
||||
// Repeated calls will result in any previously associated Manifold object to
|
||||
// be replaced with the manifold.
|
||||
//
|
||||
// The manifold is owned by the Problem by default (See Problem::Options to
|
||||
// override this behaviour).
|
||||
@@ -440,18 +323,11 @@ class CERES_EXPORT Problem {
|
||||
|
||||
// Get the Manifold object associated with this parameter block.
|
||||
//
|
||||
// If there is no Manifold Or LocalParameterization object associated then
|
||||
// nullptr is returned.
|
||||
//
|
||||
// NOTE: During the transition from LocalParameterization to Manifold,
|
||||
// internally the LocalParameterization is treated as a Manifold by wrapping
|
||||
// it using a ManifoldAdapter object. So calling GetManifold on a parameter
|
||||
// block with a LocalParameterization associated with it will return the
|
||||
// LocalParameterization wrapped in a ManifoldAdapter
|
||||
// If there is no Manifold object associated then nullptr is returned.
|
||||
const Manifold* GetManifold(const double* values) const;
|
||||
|
||||
// Returns true if a Manifold or a LocalParameterization is associated with
|
||||
// this parameter block, false otherwise.
|
||||
// Returns true if a Manifold is associated with this parameter block, false
|
||||
// otherwise.
|
||||
bool HasManifold(const double* values) const;
|
||||
|
||||
// Set the lower/upper bound for the parameter at position "index".
|
||||
@@ -484,19 +360,9 @@ class CERES_EXPORT Problem {
|
||||
// The size of the parameter block.
|
||||
int ParameterBlockSize(const double* values) const;
|
||||
|
||||
// The dimension of the tangent space of the LocalParameterization or Manifold
|
||||
// for the parameter block. If there is no LocalParameterization or Manifold
|
||||
// associated with this parameter block, then ParameterBlockLocalSize =
|
||||
// ParameterBlockSize.
|
||||
CERES_DEPRECATED_WITH_MSG(
|
||||
"LocalParameterizations are deprecated. Use ParameterBlockTangentSize "
|
||||
"instead.")
|
||||
int ParameterBlockLocalSize(const double* values) const;
|
||||
|
||||
// The dimenion of the tangent space of the LocalParameterization or Manifold
|
||||
// for the parameter block. If there is no LocalParameterization or Manifold
|
||||
// associated with this parameter block, then ParameterBlockTangentSize =
|
||||
// ParameterBlockSize.
|
||||
// The dimension of the tangent space of the Manifold for the parameter block.
|
||||
// If there is no Manifold associated with this parameter block, then
|
||||
// ParameterBlockTangentSize = ParameterBlockSize.
|
||||
int ParameterBlockTangentSize(const double* values) const;
|
||||
|
||||
// Is the given parameter block present in this problem or not?
|
||||
@@ -596,11 +462,11 @@ class CERES_EXPORT Problem {
|
||||
//
|
||||
// is the way to do so.
|
||||
//
|
||||
// Note 2: If no LocalParameterizations or Manifolds are used, then the size
|
||||
// of the gradient vector (and the number of columns in the jacobian) is the
|
||||
// sum of the sizes of all the parameter blocks. If a parameter block has a
|
||||
// LocalParameterization or Manifold, then it contributes "TangentSize"
|
||||
// entries to the gradient vector (and the number of columns in the jacobian).
|
||||
// Note 2: If no Manifolds are used, then the size of the gradient vector (and
|
||||
// the number of columns in the jacobian) is the sum of the sizes of all the
|
||||
// parameter blocks. If a parameter block has a Manifold, then it contributes
|
||||
// "TangentSize" entries to the gradient vector (and the number of columns in
|
||||
// the jacobian).
|
||||
//
|
||||
// Note 3: This function cannot be called while the problem is being solved,
|
||||
// for example it cannot be called from an IterationCallback at the end of an
|
||||
@@ -631,11 +497,10 @@ class CERES_EXPORT Problem {
|
||||
// returns false, the caller should expect the output memory locations to have
|
||||
// been modified.
|
||||
//
|
||||
// The returned cost and jacobians have had robustification and
|
||||
// LocalParameterization/Manifold applied already; for example, the jacobian
|
||||
// for a 4-dimensional quaternion parameter using the
|
||||
// "QuaternionParameterization" is num_residuals by 3 instead of num_residuals
|
||||
// by 4.
|
||||
// The returned cost and jacobians have had robustification and Manifold
|
||||
// applied already; for example, the jacobian for a 4-dimensional quaternion
|
||||
// parameter using the "QuaternionParameterization" is num_residuals by 3
|
||||
// instead of num_residuals by 4.
|
||||
//
|
||||
// apply_loss_function as the name implies allows the user to switch the
|
||||
// application of the loss function on and off.
|
||||
@@ -672,9 +537,13 @@ class CERES_EXPORT Problem {
|
||||
double* residuals,
|
||||
double** jacobians) const;
|
||||
|
||||
// Returns reference to the options with which the Problem was constructed.
|
||||
const Options& options() const;
|
||||
|
||||
// Returns pointer to Problem implementation
|
||||
internal::ProblemImpl* mutable_impl();
|
||||
|
||||
private:
|
||||
friend class Solver;
|
||||
friend class Covariance;
|
||||
std::unique_ptr<internal::ProblemImpl> impl_;
|
||||
};
|
||||
|
||||
|
||||
17
extern/ceres/include/ceres/product_manifold.h
vendored
17
extern/ceres/include/ceres/product_manifold.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -257,28 +257,21 @@ class ProductManifold final : public Manifold {
|
||||
template <typename T, std::size_t N>
|
||||
static std::array<T, N> ExclusiveScan(const std::array<T, N>& values) {
|
||||
std::array<T, N> result;
|
||||
// TODO Replace with std::exclusive_scan once all platforms have full C++17
|
||||
// STL support.
|
||||
T init = 0;
|
||||
|
||||
// TODO Replace by std::exclusive_scan once C++17 is available
|
||||
for (std::size_t i = 0; i != N; ++i) {
|
||||
result[i] = init;
|
||||
init += values[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO Replace by std::void_t once C++17 is available
|
||||
template <typename... Types>
|
||||
struct Void {
|
||||
using type = void;
|
||||
};
|
||||
|
||||
template <typename T, typename E = void>
|
||||
struct IsDereferenceable : std::false_type {};
|
||||
|
||||
template <typename T>
|
||||
struct IsDereferenceable<T, typename Void<decltype(*std::declval<T>())>::type>
|
||||
struct IsDereferenceable<T, std::void_t<decltype(*std::declval<T>())>>
|
||||
: std::true_type {};
|
||||
|
||||
template <typename T,
|
||||
@@ -311,7 +304,6 @@ class ProductManifold final : public Manifold {
|
||||
int tangent_size_;
|
||||
};
|
||||
|
||||
#ifdef CERES_HAS_CPP17
|
||||
// C++17 deduction guide that allows the user to avoid explicitly specifying
|
||||
// the template parameters of ProductManifold. The class can instead be
|
||||
// instantiated as follows:
|
||||
@@ -321,7 +313,6 @@ class ProductManifold final : public Manifold {
|
||||
template <typename Manifold0, typename Manifold1, typename... Manifolds>
|
||||
ProductManifold(Manifold0&&, Manifold1&&, Manifolds&&...)
|
||||
-> ProductManifold<Manifold0, Manifold1, Manifolds...>;
|
||||
#endif
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
|
||||
242
extern/ceres/include/ceres/rotation.h
vendored
242
extern/ceres/include/ceres/rotation.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,8 +47,9 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "ceres/constants.h"
|
||||
#include "ceres/internal/euler_angles.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
@@ -60,7 +61,7 @@ namespace ceres {
|
||||
//
|
||||
// the expression M(i, j) is equivalent to
|
||||
//
|
||||
// arrary[i * row_stride + j * col_stride]
|
||||
// array[i * row_stride + j * col_stride]
|
||||
//
|
||||
// Conversion functions to and from rotation matrices accept
|
||||
// MatrixAdapters to permit using row-major and column-major layouts,
|
||||
@@ -136,6 +137,71 @@ template <typename T, int row_stride, int col_stride>
|
||||
void EulerAnglesToRotationMatrix(
|
||||
const T* euler, const MatrixAdapter<T, row_stride, col_stride>& R);
|
||||
|
||||
// Convert a generic Euler Angle sequence (in radians) to a 3x3 rotation matrix.
|
||||
//
|
||||
// Euler Angles define a sequence of 3 rotations about a sequence of axes,
|
||||
// typically taken to be the X, Y, or Z axes. The last axis may be the same as
|
||||
// the first axis (e.g. ZYZ) per Euler's original definition of his angles
|
||||
// (proper Euler angles) or not (e.g. ZYX / yaw-pitch-roll), per common usage in
|
||||
// the nautical and aerospace fields (Tait-Bryan angles). The three rotations
|
||||
// may be in a global frame of reference (Extrinsic) or in a body fixed frame of
|
||||
// reference (Intrinsic) that moves with the rotating object.
|
||||
//
|
||||
// Internally, Euler Axis sequences are classified by Ken Shoemake's scheme from
|
||||
// "Euler angle conversion", Graphics Gems IV, where a choice of axis for the
|
||||
// first rotation and 3 binary choices:
|
||||
// 1. Parity of the axis permutation. The axis sequence has Even parity if the
|
||||
// second axis of rotation is 'greater-than' the first axis of rotation
|
||||
// according to the order X<Y<Z<X, otherwise it has Odd parity.
|
||||
// 2. Proper Euler Angles v.s. Tait-Bryan Angles
|
||||
// 3. Extrinsic Rotations v.s. Intrinsic Rotations
|
||||
// compactly represent all 24 possible Euler Angle Conventions
|
||||
//
|
||||
// One template parameter: EulerSystem must be explicitly given. This parameter
|
||||
// is a tag named by 'Extrinsic' or 'Intrinsic' followed by three characters in
|
||||
// the set '[XYZ]', specifying the axis sequence, e.g. ceres::ExtrinsicYZY
|
||||
// (robotic arms), ceres::IntrinsicZYX (for aerospace), etc.
|
||||
//
|
||||
// The order of elements in the input array 'euler' follows the axis sequence
|
||||
template <typename EulerSystem, typename T>
|
||||
inline void EulerAnglesToRotation(const T* euler, T* R);
|
||||
|
||||
template <typename EulerSystem, typename T, int row_stride, int col_stride>
|
||||
void EulerAnglesToRotation(const T* euler,
|
||||
const MatrixAdapter<T, row_stride, col_stride>& R);
|
||||
|
||||
// Convert a 3x3 rotation matrix to a generic Euler Angle sequence (in radians)
|
||||
//
|
||||
// Euler Angles define a sequence of 3 rotations about a sequence of axes,
|
||||
// typically taken to be the X, Y, or Z axes. The last axis may be the same as
|
||||
// the first axis (e.g. ZYZ) per Euler's original definition of his angles
|
||||
// (proper Euler angles) or not (e.g. ZYX / yaw-pitch-roll), per common usage in
|
||||
// the nautical and aerospace fields (Tait-Bryan angles). The three rotations
|
||||
// may be in a global frame of reference (Extrinsic) or in a body fixed frame of
|
||||
// reference (Intrinsic) that moves with the rotating object.
|
||||
//
|
||||
// Internally, Euler Axis sequences are classified by Ken Shoemake's scheme from
|
||||
// "Euler angle conversion", Graphics Gems IV, where a choice of axis for the
|
||||
// first rotation and 3 binary choices:
|
||||
// 1. Oddness of the axis permutation, that defines whether the second axis is
|
||||
// 'greater-than' the first axis according to the order X>Y>Z>X)
|
||||
// 2. Proper Euler Angles v.s. Tait-Bryan Angles
|
||||
// 3. Extrinsic Rotations v.s. Intrinsic Rotations
|
||||
// compactly represent all 24 possible Euler Angle Conventions
|
||||
//
|
||||
// One template parameter: EulerSystem must be explicitly given. This parameter
|
||||
// is a tag named by 'Extrinsic' or 'Intrinsic' followed by three characters in
|
||||
// the set '[XYZ]', specifying the axis sequence, e.g. ceres::ExtrinsicYZY
|
||||
// (robotic arms), ceres::IntrinsicZYX (for aerospace), etc.
|
||||
//
|
||||
// The order of elements in the output array 'euler' follows the axis sequence
|
||||
template <typename EulerSystem, typename T>
|
||||
inline void RotationMatrixToEulerAngles(const T* R, T* euler);
|
||||
|
||||
template <typename EulerSystem, typename T, int row_stride, int col_stride>
|
||||
void RotationMatrixToEulerAngles(
|
||||
const MatrixAdapter<const T, row_stride, col_stride>& R, T* euler);
|
||||
|
||||
// Convert a 4-vector to a 3x3 scaled rotation matrix.
|
||||
//
|
||||
// The choice of rotation is such that the quaternion [1 0 0 0] goes to an
|
||||
@@ -247,14 +313,15 @@ MatrixAdapter<T, 3, 1> RowMajorAdapter3x3(T* pointer) {
|
||||
|
||||
template <typename T>
|
||||
inline void AngleAxisToQuaternion(const T* angle_axis, T* quaternion) {
|
||||
using std::fpclassify;
|
||||
using std::hypot;
|
||||
const T& a0 = angle_axis[0];
|
||||
const T& a1 = angle_axis[1];
|
||||
const T& a2 = angle_axis[2];
|
||||
const T theta_squared = a0 * a0 + a1 * a1 + a2 * a2;
|
||||
const T theta = hypot(a0, a1, a2);
|
||||
|
||||
// For points not at the origin, the full conversion is numerically stable.
|
||||
if (theta_squared > T(0.0)) {
|
||||
const T theta = sqrt(theta_squared);
|
||||
if (fpclassify(theta) != FP_ZERO) {
|
||||
const T half_theta = theta * T(0.5);
|
||||
const T k = sin(half_theta) / theta;
|
||||
quaternion[0] = cos(half_theta);
|
||||
@@ -276,15 +343,16 @@ inline void AngleAxisToQuaternion(const T* angle_axis, T* quaternion) {
|
||||
|
||||
template <typename T>
|
||||
inline void QuaternionToAngleAxis(const T* quaternion, T* angle_axis) {
|
||||
using std::fpclassify;
|
||||
using std::hypot;
|
||||
const T& q1 = quaternion[1];
|
||||
const T& q2 = quaternion[2];
|
||||
const T& q3 = quaternion[3];
|
||||
const T sin_squared_theta = q1 * q1 + q2 * q2 + q3 * q3;
|
||||
const T sin_theta = hypot(q1, q2, q3);
|
||||
|
||||
// For quaternions representing non-zero rotation, the conversion
|
||||
// is numerically stable.
|
||||
if (sin_squared_theta > T(0.0)) {
|
||||
const T sin_theta = sqrt(sin_squared_theta);
|
||||
if (fpclassify(sin_theta) != FP_ZERO) {
|
||||
const T& cos_theta = quaternion[0];
|
||||
|
||||
// If cos_theta is negative, theta is greater than pi/2, which
|
||||
@@ -385,13 +453,14 @@ inline void AngleAxisToRotationMatrix(const T* angle_axis, T* R) {
|
||||
template <typename T, int row_stride, int col_stride>
|
||||
void AngleAxisToRotationMatrix(
|
||||
const T* angle_axis, const MatrixAdapter<T, row_stride, col_stride>& R) {
|
||||
using std::fpclassify;
|
||||
using std::hypot;
|
||||
static const T kOne = T(1.0);
|
||||
const T theta2 = DotProduct(angle_axis, angle_axis);
|
||||
if (theta2 > T(std::numeric_limits<double>::epsilon())) {
|
||||
const T theta = hypot(angle_axis[0], angle_axis[1], angle_axis[2]);
|
||||
if (fpclassify(theta) != FP_ZERO) {
|
||||
// We want to be careful to only evaluate the square root if the
|
||||
// norm of the angle_axis vector is greater than zero. Otherwise
|
||||
// we get a division by zero.
|
||||
const T theta = sqrt(theta2);
|
||||
const T wx = angle_axis[0] / theta;
|
||||
const T wy = angle_axis[1] / theta;
|
||||
const T wz = angle_axis[2] / theta;
|
||||
@@ -411,7 +480,7 @@ void AngleAxisToRotationMatrix(
|
||||
R(2, 2) = costheta + wz*wz*(kOne - costheta);
|
||||
// clang-format on
|
||||
} else {
|
||||
// Near zero, we switch to using the first order Taylor expansion.
|
||||
// At zero, we switch to using the first order Taylor expansion.
|
||||
R(0, 0) = kOne;
|
||||
R(1, 0) = angle_axis[2];
|
||||
R(2, 0) = -angle_axis[1];
|
||||
@@ -424,6 +493,141 @@ void AngleAxisToRotationMatrix(
|
||||
}
|
||||
}
|
||||
|
||||
template <typename EulerSystem, typename T>
|
||||
inline void EulerAnglesToRotation(const T* euler, T* R) {
|
||||
EulerAnglesToRotation<EulerSystem>(euler, RowMajorAdapter3x3(R));
|
||||
}
|
||||
|
||||
template <typename EulerSystem, typename T, int row_stride, int col_stride>
|
||||
void EulerAnglesToRotation(const T* euler,
|
||||
const MatrixAdapter<T, row_stride, col_stride>& R) {
|
||||
using std::cos;
|
||||
using std::sin;
|
||||
|
||||
const auto [i, j, k] = EulerSystem::kAxes;
|
||||
|
||||
T ea[3];
|
||||
ea[1] = euler[1];
|
||||
if constexpr (EulerSystem::kIsIntrinsic) {
|
||||
ea[0] = euler[2];
|
||||
ea[2] = euler[0];
|
||||
} else {
|
||||
ea[0] = euler[0];
|
||||
ea[2] = euler[2];
|
||||
}
|
||||
if constexpr (EulerSystem::kIsParityOdd) {
|
||||
ea[0] = -ea[0];
|
||||
ea[1] = -ea[1];
|
||||
ea[2] = -ea[2];
|
||||
}
|
||||
|
||||
const T ci = cos(ea[0]);
|
||||
const T cj = cos(ea[1]);
|
||||
const T ch = cos(ea[2]);
|
||||
const T si = sin(ea[0]);
|
||||
const T sj = sin(ea[1]);
|
||||
const T sh = sin(ea[2]);
|
||||
const T cc = ci * ch;
|
||||
const T cs = ci * sh;
|
||||
const T sc = si * ch;
|
||||
const T ss = si * sh;
|
||||
if constexpr (EulerSystem::kIsProperEuler) {
|
||||
R(i, i) = cj;
|
||||
R(i, j) = sj * si;
|
||||
R(i, k) = sj * ci;
|
||||
R(j, i) = sj * sh;
|
||||
R(j, j) = -cj * ss + cc;
|
||||
R(j, k) = -cj * cs - sc;
|
||||
R(k, i) = -sj * ch;
|
||||
R(k, j) = cj * sc + cs;
|
||||
R(k, k) = cj * cc - ss;
|
||||
} else {
|
||||
R(i, i) = cj * ch;
|
||||
R(i, j) = sj * sc - cs;
|
||||
R(i, k) = sj * cc + ss;
|
||||
R(j, i) = cj * sh;
|
||||
R(j, j) = sj * ss + cc;
|
||||
R(j, k) = sj * cs - sc;
|
||||
R(k, i) = -sj;
|
||||
R(k, j) = cj * si;
|
||||
R(k, k) = cj * ci;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename EulerSystem, typename T>
|
||||
inline void RotationMatrixToEulerAngles(const T* R, T* euler) {
|
||||
RotationMatrixToEulerAngles<EulerSystem>(RowMajorAdapter3x3(R), euler);
|
||||
}
|
||||
|
||||
template <typename EulerSystem, typename T, int row_stride, int col_stride>
|
||||
void RotationMatrixToEulerAngles(
|
||||
const MatrixAdapter<const T, row_stride, col_stride>& R, T* euler) {
|
||||
using std::atan2;
|
||||
using std::fpclassify;
|
||||
using std::hypot;
|
||||
|
||||
const auto [i, j, k] = EulerSystem::kAxes;
|
||||
|
||||
T ea[3];
|
||||
if constexpr (EulerSystem::kIsProperEuler) {
|
||||
const T sy = hypot(R(i, j), R(i, k));
|
||||
if (fpclassify(sy) != FP_ZERO) {
|
||||
ea[0] = atan2(R(i, j), R(i, k));
|
||||
ea[1] = atan2(sy, R(i, i));
|
||||
ea[2] = atan2(R(j, i), -R(k, i));
|
||||
} else {
|
||||
ea[0] = atan2(-R(j, k), R(j, j));
|
||||
ea[1] = atan2(sy, R(i, i));
|
||||
ea[2] = T(0.0);
|
||||
}
|
||||
} else {
|
||||
const T cy = hypot(R(i, i), R(j, i));
|
||||
if (fpclassify(cy) != FP_ZERO) {
|
||||
ea[0] = atan2(R(k, j), R(k, k));
|
||||
ea[1] = atan2(-R(k, i), cy);
|
||||
ea[2] = atan2(R(j, i), R(i, i));
|
||||
} else {
|
||||
ea[0] = atan2(-R(j, k), R(j, j));
|
||||
ea[1] = atan2(-R(k, i), cy);
|
||||
ea[2] = T(0.0);
|
||||
}
|
||||
}
|
||||
if constexpr (EulerSystem::kIsParityOdd) {
|
||||
ea[0] = -ea[0];
|
||||
ea[1] = -ea[1];
|
||||
ea[2] = -ea[2];
|
||||
}
|
||||
euler[1] = ea[1];
|
||||
if constexpr (EulerSystem::kIsIntrinsic) {
|
||||
euler[0] = ea[2];
|
||||
euler[2] = ea[0];
|
||||
} else {
|
||||
euler[0] = ea[0];
|
||||
euler[2] = ea[2];
|
||||
}
|
||||
|
||||
// Proper euler angles are defined for angles in
|
||||
// [-pi, pi) x [0, pi / 2) x [-pi, pi)
|
||||
// which is enforced here
|
||||
if constexpr (EulerSystem::kIsProperEuler) {
|
||||
const T kPi(constants::pi);
|
||||
const T kTwoPi(2.0 * kPi);
|
||||
if (euler[1] < T(0.0) || ea[1] > kPi) {
|
||||
euler[0] += kPi;
|
||||
euler[1] = -euler[1];
|
||||
euler[2] -= kPi;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (euler[i] < -kPi) {
|
||||
euler[i] += kTwoPi;
|
||||
} else if (euler[i] > kPi) {
|
||||
euler[i] -= kTwoPi;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void EulerAnglesToRotationMatrix(const T* euler,
|
||||
const int row_stride_parameter,
|
||||
@@ -589,9 +793,12 @@ inline void AngleAxisRotatePoint(const T angle_axis[3],
|
||||
const T pt[3],
|
||||
T result[3]) {
|
||||
DCHECK_NE(pt, result) << "Inplace rotation is not supported.";
|
||||
using std::fpclassify;
|
||||
using std::hypot;
|
||||
|
||||
const T theta2 = DotProduct(angle_axis, angle_axis);
|
||||
if (theta2 > T(std::numeric_limits<double>::epsilon())) {
|
||||
const T theta = hypot(angle_axis[0], angle_axis[1], angle_axis[2]);
|
||||
|
||||
if (fpclassify(theta) != FP_ZERO) {
|
||||
// Away from zero, use the rodriguez formula
|
||||
//
|
||||
// result = pt costheta +
|
||||
@@ -602,7 +809,6 @@ inline void AngleAxisRotatePoint(const T angle_axis[3],
|
||||
// norm of the angle_axis vector is greater than zero. Otherwise
|
||||
// we get a division by zero.
|
||||
//
|
||||
const T theta = sqrt(theta2);
|
||||
const T costheta = cos(theta);
|
||||
const T sintheta = sin(theta);
|
||||
const T theta_inverse = T(1.0) / theta;
|
||||
@@ -623,7 +829,7 @@ inline void AngleAxisRotatePoint(const T angle_axis[3],
|
||||
result[1] = pt[1] * costheta + w_cross_pt[1] * sintheta + w[1] * tmp;
|
||||
result[2] = pt[2] * costheta + w_cross_pt[2] * sintheta + w[2] * tmp;
|
||||
} else {
|
||||
// Near zero, the first order Taylor approximation of the rotation
|
||||
// At zero, the first order Taylor approximation of the rotation
|
||||
// matrix R corresponding to a vector w and angle theta is
|
||||
//
|
||||
// R = I + hat(w) * sin(theta)
|
||||
@@ -635,7 +841,7 @@ inline void AngleAxisRotatePoint(const T angle_axis[3],
|
||||
// and actually performing multiplication with the point pt, gives us
|
||||
// R * pt = pt + angle_axis x pt.
|
||||
//
|
||||
// Switching to the Taylor expansion near zero provides meaningful
|
||||
// Switching to the Taylor expansion at zero provides meaningful
|
||||
// derivatives when evaluated using Jets.
|
||||
//
|
||||
// Explicitly inlined evaluation of the cross product for
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
264
extern/ceres/include/ceres/solver.h
vendored
264
extern/ceres/include/ceres/solver.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -64,8 +64,6 @@ class CERES_EXPORT Solver {
|
||||
// with a message describing the problem.
|
||||
bool IsValid(std::string* error) const;
|
||||
|
||||
// Minimizer options ----------------------------------------
|
||||
|
||||
// Ceres supports the two major families of optimization strategies -
|
||||
// Trust Region and Line Search.
|
||||
//
|
||||
@@ -378,88 +376,144 @@ class CERES_EXPORT Solver {
|
||||
DenseLinearAlgebraLibraryType dense_linear_algebra_library_type = EIGEN;
|
||||
|
||||
// Ceres supports using multiple sparse linear algebra libraries for sparse
|
||||
// matrix ordering and factorizations. Currently, SUITE_SPARSE and CX_SPARSE
|
||||
// are the valid choices, depending on whether they are linked into Ceres at
|
||||
// build time.
|
||||
// matrix ordering and factorizations.
|
||||
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
|
||||
#if !defined(CERES_NO_SUITESPARSE)
|
||||
SUITE_SPARSE;
|
||||
#elif defined(CERES_USE_EIGEN_SPARSE)
|
||||
EIGEN_SPARSE;
|
||||
#elif !defined(CERES_NO_CXSPARSE)
|
||||
CX_SPARSE;
|
||||
#elif !defined(CERES_NO_ACCELERATE_SPARSE)
|
||||
ACCELERATE_SPARSE;
|
||||
#elif defined(CERES_USE_EIGEN_SPARSE)
|
||||
EIGEN_SPARSE;
|
||||
#else
|
||||
NO_SPARSE;
|
||||
#endif
|
||||
|
||||
// The order in which variables are eliminated in a linear solver
|
||||
// can have a significant of impact on the efficiency and accuracy
|
||||
// of the method. e.g., when doing sparse Cholesky factorization,
|
||||
// can have a significant impact on the efficiency and accuracy of
|
||||
// the method. e.g., when doing sparse Cholesky factorization,
|
||||
// there are matrices for which a good ordering will give a
|
||||
// Cholesky factor with O(n) storage, where as a bad ordering will
|
||||
// result in an completely dense factor.
|
||||
//
|
||||
// Ceres allows the user to provide varying amounts of hints to
|
||||
// the solver about the variable elimination ordering to use. This
|
||||
// can range from no hints, where the solver is free to decide the
|
||||
// best possible ordering based on the user's choices like the
|
||||
// linear solver being used, to an exact order in which the
|
||||
// variables should be eliminated, and a variety of possibilities
|
||||
// in between.
|
||||
// Sparse direct solvers like SPARSE_NORMAL_CHOLESKY and
|
||||
// SPARSE_SCHUR use a fill reducing ordering of the columns and
|
||||
// rows of the matrix being factorized before computing the
|
||||
// numeric factorization.
|
||||
//
|
||||
// Instances of the ParameterBlockOrdering class are used to
|
||||
// communicate this information to Ceres.
|
||||
// This enum controls the type of algorithm used to compute
|
||||
// this fill reducing ordering. There is no single algorithm
|
||||
// that works on all matrices, so determining which algorithm
|
||||
// works better is a matter of empirical experimentation.
|
||||
//
|
||||
// Formally an ordering is an ordered partitioning of the
|
||||
// parameter blocks, i.e, each parameter block belongs to exactly
|
||||
// one group, and each group has a unique non-negative integer
|
||||
// associated with it, that determines its order in the set of
|
||||
// groups.
|
||||
// The exact behaviour of this setting is affected by the value of
|
||||
// linear_solver_ordering as described below.
|
||||
LinearSolverOrderingType linear_solver_ordering_type = AMD;
|
||||
|
||||
// Besides specifying the fill reducing ordering via
|
||||
// linear_solver_ordering_type, Ceres allows the user to provide varying
|
||||
// amounts of hints to the linear solver about the variable elimination
|
||||
// ordering to use. This can range from no hints, where the solver is free
|
||||
// to decide the best possible ordering based on the user's choices like the
|
||||
// linear solver being used, to an exact order in which the variables should
|
||||
// be eliminated, and a variety of possibilities in between.
|
||||
//
|
||||
// Given such an ordering, Ceres ensures that the parameter blocks in
|
||||
// the lowest numbered group are eliminated first, and then the
|
||||
// parameter blocks in the next lowest numbered group and so on. Within
|
||||
// each group, Ceres is free to order the parameter blocks as it
|
||||
// chooses.
|
||||
// Instances of the ParameterBlockOrdering class are used to communicate
|
||||
// this information to Ceres.
|
||||
//
|
||||
// If nullptr, then all parameter blocks are assumed to be in the
|
||||
// same group and the solver is free to decide the best
|
||||
// ordering.
|
||||
// Formally an ordering is an ordered partitioning of the parameter blocks,
|
||||
// i.e, each parameter block belongs to exactly one group, and each group
|
||||
// has a unique non-negative integer associated with it, that determines its
|
||||
// order in the set of groups.
|
||||
//
|
||||
// e.g. Consider the linear system
|
||||
//
|
||||
// x + y = 3
|
||||
// 2x + 3y = 7
|
||||
//
|
||||
// There are two ways in which it can be solved. First eliminating x
|
||||
// from the two equations, solving for y and then back substituting
|
||||
// for x, or first eliminating y, solving for x and back substituting
|
||||
// for y. The user can construct three orderings here.
|
||||
// There are two ways in which it can be solved. First eliminating x from
|
||||
// the two equations, solving for y and then back substituting for x, or
|
||||
// first eliminating y, solving for x and back substituting for y. The user
|
||||
// can construct three orderings here.
|
||||
//
|
||||
// {0: x}, {1: y} - eliminate x first.
|
||||
// {0: y}, {1: x} - eliminate y first.
|
||||
// {0: x, y} - Solver gets to decide the elimination order.
|
||||
//
|
||||
// Thus, to have Ceres determine the ordering automatically using
|
||||
// heuristics, put all the variables in group 0 and to control the
|
||||
// ordering for every variable, create groups 0..N-1, one per
|
||||
// variable, in the desired order.
|
||||
// Thus, to have Ceres determine the ordering automatically, put all the
|
||||
// variables in group 0 and to control the ordering for every variable
|
||||
// create groups 0 ... N-1, one per variable, in the desired
|
||||
// order.
|
||||
//
|
||||
// linear_solver_ordering == nullptr and an ordering where all the parameter
|
||||
// blocks are in one elimination group mean the same thing - the solver is
|
||||
// free to choose what it thinks is the best elimination ordering. Therefore
|
||||
// in the following we will only consider the case where
|
||||
// linear_solver_ordering is nullptr.
|
||||
//
|
||||
// The exact interpretation of this information depends on the values of
|
||||
// linear_solver_ordering_type and linear_solver_type/preconditioner_type
|
||||
// and sparse_linear_algebra_type.
|
||||
//
|
||||
// Bundle Adjustment
|
||||
// -----------------
|
||||
// =================
|
||||
//
|
||||
// A particular case of interest is bundle adjustment, where the user
|
||||
// has two options. The default is to not specify an ordering at all,
|
||||
// the solver will see that the user wants to use a Schur type solver
|
||||
// and figure out the right elimination ordering.
|
||||
// If the user is using one of the Schur solvers (DENSE_SCHUR,
|
||||
// SPARSE_SCHUR, ITERATIVE_SCHUR) and chooses to specify an
|
||||
// ordering, it must have one important property. The lowest
|
||||
// numbered elimination group must form an independent set in the
|
||||
// graph corresponding to the Hessian, or in other words, no two
|
||||
// parameter blocks in in the first elimination group should
|
||||
// co-occur in the same residual block. For the best performance,
|
||||
// this elimination group should be as large as possible. For
|
||||
// standard bundle adjustment problems, this corresponds to the
|
||||
// first elimination group containing all the 3d points, and the
|
||||
// second containing the all the cameras parameter blocks.
|
||||
//
|
||||
// But if the user already knows what parameter blocks are points and
|
||||
// what are cameras, they can save preprocessing time by partitioning
|
||||
// the parameter blocks into two groups, one for the points and one
|
||||
// for the cameras, where the group containing the points has an id
|
||||
// smaller than the group containing cameras.
|
||||
// If the user leaves the choice to Ceres, then the solver uses an
|
||||
// approximate maximum independent set algorithm to identify the first
|
||||
// elimination group.
|
||||
//
|
||||
// sparse_linear_algebra_library_type = SUITE_SPARSE
|
||||
// =================================================
|
||||
//
|
||||
// linear_solver_ordering_type = AMD
|
||||
// ---------------------------------
|
||||
//
|
||||
// A Constrained Approximate Minimum Degree (CAMD) ordering used where the
|
||||
// parameter blocks in the lowest numbered group are eliminated first, and
|
||||
// then the parameter blocks in the next lowest numbered group and so
|
||||
// on. Within each group, CAMD free to order the parameter blocks as it
|
||||
// chooses.
|
||||
//
|
||||
// linear_solver_ordering_type = NESDIS
|
||||
// -------------------------------------
|
||||
//
|
||||
// a. linear_solver_type = SPARSE_NORMAL_CHOLESKY or
|
||||
// linear_solver_type = CGNR and preconditioner_type = SUBSET
|
||||
//
|
||||
// The value of linear_solver_ordering is ignored and a Nested Dissection
|
||||
// algorithm is used to compute a fill reducing ordering.
|
||||
//
|
||||
// b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR
|
||||
//
|
||||
// ONLY the lowest group are used to compute the Schur complement, and
|
||||
// Nested Dissection is used to compute a fill reducing ordering for the
|
||||
// Schur Complement (or its preconditioner).
|
||||
//
|
||||
// sparse_linear_algebra_library_type = EIGEN_SPARSE or ACCELERATE_SPARSE
|
||||
// ======================================================================
|
||||
//
|
||||
// a. linear_solver_type = SPARSE_NORMAL_CHOLESKY or
|
||||
// linear_solver_type = CGNR and preconditioner_type = SUBSET
|
||||
//
|
||||
// then the value of linear_solver_ordering is ignored and AMD or NESDIS is
|
||||
// used to compute a fill reducing ordering as requested by the user.
|
||||
//
|
||||
// b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR
|
||||
//
|
||||
// ONLY the lowest group are used to compute the Schur complement, and AMD
|
||||
// or NESDIS is used to compute a fill reducing ordering for the Schur
|
||||
// Complement (or its preconditioner).
|
||||
std::shared_ptr<ParameterBlockOrdering> linear_solver_ordering;
|
||||
|
||||
// Use an explicitly computed Schur complement matrix with
|
||||
@@ -500,12 +554,6 @@ class CERES_EXPORT Solver {
|
||||
// Jacobian matrix and generally speaking, there is no performance
|
||||
// penalty for doing so.
|
||||
|
||||
// In some rare cases, it is worth using a more complicated
|
||||
// reordering algorithm which has slightly better runtime
|
||||
// performance at the expense of an extra copy of the Jacobian
|
||||
// matrix. Setting use_postordering to true enables this tradeoff.
|
||||
bool use_postordering = false;
|
||||
|
||||
// Some non-linear least squares problems are symbolically dense but
|
||||
// numerically sparse. i.e. at any given state only a small number
|
||||
// of jacobian entries are non-zero, but the position and number of
|
||||
@@ -521,11 +569,6 @@ class CERES_EXPORT Solver {
|
||||
// This settings only affects the SPARSE_NORMAL_CHOLESKY solver.
|
||||
bool dynamic_sparsity = false;
|
||||
|
||||
// TODO(sameeragarwal): Further expand the documentation for the
|
||||
// following two options.
|
||||
|
||||
// NOTE1: EXPERIMENTAL FEATURE, UNDER DEVELOPMENT, USE AT YOUR OWN RISK.
|
||||
//
|
||||
// If use_mixed_precision_solves is true, the Gauss-Newton matrix
|
||||
// is computed in double precision, but its factorization is
|
||||
// computed in single precision. This can result in significant
|
||||
@@ -536,16 +579,57 @@ class CERES_EXPORT Solver {
|
||||
// If use_mixed_precision_solves is true, we recommend setting
|
||||
// max_num_refinement_iterations to 2-3.
|
||||
//
|
||||
// NOTE2: The following two options are currently only applicable
|
||||
// if sparse_linear_algebra_library_type is EIGEN_SPARSE or
|
||||
// ACCELERATE_SPARSE, and linear_solver_type is SPARSE_NORMAL_CHOLESKY
|
||||
// or SPARSE_SCHUR.
|
||||
// This options is available when linear solver uses sparse or dense
|
||||
// cholesky factorization, except when sparse_linear_algebra_library_type =
|
||||
// SUITE_SPARSE.
|
||||
bool use_mixed_precision_solves = false;
|
||||
|
||||
// Number steps of the iterative refinement process to run when
|
||||
// computing the Gauss-Newton step.
|
||||
int max_num_refinement_iterations = 0;
|
||||
|
||||
// Minimum number of iterations for which the linear solver should
|
||||
// run, even if the convergence criterion is satisfied.
|
||||
int min_linear_solver_iterations = 0;
|
||||
|
||||
// Maximum number of iterations for which the linear solver should
|
||||
// run. If the solver does not converge in less than
|
||||
// max_linear_solver_iterations, then it returns MAX_ITERATIONS,
|
||||
// as its termination type.
|
||||
int max_linear_solver_iterations = 500;
|
||||
|
||||
// Maximum number of iterations performed by SCHUR_POWER_SERIES_EXPANSION.
|
||||
// Each iteration corresponds to one more term in the power series expansion
|
||||
// od the inverse of the Schur complement. This value controls the maximum
|
||||
// number of iterations whether it is used as a preconditioner or just to
|
||||
// initialize the solution for ITERATIVE_SCHUR.
|
||||
int max_num_spse_iterations = 5;
|
||||
|
||||
// Use SCHUR_POWER_SERIES_EXPANSION to initialize the solution for
|
||||
// ITERATIVE_SCHUR. This option can be set true regardless of what
|
||||
// preconditioner is being used.
|
||||
bool use_spse_initialization = false;
|
||||
|
||||
// When use_spse_initialization is true, this parameter along with
|
||||
// max_num_spse_iterations controls the number of
|
||||
// SCHUR_POWER_SERIES_EXPANSION iterations performed for initialization. It
|
||||
// is not used to control the preconditioner.
|
||||
double spse_tolerance = 0.1;
|
||||
|
||||
// Forcing sequence parameter. The truncated Newton solver uses
|
||||
// this number to control the relative accuracy with which the
|
||||
// Newton step is computed.
|
||||
//
|
||||
// This constant is passed to ConjugateGradientsSolver which uses
|
||||
// it to terminate the iterations when
|
||||
//
|
||||
// (Q_i - Q_{i-1})/Q_i < eta/i
|
||||
double eta = 1e-1;
|
||||
|
||||
// Normalize the jacobian using Jacobi scaling before calling
|
||||
// the linear least squares solver.
|
||||
bool jacobi_scaling = true;
|
||||
|
||||
// Some non-linear least squares problems have additional
|
||||
// structure in the way the parameter blocks interact that it is
|
||||
// beneficial to modify the way the trust region step is computed.
|
||||
@@ -629,32 +713,6 @@ class CERES_EXPORT Solver {
|
||||
// iterations is disabled.
|
||||
double inner_iteration_tolerance = 1e-3;
|
||||
|
||||
// Minimum number of iterations for which the linear solver should
|
||||
// run, even if the convergence criterion is satisfied.
|
||||
int min_linear_solver_iterations = 0;
|
||||
|
||||
// Maximum number of iterations for which the linear solver should
|
||||
// run. If the solver does not converge in less than
|
||||
// max_linear_solver_iterations, then it returns MAX_ITERATIONS,
|
||||
// as its termination type.
|
||||
int max_linear_solver_iterations = 500;
|
||||
|
||||
// Forcing sequence parameter. The truncated Newton solver uses
|
||||
// this number to control the relative accuracy with which the
|
||||
// Newton step is computed.
|
||||
//
|
||||
// This constant is passed to ConjugateGradientsSolver which uses
|
||||
// it to terminate the iterations when
|
||||
//
|
||||
// (Q_i - Q_{i-1})/Q_i < eta/i
|
||||
double eta = 1e-1;
|
||||
|
||||
// Normalize the jacobian using Jacobi scaling before calling
|
||||
// the linear least squares solver.
|
||||
bool jacobi_scaling = true;
|
||||
|
||||
// Logging options ---------------------------------------------------------
|
||||
|
||||
LoggingType logging_type = PER_MINIMIZER_ITERATION;
|
||||
|
||||
// By default the Minimizer progress is logged to VLOG(1), which
|
||||
@@ -791,10 +849,9 @@ class CERES_EXPORT Solver {
|
||||
// IterationSummary for each minimizer iteration in order.
|
||||
std::vector<IterationSummary> iterations;
|
||||
|
||||
// Number of minimizer iterations in which the step was
|
||||
// accepted. Unless use_non_monotonic_steps is true this is also
|
||||
// the number of steps in which the objective function value/cost
|
||||
// went down.
|
||||
// Number of minimizer iterations in which the step was accepted. Unless
|
||||
// use_nonmonotonic_steps is true this is also the number of steps in which
|
||||
// the objective function value/cost went down.
|
||||
int num_successful_steps = -1;
|
||||
|
||||
// Number of minimizer iterations in which the step was rejected
|
||||
@@ -884,7 +941,7 @@ class CERES_EXPORT Solver {
|
||||
// Dimension of the tangent space of the problem (or the number of
|
||||
// columns in the Jacobian for the problem). This is different
|
||||
// from num_parameters if a parameter block is associated with a
|
||||
// LocalParameterization/Manifold.
|
||||
// Manifold.
|
||||
int num_effective_parameters = -1;
|
||||
|
||||
// Number of residual blocks in the problem.
|
||||
@@ -905,7 +962,7 @@ class CERES_EXPORT Solver {
|
||||
// number of columns in the Jacobian for the reduced
|
||||
// problem). This is different from num_parameters_reduced if a
|
||||
// parameter block in the reduced problem is associated with a
|
||||
// LocalParameterization/Manifold.
|
||||
// Manifold.
|
||||
int num_effective_parameters_reduced = -1;
|
||||
|
||||
// Number of residual blocks in the reduced problem.
|
||||
@@ -922,8 +979,7 @@ class CERES_EXPORT Solver {
|
||||
int num_threads_given = -1;
|
||||
|
||||
// Number of threads actually used by the solver for Jacobian and
|
||||
// residual evaluation. This number is not equal to
|
||||
// num_threads_given if OpenMP is not available.
|
||||
// residual evaluation.
|
||||
int num_threads_used = -1;
|
||||
|
||||
// Type of the linear solver requested by the user.
|
||||
@@ -946,6 +1002,10 @@ class CERES_EXPORT Solver {
|
||||
SPARSE_NORMAL_CHOLESKY;
|
||||
#endif
|
||||
|
||||
bool mixed_precision_solves_used = false;
|
||||
|
||||
LinearSolverOrderingType linear_solver_ordering_type = AMD;
|
||||
|
||||
// Size of the elimination groups given by the user as hints to
|
||||
// the linear solver.
|
||||
std::vector<int> linear_solver_ordering_given;
|
||||
@@ -1005,7 +1065,7 @@ class CERES_EXPORT Solver {
|
||||
PreconditionerType preconditioner_type_used = IDENTITY;
|
||||
|
||||
// Type of clustering algorithm used for visibility based
|
||||
// preconditioning. Only meaningful when the preconditioner_type
|
||||
// preconditioning. Only meaningful when the preconditioner_type_used
|
||||
// is CLUSTER_JACOBI or CLUSTER_TRIDIAGONAL.
|
||||
VisibilityClusteringType visibility_clustering_type = CANONICAL_VIEWS;
|
||||
|
||||
|
||||
9
extern/ceres/include/ceres/sphere_manifold.h
vendored
9
extern/ceres/include/ceres/sphere_manifold.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -114,12 +114,17 @@ class SphereManifold final : public Manifold {
|
||||
static constexpr int TangentSpaceDimension =
|
||||
AmbientSpaceDimension > 0 ? AmbientSpaceDimension - 1 : Eigen::Dynamic;
|
||||
|
||||
// NOTE: Eigen does not allow to have a RowMajor column vector.
|
||||
// In that case, change the storage order
|
||||
static constexpr int SafeRowMajor =
|
||||
TangentSpaceDimension == 1 ? Eigen::ColMajor : Eigen::RowMajor;
|
||||
|
||||
using AmbientVector = Eigen::Matrix<double, AmbientSpaceDimension, 1>;
|
||||
using TangentVector = Eigen::Matrix<double, TangentSpaceDimension, 1>;
|
||||
using MatrixPlusJacobian = Eigen::Matrix<double,
|
||||
AmbientSpaceDimension,
|
||||
TangentSpaceDimension,
|
||||
Eigen::RowMajor>;
|
||||
SafeRowMajor>;
|
||||
using MatrixMinusJacobian = Eigen::Matrix<double,
|
||||
TangentSpaceDimension,
|
||||
AmbientSpaceDimension,
|
||||
|
||||
12
extern/ceres/include/ceres/tiny_solver.h
vendored
12
extern/ceres/include/ceres/tiny_solver.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -248,10 +248,9 @@ class TinySolver {
|
||||
jtj_regularized_ = jtj_;
|
||||
const Scalar min_diagonal = 1e-6;
|
||||
const Scalar max_diagonal = 1e32;
|
||||
for (int i = 0; i < lm_diagonal_.rows(); ++i) {
|
||||
lm_diagonal_[i] = std::sqrt(
|
||||
u * (std::min)((std::max)(jtj_(i, i), min_diagonal), max_diagonal));
|
||||
jtj_regularized_(i, i) += lm_diagonal_[i] * lm_diagonal_[i];
|
||||
for (int i = 0; i < dx_.rows(); ++i) {
|
||||
jtj_regularized_(i, i) +=
|
||||
u * (std::min)((std::max)(jtj_(i, i), min_diagonal), max_diagonal);
|
||||
}
|
||||
|
||||
// TODO(sameeragarwal): Check for failure and deal with it.
|
||||
@@ -338,7 +337,7 @@ class TinySolver {
|
||||
// linear system. This allows reusing the intermediate storage across solves.
|
||||
LinearSolver linear_solver_;
|
||||
Scalar cost_;
|
||||
Parameters dx_, x_new_, g_, jacobi_scaling_, lm_diagonal_, lm_step_;
|
||||
Parameters dx_, x_new_, g_, jacobi_scaling_, lm_step_;
|
||||
Eigen::Matrix<Scalar, NUM_RESIDUALS, 1> residuals_, f_x_new_;
|
||||
Eigen::Matrix<Scalar, NUM_RESIDUALS, NUM_PARAMETERS> jacobian_;
|
||||
Eigen::Matrix<Scalar, NUM_PARAMETERS, NUM_PARAMETERS> jtj_, jtj_regularized_;
|
||||
@@ -385,7 +384,6 @@ class TinySolver {
|
||||
x_new_.resize(num_parameters);
|
||||
g_.resize(num_parameters);
|
||||
jacobi_scaling_.resize(num_parameters);
|
||||
lm_diagonal_.resize(num_parameters);
|
||||
lm_step_.resize(num_parameters);
|
||||
residuals_.resize(num_residuals);
|
||||
f_x_new_.resize(num_residuals);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -171,7 +171,7 @@ class TinySolverAutoDiffFunction {
|
||||
const CostFunctor& cost_functor_;
|
||||
|
||||
// The number of residuals at runtime.
|
||||
// This will be overriden if NUM_RESIDUALS == Eigen::Dynamic.
|
||||
// This will be overridden if NUM_RESIDUALS == Eigen::Dynamic.
|
||||
int num_residuals_ = kNumResiduals;
|
||||
|
||||
// To evaluate the cost function with jets, temporary storage is needed. These
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
50
extern/ceres/include/ceres/types.h
vendored
50
extern/ceres/include/ceres/types.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -67,8 +67,7 @@ enum LinearSolverType {
|
||||
// Eigen.
|
||||
DENSE_QR,
|
||||
|
||||
// Solve the normal equations using a sparse cholesky solver; requires
|
||||
// SuiteSparse or CXSparse.
|
||||
// Solve the normal equations using a sparse cholesky solver;
|
||||
SPARSE_NORMAL_CHOLESKY,
|
||||
|
||||
// Specialized solvers, specific to problems with a generalized
|
||||
@@ -98,7 +97,7 @@ enum PreconditionerType {
|
||||
// Block diagonal of the Gauss-Newton Hessian.
|
||||
JACOBI,
|
||||
|
||||
// Note: The following three preconditioners can only be used with
|
||||
// Note: The following four preconditioners can only be used with
|
||||
// the ITERATIVE_SCHUR solver. They are well suited for Structure
|
||||
// from Motion problems.
|
||||
|
||||
@@ -106,6 +105,10 @@ enum PreconditionerType {
|
||||
// only be used with the ITERATIVE_SCHUR solver.
|
||||
SCHUR_JACOBI,
|
||||
|
||||
// Use power series expansion to approximate the inversion of Schur complement
|
||||
// as a preconditioner.
|
||||
SCHUR_POWER_SERIES_EXPANSION,
|
||||
|
||||
// Visibility clustering based preconditioners.
|
||||
//
|
||||
// The following two preconditioners use the visibility structure of
|
||||
@@ -134,7 +137,7 @@ enum PreconditionerType {
|
||||
// well the matrix Q approximates J'J, or how well the chosen
|
||||
// residual blocks approximate the non-linear least squares
|
||||
// problem.
|
||||
SUBSET,
|
||||
SUBSET
|
||||
};
|
||||
|
||||
enum VisibilityClusteringType {
|
||||
@@ -165,11 +168,6 @@ enum SparseLinearAlgebraLibraryType {
|
||||
// minimum degree ordering.
|
||||
SUITE_SPARSE,
|
||||
|
||||
// A lightweight replacement for SuiteSparse, which does not require
|
||||
// a LAPACK/BLAS implementation. Consequently, its performance is
|
||||
// also a bit lower than SuiteSparse.
|
||||
CX_SPARSE,
|
||||
|
||||
// Eigen's sparse linear algebra routines. In particular Ceres uses
|
||||
// the Simplicial LDLT routines.
|
||||
EIGEN_SPARSE,
|
||||
@@ -177,12 +175,39 @@ enum SparseLinearAlgebraLibraryType {
|
||||
// Apple's Accelerate framework sparse linear algebra routines.
|
||||
ACCELERATE_SPARSE,
|
||||
|
||||
// Nvidia's cuSPARSE library.
|
||||
CUDA_SPARSE,
|
||||
|
||||
// No sparse linear solver should be used. This does not necessarily
|
||||
// imply that Ceres was built without any sparse library, although that
|
||||
// is the likely use case, merely that one should not be used.
|
||||
NO_SPARSE
|
||||
};
|
||||
|
||||
// The order in which variables are eliminated in a linear solver
|
||||
// can have a significant of impact on the efficiency and accuracy
|
||||
// of the method. e.g., when doing sparse Cholesky factorization,
|
||||
// there are matrices for which a good ordering will give a
|
||||
// Cholesky factor with O(n) storage, where as a bad ordering will
|
||||
// result in an completely dense factor.
|
||||
//
|
||||
// So sparse direct solvers like SPARSE_NORMAL_CHOLESKY and
|
||||
// SPARSE_SCHUR and preconditioners like SUBSET, CLUSTER_JACOBI &
|
||||
// CLUSTER_TRIDIAGONAL use a fill reducing ordering of the columns and
|
||||
// rows of the matrix being factorized before actually the numeric
|
||||
// factorization.
|
||||
//
|
||||
// This enum controls the class of algorithm used to compute this
|
||||
// fill reducing ordering. There is no single algorithm that works
|
||||
// on all matrices, so determining which algorithm works better is a
|
||||
// matter of empirical experimentation.
|
||||
enum LinearSolverOrderingType {
|
||||
// Approximate Minimum Degree.
|
||||
AMD,
|
||||
// Nested Dissection.
|
||||
NESDIS
|
||||
};
|
||||
|
||||
enum DenseLinearAlgebraLibraryType {
|
||||
EIGEN,
|
||||
LAPACK,
|
||||
@@ -467,6 +492,11 @@ CERES_EXPORT const char* SparseLinearAlgebraLibraryTypeToString(
|
||||
CERES_EXPORT bool StringToSparseLinearAlgebraLibraryType(
|
||||
std::string value, SparseLinearAlgebraLibraryType* type);
|
||||
|
||||
CERES_EXPORT const char* LinearSolverOrderingTypeToString(
|
||||
LinearSolverOrderingType type);
|
||||
CERES_EXPORT bool StringToLinearSolverOrderingType(
|
||||
std::string value, LinearSolverOrderingType* type);
|
||||
|
||||
CERES_EXPORT const char* DenseLinearAlgebraLibraryTypeToString(
|
||||
DenseLinearAlgebraLibraryType type);
|
||||
CERES_EXPORT bool StringToDenseLinearAlgebraLibraryType(
|
||||
|
||||
4
extern/ceres/include/ceres/version.h
vendored
4
extern/ceres/include/ceres/version.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,7 +32,7 @@
|
||||
#define CERES_PUBLIC_VERSION_H_
|
||||
|
||||
#define CERES_VERSION_MAJOR 2
|
||||
#define CERES_VERSION_MINOR 1
|
||||
#define CERES_VERSION_MINOR 2
|
||||
#define CERES_VERSION_REVISION 0
|
||||
|
||||
// Classic CPP stringifcation; the extra level of indirection allows the
|
||||
|
||||
50
extern/ceres/internal/ceres/accelerate_sparse.cc
vendored
50
extern/ceres/internal/ceres/accelerate_sparse.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -61,7 +61,7 @@ const char* SparseStatusToString(SparseStatus_t status) {
|
||||
CASESTR(SparseParameterError);
|
||||
CASESTR(SparseStatusReleased);
|
||||
default:
|
||||
return "UKNOWN";
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
} // namespace.
|
||||
@@ -114,12 +114,12 @@ AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
|
||||
// Accelerate's columnStarts is a long*, not an int*. These types might be
|
||||
// different (e.g. ARM on iOS) so always make a copy.
|
||||
column_starts_.resize(A->num_rows() + 1); // +1 for final column length.
|
||||
std::copy_n(A->rows(), column_starts_.size(), &column_starts_[0]);
|
||||
std::copy_n(A->rows(), column_starts_.size(), column_starts_.data());
|
||||
|
||||
ASSparseMatrix At;
|
||||
At.structure.rowCount = A->num_cols();
|
||||
At.structure.columnCount = A->num_rows();
|
||||
At.structure.columnStarts = &column_starts_[0];
|
||||
At.structure.columnStarts = column_starts_.data();
|
||||
At.structure.rowIndices = A->mutable_cols();
|
||||
At.structure.attributes.transpose = false;
|
||||
At.structure.attributes.triangle = SparseUpperTriangle;
|
||||
@@ -127,8 +127,8 @@ AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
|
||||
At.structure.attributes._reserved = 0;
|
||||
At.structure.attributes._allocatedBySparse = 0;
|
||||
At.structure.blockSize = 1;
|
||||
if (std::is_same<Scalar, double>::value) {
|
||||
At.data = reinterpret_cast<Scalar*>(A->mutable_values());
|
||||
if constexpr (std::is_same_v<Scalar, double>) {
|
||||
At.data = A->mutable_values();
|
||||
} else {
|
||||
values_ =
|
||||
ConstVectorRef(A->values(), A->num_nonzeros()).template cast<Scalar>();
|
||||
@@ -139,8 +139,23 @@ AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
|
||||
|
||||
template <typename Scalar>
|
||||
typename AccelerateSparse<Scalar>::SymbolicFactorization
|
||||
AccelerateSparse<Scalar>::AnalyzeCholesky(ASSparseMatrix* A) {
|
||||
return SparseFactor(SparseFactorizationCholesky, A->structure);
|
||||
AccelerateSparse<Scalar>::AnalyzeCholesky(OrderingType ordering_type,
|
||||
ASSparseMatrix* A) {
|
||||
SparseSymbolicFactorOptions sfoption;
|
||||
sfoption.control = SparseDefaultControl;
|
||||
sfoption.orderMethod = SparseOrderDefault;
|
||||
sfoption.order = nullptr;
|
||||
sfoption.ignoreRowsAndColumns = nullptr;
|
||||
sfoption.malloc = malloc;
|
||||
sfoption.free = free;
|
||||
sfoption.reportError = nullptr;
|
||||
|
||||
if (ordering_type == OrderingType::AMD) {
|
||||
sfoption.orderMethod = SparseOrderAMD;
|
||||
} else if (ordering_type == OrderingType::NESDIS) {
|
||||
sfoption.orderMethod = SparseOrderMetis;
|
||||
}
|
||||
return SparseFactor(SparseFactorizationCholesky, A->structure, sfoption);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -190,7 +205,7 @@ AppleAccelerateCholesky<Scalar>::~AppleAccelerateCholesky() {
|
||||
template <typename Scalar>
|
||||
CompressedRowSparseMatrix::StorageType
|
||||
AppleAccelerateCholesky<Scalar>::StorageType() const {
|
||||
return CompressedRowSparseMatrix::LOWER_TRIANGULAR;
|
||||
return CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR;
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -199,7 +214,7 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Factorize(
|
||||
CHECK_EQ(lhs->storage_type(), StorageType());
|
||||
if (lhs == nullptr) {
|
||||
*message = "Failure: Input lhs is nullptr.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
typename SparseTypesTrait<Scalar>::SparseMatrix as_lhs =
|
||||
as_.CreateSparseMatrixTransposeView(lhs);
|
||||
@@ -207,13 +222,14 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Factorize(
|
||||
if (!symbolic_factor_) {
|
||||
symbolic_factor_ = std::make_unique<
|
||||
typename SparseTypesTrait<Scalar>::SymbolicFactorization>(
|
||||
as_.AnalyzeCholesky(&as_lhs));
|
||||
as_.AnalyzeCholesky(ordering_type_, &as_lhs));
|
||||
|
||||
if (symbolic_factor_->status != SparseStatusOK) {
|
||||
*message = StringPrintf(
|
||||
"Apple Accelerate Failure : Symbolic factorisation failed: %s",
|
||||
SparseStatusToString(symbolic_factor_->status));
|
||||
FreeSymbolicFactorization();
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,10 +246,10 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Factorize(
|
||||
"Apple Accelerate Failure : Numeric factorisation failed: %s",
|
||||
SparseStatusToString(numeric_factor_->status));
|
||||
FreeNumericFactorization();
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -246,8 +262,8 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Solve(
|
||||
|
||||
typename SparseTypesTrait<Scalar>::DenseVector as_rhs_and_solution;
|
||||
as_rhs_and_solution.count = num_cols;
|
||||
if (std::is_same<Scalar, double>::value) {
|
||||
as_rhs_and_solution.data = reinterpret_cast<Scalar*>(solution);
|
||||
if constexpr (std::is_same_v<Scalar, double>) {
|
||||
as_rhs_and_solution.data = solution;
|
||||
std::copy_n(rhs, num_cols, solution);
|
||||
} else {
|
||||
scalar_rhs_and_solution_ =
|
||||
@@ -259,7 +275,7 @@ LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Solve(
|
||||
VectorRef(solution, num_cols) =
|
||||
scalar_rhs_and_solution_.template cast<double>();
|
||||
}
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
|
||||
21
extern/ceres/internal/ceres/accelerate_sparse.h
vendored
21
extern/ceres/internal/ceres/accelerate_sparse.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -55,18 +55,18 @@ struct SparseTypesTrait {};
|
||||
|
||||
template <>
|
||||
struct SparseTypesTrait<double> {
|
||||
typedef DenseVector_Double DenseVector;
|
||||
typedef SparseMatrix_Double SparseMatrix;
|
||||
typedef SparseOpaqueSymbolicFactorization SymbolicFactorization;
|
||||
typedef SparseOpaqueFactorization_Double NumericFactorization;
|
||||
using DenseVector = DenseVector_Double;
|
||||
using SparseMatrix = SparseMatrix_Double;
|
||||
using SymbolicFactorization = SparseOpaqueSymbolicFactorization;
|
||||
using NumericFactorization = SparseOpaqueFactorization_Double;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SparseTypesTrait<float> {
|
||||
typedef DenseVector_Float DenseVector;
|
||||
typedef SparseMatrix_Float SparseMatrix;
|
||||
typedef SparseOpaqueSymbolicFactorization SymbolicFactorization;
|
||||
typedef SparseOpaqueFactorization_Float NumericFactorization;
|
||||
using DenseVector = DenseVector_Float;
|
||||
using SparseMatrix = SparseMatrix_Float;
|
||||
using SymbolicFactorization = SparseOpaqueSymbolicFactorization;
|
||||
using NumericFactorization = SparseOpaqueFactorization_Float;
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
@@ -91,7 +91,8 @@ class AccelerateSparse {
|
||||
// objects internally).
|
||||
ASSparseMatrix CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A);
|
||||
// Computes a symbolic factorisation of A that can be used in Solve().
|
||||
SymbolicFactorization AnalyzeCholesky(ASSparseMatrix* A);
|
||||
SymbolicFactorization AnalyzeCholesky(OrderingType ordering_type,
|
||||
ASSparseMatrix* A);
|
||||
// Compute the numeric Cholesky factorization of A, given its
|
||||
// symbolic factorization.
|
||||
NumericFactorization Cholesky(ASSparseMatrix* A,
|
||||
|
||||
31
extern/ceres/internal/ceres/array_utils.cc
vendored
31
extern/ceres/internal/ceres/array_utils.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,14 +38,12 @@
|
||||
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "ceres/types.h"
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::string;
|
||||
namespace ceres::internal {
|
||||
|
||||
bool IsArrayValid(const int size, const double* x) {
|
||||
bool IsArrayValid(const int64_t size, const double* x) {
|
||||
if (x != nullptr) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
if (!std::isfinite(x[i]) || (x[i] == kImpossibleValue)) {
|
||||
return false;
|
||||
}
|
||||
@@ -54,12 +52,12 @@ bool IsArrayValid(const int size, const double* x) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int FindInvalidValue(const int size, const double* x) {
|
||||
int64_t FindInvalidValue(const int64_t size, const double* x) {
|
||||
if (x == nullptr) {
|
||||
return size;
|
||||
}
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
if (!std::isfinite(x[i]) || (x[i] == kImpossibleValue)) {
|
||||
return i;
|
||||
}
|
||||
@@ -68,16 +66,18 @@ int FindInvalidValue(const int size, const double* x) {
|
||||
return size;
|
||||
}
|
||||
|
||||
void InvalidateArray(const int size, double* x) {
|
||||
void InvalidateArray(const int64_t size, double* x) {
|
||||
if (x != nullptr) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
x[i] = kImpossibleValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AppendArrayToString(const int size, const double* x, string* result) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
void AppendArrayToString(const int64_t size,
|
||||
const double* x,
|
||||
std::string* result) {
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
if (x == nullptr) {
|
||||
StringAppendF(result, "Not Computed ");
|
||||
} else {
|
||||
@@ -90,18 +90,17 @@ void AppendArrayToString(const int size, const double* x, string* result) {
|
||||
}
|
||||
}
|
||||
|
||||
void MapValuesToContiguousRange(const int size, int* array) {
|
||||
void MapValuesToContiguousRange(const int64_t size, int* array) {
|
||||
std::vector<int> unique_values(array, array + size);
|
||||
std::sort(unique_values.begin(), unique_values.end());
|
||||
unique_values.erase(std::unique(unique_values.begin(), unique_values.end()),
|
||||
unique_values.end());
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int64_t i = 0; i < size; ++i) {
|
||||
array[i] =
|
||||
std::lower_bound(unique_values.begin(), unique_values.end(), array[i]) -
|
||||
unique_values.begin();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
19
extern/ceres/internal/ceres/array_utils.h
vendored
19
extern/ceres/internal/ceres/array_utils.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,30 +43,30 @@
|
||||
#ifndef CERES_INTERNAL_ARRAY_UTILS_H_
|
||||
#define CERES_INTERNAL_ARRAY_UTILS_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Fill the array x with an impossible value that the user code is
|
||||
// never expected to compute.
|
||||
CERES_NO_EXPORT void InvalidateArray(int size, double* x);
|
||||
CERES_NO_EXPORT void InvalidateArray(const int64_t size, double* x);
|
||||
|
||||
// Check if all the entries of the array x are valid, i.e. all the
|
||||
// values in the array should be finite and none of them should be
|
||||
// equal to the "impossible" value used by InvalidateArray.
|
||||
CERES_NO_EXPORT bool IsArrayValid(int size, const double* x);
|
||||
CERES_NO_EXPORT bool IsArrayValid(const int64_t size, const double* x);
|
||||
|
||||
// If the array contains an invalid value, return the index for it,
|
||||
// otherwise return size.
|
||||
CERES_NO_EXPORT int FindInvalidValue(const int size, const double* x);
|
||||
CERES_NO_EXPORT int64_t FindInvalidValue(const int64_t size, const double* x);
|
||||
|
||||
// Utility routine to print an array of doubles to a string. If the
|
||||
// array pointer is nullptr, it is treated as an array of zeros.
|
||||
CERES_NO_EXPORT void AppendArrayToString(const int size,
|
||||
CERES_NO_EXPORT void AppendArrayToString(const int64_t size,
|
||||
const double* x,
|
||||
std::string* result);
|
||||
|
||||
@@ -83,10 +83,9 @@ CERES_NO_EXPORT void AppendArrayToString(const int size,
|
||||
// gets mapped to
|
||||
//
|
||||
// [1 0 2 3 0 1 3]
|
||||
CERES_NO_EXPORT void MapValuesToContiguousRange(int size, int* array);
|
||||
CERES_NO_EXPORT void MapValuesToContiguousRange(const int64_t size, int* array);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/residual_block.h"
|
||||
#include "ceres/sparse_matrix.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
void BlockEvaluatePreparer::Init(int const* const* jacobian_layout,
|
||||
int max_derivatives_per_residual_block) {
|
||||
@@ -78,5 +77,4 @@ void BlockEvaluatePreparer::Prepare(const ResidualBlock* residual_block,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/scratch_evaluate_preparer.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class ResidualBlock;
|
||||
class SparseMatrix;
|
||||
@@ -72,7 +71,6 @@ class CERES_NO_EXPORT BlockEvaluatePreparer {
|
||||
ScratchEvaluatePreparer scratch_evaluate_preparer_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_BLOCK_EVALUATE_PREPARER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,71 +30,197 @@
|
||||
|
||||
#include "ceres/block_jacobi_preconditioner.h"
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "ceres/block_random_access_diagonal_matrix.h"
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/casts.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/small_blas.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
BlockJacobiPreconditioner::BlockJacobiPreconditioner(
|
||||
const BlockSparseMatrix& A) {
|
||||
const CompressedRowBlockStructure* bs = A.block_structure();
|
||||
std::vector<int> blocks(bs->cols.size());
|
||||
for (int i = 0; i < blocks.size(); ++i) {
|
||||
blocks[i] = bs->cols[i].size;
|
||||
}
|
||||
|
||||
m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(blocks);
|
||||
BlockSparseJacobiPreconditioner::BlockSparseJacobiPreconditioner(
|
||||
Preconditioner::Options options, const BlockSparseMatrix& A)
|
||||
: options_(std::move(options)) {
|
||||
m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(
|
||||
A.block_structure()->cols, options_.context, options_.num_threads);
|
||||
}
|
||||
|
||||
BlockJacobiPreconditioner::~BlockJacobiPreconditioner() = default;
|
||||
BlockSparseJacobiPreconditioner::~BlockSparseJacobiPreconditioner() = default;
|
||||
|
||||
bool BlockJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
|
||||
const double* D) {
|
||||
bool BlockSparseJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
|
||||
const double* D) {
|
||||
const CompressedRowBlockStructure* bs = A.block_structure();
|
||||
const double* values = A.values();
|
||||
m_->SetZero();
|
||||
for (int i = 0; i < bs->rows.size(); ++i) {
|
||||
const int row_block_size = bs->rows[i].block.size;
|
||||
const std::vector<Cell>& cells = bs->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
const int block_id = cell.block_id;
|
||||
const int col_block_size = bs->cols[block_id].size;
|
||||
|
||||
int r, c, row_stride, col_stride;
|
||||
CellInfo* cell_info =
|
||||
m_->GetCell(block_id, block_id, &r, &c, &row_stride, &col_stride);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
ConstMatrixRef b(values + cell.position, row_block_size, col_block_size);
|
||||
m.block(r, c, col_block_size, col_block_size) += b.transpose() * b;
|
||||
}
|
||||
}
|
||||
ParallelFor(options_.context,
|
||||
0,
|
||||
bs->rows.size(),
|
||||
options_.num_threads,
|
||||
[this, bs, values](int i) {
|
||||
const int row_block_size = bs->rows[i].block.size;
|
||||
const std::vector<Cell>& cells = bs->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
const int block_id = cell.block_id;
|
||||
const int col_block_size = bs->cols[block_id].size;
|
||||
int r, c, row_stride, col_stride;
|
||||
CellInfo* cell_info = m_->GetCell(
|
||||
block_id, block_id, &r, &c, &row_stride, &col_stride);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
ConstMatrixRef b(
|
||||
values + cell.position, row_block_size, col_block_size);
|
||||
auto lock =
|
||||
MakeConditionalLock(options_.num_threads, cell_info->m);
|
||||
// clang-format off
|
||||
MatrixTransposeMatrixMultiply<Eigen::Dynamic, Eigen::Dynamic,
|
||||
Eigen::Dynamic,Eigen::Dynamic, 1>(
|
||||
values + cell.position, row_block_size,col_block_size,
|
||||
values + cell.position, row_block_size,col_block_size,
|
||||
cell_info->values,r, c,row_stride,col_stride);
|
||||
// clang-format on
|
||||
}
|
||||
});
|
||||
|
||||
if (D != nullptr) {
|
||||
// Add the diagonal.
|
||||
int position = 0;
|
||||
for (int i = 0; i < bs->cols.size(); ++i) {
|
||||
const int block_size = bs->cols[i].size;
|
||||
int r, c, row_stride, col_stride;
|
||||
CellInfo* cell_info = m_->GetCell(i, i, &r, &c, &row_stride, &col_stride);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
m.block(r, c, block_size, block_size).diagonal() +=
|
||||
ConstVectorRef(D + position, block_size).array().square().matrix();
|
||||
position += block_size;
|
||||
}
|
||||
ParallelFor(options_.context,
|
||||
0,
|
||||
bs->cols.size(),
|
||||
options_.num_threads,
|
||||
[this, bs, D](int i) {
|
||||
const int block_size = bs->cols[i].size;
|
||||
int r, c, row_stride, col_stride;
|
||||
CellInfo* cell_info =
|
||||
m_->GetCell(i, i, &r, &c, &row_stride, &col_stride);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
m.block(r, c, block_size, block_size).diagonal() +=
|
||||
ConstVectorRef(D + bs->cols[i].position, block_size)
|
||||
.array()
|
||||
.square()
|
||||
.matrix();
|
||||
});
|
||||
}
|
||||
|
||||
m_->Invert();
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlockJacobiPreconditioner::RightMultiply(const double* x,
|
||||
double* y) const {
|
||||
m_->RightMultiply(x, y);
|
||||
BlockCRSJacobiPreconditioner::BlockCRSJacobiPreconditioner(
|
||||
Preconditioner::Options options, const CompressedRowSparseMatrix& A)
|
||||
: options_(std::move(options)), locks_(A.col_blocks().size()) {
|
||||
auto& col_blocks = A.col_blocks();
|
||||
|
||||
// Compute the number of non-zeros in the preconditioner. This is needed so
|
||||
// that we can construct the CompressedRowSparseMatrix.
|
||||
const int m_nnz = SumSquaredSizes(col_blocks);
|
||||
m_ = std::make_unique<CompressedRowSparseMatrix>(
|
||||
A.num_cols(), A.num_cols(), m_nnz);
|
||||
|
||||
const int num_col_blocks = col_blocks.size();
|
||||
|
||||
// Populate the sparsity structure of the preconditioner matrix.
|
||||
int* m_cols = m_->mutable_cols();
|
||||
int* m_rows = m_->mutable_rows();
|
||||
m_rows[0] = 0;
|
||||
for (int i = 0, idx = 0; i < num_col_blocks; ++i) {
|
||||
// For each column block populate a diagonal block in the preconditioner.
|
||||
// Not that the because of the way the CompressedRowSparseMatrix format
|
||||
// works, the entire diagonal block is laid out contiguously in memory as a
|
||||
// row-major matrix. We will use this when updating the block.
|
||||
auto& block = col_blocks[i];
|
||||
for (int j = 0; j < block.size; ++j) {
|
||||
for (int k = 0; k < block.size; ++k, ++idx) {
|
||||
m_cols[idx] = block.position + k;
|
||||
}
|
||||
m_rows[block.position + j + 1] = idx;
|
||||
}
|
||||
}
|
||||
|
||||
// In reality we only need num_col_blocks locks, however that would require
|
||||
// that in UpdateImpl we are able to look up the column block from the it
|
||||
// first column. To save ourselves this map we will instead spend a few extra
|
||||
// lock objects.
|
||||
std::vector<std::mutex> locks(A.num_cols());
|
||||
locks_.swap(locks);
|
||||
CHECK_EQ(m_rows[A.num_cols()], m_nnz);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
BlockCRSJacobiPreconditioner::~BlockCRSJacobiPreconditioner() = default;
|
||||
|
||||
bool BlockCRSJacobiPreconditioner::UpdateImpl(
|
||||
const CompressedRowSparseMatrix& A, const double* D) {
|
||||
const auto& col_blocks = A.col_blocks();
|
||||
const auto& row_blocks = A.row_blocks();
|
||||
const int num_col_blocks = col_blocks.size();
|
||||
const int num_row_blocks = row_blocks.size();
|
||||
|
||||
const int* a_rows = A.rows();
|
||||
const int* a_cols = A.cols();
|
||||
const double* a_values = A.values();
|
||||
double* m_values = m_->mutable_values();
|
||||
const int* m_rows = m_->rows();
|
||||
|
||||
m_->SetZero();
|
||||
|
||||
ParallelFor(
|
||||
options_.context,
|
||||
0,
|
||||
num_row_blocks,
|
||||
options_.num_threads,
|
||||
[this, row_blocks, a_rows, a_cols, a_values, m_values, m_rows](int i) {
|
||||
const int row = row_blocks[i].position;
|
||||
const int row_block_size = row_blocks[i].size;
|
||||
const int row_nnz = a_rows[row + 1] - a_rows[row];
|
||||
ConstMatrixRef row_block(
|
||||
a_values + a_rows[row], row_block_size, row_nnz);
|
||||
int c = 0;
|
||||
while (c < row_nnz) {
|
||||
const int idx = a_rows[row] + c;
|
||||
const int col = a_cols[idx];
|
||||
const int col_block_size = m_rows[col + 1] - m_rows[col];
|
||||
|
||||
// We make use of the fact that the entire diagonal block is
|
||||
// stored contiguously in memory as a row-major matrix.
|
||||
MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size);
|
||||
// We do not have a row_stride version of
|
||||
// MatrixTransposeMatrixMultiply, otherwise we could use it
|
||||
// here to further speed up the following expression.
|
||||
auto b = row_block.middleCols(c, col_block_size);
|
||||
auto lock = MakeConditionalLock(options_.num_threads, locks_[col]);
|
||||
m.noalias() += b.transpose() * b;
|
||||
c += col_block_size;
|
||||
}
|
||||
});
|
||||
|
||||
ParallelFor(
|
||||
options_.context,
|
||||
0,
|
||||
num_col_blocks,
|
||||
options_.num_threads,
|
||||
[col_blocks, m_rows, m_values, D](int i) {
|
||||
const int col = col_blocks[i].position;
|
||||
const int col_block_size = col_blocks[i].size;
|
||||
MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size);
|
||||
|
||||
if (D != nullptr) {
|
||||
m.diagonal() +=
|
||||
ConstVectorRef(D + col, col_block_size).array().square().matrix();
|
||||
}
|
||||
|
||||
// TODO(sameeragarwal): Deal with Cholesky inversion failure here and
|
||||
// elsewhere.
|
||||
m = m.llt().solve(Matrix::Identity(col_block_size, col_block_size));
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,34 +38,30 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/preconditioner.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockSparseMatrix;
|
||||
struct CompressedRowBlockStructure;
|
||||
class CompressedRowSparseMatrix;
|
||||
|
||||
// A block Jacobi preconditioner. This is intended for use with
|
||||
// conjugate gradients, or other iterative symmetric solvers. To use
|
||||
// the preconditioner, create one by passing a BlockSparseMatrix "A"
|
||||
// to the constructor. This fixes the sparsity pattern to the pattern
|
||||
// of the matrix A^TA.
|
||||
// conjugate gradients, or other iterative symmetric solvers.
|
||||
|
||||
// This version of the preconditioner is for use with BlockSparseMatrix
|
||||
// Jacobians.
|
||||
//
|
||||
// Before each use of the preconditioner in a solve with conjugate gradients,
|
||||
// update the matrix by running Update(A, D). The values of the matrix A are
|
||||
// inspected to construct the preconditioner. The vector D is applied as the
|
||||
// D^TD diagonal term.
|
||||
class CERES_NO_EXPORT BlockJacobiPreconditioner
|
||||
// TODO(https://github.com/ceres-solver/ceres-solver/issues/936):
|
||||
// BlockSparseJacobiPreconditioner::RightMultiply will benefit from
|
||||
// multithreading
|
||||
class CERES_NO_EXPORT BlockSparseJacobiPreconditioner
|
||||
: public BlockSparseMatrixPreconditioner {
|
||||
public:
|
||||
// A must remain valid while the BlockJacobiPreconditioner is.
|
||||
explicit BlockJacobiPreconditioner(const BlockSparseMatrix& A);
|
||||
BlockJacobiPreconditioner(const BlockJacobiPreconditioner&) = delete;
|
||||
void operator=(const BlockJacobiPreconditioner&) = delete;
|
||||
|
||||
~BlockJacobiPreconditioner() override;
|
||||
|
||||
// Preconditioner interface
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
explicit BlockSparseJacobiPreconditioner(Preconditioner::Options,
|
||||
const BlockSparseMatrix& A);
|
||||
~BlockSparseJacobiPreconditioner() override;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final {
|
||||
return m_->RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
int num_rows() const final { return m_->num_rows(); }
|
||||
int num_cols() const final { return m_->num_rows(); }
|
||||
const BlockRandomAccessDiagonalMatrix& matrix() const { return *m_; }
|
||||
@@ -73,11 +69,35 @@ class CERES_NO_EXPORT BlockJacobiPreconditioner
|
||||
private:
|
||||
bool UpdateImpl(const BlockSparseMatrix& A, const double* D) final;
|
||||
|
||||
Preconditioner::Options options_;
|
||||
std::unique_ptr<BlockRandomAccessDiagonalMatrix> m_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
// This version of the preconditioner is for use with CompressedRowSparseMatrix
|
||||
// Jacobians.
|
||||
class CERES_NO_EXPORT BlockCRSJacobiPreconditioner
|
||||
: public CompressedRowSparseMatrixPreconditioner {
|
||||
public:
|
||||
// A must remain valid while the BlockJacobiPreconditioner is.
|
||||
explicit BlockCRSJacobiPreconditioner(Preconditioner::Options options,
|
||||
const CompressedRowSparseMatrix& A);
|
||||
~BlockCRSJacobiPreconditioner() override;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final {
|
||||
m_->RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
int num_rows() const final { return m_->num_rows(); }
|
||||
int num_cols() const final { return m_->num_rows(); }
|
||||
const CompressedRowSparseMatrix& matrix() const { return *m_; }
|
||||
|
||||
private:
|
||||
bool UpdateImpl(const CompressedRowSparseMatrix& A, const double* D) final;
|
||||
|
||||
Preconditioner::Options options_;
|
||||
std::vector<std::mutex> locks_;
|
||||
std::unique_ptr<CompressedRowSparseMatrix> m_;
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
110
extern/ceres/internal/ceres/block_jacobian_writer.cc
vendored
110
extern/ceres/internal/ceres/block_jacobian_writer.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,6 +32,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/block_evaluate_preparer.h"
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
@@ -41,10 +42,7 @@
|
||||
#include "ceres/program.h"
|
||||
#include "ceres/residual_block.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -56,19 +54,27 @@ namespace {
|
||||
// the first num_eliminate_blocks parameter blocks as indicated by the parameter
|
||||
// block ordering. The remaining parameter blocks are the F blocks.
|
||||
//
|
||||
// In order to simplify handling block-sparse to CRS conversion, cells within
|
||||
// the row-block of non-partitioned matrix are stored in memory sequentially in
|
||||
// the order of increasing column-block id. In case of partitioned matrices,
|
||||
// cells corresponding to F sub-matrix are stored sequentially in the order of
|
||||
// increasing column-block id (with cells corresponding to E sub-matrix stored
|
||||
// separately).
|
||||
//
|
||||
// TODO(keir): Consider if we should use a boolean for each parameter block
|
||||
// instead of num_eliminate_blocks.
|
||||
void BuildJacobianLayout(const Program& program,
|
||||
bool BuildJacobianLayout(const Program& program,
|
||||
int num_eliminate_blocks,
|
||||
vector<int*>* jacobian_layout,
|
||||
vector<int>* jacobian_layout_storage) {
|
||||
const vector<ResidualBlock*>& residual_blocks = program.residual_blocks();
|
||||
std::vector<int*>* jacobian_layout,
|
||||
std::vector<int>* jacobian_layout_storage) {
|
||||
const std::vector<ResidualBlock*>& residual_blocks =
|
||||
program.residual_blocks();
|
||||
|
||||
// Iterate over all the active residual blocks and determine how many E blocks
|
||||
// are there. This will determine where the F blocks start in the jacobian
|
||||
// matrix. Also compute the number of jacobian blocks.
|
||||
int f_block_pos = 0;
|
||||
int num_jacobian_blocks = 0;
|
||||
unsigned int f_block_pos = 0;
|
||||
unsigned int num_jacobian_blocks = 0;
|
||||
for (auto* residual_block : residual_blocks) {
|
||||
const int num_residuals = residual_block->NumResiduals();
|
||||
const int num_parameter_blocks = residual_block->NumParameterBlocks();
|
||||
@@ -84,6 +90,11 @@ void BuildJacobianLayout(const Program& program,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_jacobian_blocks > std::numeric_limits<int>::max()) {
|
||||
LOG(ERROR) << "Overlow error. Too many blocks in the jacobian matrix : "
|
||||
<< num_jacobian_blocks;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// We now know that the E blocks are laid out starting at zero, and the F
|
||||
@@ -95,65 +106,103 @@ void BuildJacobianLayout(const Program& program,
|
||||
jacobian_layout_storage->resize(num_jacobian_blocks);
|
||||
|
||||
int e_block_pos = 0;
|
||||
int* jacobian_pos = &(*jacobian_layout_storage)[0];
|
||||
int* jacobian_pos = jacobian_layout_storage->data();
|
||||
std::vector<std::pair<int, int>> active_parameter_blocks;
|
||||
for (int i = 0; i < residual_blocks.size(); ++i) {
|
||||
const ResidualBlock* residual_block = residual_blocks[i];
|
||||
const int num_residuals = residual_block->NumResiduals();
|
||||
const int num_parameter_blocks = residual_block->NumParameterBlocks();
|
||||
|
||||
(*jacobian_layout)[i] = jacobian_pos;
|
||||
// Cells from F sub-matrix are to be stored sequentially with increasing
|
||||
// column block id. For each non-constant parameter block, a pair of indices
|
||||
// (index in the list of active parameter blocks and index in the list of
|
||||
// all parameter blocks) is computed, and index pairs are sorted by the
|
||||
// index of corresponding column block id.
|
||||
active_parameter_blocks.clear();
|
||||
active_parameter_blocks.reserve(num_parameter_blocks);
|
||||
for (int j = 0; j < num_parameter_blocks; ++j) {
|
||||
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
|
||||
const int parameter_block_index = parameter_block->index();
|
||||
if (parameter_block->IsConstant()) {
|
||||
continue;
|
||||
}
|
||||
const int k = active_parameter_blocks.size();
|
||||
active_parameter_blocks.emplace_back(k, j);
|
||||
}
|
||||
std::sort(active_parameter_blocks.begin(),
|
||||
active_parameter_blocks.end(),
|
||||
[&residual_block](const std::pair<int, int>& a,
|
||||
const std::pair<int, int>& b) {
|
||||
return residual_block->parameter_blocks()[a.second]->index() <
|
||||
residual_block->parameter_blocks()[b.second]->index();
|
||||
});
|
||||
// Cell positions for each active parameter block are filled in the order of
|
||||
// active parameter block indices sorted by columnd block index. This
|
||||
// guarantees that cells are laid out sequentially with increasing column
|
||||
// block indices.
|
||||
for (const auto& indices : active_parameter_blocks) {
|
||||
const auto [k, j] = indices;
|
||||
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
|
||||
const int parameter_block_index = parameter_block->index();
|
||||
const int jacobian_block_size =
|
||||
num_residuals * parameter_block->TangentSize();
|
||||
if (parameter_block_index < num_eliminate_blocks) {
|
||||
*jacobian_pos = e_block_pos;
|
||||
jacobian_pos[k] = e_block_pos;
|
||||
e_block_pos += jacobian_block_size;
|
||||
} else {
|
||||
*jacobian_pos = f_block_pos;
|
||||
jacobian_pos[k] = static_cast<int>(f_block_pos);
|
||||
f_block_pos += jacobian_block_size;
|
||||
if (f_block_pos > std::numeric_limits<int>::max()) {
|
||||
LOG(ERROR)
|
||||
<< "Overlow error. Too many entries in the Jacobian matrix.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
jacobian_pos++;
|
||||
}
|
||||
jacobian_pos += active_parameter_blocks.size();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
BlockJacobianWriter::BlockJacobianWriter(const Evaluator::Options& options,
|
||||
Program* program)
|
||||
: program_(program) {
|
||||
: options_(options), program_(program) {
|
||||
CHECK_GE(options.num_eliminate_blocks, 0)
|
||||
<< "num_eliminate_blocks must be greater than 0.";
|
||||
|
||||
BuildJacobianLayout(*program,
|
||||
options.num_eliminate_blocks,
|
||||
&jacobian_layout_,
|
||||
&jacobian_layout_storage_);
|
||||
jacobian_layout_is_valid_ = BuildJacobianLayout(*program,
|
||||
options.num_eliminate_blocks,
|
||||
&jacobian_layout_,
|
||||
&jacobian_layout_storage_);
|
||||
}
|
||||
|
||||
// Create evaluate prepareres that point directly into the final jacobian. This
|
||||
// makes the final Write() a nop.
|
||||
std::unique_ptr<BlockEvaluatePreparer[]>
|
||||
BlockJacobianWriter::CreateEvaluatePreparers(int num_threads) {
|
||||
int max_derivatives_per_residual_block =
|
||||
BlockJacobianWriter::CreateEvaluatePreparers(unsigned num_threads) {
|
||||
const int max_derivatives_per_residual_block =
|
||||
program_->MaxDerivativesPerResidualBlock();
|
||||
|
||||
auto preparers = std::make_unique<BlockEvaluatePreparer[]>(num_threads);
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
preparers[i].Init(&jacobian_layout_[0], max_derivatives_per_residual_block);
|
||||
for (unsigned i = 0; i < num_threads; i++) {
|
||||
preparers[i].Init(jacobian_layout_.data(),
|
||||
max_derivatives_per_residual_block);
|
||||
}
|
||||
return preparers;
|
||||
}
|
||||
|
||||
std::unique_ptr<SparseMatrix> BlockJacobianWriter::CreateJacobian() const {
|
||||
if (!jacobian_layout_is_valid_) {
|
||||
LOG(ERROR) << "Unable to create Jacobian matrix. Too many entries in the "
|
||||
"Jacobian matrix.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto* bs = new CompressedRowBlockStructure;
|
||||
|
||||
const vector<ParameterBlock*>& parameter_blocks =
|
||||
const std::vector<ParameterBlock*>& parameter_blocks =
|
||||
program_->parameter_blocks();
|
||||
|
||||
// Construct the column blocks.
|
||||
@@ -167,7 +216,8 @@ std::unique_ptr<SparseMatrix> BlockJacobianWriter::CreateJacobian() const {
|
||||
}
|
||||
|
||||
// Construct the cells in each row.
|
||||
const vector<ResidualBlock*>& residual_blocks = program_->residual_blocks();
|
||||
const std::vector<ResidualBlock*>& residual_blocks =
|
||||
program_->residual_blocks();
|
||||
int row_block_position = 0;
|
||||
bs->rows.resize(residual_blocks.size());
|
||||
for (int i = 0; i < residual_blocks.size(); ++i) {
|
||||
@@ -206,8 +256,8 @@ std::unique_ptr<SparseMatrix> BlockJacobianWriter::CreateJacobian() const {
|
||||
std::sort(row->cells.begin(), row->cells.end(), CellLessThan);
|
||||
}
|
||||
|
||||
return std::make_unique<BlockSparseMatrix>(bs);
|
||||
return std::make_unique<BlockSparseMatrix>(
|
||||
bs, options_.sparse_linear_algebra_library_type == CUDA_SPARSE);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,16 +44,26 @@
|
||||
#include "ceres/evaluator.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockEvaluatePreparer;
|
||||
class Program;
|
||||
class SparseMatrix;
|
||||
|
||||
// TODO(sameeragarwal): This class needs documemtation.
|
||||
// TODO(sameeragarwal): This class needs documentation.
|
||||
class CERES_NO_EXPORT BlockJacobianWriter {
|
||||
public:
|
||||
// Pre-computes positions of cells in block-sparse jacobian.
|
||||
// Two possible memory layouts are implemented:
|
||||
// - Non-partitioned case
|
||||
// - Partitioned case (for Schur type linear solver)
|
||||
//
|
||||
// In non-partitioned case, cells are stored sequentially in the
|
||||
// lexicographic order of (row block id, column block id).
|
||||
//
|
||||
// In the case of partitoned matrix, cells of each sub-matrix (E and F) are
|
||||
// stored sequentially in the lexicographic order of (row block id, column
|
||||
// block id) and cells from E sub-matrix precede cells from F sub-matrix.
|
||||
BlockJacobianWriter(const Evaluator::Options& options, Program* program);
|
||||
|
||||
// JacobianWriter interface.
|
||||
@@ -61,7 +71,7 @@ class CERES_NO_EXPORT BlockJacobianWriter {
|
||||
// Create evaluate prepareres that point directly into the final jacobian.
|
||||
// This makes the final Write() a nop.
|
||||
std::unique_ptr<BlockEvaluatePreparer[]> CreateEvaluatePreparers(
|
||||
int num_threads);
|
||||
unsigned num_threads);
|
||||
|
||||
std::unique_ptr<SparseMatrix> CreateJacobian() const;
|
||||
|
||||
@@ -75,12 +85,13 @@ class CERES_NO_EXPORT BlockJacobianWriter {
|
||||
}
|
||||
|
||||
private:
|
||||
Evaluator::Options options_;
|
||||
Program* program_;
|
||||
|
||||
// Stores the position of each residual / parameter jacobian.
|
||||
//
|
||||
// The block sparse matrix that this writer writes to is stored as a set of
|
||||
// contiguos dense blocks, one after each other; see BlockSparseMatrix. The
|
||||
// contiguous dense blocks, one after each other; see BlockSparseMatrix. The
|
||||
// "double* values_" member of the block sparse matrix contains all of these
|
||||
// blocks. Given a pointer to the first element of a block and the size of
|
||||
// that block, it's possible to write to it.
|
||||
@@ -122,9 +133,14 @@ class CERES_NO_EXPORT BlockJacobianWriter {
|
||||
|
||||
// The pointers in jacobian_layout_ point directly into this vector.
|
||||
std::vector<int> jacobian_layout_storage_;
|
||||
|
||||
// The constructor computes the layout of the Jacobian, and this bool keeps
|
||||
// track of whether the computation of the layout completed successfully or
|
||||
// not, if it is false, then jacobian_layout and jacobian_layout_storage are
|
||||
// both in an invalid state.
|
||||
bool jacobian_layout_is_valid_ = false;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_BLOCK_JACOBIAN_WRITER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,26 +30,21 @@
|
||||
|
||||
#include "ceres/block_random_access_dense_matrix.h"
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
BlockRandomAccessDenseMatrix::BlockRandomAccessDenseMatrix(
|
||||
const std::vector<int>& blocks) {
|
||||
const int num_blocks = blocks.size();
|
||||
block_layout_.resize(num_blocks, 0);
|
||||
num_rows_ = 0;
|
||||
for (int i = 0; i < num_blocks; ++i) {
|
||||
block_layout_[i] = num_rows_;
|
||||
num_rows_ += blocks[i];
|
||||
}
|
||||
|
||||
std::vector<Block> blocks, ContextImpl* context, int num_threads)
|
||||
: blocks_(std::move(blocks)), context_(context), num_threads_(num_threads) {
|
||||
const int num_blocks = blocks_.size();
|
||||
num_rows_ = NumScalarEntries(blocks_);
|
||||
values_ = std::make_unique<double[]>(num_rows_ * num_rows_);
|
||||
|
||||
cell_infos_ = std::make_unique<CellInfo[]>(num_blocks * num_blocks);
|
||||
for (int i = 0; i < num_blocks * num_blocks; ++i) {
|
||||
cell_infos_[i].values = values_.get();
|
||||
@@ -58,30 +53,23 @@ BlockRandomAccessDenseMatrix::BlockRandomAccessDenseMatrix(
|
||||
SetZero();
|
||||
}
|
||||
|
||||
// Assume that the user does not hold any locks on any cell blocks
|
||||
// when they are calling SetZero.
|
||||
BlockRandomAccessDenseMatrix::~BlockRandomAccessDenseMatrix() = default;
|
||||
|
||||
CellInfo* BlockRandomAccessDenseMatrix::GetCell(const int row_block_id,
|
||||
const int col_block_id,
|
||||
int* row,
|
||||
int* col,
|
||||
int* row_stride,
|
||||
int* col_stride) {
|
||||
*row = block_layout_[row_block_id];
|
||||
*col = block_layout_[col_block_id];
|
||||
*row = blocks_[row_block_id].position;
|
||||
*col = blocks_[col_block_id].position;
|
||||
*row_stride = num_rows_;
|
||||
*col_stride = num_rows_;
|
||||
return &cell_infos_[row_block_id * block_layout_.size() + col_block_id];
|
||||
return &cell_infos_[row_block_id * blocks_.size() + col_block_id];
|
||||
}
|
||||
|
||||
// Assume that the user does not hold any locks on any cell blocks
|
||||
// when they are calling SetZero.
|
||||
void BlockRandomAccessDenseMatrix::SetZero() {
|
||||
if (num_rows_) {
|
||||
VectorRef(values_.get(), num_rows_ * num_rows_).setZero();
|
||||
}
|
||||
ParallelSetZero(context_, num_threads_, values_.get(), num_rows_ * num_rows_);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,11 +35,12 @@
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/block_random_access_matrix.h"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// A square block random accessible matrix with the same row and
|
||||
// column block structure. All cells are stored in the same single
|
||||
@@ -56,13 +57,11 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix
|
||||
public:
|
||||
// blocks is a vector of block sizes. The resulting matrix has
|
||||
// blocks.size() * blocks.size() cells.
|
||||
explicit BlockRandomAccessDenseMatrix(const std::vector<int>& blocks);
|
||||
BlockRandomAccessDenseMatrix(const BlockRandomAccessDenseMatrix&) = delete;
|
||||
void operator=(const BlockRandomAccessDenseMatrix&) = delete;
|
||||
explicit BlockRandomAccessDenseMatrix(std::vector<Block> blocks,
|
||||
ContextImpl* context,
|
||||
int num_threads);
|
||||
|
||||
// The destructor is not thread safe. It assumes that no one is
|
||||
// modifying any cells when the matrix is being destroyed.
|
||||
~BlockRandomAccessDenseMatrix() override;
|
||||
~BlockRandomAccessDenseMatrix() override = default;
|
||||
|
||||
// BlockRandomAccessMatrix interface.
|
||||
CellInfo* GetCell(int row_block_id,
|
||||
@@ -72,8 +71,6 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix
|
||||
int* row_stride,
|
||||
int* col_stride) final;
|
||||
|
||||
// This is not a thread safe method, it assumes that no cell is
|
||||
// locked.
|
||||
void SetZero() final;
|
||||
|
||||
// Since the matrix is square with the same row and column block
|
||||
@@ -86,14 +83,15 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix
|
||||
double* mutable_values() { return values_.get(); }
|
||||
|
||||
private:
|
||||
int num_rows_;
|
||||
std::vector<int> block_layout_;
|
||||
std::vector<Block> blocks_;
|
||||
ContextImpl* context_ = nullptr;
|
||||
int num_threads_ = -1;
|
||||
int num_rows_ = -1;
|
||||
std::unique_ptr<double[]> values_;
|
||||
std::unique_ptr<CellInfo[]> cell_infos_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,61 +37,26 @@
|
||||
#include <vector>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "ceres/stl_util.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::vector;
|
||||
|
||||
// TODO(sameeragarwal): Drop the dependence on TripletSparseMatrix.
|
||||
namespace ceres::internal {
|
||||
|
||||
BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix(
|
||||
const vector<int>& blocks)
|
||||
: blocks_(blocks) {
|
||||
// Build the row/column layout vector and count the number of scalar
|
||||
// rows/columns.
|
||||
int num_cols = 0;
|
||||
int num_nonzeros = 0;
|
||||
vector<int> block_positions;
|
||||
for (int block_size : blocks_) {
|
||||
block_positions.push_back(num_cols);
|
||||
num_cols += block_size;
|
||||
num_nonzeros += block_size * block_size;
|
||||
const std::vector<Block>& blocks, ContextImpl* context, int num_threads)
|
||||
: context_(context), num_threads_(num_threads) {
|
||||
m_ = CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(nullptr, blocks);
|
||||
double* values = m_->mutable_values();
|
||||
layout_.reserve(blocks.size());
|
||||
for (auto& block : blocks) {
|
||||
layout_.emplace_back(std::make_unique<CellInfo>(values));
|
||||
values += block.size * block.size;
|
||||
}
|
||||
|
||||
VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] "
|
||||
<< num_nonzeros;
|
||||
|
||||
tsm_ =
|
||||
std::make_unique<TripletSparseMatrix>(num_cols, num_cols, num_nonzeros);
|
||||
tsm_->set_num_nonzeros(num_nonzeros);
|
||||
int* rows = tsm_->mutable_rows();
|
||||
int* cols = tsm_->mutable_cols();
|
||||
double* values = tsm_->mutable_values();
|
||||
|
||||
int pos = 0;
|
||||
for (int i = 0; i < blocks_.size(); ++i) {
|
||||
const int block_size = blocks_[i];
|
||||
layout_.push_back(new CellInfo(values + pos));
|
||||
const int block_begin = block_positions[i];
|
||||
for (int r = 0; r < block_size; ++r) {
|
||||
for (int c = 0; c < block_size; ++c, ++pos) {
|
||||
rows[pos] = block_begin + r;
|
||||
cols[pos] = block_begin + c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assume that the user does not hold any locks on any cell blocks
|
||||
// when they are calling SetZero.
|
||||
BlockRandomAccessDiagonalMatrix::~BlockRandomAccessDiagonalMatrix() {
|
||||
STLDeleteContainerPointers(layout_.begin(), layout_.end());
|
||||
}
|
||||
|
||||
CellInfo* BlockRandomAccessDiagonalMatrix::GetCell(int row_block_id,
|
||||
@@ -103,47 +68,51 @@ CellInfo* BlockRandomAccessDiagonalMatrix::GetCell(int row_block_id,
|
||||
if (row_block_id != col_block_id) {
|
||||
return nullptr;
|
||||
}
|
||||
const int stride = blocks_[row_block_id];
|
||||
|
||||
auto& blocks = m_->row_blocks();
|
||||
const int stride = blocks[row_block_id].size;
|
||||
|
||||
// Each cell is stored contiguously as its own little dense matrix.
|
||||
*row = 0;
|
||||
*col = 0;
|
||||
*row_stride = stride;
|
||||
*col_stride = stride;
|
||||
return layout_[row_block_id];
|
||||
return layout_[row_block_id].get();
|
||||
}
|
||||
|
||||
// Assume that the user does not hold any locks on any cell blocks
|
||||
// when they are calling SetZero.
|
||||
void BlockRandomAccessDiagonalMatrix::SetZero() {
|
||||
if (tsm_->num_nonzeros()) {
|
||||
VectorRef(tsm_->mutable_values(), tsm_->num_nonzeros()).setZero();
|
||||
}
|
||||
ParallelSetZero(
|
||||
context_, num_threads_, m_->mutable_values(), m_->num_nonzeros());
|
||||
}
|
||||
|
||||
void BlockRandomAccessDiagonalMatrix::Invert() {
|
||||
double* values = tsm_->mutable_values();
|
||||
for (int block_size : blocks_) {
|
||||
MatrixRef block(values, block_size, block_size);
|
||||
block = block.selfadjointView<Eigen::Upper>().llt().solve(
|
||||
Matrix::Identity(block_size, block_size));
|
||||
values += block_size * block_size;
|
||||
}
|
||||
auto& blocks = m_->row_blocks();
|
||||
const int num_blocks = blocks.size();
|
||||
ParallelFor(context_, 0, num_blocks, num_threads_, [this, blocks](int i) {
|
||||
auto* cell_info = layout_[i].get();
|
||||
auto& block = blocks[i];
|
||||
MatrixRef b(cell_info->values, block.size, block.size);
|
||||
b = b.selfadjointView<Eigen::Upper>().llt().solve(
|
||||
Matrix::Identity(block.size, block.size));
|
||||
});
|
||||
}
|
||||
|
||||
void BlockRandomAccessDiagonalMatrix::RightMultiply(const double* x,
|
||||
double* y) const {
|
||||
void BlockRandomAccessDiagonalMatrix::RightMultiplyAndAccumulate(
|
||||
const double* x, double* y) const {
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
const double* values = tsm_->values();
|
||||
for (int block_size : blocks_) {
|
||||
ConstMatrixRef block(values, block_size, block_size);
|
||||
VectorRef(y, block_size).noalias() += block * ConstVectorRef(x, block_size);
|
||||
x += block_size;
|
||||
y += block_size;
|
||||
values += block_size * block_size;
|
||||
}
|
||||
auto& blocks = m_->row_blocks();
|
||||
const int num_blocks = blocks.size();
|
||||
ParallelFor(
|
||||
context_, 0, num_blocks, num_threads_, [this, blocks, x, y](int i) {
|
||||
auto* cell_info = layout_[i].get();
|
||||
auto& block = blocks[i];
|
||||
ConstMatrixRef b(cell_info->values, block.size, block.size);
|
||||
VectorRef(y + block.position, block.size).noalias() +=
|
||||
b * ConstVectorRef(x + block.position, block.size);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,33 +32,30 @@
|
||||
#define CERES_INTERNAL_BLOCK_RANDOM_ACCESS_DIAGONAL_MATRIX_H_
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/block_random_access_matrix.h"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// A thread safe block diagonal matrix implementation of
|
||||
// BlockRandomAccessMatrix.
|
||||
// A BlockRandomAccessMatrix which only stores the block diagonal.
|
||||
// BlockRandomAccessSparseMatrix can also be used to do this, but this class is
|
||||
// more efficient in time and in space.
|
||||
class CERES_NO_EXPORT BlockRandomAccessDiagonalMatrix
|
||||
: public BlockRandomAccessMatrix {
|
||||
public:
|
||||
// blocks is an array of block sizes.
|
||||
explicit BlockRandomAccessDiagonalMatrix(const std::vector<int>& blocks);
|
||||
BlockRandomAccessDiagonalMatrix(const BlockRandomAccessDiagonalMatrix&) =
|
||||
delete;
|
||||
void operator=(const BlockRandomAccessDiagonalMatrix&) = delete;
|
||||
|
||||
// The destructor is not thread safe. It assumes that no one is
|
||||
// modifying any cells when the matrix is being destroyed.
|
||||
~BlockRandomAccessDiagonalMatrix() override;
|
||||
BlockRandomAccessDiagonalMatrix(const std::vector<Block>& blocks,
|
||||
ContextImpl* context,
|
||||
int num_threads);
|
||||
~BlockRandomAccessDiagonalMatrix() override = default;
|
||||
|
||||
// BlockRandomAccessMatrix Interface.
|
||||
CellInfo* GetCell(int row_block_id,
|
||||
@@ -68,36 +65,30 @@ class CERES_NO_EXPORT BlockRandomAccessDiagonalMatrix
|
||||
int* row_stride,
|
||||
int* col_stride) final;
|
||||
|
||||
// This is not a thread safe method, it assumes that no cell is
|
||||
// locked.
|
||||
// m = 0
|
||||
void SetZero() final;
|
||||
|
||||
// Invert the matrix assuming that each block is positive definite.
|
||||
// m = m^{-1}
|
||||
void Invert();
|
||||
|
||||
// y += S * x
|
||||
void RightMultiply(const double* x, double* y) const;
|
||||
// y += m * x
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const;
|
||||
|
||||
// Since the matrix is square, num_rows() == num_cols().
|
||||
int num_rows() const final { return tsm_->num_rows(); }
|
||||
int num_cols() const final { return tsm_->num_cols(); }
|
||||
int num_rows() const final { return m_->num_rows(); }
|
||||
int num_cols() const final { return m_->num_cols(); }
|
||||
|
||||
const TripletSparseMatrix* matrix() const { return tsm_.get(); }
|
||||
TripletSparseMatrix* mutable_matrix() { return tsm_.get(); }
|
||||
const CompressedRowSparseMatrix* matrix() const { return m_.get(); }
|
||||
CompressedRowSparseMatrix* mutable_matrix() { return m_.get(); }
|
||||
|
||||
private:
|
||||
// row/column block sizes.
|
||||
const std::vector<int> blocks_;
|
||||
std::vector<CellInfo*> layout_;
|
||||
|
||||
// The underlying matrix object which actually stores the cells.
|
||||
std::unique_ptr<TripletSparseMatrix> tsm_;
|
||||
|
||||
friend class BlockRandomAccessDiagonalMatrixTest;
|
||||
ContextImpl* context_ = nullptr;
|
||||
const int num_threads_ = 1;
|
||||
std::unique_ptr<CompressedRowSparseMatrix> m_;
|
||||
std::vector<std::unique_ptr<CellInfo>> layout_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,10 +30,8 @@
|
||||
|
||||
#include "ceres/block_random_access_matrix.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
BlockRandomAccessMatrix::~BlockRandomAccessMatrix() = default;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,8 +37,7 @@
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// A matrix implementing the BlockRandomAccessMatrix interface is a
|
||||
// matrix whose rows and columns are divided into blocks. For example
|
||||
@@ -123,7 +122,6 @@ class CERES_NO_EXPORT BlockRandomAccessMatrix {
|
||||
virtual int num_cols() const = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_BLOCK_RANDOM_ACCESS_MATRIX_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,87 +37,63 @@
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::make_pair;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
BlockRandomAccessSparseMatrix::BlockRandomAccessSparseMatrix(
|
||||
const vector<int>& blocks, const set<pair<int, int>>& block_pairs)
|
||||
: kMaxRowBlocks(10 * 1000 * 1000), blocks_(blocks) {
|
||||
CHECK_LT(blocks.size(), kMaxRowBlocks);
|
||||
const std::vector<Block>& blocks,
|
||||
const std::set<std::pair<int, int>>& block_pairs,
|
||||
ContextImpl* context,
|
||||
int num_threads)
|
||||
: blocks_(blocks), context_(context), num_threads_(num_threads) {
|
||||
CHECK_LE(blocks.size(), std::numeric_limits<std::int32_t>::max());
|
||||
|
||||
// Build the row/column layout vector and count the number of scalar
|
||||
// rows/columns.
|
||||
int num_cols = 0;
|
||||
block_positions_.reserve(blocks_.size());
|
||||
for (int block_size : blocks_) {
|
||||
block_positions_.push_back(num_cols);
|
||||
num_cols += block_size;
|
||||
const int num_cols = NumScalarEntries(blocks);
|
||||
const int num_blocks = blocks.size();
|
||||
|
||||
std::vector<int> num_cells_at_row(num_blocks);
|
||||
for (auto& p : block_pairs) {
|
||||
++num_cells_at_row[p.first];
|
||||
}
|
||||
|
||||
// Count the number of scalar non-zero entries and build the layout
|
||||
// object for looking into the values array of the
|
||||
// TripletSparseMatrix.
|
||||
auto block_structure_ = new CompressedRowBlockStructure;
|
||||
block_structure_->cols = blocks;
|
||||
block_structure_->rows.resize(num_blocks);
|
||||
auto p = block_pairs.begin();
|
||||
int num_nonzeros = 0;
|
||||
for (const auto& block_pair : block_pairs) {
|
||||
const int row_block_size = blocks_[block_pair.first];
|
||||
const int col_block_size = blocks_[block_pair.second];
|
||||
num_nonzeros += row_block_size * col_block_size;
|
||||
}
|
||||
|
||||
VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] "
|
||||
<< num_nonzeros;
|
||||
|
||||
tsm_ =
|
||||
std::make_unique<TripletSparseMatrix>(num_cols, num_cols, num_nonzeros);
|
||||
tsm_->set_num_nonzeros(num_nonzeros);
|
||||
int* rows = tsm_->mutable_rows();
|
||||
int* cols = tsm_->mutable_cols();
|
||||
double* values = tsm_->mutable_values();
|
||||
|
||||
int pos = 0;
|
||||
for (const auto& block_pair : block_pairs) {
|
||||
const int row_block_size = blocks_[block_pair.first];
|
||||
const int col_block_size = blocks_[block_pair.second];
|
||||
cell_values_.emplace_back(block_pair, values + pos);
|
||||
layout_[IntPairToLong(block_pair.first, block_pair.second)] =
|
||||
new CellInfo(values + pos);
|
||||
pos += row_block_size * col_block_size;
|
||||
}
|
||||
|
||||
// Fill the sparsity pattern of the underlying matrix.
|
||||
for (const auto& block_pair : block_pairs) {
|
||||
const int row_block_id = block_pair.first;
|
||||
const int col_block_id = block_pair.second;
|
||||
const int row_block_size = blocks_[row_block_id];
|
||||
const int col_block_size = blocks_[col_block_id];
|
||||
int pos =
|
||||
layout_[IntPairToLong(row_block_id, col_block_id)]->values - values;
|
||||
for (int r = 0; r < row_block_size; ++r) {
|
||||
for (int c = 0; c < col_block_size; ++c, ++pos) {
|
||||
rows[pos] = block_positions_[row_block_id] + r;
|
||||
cols[pos] = block_positions_[col_block_id] + c;
|
||||
values[pos] = 1.0;
|
||||
DCHECK_LT(rows[pos], tsm_->num_rows());
|
||||
DCHECK_LT(cols[pos], tsm_->num_rows());
|
||||
}
|
||||
// Pairs of block indices are sorted lexicographically, thus pairs
|
||||
// corresponding to a single row-block are stored in segments of index pairs
|
||||
// with constant row-block index and increasing column-block index.
|
||||
// CompressedRowBlockStructure is created by traversing block_pairs set.
|
||||
for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
|
||||
auto& row = block_structure_->rows[row_block_id];
|
||||
row.block = blocks[row_block_id];
|
||||
row.cells.reserve(num_cells_at_row[row_block_id]);
|
||||
const int row_block_size = blocks[row_block_id].size;
|
||||
// Process all index pairs corresponding to the current row block. Because
|
||||
// index pairs are sorted lexicographically, cells are being appended to the
|
||||
// current row-block till the first change in row-block index
|
||||
for (; p != block_pairs.end() && row_block_id == p->first; ++p) {
|
||||
const int col_block_id = p->second;
|
||||
row.cells.emplace_back(col_block_id, num_nonzeros);
|
||||
num_nonzeros += row_block_size * blocks[col_block_id].size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assume that the user does not hold any locks on any cell blocks
|
||||
// when they are calling SetZero.
|
||||
BlockRandomAccessSparseMatrix::~BlockRandomAccessSparseMatrix() {
|
||||
for (const auto& entry : layout_) {
|
||||
delete entry.second;
|
||||
bsm_ = std::make_unique<BlockSparseMatrix>(block_structure_);
|
||||
VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] "
|
||||
<< num_nonzeros;
|
||||
double* values = bsm_->mutable_values();
|
||||
for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
|
||||
const auto& cells = block_structure_->rows[row_block_id].cells;
|
||||
for (auto& c : cells) {
|
||||
const int col_block_id = c.block_id;
|
||||
double* const data = values + c.position;
|
||||
layout_[IntPairToInt64(row_block_id, col_block_id)] =
|
||||
std::make_unique<CellInfo>(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,8 +103,7 @@ CellInfo* BlockRandomAccessSparseMatrix::GetCell(int row_block_id,
|
||||
int* col,
|
||||
int* row_stride,
|
||||
int* col_stride) {
|
||||
const LayoutType::iterator it =
|
||||
layout_.find(IntPairToLong(row_block_id, col_block_id));
|
||||
const auto it = layout_.find(IntPairToInt64(row_block_id, col_block_id));
|
||||
if (it == layout_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -136,44 +111,49 @@ CellInfo* BlockRandomAccessSparseMatrix::GetCell(int row_block_id,
|
||||
// Each cell is stored contiguously as its own little dense matrix.
|
||||
*row = 0;
|
||||
*col = 0;
|
||||
*row_stride = blocks_[row_block_id];
|
||||
*col_stride = blocks_[col_block_id];
|
||||
return it->second;
|
||||
*row_stride = blocks_[row_block_id].size;
|
||||
*col_stride = blocks_[col_block_id].size;
|
||||
return it->second.get();
|
||||
}
|
||||
|
||||
// Assume that the user does not hold any locks on any cell blocks
|
||||
// when they are calling SetZero.
|
||||
void BlockRandomAccessSparseMatrix::SetZero() {
|
||||
if (tsm_->num_nonzeros()) {
|
||||
VectorRef(tsm_->mutable_values(), tsm_->num_nonzeros()).setZero();
|
||||
}
|
||||
bsm_->SetZero(context_, num_threads_);
|
||||
}
|
||||
|
||||
void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x,
|
||||
double* y) const {
|
||||
for (const auto& cell_position_and_data : cell_values_) {
|
||||
const int row = cell_position_and_data.first.first;
|
||||
const int row_block_size = blocks_[row];
|
||||
const int row_block_pos = block_positions_[row];
|
||||
void BlockRandomAccessSparseMatrix::SymmetricRightMultiplyAndAccumulate(
|
||||
const double* x, double* y) const {
|
||||
const auto bs = bsm_->block_structure();
|
||||
const auto values = bsm_->values();
|
||||
const int num_blocks = blocks_.size();
|
||||
|
||||
const int col = cell_position_and_data.first.second;
|
||||
const int col_block_size = blocks_[col];
|
||||
const int col_block_pos = block_positions_[col];
|
||||
for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) {
|
||||
const auto& row_block = bs->rows[row_block_id];
|
||||
const int row_block_size = row_block.block.size;
|
||||
const int row_block_pos = row_block.block.position;
|
||||
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
cell_position_and_data.second,
|
||||
row_block_size,
|
||||
col_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
for (auto& c : row_block.cells) {
|
||||
const int col_block_id = c.block_id;
|
||||
const int col_block_size = blocks_[col_block_id].size;
|
||||
const int col_block_pos = blocks_[col_block_id].position;
|
||||
|
||||
// Since the matrix is symmetric, but only the upper triangular
|
||||
// part is stored, if the block being accessed is not a diagonal
|
||||
// block, then use the same block to do the corresponding lower
|
||||
// triangular multiply also.
|
||||
if (row != col) {
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values + c.position,
|
||||
row_block_size,
|
||||
col_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
if (col_block_id == row_block_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Since the matrix is symmetric, but only the upper triangular
|
||||
// part is stored, if the block being accessed is not a diagonal
|
||||
// block, then use the same block to do the corresponding lower
|
||||
// triangular multiply also
|
||||
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
cell_position_and_data.second,
|
||||
values + c.position,
|
||||
row_block_size,
|
||||
col_block_size,
|
||||
x + row_block_pos,
|
||||
@@ -182,5 +162,4 @@ void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,17 +39,18 @@
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/block_random_access_matrix.h"
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/small_blas.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// A thread safe square block sparse implementation of
|
||||
// BlockRandomAccessMatrix. Internally a TripletSparseMatrix is used
|
||||
// BlockRandomAccessMatrix. Internally a BlockSparseMatrix is used
|
||||
// for doing the actual storage. This class augments this matrix with
|
||||
// an unordered_map that allows random read/write access.
|
||||
class CERES_NO_EXPORT BlockRandomAccessSparseMatrix
|
||||
@@ -59,14 +60,14 @@ class CERES_NO_EXPORT BlockRandomAccessSparseMatrix
|
||||
// <row_block_id, col_block_id> pairs to identify the non-zero cells
|
||||
// of this matrix.
|
||||
BlockRandomAccessSparseMatrix(
|
||||
const std::vector<int>& blocks,
|
||||
const std::set<std::pair<int, int>>& block_pairs);
|
||||
BlockRandomAccessSparseMatrix(const BlockRandomAccessSparseMatrix&) = delete;
|
||||
void operator=(const BlockRandomAccessSparseMatrix&) = delete;
|
||||
const std::vector<Block>& blocks,
|
||||
const std::set<std::pair<int, int>>& block_pairs,
|
||||
ContextImpl* context,
|
||||
int num_threads);
|
||||
|
||||
// The destructor is not thread safe. It assumes that no one is
|
||||
// modifying any cells when the matrix is being destroyed.
|
||||
~BlockRandomAccessSparseMatrix() override;
|
||||
~BlockRandomAccessSparseMatrix() override = default;
|
||||
|
||||
// BlockRandomAccessMatrix Interface.
|
||||
CellInfo* GetCell(int row_block_id,
|
||||
@@ -80,53 +81,49 @@ class CERES_NO_EXPORT BlockRandomAccessSparseMatrix
|
||||
// locked.
|
||||
void SetZero() final;
|
||||
|
||||
// Assume that the matrix is symmetric and only one half of the
|
||||
// matrix is stored.
|
||||
// Assume that the matrix is symmetric and only one half of the matrix is
|
||||
// stored.
|
||||
//
|
||||
// y += S * x
|
||||
void SymmetricRightMultiply(const double* x, double* y) const;
|
||||
void SymmetricRightMultiplyAndAccumulate(const double* x, double* y) const;
|
||||
|
||||
// Since the matrix is square, num_rows() == num_cols().
|
||||
int num_rows() const final { return tsm_->num_rows(); }
|
||||
int num_cols() const final { return tsm_->num_cols(); }
|
||||
int num_rows() const final { return bsm_->num_rows(); }
|
||||
int num_cols() const final { return bsm_->num_cols(); }
|
||||
|
||||
// Access to the underlying matrix object.
|
||||
const TripletSparseMatrix* matrix() const { return tsm_.get(); }
|
||||
TripletSparseMatrix* mutable_matrix() { return tsm_.get(); }
|
||||
const BlockSparseMatrix* matrix() const { return bsm_.get(); }
|
||||
BlockSparseMatrix* mutable_matrix() { return bsm_.get(); }
|
||||
|
||||
private:
|
||||
int64_t IntPairToLong(int row, int col) const {
|
||||
return row * kMaxRowBlocks + col;
|
||||
int64_t IntPairToInt64(int row, int col) const {
|
||||
return row * kRowShift + col;
|
||||
}
|
||||
|
||||
void LongToIntPair(int64_t index, int* row, int* col) const {
|
||||
*row = index / kMaxRowBlocks;
|
||||
*col = index % kMaxRowBlocks;
|
||||
void Int64ToIntPair(int64_t index, int* row, int* col) const {
|
||||
*row = index / kRowShift;
|
||||
*col = index % kRowShift;
|
||||
}
|
||||
|
||||
const int64_t kMaxRowBlocks;
|
||||
constexpr static int64_t kRowShift{1ll << 32};
|
||||
|
||||
// row/column block sizes.
|
||||
const std::vector<int> blocks_;
|
||||
std::vector<int> block_positions_;
|
||||
const std::vector<Block> blocks_;
|
||||
ContextImpl* context_ = nullptr;
|
||||
const int num_threads_ = 1;
|
||||
|
||||
// A mapping from <row_block_id, col_block_id> to the position in
|
||||
// the values array of tsm_ where the block is stored.
|
||||
using LayoutType = std::unordered_map<long, CellInfo*>;
|
||||
using LayoutType = std::unordered_map<int64_t, std::unique_ptr<CellInfo>>;
|
||||
LayoutType layout_;
|
||||
|
||||
// In order traversal of contents of the matrix. This allows us to
|
||||
// implement a matrix-vector which is 20% faster than using the
|
||||
// iterator in the Layout object instead.
|
||||
std::vector<std::pair<std::pair<int, int>, double*>> cell_values_;
|
||||
// The underlying matrix object which actually stores the cells.
|
||||
std::unique_ptr<TripletSparseMatrix> tsm_;
|
||||
std::unique_ptr<BlockSparseMatrix> bsm_;
|
||||
|
||||
friend class BlockRandomAccessSparseMatrixTest;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
573
extern/ceres/internal/ceres/block_sparse_matrix.cc
vendored
573
extern/ceres/internal/ceres/block_sparse_matrix.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,23 +33,151 @@
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/crs_matrix.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/random.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "ceres/small_blas.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
#ifndef CERES_NO_CUDA
|
||||
#include "cuda_runtime.h"
|
||||
#endif
|
||||
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
namespace {
|
||||
void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) {
|
||||
if (rows.empty()) {
|
||||
return;
|
||||
}
|
||||
rows[0].cumulative_nnz = rows[0].nnz;
|
||||
for (int c = 1; c < rows.size(); ++c) {
|
||||
const int curr_nnz = rows[c].nnz;
|
||||
rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool transpose>
|
||||
std::unique_ptr<CompressedRowSparseMatrix>
|
||||
CreateStructureOfCompressedRowSparseMatrix(
|
||||
const double* values,
|
||||
int num_rows,
|
||||
int num_cols,
|
||||
int num_nonzeros,
|
||||
const CompressedRowBlockStructure* block_structure) {
|
||||
auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>(
|
||||
num_rows, num_cols, num_nonzeros);
|
||||
auto crs_cols = crs_matrix->mutable_cols();
|
||||
auto crs_rows = crs_matrix->mutable_rows();
|
||||
int value_offset = 0;
|
||||
const int num_row_blocks = block_structure->rows.size();
|
||||
const auto& cols = block_structure->cols;
|
||||
*crs_rows++ = 0;
|
||||
for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
|
||||
const auto& row_block = block_structure->rows[row_block_id];
|
||||
// Empty row block: only requires setting row offsets
|
||||
if (row_block.cells.empty()) {
|
||||
std::fill(crs_rows, crs_rows + row_block.block.size, value_offset);
|
||||
crs_rows += row_block.block.size;
|
||||
continue;
|
||||
}
|
||||
|
||||
int row_nnz = 0;
|
||||
if constexpr (transpose) {
|
||||
// Transposed block structure comes with nnz in row-block filled-in
|
||||
row_nnz = row_block.nnz / row_block.block.size;
|
||||
} else {
|
||||
// Nnz field of non-transposed block structure is not filled and it can
|
||||
// have non-sequential structure (consider the case of jacobian for
|
||||
// Schur-complement solver: E and F blocks are stored separately).
|
||||
for (auto& c : row_block.cells) {
|
||||
row_nnz += cols[c.block_id].size;
|
||||
}
|
||||
}
|
||||
|
||||
// Row-wise setup of matrix structure
|
||||
for (int row = 0; row < row_block.block.size; ++row) {
|
||||
value_offset += row_nnz;
|
||||
*crs_rows++ = value_offset;
|
||||
for (auto& c : row_block.cells) {
|
||||
const int col_block_size = cols[c.block_id].size;
|
||||
const int col_position = cols[c.block_id].position;
|
||||
std::iota(crs_cols, crs_cols + col_block_size, col_position);
|
||||
crs_cols += col_block_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
return crs_matrix;
|
||||
}
|
||||
|
||||
template <bool transpose>
|
||||
void UpdateCompressedRowSparseMatrixImpl(
|
||||
CompressedRowSparseMatrix* crs_matrix,
|
||||
const double* values,
|
||||
const CompressedRowBlockStructure* block_structure) {
|
||||
auto crs_values = crs_matrix->mutable_values();
|
||||
auto crs_rows = crs_matrix->mutable_rows();
|
||||
const int num_row_blocks = block_structure->rows.size();
|
||||
const auto& cols = block_structure->cols;
|
||||
for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
|
||||
const auto& row_block = block_structure->rows[row_block_id];
|
||||
const int row_block_size = row_block.block.size;
|
||||
const int row_nnz = crs_rows[1] - crs_rows[0];
|
||||
crs_rows += row_block_size;
|
||||
|
||||
if (row_nnz == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
MatrixRef crs_row_block(crs_values, row_block_size, row_nnz);
|
||||
int col_offset = 0;
|
||||
for (auto& c : row_block.cells) {
|
||||
const int col_block_size = cols[c.block_id].size;
|
||||
auto crs_cell =
|
||||
crs_row_block.block(0, col_offset, row_block_size, col_block_size);
|
||||
if constexpr (transpose) {
|
||||
// Transposed matrix is filled using transposed block-strucutre
|
||||
ConstMatrixRef cell(
|
||||
values + c.position, col_block_size, row_block_size);
|
||||
crs_cell = cell.transpose();
|
||||
} else {
|
||||
ConstMatrixRef cell(
|
||||
values + c.position, row_block_size, col_block_size);
|
||||
crs_cell = cell;
|
||||
}
|
||||
col_offset += col_block_size;
|
||||
}
|
||||
crs_values += row_nnz * row_block_size;
|
||||
}
|
||||
}
|
||||
|
||||
void SetBlockStructureOfCompressedRowSparseMatrix(
|
||||
CompressedRowSparseMatrix* crs_matrix,
|
||||
CompressedRowBlockStructure* block_structure) {
|
||||
const int num_row_blocks = block_structure->rows.size();
|
||||
auto& row_blocks = *crs_matrix->mutable_row_blocks();
|
||||
row_blocks.resize(num_row_blocks);
|
||||
for (int i = 0; i < num_row_blocks; ++i) {
|
||||
row_blocks[i] = block_structure->rows[i].block;
|
||||
}
|
||||
|
||||
auto& col_blocks = *crs_matrix->mutable_col_blocks();
|
||||
col_blocks = block_structure->cols;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
BlockSparseMatrix::BlockSparseMatrix(
|
||||
CompressedRowBlockStructure* block_structure)
|
||||
: num_rows_(0),
|
||||
CompressedRowBlockStructure* block_structure, bool use_page_locked_memory)
|
||||
: use_page_locked_memory_(use_page_locked_memory),
|
||||
num_rows_(0),
|
||||
num_cols_(0),
|
||||
num_nonzeros_(0),
|
||||
block_structure_(block_structure) {
|
||||
@@ -66,7 +194,7 @@ BlockSparseMatrix::BlockSparseMatrix(
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
num_rows_ += row_block_size;
|
||||
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
const std::vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
int col_block_id = cell.block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
@@ -79,51 +207,138 @@ BlockSparseMatrix::BlockSparseMatrix(
|
||||
CHECK_GE(num_nonzeros_, 0);
|
||||
VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double)
|
||||
<< " bytes."; // NOLINT
|
||||
values_ = std::make_unique<double[]>(num_nonzeros_);
|
||||
|
||||
values_ = AllocateValues(num_nonzeros_);
|
||||
max_num_nonzeros_ = num_nonzeros_;
|
||||
CHECK(values_ != nullptr);
|
||||
AddTransposeBlockStructure();
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::SetZero() {
|
||||
std::fill(values_.get(), values_.get() + num_nonzeros_, 0.0);
|
||||
}
|
||||
BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); }
|
||||
|
||||
void BlockSparseMatrix::RightMultiply(const double* x, double* y) const {
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
int row_block_pos = block_structure_->rows[i].block.position;
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
int col_block_id = cell.block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values_.get() + cell.position,
|
||||
row_block_size,
|
||||
col_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
}
|
||||
void BlockSparseMatrix::AddTransposeBlockStructure() {
|
||||
if (transpose_block_structure_ == nullptr) {
|
||||
transpose_block_structure_ = CreateTranspose(*block_structure_);
|
||||
}
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const {
|
||||
void BlockSparseMatrix::SetZero() {
|
||||
std::fill(values_, values_ + num_nonzeros_, 0.0);
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) {
|
||||
ParallelSetZero(context, num_threads, values_, num_nonzeros_);
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
RightMultiplyAndAccumulate(x, y, nullptr, 1);
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
|
||||
const auto values = values_;
|
||||
const auto block_structure = block_structure_.get();
|
||||
const auto num_row_blocks = block_structure->rows.size();
|
||||
|
||||
ParallelFor(context,
|
||||
0,
|
||||
num_row_blocks,
|
||||
num_threads,
|
||||
[values, block_structure, x, y](int row_block_id) {
|
||||
const int row_block_pos =
|
||||
block_structure->rows[row_block_id].block.position;
|
||||
const int row_block_size =
|
||||
block_structure->rows[row_block_id].block.size;
|
||||
const auto& cells = block_structure->rows[row_block_id].cells;
|
||||
for (const auto& cell : cells) {
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_size =
|
||||
block_structure->cols[col_block_id].size;
|
||||
const int col_block_pos =
|
||||
block_structure->cols[col_block_id].position;
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values + cell.position,
|
||||
row_block_size,
|
||||
col_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
|
||||
// might benefit from caching column-block partition
|
||||
void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
// While utilizing transposed structure allows to perform parallel
|
||||
// left-multiplication by dense vector, it makes access patterns to matrix
|
||||
// elements scattered. Thus, multiplication using transposed structure
|
||||
// is only useful for parallel execution
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
if (transpose_block_structure_ == nullptr || num_threads == 1) {
|
||||
LeftMultiplyAndAccumulate(x, y);
|
||||
return;
|
||||
}
|
||||
|
||||
auto transpose_bs = transpose_block_structure_.get();
|
||||
const auto values = values_;
|
||||
const int num_col_blocks = transpose_bs->rows.size();
|
||||
if (!num_col_blocks) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Use non-zero count as iteration cost for guided parallel-for loop
|
||||
ParallelFor(
|
||||
context,
|
||||
0,
|
||||
num_col_blocks,
|
||||
num_threads,
|
||||
[values, transpose_bs, x, y](int row_block_id) {
|
||||
int row_block_pos = transpose_bs->rows[row_block_id].block.position;
|
||||
int row_block_size = transpose_bs->rows[row_block_id].block.size;
|
||||
auto& cells = transpose_bs->rows[row_block_id].cells;
|
||||
|
||||
for (auto& cell : cells) {
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_size = transpose_bs->cols[col_block_id].size;
|
||||
const int col_block_pos = transpose_bs->cols[col_block_id].position;
|
||||
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values + cell.position,
|
||||
col_block_size,
|
||||
row_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
}
|
||||
},
|
||||
transpose_bs->rows.data(),
|
||||
[](const CompressedRow& row) { return row.cumulative_nnz; });
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
// Single-threaded left products are always computed using a non-transpose
|
||||
// block structure, because it has linear acess pattern to matrix elements
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
int row_block_pos = block_structure_->rows[i].block.position;
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
const auto& cells = block_structure_->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
int col_block_id = cell.block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values_.get() + cell.position,
|
||||
values_ + cell.position,
|
||||
row_block_size,
|
||||
col_block_size,
|
||||
x + row_block_pos,
|
||||
@@ -137,35 +352,144 @@ void BlockSparseMatrix::SquaredColumnNorm(double* x) const {
|
||||
VectorRef(x, num_cols_).setZero();
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
auto& cells = block_structure_->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
int col_block_id = cell.block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||
const MatrixRef m(
|
||||
values_.get() + cell.position, row_block_size, col_block_size);
|
||||
values_ + cell.position, row_block_size, col_block_size);
|
||||
VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
|
||||
// might benefit from caching column-block partition
|
||||
void BlockSparseMatrix::SquaredColumnNorm(double* x,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
if (transpose_block_structure_ == nullptr || num_threads == 1) {
|
||||
SquaredColumnNorm(x);
|
||||
return;
|
||||
}
|
||||
|
||||
CHECK(x != nullptr);
|
||||
ParallelSetZero(context, num_threads, x, num_cols_);
|
||||
|
||||
auto transpose_bs = transpose_block_structure_.get();
|
||||
const auto values = values_;
|
||||
const int num_col_blocks = transpose_bs->rows.size();
|
||||
ParallelFor(
|
||||
context,
|
||||
0,
|
||||
num_col_blocks,
|
||||
num_threads,
|
||||
[values, transpose_bs, x](int row_block_id) {
|
||||
const auto& row = transpose_bs->rows[row_block_id];
|
||||
|
||||
for (auto& cell : row.cells) {
|
||||
const auto& col = transpose_bs->cols[cell.block_id];
|
||||
const MatrixRef m(values + cell.position, col.size, row.block.size);
|
||||
VectorRef(x + row.block.position, row.block.size) +=
|
||||
m.colwise().squaredNorm();
|
||||
}
|
||||
},
|
||||
transpose_bs->rows.data(),
|
||||
[](const CompressedRow& row) { return row.cumulative_nnz; });
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::ScaleColumns(const double* scale) {
|
||||
CHECK(scale != nullptr);
|
||||
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
auto& cells = block_structure_->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
int col_block_id = cell.block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||
MatrixRef m(
|
||||
values_.get() + cell.position, row_block_size, col_block_size);
|
||||
MatrixRef m(values_ + cell.position, row_block_size, col_block_size);
|
||||
m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
|
||||
// might benefit from caching column-block partition
|
||||
void BlockSparseMatrix::ScaleColumns(const double* scale,
|
||||
ContextImpl* context,
|
||||
int num_threads) {
|
||||
if (transpose_block_structure_ == nullptr || num_threads == 1) {
|
||||
ScaleColumns(scale);
|
||||
return;
|
||||
}
|
||||
|
||||
CHECK(scale != nullptr);
|
||||
auto transpose_bs = transpose_block_structure_.get();
|
||||
auto values = values_;
|
||||
const int num_col_blocks = transpose_bs->rows.size();
|
||||
ParallelFor(
|
||||
context,
|
||||
0,
|
||||
num_col_blocks,
|
||||
num_threads,
|
||||
[values, transpose_bs, scale](int row_block_id) {
|
||||
const auto& row = transpose_bs->rows[row_block_id];
|
||||
|
||||
for (auto& cell : row.cells) {
|
||||
const auto& col = transpose_bs->cols[cell.block_id];
|
||||
MatrixRef m(values + cell.position, col.size, row.block.size);
|
||||
m *= ConstVectorRef(scale + row.block.position, row.block.size)
|
||||
.asDiagonal();
|
||||
}
|
||||
},
|
||||
transpose_bs->rows.data(),
|
||||
[](const CompressedRow& row) { return row.cumulative_nnz; });
|
||||
}
|
||||
std::unique_ptr<CompressedRowSparseMatrix>
|
||||
BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const {
|
||||
auto bs = transpose_block_structure_.get();
|
||||
auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>(
|
||||
values(), num_cols_, num_rows_, num_nonzeros_, bs);
|
||||
|
||||
SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs);
|
||||
|
||||
UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get());
|
||||
return crs_matrix;
|
||||
}
|
||||
|
||||
std::unique_ptr<CompressedRowSparseMatrix>
|
||||
BlockSparseMatrix::ToCompressedRowSparseMatrix() const {
|
||||
auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>(
|
||||
values(), num_rows_, num_cols_, num_nonzeros_, block_structure_.get());
|
||||
|
||||
SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(),
|
||||
block_structure_.get());
|
||||
|
||||
UpdateCompressedRowSparseMatrix(crs_matrix.get());
|
||||
return crs_matrix;
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose(
|
||||
CompressedRowSparseMatrix* crs_matrix) const {
|
||||
CHECK(crs_matrix != nullptr);
|
||||
CHECK_EQ(crs_matrix->num_rows(), num_cols_);
|
||||
CHECK_EQ(crs_matrix->num_cols(), num_rows_);
|
||||
CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
|
||||
UpdateCompressedRowSparseMatrixImpl<true>(
|
||||
crs_matrix, values(), transpose_block_structure_.get());
|
||||
}
|
||||
void BlockSparseMatrix::UpdateCompressedRowSparseMatrix(
|
||||
CompressedRowSparseMatrix* crs_matrix) const {
|
||||
CHECK(crs_matrix != nullptr);
|
||||
CHECK_EQ(crs_matrix->num_rows(), num_rows_);
|
||||
CHECK_EQ(crs_matrix->num_cols(), num_cols_);
|
||||
CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
|
||||
UpdateCompressedRowSparseMatrixImpl<false>(
|
||||
crs_matrix, values(), block_structure_.get());
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
|
||||
CHECK(dense_matrix != nullptr);
|
||||
|
||||
@@ -176,14 +500,14 @@ void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
int row_block_pos = block_structure_->rows[i].block.position;
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
auto& cells = block_structure_->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
int col_block_id = cell.block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||
int jac_pos = cell.position;
|
||||
m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) +=
|
||||
MatrixRef(values_.get() + jac_pos, row_block_size, col_block_size);
|
||||
MatrixRef(values_ + jac_pos, row_block_size, col_block_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -199,7 +523,7 @@ void BlockSparseMatrix::ToTripletSparseMatrix(
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
int row_block_pos = block_structure_->rows[i].block.position;
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
const auto& cells = block_structure_->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
int col_block_id = cell.block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
@@ -223,12 +547,19 @@ const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const {
|
||||
return block_structure_.get();
|
||||
}
|
||||
|
||||
// Return a pointer to the block structure of matrix transpose. We continue to
|
||||
// hold ownership of the object though.
|
||||
const CompressedRowBlockStructure*
|
||||
BlockSparseMatrix::transpose_block_structure() const {
|
||||
return transpose_block_structure_.get();
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::ToTextFile(FILE* file) const {
|
||||
CHECK(file != nullptr);
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
const int row_block_pos = block_structure_->rows[i].block.position;
|
||||
const int row_block_size = block_structure_->rows[i].block.size;
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
const auto& cells = block_structure_->rows[i].cells;
|
||||
for (const auto& cell : cells) {
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
@@ -293,34 +624,51 @@ void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) {
|
||||
|
||||
for (int i = 0; i < m_bs->rows.size(); ++i) {
|
||||
const CompressedRow& m_row = m_bs->rows[i];
|
||||
CompressedRow& row = block_structure_->rows[old_num_row_blocks + i];
|
||||
const int row_block_id = old_num_row_blocks + i;
|
||||
CompressedRow& row = block_structure_->rows[row_block_id];
|
||||
row.block.size = m_row.block.size;
|
||||
row.block.position = num_rows_;
|
||||
num_rows_ += m_row.block.size;
|
||||
row.cells.resize(m_row.cells.size());
|
||||
if (transpose_block_structure_) {
|
||||
transpose_block_structure_->cols.emplace_back(row.block);
|
||||
}
|
||||
for (int c = 0; c < m_row.cells.size(); ++c) {
|
||||
const int block_id = m_row.cells[c].block_id;
|
||||
row.cells[c].block_id = block_id;
|
||||
row.cells[c].position = num_nonzeros_;
|
||||
num_nonzeros_ += m_row.block.size * m_bs->cols[block_id].size;
|
||||
|
||||
const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size;
|
||||
if (transpose_block_structure_) {
|
||||
transpose_block_structure_->rows[block_id].cells.emplace_back(
|
||||
row_block_id, num_nonzeros_);
|
||||
transpose_block_structure_->rows[block_id].nnz += cell_nnz;
|
||||
}
|
||||
|
||||
num_nonzeros_ += cell_nnz;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_nonzeros_ > max_num_nonzeros_) {
|
||||
std::unique_ptr<double[]> new_values =
|
||||
std::make_unique<double[]>(num_nonzeros_);
|
||||
std::copy_n(values_.get(), old_num_nonzeros, new_values.get());
|
||||
values_ = std::move(new_values);
|
||||
double* old_values = values_;
|
||||
values_ = AllocateValues(num_nonzeros_);
|
||||
std::copy_n(old_values, old_num_nonzeros, values_);
|
||||
max_num_nonzeros_ = num_nonzeros_;
|
||||
FreeValues(old_values);
|
||||
}
|
||||
|
||||
std::copy(m.values(),
|
||||
m.values() + m.num_nonzeros(),
|
||||
values_.get() + old_num_nonzeros);
|
||||
std::copy(
|
||||
m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros);
|
||||
|
||||
if (transpose_block_structure_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
|
||||
}
|
||||
|
||||
void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
|
||||
const int num_row_blocks = block_structure_->rows.size();
|
||||
const int new_num_row_blocks = num_row_blocks - delta_row_blocks;
|
||||
int delta_num_nonzeros = 0;
|
||||
int delta_num_rows = 0;
|
||||
const std::vector<Block>& column_blocks = block_structure_->cols;
|
||||
@@ -330,15 +678,40 @@ void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
|
||||
for (int c = 0; c < row.cells.size(); ++c) {
|
||||
const Cell& cell = row.cells[c];
|
||||
delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size;
|
||||
|
||||
if (transpose_block_structure_) {
|
||||
auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells;
|
||||
while (!col_cells.empty() &&
|
||||
col_cells.back().block_id >= new_num_row_blocks) {
|
||||
const int del_block_id = col_cells.back().block_id;
|
||||
const int del_block_rows =
|
||||
block_structure_->rows[del_block_id].block.size;
|
||||
const int del_block_cols = column_blocks[cell.block_id].size;
|
||||
const int del_cell_nnz = del_block_rows * del_block_cols;
|
||||
transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz;
|
||||
col_cells.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
num_nonzeros_ -= delta_num_nonzeros;
|
||||
num_rows_ -= delta_num_rows;
|
||||
block_structure_->rows.resize(num_row_blocks - delta_row_blocks);
|
||||
block_structure_->rows.resize(new_num_row_blocks);
|
||||
|
||||
if (transpose_block_structure_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < delta_row_blocks; ++i) {
|
||||
transpose_block_structure_->cols.pop_back();
|
||||
}
|
||||
|
||||
ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
|
||||
}
|
||||
|
||||
std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
|
||||
const BlockSparseMatrix::RandomMatrixOptions& options) {
|
||||
const BlockSparseMatrix::RandomMatrixOptions& options,
|
||||
std::mt19937& prng,
|
||||
bool use_page_locked_memory) {
|
||||
CHECK_GT(options.num_row_blocks, 0);
|
||||
CHECK_GT(options.min_row_block_size, 0);
|
||||
CHECK_GT(options.max_row_block_size, 0);
|
||||
@@ -346,7 +719,11 @@ std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
|
||||
CHECK_GT(options.block_density, 0.0);
|
||||
CHECK_LE(options.block_density, 1.0);
|
||||
|
||||
auto* bs = new CompressedRowBlockStructure();
|
||||
std::uniform_int_distribution<int> col_distribution(
|
||||
options.min_col_block_size, options.max_col_block_size);
|
||||
std::uniform_int_distribution<int> row_distribution(
|
||||
options.min_row_block_size, options.max_row_block_size);
|
||||
auto bs = std::make_unique<CompressedRowBlockStructure>();
|
||||
if (options.col_blocks.empty()) {
|
||||
CHECK_GT(options.num_col_blocks, 0);
|
||||
CHECK_GT(options.min_col_block_size, 0);
|
||||
@@ -356,10 +733,7 @@ std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
|
||||
// Generate the col block structure.
|
||||
int col_block_position = 0;
|
||||
for (int i = 0; i < options.num_col_blocks; ++i) {
|
||||
// Generate a random integer in [min_col_block_size, max_col_block_size]
|
||||
const int delta_block_size =
|
||||
Uniform(options.max_col_block_size - options.min_col_block_size);
|
||||
const int col_block_size = options.min_col_block_size + delta_block_size;
|
||||
const int col_block_size = col_distribution(prng);
|
||||
bs->cols.emplace_back(col_block_size, col_block_position);
|
||||
col_block_position += col_block_size;
|
||||
}
|
||||
@@ -368,22 +742,21 @@ std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
|
||||
}
|
||||
|
||||
bool matrix_has_blocks = false;
|
||||
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
|
||||
while (!matrix_has_blocks) {
|
||||
VLOG(1) << "Clearing";
|
||||
bs->rows.clear();
|
||||
int row_block_position = 0;
|
||||
int value_position = 0;
|
||||
for (int r = 0; r < options.num_row_blocks; ++r) {
|
||||
const int delta_block_size =
|
||||
Uniform(options.max_row_block_size - options.min_row_block_size);
|
||||
const int row_block_size = options.min_row_block_size + delta_block_size;
|
||||
const int row_block_size = row_distribution(prng);
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = row_block_size;
|
||||
row.block.position = row_block_position;
|
||||
row_block_position += row_block_size;
|
||||
for (int c = 0; c < bs->cols.size(); ++c) {
|
||||
if (RandDouble() > options.block_density) continue;
|
||||
if (uniform01(prng) > options.block_density) continue;
|
||||
|
||||
row.cells.emplace_back();
|
||||
Cell& cell = row.cells.back();
|
||||
@@ -395,14 +768,76 @@ std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
|
||||
}
|
||||
}
|
||||
|
||||
auto matrix = std::make_unique<BlockSparseMatrix>(bs);
|
||||
auto matrix =
|
||||
std::make_unique<BlockSparseMatrix>(bs.release(), use_page_locked_memory);
|
||||
double* values = matrix->mutable_values();
|
||||
for (int i = 0; i < matrix->num_nonzeros(); ++i) {
|
||||
values[i] = RandNormal();
|
||||
}
|
||||
std::normal_distribution<double> standard_normal_distribution;
|
||||
std::generate_n(
|
||||
values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] {
|
||||
return standard_normal_distribution(prng);
|
||||
});
|
||||
|
||||
return matrix;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
|
||||
const CompressedRowBlockStructure& bs) {
|
||||
auto transpose = std::make_unique<CompressedRowBlockStructure>();
|
||||
|
||||
transpose->rows.resize(bs.cols.size());
|
||||
for (int i = 0; i < bs.cols.size(); ++i) {
|
||||
transpose->rows[i].block = bs.cols[i];
|
||||
transpose->rows[i].nnz = 0;
|
||||
}
|
||||
|
||||
transpose->cols.resize(bs.rows.size());
|
||||
for (int i = 0; i < bs.rows.size(); ++i) {
|
||||
auto& row = bs.rows[i];
|
||||
transpose->cols[i] = row.block;
|
||||
|
||||
const int nrows = row.block.size;
|
||||
for (auto& cell : row.cells) {
|
||||
transpose->rows[cell.block_id].cells.emplace_back(i, cell.position);
|
||||
const int ncols = transpose->rows[cell.block_id].block.size;
|
||||
transpose->rows[cell.block_id].nnz += nrows * ncols;
|
||||
}
|
||||
}
|
||||
ComputeCumulativeNumberOfNonZeros(transpose->rows);
|
||||
return transpose;
|
||||
}
|
||||
|
||||
double* BlockSparseMatrix::AllocateValues(int size) {
|
||||
if (!use_page_locked_memory_) {
|
||||
return new double[size];
|
||||
}
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
double* values = nullptr;
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault));
|
||||
return values;
|
||||
#else
|
||||
LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
|
||||
<< "This is a Ceres bug; please contact the developers!";
|
||||
return nullptr;
|
||||
#endif
|
||||
};
|
||||
|
||||
void BlockSparseMatrix::FreeValues(double*& values) {
|
||||
if (!use_page_locked_memory_) {
|
||||
delete[] values;
|
||||
values = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
CHECK_EQ(cudaSuccess, cudaFreeHost(values));
|
||||
values = nullptr;
|
||||
#else
|
||||
LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
|
||||
<< "This is a Ceres bug; please contact the developers!";
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,15 +35,17 @@
|
||||
#define CERES_INTERNAL_BLOCK_SPARSE_MATRIX_H_
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/sparse_matrix.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class TripletSparseMatrix;
|
||||
|
||||
@@ -63,31 +65,64 @@ class CERES_NO_EXPORT BlockSparseMatrix final : public SparseMatrix {
|
||||
//
|
||||
// TODO(sameeragarwal): Add a function which will validate legal
|
||||
// CompressedRowBlockStructure objects.
|
||||
explicit BlockSparseMatrix(CompressedRowBlockStructure* block_structure);
|
||||
explicit BlockSparseMatrix(CompressedRowBlockStructure* block_structure,
|
||||
bool use_page_locked_memory = false);
|
||||
~BlockSparseMatrix();
|
||||
|
||||
BlockSparseMatrix();
|
||||
BlockSparseMatrix(const BlockSparseMatrix&) = delete;
|
||||
void operator=(const BlockSparseMatrix&) = delete;
|
||||
|
||||
// Implementation of SparseMatrix interface.
|
||||
void SetZero() final;
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void LeftMultiply(const double* x, double* y) const final;
|
||||
void SetZero() override final;
|
||||
void SetZero(ContextImpl* context, int num_threads) override final;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const final;
|
||||
void LeftMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void LeftMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const final;
|
||||
void SquaredColumnNorm(double* x) const final;
|
||||
void SquaredColumnNorm(double* x,
|
||||
ContextImpl* context,
|
||||
int num_threads) const final;
|
||||
void ScaleColumns(const double* scale) final;
|
||||
void ScaleColumns(const double* scale,
|
||||
ContextImpl* context,
|
||||
int num_threads) final;
|
||||
|
||||
// Convert to CompressedRowSparseMatrix
|
||||
std::unique_ptr<CompressedRowSparseMatrix> ToCompressedRowSparseMatrix()
|
||||
const;
|
||||
// Create CompressedRowSparseMatrix corresponding to transposed matrix
|
||||
std::unique_ptr<CompressedRowSparseMatrix>
|
||||
ToCompressedRowSparseMatrixTranspose() const;
|
||||
// Copy values to CompressedRowSparseMatrix that has compatible structure
|
||||
void UpdateCompressedRowSparseMatrix(
|
||||
CompressedRowSparseMatrix* crs_matrix) const;
|
||||
// Copy values to CompressedRowSparseMatrix that has structure of transposed
|
||||
// matrix
|
||||
void UpdateCompressedRowSparseMatrixTranspose(
|
||||
CompressedRowSparseMatrix* crs_matrix) const;
|
||||
void ToDenseMatrix(Matrix* dense_matrix) const final;
|
||||
void ToTextFile(FILE* file) const final;
|
||||
|
||||
void AddTransposeBlockStructure();
|
||||
|
||||
// clang-format off
|
||||
int num_rows() const final { return num_rows_; }
|
||||
int num_cols() const final { return num_cols_; }
|
||||
int num_nonzeros() const final { return num_nonzeros_; }
|
||||
const double* values() const final { return values_.get(); }
|
||||
double* mutable_values() final { return values_.get(); }
|
||||
const double* values() const final { return values_; }
|
||||
double* mutable_values() final { return values_; }
|
||||
// clang-format on
|
||||
|
||||
void ToTripletSparseMatrix(TripletSparseMatrix* matrix) const;
|
||||
const CompressedRowBlockStructure* block_structure() const;
|
||||
const CompressedRowBlockStructure* transpose_block_structure() const;
|
||||
|
||||
// Append the contents of m to the bottom of this matrix. m must
|
||||
// have the same column blocks structure as this matrix.
|
||||
@@ -122,15 +157,22 @@ class CERES_NO_EXPORT BlockSparseMatrix final : public SparseMatrix {
|
||||
// distributed and whose structure is determined by
|
||||
// RandomMatrixOptions.
|
||||
static std::unique_ptr<BlockSparseMatrix> CreateRandomMatrix(
|
||||
const RandomMatrixOptions& options);
|
||||
const RandomMatrixOptions& options,
|
||||
std::mt19937& prng,
|
||||
bool use_page_locked_memory = false);
|
||||
|
||||
private:
|
||||
double* AllocateValues(int size);
|
||||
void FreeValues(double*& values);
|
||||
|
||||
const bool use_page_locked_memory_;
|
||||
int num_rows_;
|
||||
int num_cols_;
|
||||
int num_nonzeros_;
|
||||
int max_num_nonzeros_;
|
||||
std::unique_ptr<double[]> values_;
|
||||
double* values_;
|
||||
std::unique_ptr<CompressedRowBlockStructure> block_structure_;
|
||||
std::unique_ptr<CompressedRowBlockStructure> transpose_block_structure_;
|
||||
};
|
||||
|
||||
// A number of algorithms like the SchurEliminator do not need
|
||||
@@ -158,8 +200,10 @@ class CERES_NO_EXPORT BlockSparseMatrixData {
|
||||
const double* values_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
|
||||
const CompressedRowBlockStructure& bs);
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
36
extern/ceres/internal/ceres/block_structure.cc
vendored
36
extern/ceres/internal/ceres/block_structure.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,8 +30,11 @@
|
||||
|
||||
#include "ceres/block_structure.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
#include <vector>
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
bool CellLessThan(const Cell& lhs, const Cell& rhs) {
|
||||
if (lhs.block_id == rhs.block_id) {
|
||||
@@ -40,5 +43,28 @@ bool CellLessThan(const Cell& lhs, const Cell& rhs) {
|
||||
return (lhs.block_id < rhs.block_id);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
std::vector<Block> Tail(const std::vector<Block>& blocks, int n) {
|
||||
CHECK_LE(n, blocks.size());
|
||||
std::vector<Block> tail;
|
||||
const int num_blocks = blocks.size();
|
||||
const int start = num_blocks - n;
|
||||
|
||||
int position = 0;
|
||||
tail.reserve(n);
|
||||
for (int i = start; i < num_blocks; ++i) {
|
||||
tail.emplace_back(blocks[i].size, position);
|
||||
position += blocks[i].size;
|
||||
}
|
||||
|
||||
return tail;
|
||||
}
|
||||
|
||||
int SumSquaredSizes(const std::vector<Block>& blocks) {
|
||||
int sum = 0;
|
||||
for (const auto& b : blocks) {
|
||||
sum += b.size * b.size;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
108
extern/ceres/internal/ceres/block_structure.h
vendored
108
extern/ceres/internal/ceres/block_structure.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,6 +43,9 @@
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
// This file is being included into source files that are compiled with nvcc.
|
||||
// nvcc shipped with ubuntu 20.04 does not support some features of c++17,
|
||||
// including nested namespace definitions
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
@@ -50,15 +53,19 @@ using BlockSize = int32_t;
|
||||
|
||||
struct CERES_NO_EXPORT Block {
|
||||
Block() = default;
|
||||
Block(int size_, int position_) : size(size_), position(position_) {}
|
||||
Block(int size_, int position_) noexcept : size(size_), position(position_) {}
|
||||
|
||||
BlockSize size{-1};
|
||||
int position{-1}; // Position along the row/column.
|
||||
};
|
||||
|
||||
inline bool operator==(const Block& left, const Block& right) noexcept {
|
||||
return (left.size == right.size) && (left.position == right.position);
|
||||
}
|
||||
|
||||
struct CERES_NO_EXPORT Cell {
|
||||
Cell() = default;
|
||||
Cell(int block_id_, int position_)
|
||||
Cell(int block_id_, int position_) noexcept
|
||||
: block_id(block_id_), position(position_) {}
|
||||
|
||||
// Column or row block id as the case maybe.
|
||||
@@ -75,14 +82,95 @@ struct CERES_NO_EXPORT CompressedList {
|
||||
|
||||
// Construct a CompressedList with the cells containing num_cells
|
||||
// entries.
|
||||
explicit CompressedList(int num_cells) : cells(num_cells) {}
|
||||
explicit CompressedList(int num_cells) noexcept : cells(num_cells) {}
|
||||
Block block;
|
||||
std::vector<Cell> cells;
|
||||
// Number of non-zeros in cells of this row block
|
||||
int nnz{-1};
|
||||
// Number of non-zeros in cells of this and every preceeding row block in
|
||||
// block-sparse matrix
|
||||
int cumulative_nnz{-1};
|
||||
};
|
||||
|
||||
using CompressedRow = CompressedList;
|
||||
using CompressedColumn = CompressedList;
|
||||
|
||||
// CompressedRowBlockStructure specifies the storage structure of a row block
|
||||
// sparse matrix.
|
||||
//
|
||||
// Consider the following matrix A:
|
||||
// A = [A_11 A_12 ...
|
||||
// A_21 A_22 ...
|
||||
// ...
|
||||
// A_m1 A_m2 ... ]
|
||||
//
|
||||
// A row block sparse matrix is a matrix where the following properties hold:
|
||||
// 1. The number of rows in every block A_ij and A_ik are the same.
|
||||
// 2. The number of columns in every block A_ij and A_kj are the same.
|
||||
// 3. The number of rows in A_ij and A_kj may be different (i != k).
|
||||
// 4. The number of columns in A_ij and A_ik may be different (j != k).
|
||||
// 5. Any block A_ij may be all 0s, in which case the block is not stored.
|
||||
//
|
||||
// The structure of the matrix is stored as follows:
|
||||
//
|
||||
// The `rows' array contains the following information for each row block:
|
||||
// - rows[i].block.size: The number of rows in each block A_ij in the row block.
|
||||
// - rows[i].block.position: The starting row in the full matrix A of the
|
||||
// row block i.
|
||||
// - rows[i].cells[j].block_id: The index into the `cols' array corresponding to
|
||||
// the non-zero blocks A_ij.
|
||||
// - rows[i].cells[j].position: The index in the `values' array for the contents
|
||||
// of block A_ij.
|
||||
//
|
||||
// The `cols' array contains the following information for block:
|
||||
// - cols[.].size: The number of columns spanned by the block.
|
||||
// - cols[.].position: The starting column in the full matrix A of the block.
|
||||
//
|
||||
//
|
||||
// Example of a row block sparse matrix:
|
||||
// block_id: | 0 |1|2 |3 |
|
||||
// rows[0]: [ 1 2 0 3 4 0 ]
|
||||
// [ 5 6 0 7 8 0 ]
|
||||
// rows[1]: [ 0 0 9 0 0 0 ]
|
||||
//
|
||||
// This matrix is stored as follows:
|
||||
//
|
||||
// There are four column blocks:
|
||||
// cols[0].size = 2
|
||||
// cols[0].position = 0
|
||||
// cols[1].size = 1
|
||||
// cols[1].position = 2
|
||||
// cols[2].size = 2
|
||||
// cols[2].position = 3
|
||||
// cols[3].size = 1
|
||||
// cols[3].position = 5
|
||||
|
||||
// The first row block spans two rows, starting at row 0:
|
||||
// rows[0].block.size = 2 // This row block spans two rows.
|
||||
// rows[0].block.position = 0 // It starts at row 0.
|
||||
// rows[0] has two cells, at column blocks 0 and 2:
|
||||
// rows[0].cells[0].block_id = 0 // This cell is in column block 0.
|
||||
// rows[0].cells[0].position = 0 // See below for an explanation of this.
|
||||
// rows[0].cells[1].block_id = 2 // This cell is in column block 2.
|
||||
// rows[0].cells[1].position = 4 // See below for an explanation of this.
|
||||
//
|
||||
// The second row block spans two rows, starting at row 2:
|
||||
// rows[1].block.size = 1 // This row block spans one row.
|
||||
// rows[1].block.position = 2 // It starts at row 2.
|
||||
// rows[1] has one cell at column block 1:
|
||||
// rows[1].cells[0].block_id = 1 // This cell is in column block 1.
|
||||
// rows[1].cells[0].position = 8 // See below for an explanation of this.
|
||||
//
|
||||
// The values in each blocks are stored contiguously in row major order.
|
||||
// However, there is no unique way to order the blocks -- it is usually
|
||||
// optimized to promote cache coherent access, e.g. ordering it so that
|
||||
// Jacobian blocks of parameters of the same type are stored nearby.
|
||||
// This is one possible way to store the values of the blocks in a values array:
|
||||
// values = { 1, 2, 5, 6, 3, 4, 7, 8, 9 }
|
||||
// | | | | // The three blocks.
|
||||
// ^ rows[0].cells[0].position = 0
|
||||
// ^ rows[0].cells[1].position = 4
|
||||
// ^ rows[1].cells[0].position = 8
|
||||
struct CERES_NO_EXPORT CompressedRowBlockStructure {
|
||||
std::vector<Block> cols;
|
||||
std::vector<CompressedRow> rows;
|
||||
@@ -93,6 +181,18 @@ struct CERES_NO_EXPORT CompressedColumnBlockStructure {
|
||||
std::vector<CompressedColumn> cols;
|
||||
};
|
||||
|
||||
inline int NumScalarEntries(const std::vector<Block>& blocks) {
|
||||
if (blocks.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto& block = blocks.back();
|
||||
return block.position + block.size;
|
||||
}
|
||||
|
||||
std::vector<Block> Tail(const std::vector<Block>& blocks, int n);
|
||||
int SumSquaredSizes(const std::vector<Block>& blocks);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
|
||||
2
extern/ceres/internal/ceres/c_api.cc
vendored
2
extern/ceres/internal/ceres/c_api.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
15
extern/ceres/internal/ceres/callbacks.cc
vendored
15
extern/ceres/internal/ceres/callbacks.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,15 +32,13 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream> // NO LINT
|
||||
#include <string>
|
||||
|
||||
#include "ceres/program.h"
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::string;
|
||||
namespace ceres::internal {
|
||||
|
||||
StateUpdatingCallback::StateUpdatingCallback(Program* program,
|
||||
double* parameters)
|
||||
@@ -49,7 +47,7 @@ StateUpdatingCallback::StateUpdatingCallback(Program* program,
|
||||
StateUpdatingCallback::~StateUpdatingCallback() = default;
|
||||
|
||||
CallbackReturnType StateUpdatingCallback::operator()(
|
||||
const IterationSummary& summary) {
|
||||
const IterationSummary& /*summary*/) {
|
||||
program_->StateVectorToParameterBlocks(parameters_);
|
||||
program_->CopyParameterBlockStateToUserState();
|
||||
return SOLVER_CONTINUE;
|
||||
@@ -83,7 +81,7 @@ LoggingCallback::~LoggingCallback() = default;
|
||||
|
||||
CallbackReturnType LoggingCallback::operator()(
|
||||
const IterationSummary& summary) {
|
||||
string output;
|
||||
std::string output;
|
||||
if (minimizer_type == LINE_SEARCH) {
|
||||
output = StringPrintf(
|
||||
"% 4d: f:% 8e d:% 3.2e g:% 3.2e h:% 3.2e s:% 3.2e e:% 3d it:% 3.2e "
|
||||
@@ -127,5 +125,4 @@ CallbackReturnType LoggingCallback::operator()(
|
||||
return SOLVER_CONTINUE;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/callbacks.h
vendored
8
extern/ceres/internal/ceres/callbacks.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,8 +36,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/iteration_callback.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Program;
|
||||
|
||||
@@ -84,7 +83,6 @@ class CERES_NO_EXPORT LoggingCallback final : public IterationCallback {
|
||||
const bool log_to_stdout_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_CALLBACKS_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,16 +33,14 @@
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/graph.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/map_util.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
using IntMap = std::unordered_map<int, int>;
|
||||
using IntSet = std::unordered_set<int>;
|
||||
@@ -59,15 +57,15 @@ class CERES_NO_EXPORT CanonicalViewsClustering {
|
||||
// are assigned to a cluster with id = kInvalidClusterId.
|
||||
void ComputeClustering(const CanonicalViewsClusteringOptions& options,
|
||||
const WeightedGraph<int>& graph,
|
||||
vector<int>* centers,
|
||||
std::vector<int>* centers,
|
||||
IntMap* membership);
|
||||
|
||||
private:
|
||||
void FindValidViews(IntSet* valid_views) const;
|
||||
double ComputeClusteringQualityDifference(const int candidate,
|
||||
const vector<int>& centers) const;
|
||||
double ComputeClusteringQualityDifference(
|
||||
int candidate, const std::vector<int>& centers) const;
|
||||
void UpdateCanonicalViewAssignments(const int canonical_view);
|
||||
void ComputeClusterMembership(const vector<int>& centers,
|
||||
void ComputeClusterMembership(const std::vector<int>& centers,
|
||||
IntMap* membership) const;
|
||||
|
||||
CanonicalViewsClusteringOptions options_;
|
||||
@@ -82,7 +80,7 @@ class CERES_NO_EXPORT CanonicalViewsClustering {
|
||||
void ComputeCanonicalViewsClustering(
|
||||
const CanonicalViewsClusteringOptions& options,
|
||||
const WeightedGraph<int>& graph,
|
||||
vector<int>* centers,
|
||||
std::vector<int>* centers,
|
||||
IntMap* membership) {
|
||||
time_t start_time = time(nullptr);
|
||||
CanonicalViewsClustering cv;
|
||||
@@ -95,7 +93,7 @@ void ComputeCanonicalViewsClustering(
|
||||
void CanonicalViewsClustering::ComputeClustering(
|
||||
const CanonicalViewsClusteringOptions& options,
|
||||
const WeightedGraph<int>& graph,
|
||||
vector<int>* centers,
|
||||
std::vector<int>* centers,
|
||||
IntMap* membership) {
|
||||
options_ = options;
|
||||
CHECK(centers != nullptr);
|
||||
@@ -151,7 +149,7 @@ void CanonicalViewsClustering::FindValidViews(IntSet* valid_views) const {
|
||||
// Computes the difference in the quality score if 'candidate' were
|
||||
// added to the set of canonical views.
|
||||
double CanonicalViewsClustering::ComputeClusteringQualityDifference(
|
||||
const int candidate, const vector<int>& centers) const {
|
||||
const int candidate, const std::vector<int>& centers) const {
|
||||
// View score.
|
||||
double difference =
|
||||
options_.view_score_weight * graph_->VertexWeight(candidate);
|
||||
@@ -198,7 +196,7 @@ void CanonicalViewsClustering::UpdateCanonicalViewAssignments(
|
||||
|
||||
// Assign a cluster id to each view.
|
||||
void CanonicalViewsClustering::ComputeClusterMembership(
|
||||
const vector<int>& centers, IntMap* membership) const {
|
||||
const std::vector<int>& centers, IntMap* membership) const {
|
||||
CHECK(membership != nullptr);
|
||||
membership->clear();
|
||||
|
||||
@@ -222,5 +220,4 @@ void CanonicalViewsClustering::ComputeClusterMembership(
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -48,8 +48,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
struct CanonicalViewsClusteringOptions;
|
||||
|
||||
@@ -120,8 +119,7 @@ struct CERES_NO_EXPORT CanonicalViewsClusteringOptions {
|
||||
double view_score_weight = 0.0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
2
extern/ceres/internal/ceres/casts.h
vendored
2
extern/ceres/internal/ceres/casts.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
123
extern/ceres/internal/ceres/cgnr_linear_operator.h
vendored
123
extern/ceres/internal/ceres/cgnr_linear_operator.h
vendored
@@ -1,123 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: keir@google.com (Keir Mierle)
|
||||
|
||||
#ifndef CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_
|
||||
#define CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_operator.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
class SparseMatrix;
|
||||
|
||||
// A linear operator which takes a matrix A and a diagonal vector D and
|
||||
// performs products of the form
|
||||
//
|
||||
// (A^T A + D^T D)x
|
||||
//
|
||||
// This is used to implement iterative general sparse linear solving with
|
||||
// conjugate gradients, where A is the Jacobian and D is a regularizing
|
||||
// parameter. A brief proof that D^T D is the correct regularizer:
|
||||
//
|
||||
// Given a regularized least squares problem:
|
||||
//
|
||||
// min ||Ax - b||^2 + ||Dx||^2
|
||||
// x
|
||||
//
|
||||
// First expand into matrix notation:
|
||||
//
|
||||
// (Ax - b)^T (Ax - b) + xD^TDx
|
||||
//
|
||||
// Then multiply out to get:
|
||||
//
|
||||
// = xA^TAx - 2b^T Ax + b^Tb + xD^TDx
|
||||
//
|
||||
// Take the derivative:
|
||||
//
|
||||
// 0 = 2A^TAx - 2A^T b + 2 D^TDx
|
||||
// 0 = A^TAx - A^T b + D^TDx
|
||||
// 0 = (A^TA + D^TD)x - A^T b
|
||||
//
|
||||
// Thus, the symmetric system we need to solve for CGNR is
|
||||
//
|
||||
// Sx = z
|
||||
//
|
||||
// with S = A^TA + D^TD
|
||||
// and z = A^T b
|
||||
//
|
||||
// Note: This class is not thread safe, since it uses some temporary storage.
|
||||
class CERES_NO_EXPORT CgnrLinearOperator final : public LinearOperator {
|
||||
public:
|
||||
CgnrLinearOperator(const LinearOperator& A, const double* D)
|
||||
: A_(A), D_(D), z_(new double[A.num_rows()]) {}
|
||||
|
||||
void RightMultiply(const double* x, double* y) const final {
|
||||
std::fill(z_.get(), z_.get() + A_.num_rows(), 0.0);
|
||||
|
||||
// z = Ax
|
||||
A_.RightMultiply(x, z_.get());
|
||||
|
||||
// y = y + Atz
|
||||
A_.LeftMultiply(z_.get(), y);
|
||||
|
||||
// y = y + DtDx
|
||||
if (D_ != nullptr) {
|
||||
int n = A_.num_cols();
|
||||
VectorRef(y, n).array() +=
|
||||
ConstVectorRef(D_, n).array().square() * ConstVectorRef(x, n).array();
|
||||
}
|
||||
}
|
||||
|
||||
void LeftMultiply(const double* x, double* y) const final {
|
||||
RightMultiply(x, y);
|
||||
}
|
||||
|
||||
int num_rows() const final { return A_.num_cols(); }
|
||||
int num_cols() const final { return A_.num_cols(); }
|
||||
|
||||
private:
|
||||
const LinearOperator& A_;
|
||||
const double* D_;
|
||||
std::unique_ptr<double[]> z_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
#endif // CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_
|
||||
349
extern/ceres/internal/ceres/cgnr_solver.cc
vendored
349
extern/ceres/internal/ceres/cgnr_solver.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,16 +34,92 @@
|
||||
#include <utility>
|
||||
|
||||
#include "ceres/block_jacobi_preconditioner.h"
|
||||
#include "ceres/cgnr_linear_operator.h"
|
||||
#include "ceres/conjugate_gradients_solver.h"
|
||||
#include "ceres/cuda_sparse_matrix.h"
|
||||
#include "ceres/cuda_vector.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/subset_preconditioner.h"
|
||||
#include "ceres/wall_time.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// A linear operator which takes a matrix A and a diagonal vector D and
|
||||
// performs products of the form
|
||||
//
|
||||
// (A^T A + D^T D)x
|
||||
//
|
||||
// This is used to implement iterative general sparse linear solving with
|
||||
// conjugate gradients, where A is the Jacobian and D is a regularizing
|
||||
// parameter. A brief proof that D^T D is the correct regularizer:
|
||||
//
|
||||
// Given a regularized least squares problem:
|
||||
//
|
||||
// min ||Ax - b||^2 + ||Dx||^2
|
||||
// x
|
||||
//
|
||||
// First expand into matrix notation:
|
||||
//
|
||||
// (Ax - b)^T (Ax - b) + xD^TDx
|
||||
//
|
||||
// Then multiply out to get:
|
||||
//
|
||||
// = xA^TAx - 2b^T Ax + b^Tb + xD^TDx
|
||||
//
|
||||
// Take the derivative:
|
||||
//
|
||||
// 0 = 2A^TAx - 2A^T b + 2 D^TDx
|
||||
// 0 = A^TAx - A^T b + D^TDx
|
||||
// 0 = (A^TA + D^TD)x - A^T b
|
||||
//
|
||||
// Thus, the symmetric system we need to solve for CGNR is
|
||||
//
|
||||
// Sx = z
|
||||
//
|
||||
// with S = A^TA + D^TD
|
||||
// and z = A^T b
|
||||
//
|
||||
// Note: This class is not thread safe, since it uses some temporary storage.
|
||||
class CERES_NO_EXPORT CgnrLinearOperator final
|
||||
: public ConjugateGradientsLinearOperator<Vector> {
|
||||
public:
|
||||
CgnrLinearOperator(const LinearOperator& A,
|
||||
const double* D,
|
||||
ContextImpl* context,
|
||||
int num_threads)
|
||||
: A_(A),
|
||||
D_(D),
|
||||
z_(Vector::Zero(A.num_rows())),
|
||||
context_(context),
|
||||
num_threads_(num_threads) {}
|
||||
|
||||
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
|
||||
// z = Ax
|
||||
// y = y + Atz
|
||||
z_.setZero();
|
||||
A_.RightMultiplyAndAccumulate(x, z_, context_, num_threads_);
|
||||
A_.LeftMultiplyAndAccumulate(z_, y, context_, num_threads_);
|
||||
|
||||
// y = y + DtDx
|
||||
if (D_ != nullptr) {
|
||||
int n = A_.num_cols();
|
||||
ParallelAssign(
|
||||
context_,
|
||||
num_threads_,
|
||||
y,
|
||||
y.array() + ConstVectorRef(D_, n).array().square() * x.array());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const LinearOperator& A_;
|
||||
const double* D_;
|
||||
Vector z_;
|
||||
|
||||
ContextImpl* context_;
|
||||
int num_threads_;
|
||||
};
|
||||
|
||||
CgnrSolver::CgnrSolver(LinearSolver::Options options)
|
||||
: options_(std::move(options)) {
|
||||
@@ -57,7 +133,14 @@ CgnrSolver::CgnrSolver(LinearSolver::Options options)
|
||||
}
|
||||
}
|
||||
|
||||
CgnrSolver::~CgnrSolver() = default;
|
||||
CgnrSolver::~CgnrSolver() {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (scratch_[i]) {
|
||||
delete scratch_[i];
|
||||
scratch_[i] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LinearSolver::Summary CgnrSolver::SolveImpl(
|
||||
BlockSparseMatrix* A,
|
||||
@@ -65,48 +148,244 @@ LinearSolver::Summary CgnrSolver::SolveImpl(
|
||||
const LinearSolver::PerSolveOptions& per_solve_options,
|
||||
double* x) {
|
||||
EventLogger event_logger("CgnrSolver::Solve");
|
||||
|
||||
// Form z = Atb.
|
||||
Vector z(A->num_cols());
|
||||
z.setZero();
|
||||
A->LeftMultiply(b, z.data());
|
||||
|
||||
if (!preconditioner_) {
|
||||
Preconditioner::Options preconditioner_options;
|
||||
preconditioner_options.type = options_.preconditioner_type;
|
||||
preconditioner_options.subset_preconditioner_start_row_block =
|
||||
options_.subset_preconditioner_start_row_block;
|
||||
preconditioner_options.sparse_linear_algebra_library_type =
|
||||
options_.sparse_linear_algebra_library_type;
|
||||
preconditioner_options.ordering_type = options_.ordering_type;
|
||||
preconditioner_options.num_threads = options_.num_threads;
|
||||
preconditioner_options.context = options_.context;
|
||||
|
||||
if (options_.preconditioner_type == JACOBI) {
|
||||
preconditioner_ = std::make_unique<BlockJacobiPreconditioner>(*A);
|
||||
preconditioner_ = std::make_unique<BlockSparseJacobiPreconditioner>(
|
||||
preconditioner_options, *A);
|
||||
} else if (options_.preconditioner_type == SUBSET) {
|
||||
Preconditioner::Options preconditioner_options;
|
||||
preconditioner_options.type = SUBSET;
|
||||
preconditioner_options.subset_preconditioner_start_row_block =
|
||||
options_.subset_preconditioner_start_row_block;
|
||||
preconditioner_options.sparse_linear_algebra_library_type =
|
||||
options_.sparse_linear_algebra_library_type;
|
||||
preconditioner_options.use_postordering = options_.use_postordering;
|
||||
preconditioner_options.num_threads = options_.num_threads;
|
||||
preconditioner_options.context = options_.context;
|
||||
preconditioner_ =
|
||||
std::make_unique<SubsetPreconditioner>(preconditioner_options, *A);
|
||||
} else {
|
||||
preconditioner_ = std::make_unique<IdentityPreconditioner>(A->num_cols());
|
||||
}
|
||||
}
|
||||
preconditioner_->Update(*A, per_solve_options.D);
|
||||
|
||||
if (preconditioner_) {
|
||||
preconditioner_->Update(*A, per_solve_options.D);
|
||||
ConjugateGradientsSolverOptions cg_options;
|
||||
cg_options.min_num_iterations = options_.min_num_iterations;
|
||||
cg_options.max_num_iterations = options_.max_num_iterations;
|
||||
cg_options.residual_reset_period = options_.residual_reset_period;
|
||||
cg_options.q_tolerance = per_solve_options.q_tolerance;
|
||||
cg_options.r_tolerance = per_solve_options.r_tolerance;
|
||||
cg_options.context = options_.context;
|
||||
cg_options.num_threads = options_.num_threads;
|
||||
|
||||
// lhs = AtA + DtD
|
||||
CgnrLinearOperator lhs(
|
||||
*A, per_solve_options.D, options_.context, options_.num_threads);
|
||||
// rhs = Atb.
|
||||
Vector rhs(A->num_cols());
|
||||
rhs.setZero();
|
||||
A->LeftMultiplyAndAccumulate(
|
||||
b, rhs.data(), options_.context, options_.num_threads);
|
||||
|
||||
cg_solution_ = Vector::Zero(A->num_cols());
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (scratch_[i] == nullptr) {
|
||||
scratch_[i] = new Vector(A->num_cols());
|
||||
}
|
||||
}
|
||||
|
||||
LinearSolver::PerSolveOptions cg_per_solve_options = per_solve_options;
|
||||
cg_per_solve_options.preconditioner = preconditioner_.get();
|
||||
|
||||
// Solve (AtA + DtD)x = z (= Atb).
|
||||
VectorRef(x, A->num_cols()).setZero();
|
||||
CgnrLinearOperator lhs(*A, per_solve_options.D);
|
||||
event_logger.AddEvent("Setup");
|
||||
|
||||
ConjugateGradientsSolver conjugate_gradient_solver(options_);
|
||||
LinearSolver::Summary summary =
|
||||
conjugate_gradient_solver.Solve(&lhs, z.data(), cg_per_solve_options, x);
|
||||
LinearOperatorAdapter preconditioner(*preconditioner_);
|
||||
auto summary = ConjugateGradientsSolver(
|
||||
cg_options, lhs, rhs, preconditioner, scratch_, cg_solution_);
|
||||
VectorRef(x, A->num_cols()) = cg_solution_;
|
||||
event_logger.AddEvent("Solve");
|
||||
return summary;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
// A linear operator which takes a matrix A and a diagonal vector D and
|
||||
// performs products of the form
|
||||
//
|
||||
// (A^T A + D^T D)x
|
||||
//
|
||||
// This is used to implement iterative general sparse linear solving with
|
||||
// conjugate gradients, where A is the Jacobian and D is a regularizing
|
||||
// parameter. A brief proof is included in cgnr_linear_operator.h.
|
||||
class CERES_NO_EXPORT CudaCgnrLinearOperator final
|
||||
: public ConjugateGradientsLinearOperator<CudaVector> {
|
||||
public:
|
||||
CudaCgnrLinearOperator(CudaSparseMatrix& A,
|
||||
const CudaVector& D,
|
||||
CudaVector* z)
|
||||
: A_(A), D_(D), z_(z) {}
|
||||
|
||||
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
|
||||
// z = Ax
|
||||
z_->SetZero();
|
||||
A_.RightMultiplyAndAccumulate(x, z_);
|
||||
|
||||
// y = y + Atz
|
||||
// = y + AtAx
|
||||
A_.LeftMultiplyAndAccumulate(*z_, &y);
|
||||
|
||||
// y = y + DtDx
|
||||
y.DtDxpy(D_, x);
|
||||
}
|
||||
|
||||
private:
|
||||
CudaSparseMatrix& A_;
|
||||
const CudaVector& D_;
|
||||
CudaVector* z_ = nullptr;
|
||||
};
|
||||
|
||||
class CERES_NO_EXPORT CudaIdentityPreconditioner final
|
||||
: public CudaPreconditioner {
|
||||
public:
|
||||
void Update(const CompressedRowSparseMatrix& A, const double* D) final {}
|
||||
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
|
||||
y.Axpby(1.0, x, 1.0);
|
||||
}
|
||||
};
|
||||
|
||||
// This class wraps the existing CPU Jacobi preconditioner, caches the structure
|
||||
// of the block diagonal, and for each CGNR solve updates the values on the CPU
|
||||
// and then copies them over to the GPU.
|
||||
class CERES_NO_EXPORT CudaJacobiPreconditioner final
|
||||
: public CudaPreconditioner {
|
||||
public:
|
||||
explicit CudaJacobiPreconditioner(Preconditioner::Options options,
|
||||
const CompressedRowSparseMatrix& A)
|
||||
: options_(std::move(options)),
|
||||
cpu_preconditioner_(options_, A),
|
||||
m_(options_.context, cpu_preconditioner_.matrix()) {}
|
||||
~CudaJacobiPreconditioner() = default;
|
||||
|
||||
void Update(const CompressedRowSparseMatrix& A, const double* D) final {
|
||||
cpu_preconditioner_.Update(A, D);
|
||||
m_.CopyValuesFromCpu(cpu_preconditioner_.matrix());
|
||||
}
|
||||
|
||||
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final {
|
||||
m_.RightMultiplyAndAccumulate(x, &y);
|
||||
}
|
||||
|
||||
private:
|
||||
Preconditioner::Options options_;
|
||||
BlockCRSJacobiPreconditioner cpu_preconditioner_;
|
||||
CudaSparseMatrix m_;
|
||||
};
|
||||
|
||||
CudaCgnrSolver::CudaCgnrSolver(LinearSolver::Options options)
|
||||
: options_(std::move(options)) {}
|
||||
|
||||
CudaCgnrSolver::~CudaCgnrSolver() {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (scratch_[i]) {
|
||||
delete scratch_[i];
|
||||
scratch_[i] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<CudaCgnrSolver> CudaCgnrSolver::Create(
|
||||
LinearSolver::Options options, std::string* error) {
|
||||
CHECK(error != nullptr);
|
||||
if (options.preconditioner_type != IDENTITY &&
|
||||
options.preconditioner_type != JACOBI) {
|
||||
*error =
|
||||
"CudaCgnrSolver does not support preconditioner type " +
|
||||
std::string(PreconditionerTypeToString(options.preconditioner_type)) +
|
||||
". ";
|
||||
return nullptr;
|
||||
}
|
||||
CHECK(options.context->IsCudaInitialized())
|
||||
<< "CudaCgnrSolver requires CUDA initialization.";
|
||||
auto solver = std::make_unique<CudaCgnrSolver>(options);
|
||||
return solver;
|
||||
}
|
||||
|
||||
void CudaCgnrSolver::CpuToGpuTransfer(const CompressedRowSparseMatrix& A,
|
||||
const double* b,
|
||||
const double* D) {
|
||||
if (A_ == nullptr) {
|
||||
// Assume structure is not cached, do an initialization and structural copy.
|
||||
A_ = std::make_unique<CudaSparseMatrix>(options_.context, A);
|
||||
b_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
|
||||
x_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
|
||||
Atb_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
|
||||
Ax_ = std::make_unique<CudaVector>(options_.context, A.num_rows());
|
||||
D_ = std::make_unique<CudaVector>(options_.context, A.num_cols());
|
||||
|
||||
Preconditioner::Options preconditioner_options;
|
||||
preconditioner_options.type = options_.preconditioner_type;
|
||||
preconditioner_options.subset_preconditioner_start_row_block =
|
||||
options_.subset_preconditioner_start_row_block;
|
||||
preconditioner_options.sparse_linear_algebra_library_type =
|
||||
options_.sparse_linear_algebra_library_type;
|
||||
preconditioner_options.ordering_type = options_.ordering_type;
|
||||
preconditioner_options.num_threads = options_.num_threads;
|
||||
preconditioner_options.context = options_.context;
|
||||
|
||||
if (options_.preconditioner_type == JACOBI) {
|
||||
preconditioner_ =
|
||||
std::make_unique<CudaJacobiPreconditioner>(preconditioner_options, A);
|
||||
} else {
|
||||
preconditioner_ = std::make_unique<CudaIdentityPreconditioner>();
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
scratch_[i] = new CudaVector(options_.context, A.num_cols());
|
||||
}
|
||||
} else {
|
||||
// Assume structure is cached, do a value copy.
|
||||
A_->CopyValuesFromCpu(A);
|
||||
}
|
||||
b_->CopyFromCpu(ConstVectorRef(b, A.num_rows()));
|
||||
D_->CopyFromCpu(ConstVectorRef(D, A.num_cols()));
|
||||
}
|
||||
|
||||
LinearSolver::Summary CudaCgnrSolver::SolveImpl(
|
||||
CompressedRowSparseMatrix* A,
|
||||
const double* b,
|
||||
const LinearSolver::PerSolveOptions& per_solve_options,
|
||||
double* x) {
|
||||
EventLogger event_logger("CudaCgnrSolver::Solve");
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
|
||||
|
||||
CpuToGpuTransfer(*A, b, per_solve_options.D);
|
||||
event_logger.AddEvent("CPU to GPU Transfer");
|
||||
preconditioner_->Update(*A, per_solve_options.D);
|
||||
event_logger.AddEvent("Preconditioner Update");
|
||||
|
||||
// Form z = Atb.
|
||||
Atb_->SetZero();
|
||||
A_->LeftMultiplyAndAccumulate(*b_, Atb_.get());
|
||||
|
||||
// Solve (AtA + DtD)x = z (= Atb).
|
||||
x_->SetZero();
|
||||
CudaCgnrLinearOperator lhs(*A_, *D_, Ax_.get());
|
||||
|
||||
event_logger.AddEvent("Setup");
|
||||
|
||||
ConjugateGradientsSolverOptions cg_options;
|
||||
cg_options.min_num_iterations = options_.min_num_iterations;
|
||||
cg_options.max_num_iterations = options_.max_num_iterations;
|
||||
cg_options.residual_reset_period = options_.residual_reset_period;
|
||||
cg_options.q_tolerance = per_solve_options.q_tolerance;
|
||||
cg_options.r_tolerance = per_solve_options.r_tolerance;
|
||||
|
||||
summary = ConjugateGradientsSolver(
|
||||
cg_options, lhs, *Atb_, *preconditioner_, scratch_, *x_);
|
||||
x_->CopyTo(x);
|
||||
event_logger.AddEvent("Solve");
|
||||
return summary;
|
||||
}
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
53
extern/ceres/internal/ceres/cgnr_solver.h
vendored
53
extern/ceres/internal/ceres/cgnr_solver.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,11 +33,13 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/conjugate_gradients_solver.h"
|
||||
#include "ceres/cuda_sparse_matrix.h"
|
||||
#include "ceres/cuda_vector.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Preconditioner;
|
||||
|
||||
@@ -65,9 +67,50 @@ class CERES_NO_EXPORT CgnrSolver final : public BlockSparseMatrixSolver {
|
||||
private:
|
||||
const LinearSolver::Options options_;
|
||||
std::unique_ptr<Preconditioner> preconditioner_;
|
||||
Vector cg_solution_;
|
||||
Vector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr};
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
#ifndef CERES_NO_CUDA
|
||||
class CudaPreconditioner : public ConjugateGradientsLinearOperator<CudaVector> {
|
||||
public:
|
||||
virtual void Update(const CompressedRowSparseMatrix& A, const double* D) = 0;
|
||||
virtual ~CudaPreconditioner() = default;
|
||||
};
|
||||
|
||||
// A Cuda-accelerated version of CgnrSolver.
|
||||
// This solver assumes that the sparsity structure of A remains constant for its
|
||||
// lifetime.
|
||||
class CERES_NO_EXPORT CudaCgnrSolver final
|
||||
: public CompressedRowSparseMatrixSolver {
|
||||
public:
|
||||
explicit CudaCgnrSolver(LinearSolver::Options options);
|
||||
static std::unique_ptr<CudaCgnrSolver> Create(LinearSolver::Options options,
|
||||
std::string* error);
|
||||
~CudaCgnrSolver() override;
|
||||
|
||||
Summary SolveImpl(CompressedRowSparseMatrix* A,
|
||||
const double* b,
|
||||
const LinearSolver::PerSolveOptions& per_solve_options,
|
||||
double* x) final;
|
||||
|
||||
private:
|
||||
void CpuToGpuTransfer(const CompressedRowSparseMatrix& A,
|
||||
const double* b,
|
||||
const double* D);
|
||||
|
||||
LinearSolver::Options options_;
|
||||
std::unique_ptr<CudaSparseMatrix> A_;
|
||||
std::unique_ptr<CudaVector> b_;
|
||||
std::unique_ptr<CudaVector> x_;
|
||||
std::unique_ptr<CudaVector> Atb_;
|
||||
std::unique_ptr<CudaVector> Ax_;
|
||||
std::unique_ptr<CudaVector> D_;
|
||||
std::unique_ptr<CudaPreconditioner> preconditioner_;
|
||||
CudaVector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr};
|
||||
};
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_CGNR_SOLVER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,30 +36,21 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
using std::vector;
|
||||
|
||||
void CompressedColumnScalarMatrixToBlockMatrix(const int* scalar_rows,
|
||||
const int* scalar_cols,
|
||||
const vector<int>& row_blocks,
|
||||
const vector<int>& col_blocks,
|
||||
vector<int>* block_rows,
|
||||
vector<int>* block_cols) {
|
||||
void CompressedColumnScalarMatrixToBlockMatrix(
|
||||
const int* scalar_rows,
|
||||
const int* scalar_cols,
|
||||
const std::vector<Block>& row_blocks,
|
||||
const std::vector<Block>& col_blocks,
|
||||
std::vector<int>* block_rows,
|
||||
std::vector<int>* block_cols) {
|
||||
CHECK(block_rows != nullptr);
|
||||
CHECK(block_cols != nullptr);
|
||||
block_rows->clear();
|
||||
block_cols->clear();
|
||||
const int num_row_blocks = row_blocks.size();
|
||||
const int num_col_blocks = col_blocks.size();
|
||||
|
||||
vector<int> row_block_starts(num_row_blocks);
|
||||
for (int i = 0, cursor = 0; i < num_row_blocks; ++i) {
|
||||
row_block_starts[i] = cursor;
|
||||
cursor += row_blocks[i];
|
||||
}
|
||||
|
||||
// This loop extracts the block sparsity of the scalar sparse matrix
|
||||
// It does so by iterating over the columns, but only considering
|
||||
// the columns corresponding to the first element of each column
|
||||
@@ -71,52 +62,46 @@ void CompressedColumnScalarMatrixToBlockMatrix(const int* scalar_rows,
|
||||
for (int col_block = 0; col_block < num_col_blocks; ++col_block) {
|
||||
int column_size = 0;
|
||||
for (int idx = scalar_cols[c]; idx < scalar_cols[c + 1]; ++idx) {
|
||||
vector<int>::const_iterator it = std::lower_bound(
|
||||
row_block_starts.begin(), row_block_starts.end(), scalar_rows[idx]);
|
||||
// Since we are using lower_bound, it will return the row id
|
||||
// where the row block starts. For everything but the first row
|
||||
// of the block, where these values will be the same, we can
|
||||
// skip, as we only need the first row to detect the presence of
|
||||
// the block.
|
||||
auto it = std::lower_bound(row_blocks.begin(),
|
||||
row_blocks.end(),
|
||||
scalar_rows[idx],
|
||||
[](const Block& block, double value) {
|
||||
return block.position < value;
|
||||
});
|
||||
// Since we are using lower_bound, it will return the row id where the row
|
||||
// block starts. For everything but the first row of the block, where
|
||||
// these values will be the same, we can skip, as we only need the first
|
||||
// row to detect the presence of the block.
|
||||
//
|
||||
// For rows all but the first row in the last row block,
|
||||
// lower_bound will return row_block_starts.end(), but those can
|
||||
// be skipped like the rows in other row blocks too.
|
||||
if (it == row_block_starts.end() || *it != scalar_rows[idx]) {
|
||||
// For rows all but the first row in the last row block, lower_bound will
|
||||
// return row_blocks_.end(), but those can be skipped like the rows in
|
||||
// other row blocks too.
|
||||
if (it == row_blocks.end() || it->position != scalar_rows[idx]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
block_rows->push_back(it - row_block_starts.begin());
|
||||
block_rows->push_back(it - row_blocks.begin());
|
||||
++column_size;
|
||||
}
|
||||
block_cols->push_back(block_cols->back() + column_size);
|
||||
c += col_blocks[col_block];
|
||||
c += col_blocks[col_block].size;
|
||||
}
|
||||
}
|
||||
|
||||
void BlockOrderingToScalarOrdering(const vector<int>& blocks,
|
||||
const vector<int>& block_ordering,
|
||||
vector<int>* scalar_ordering) {
|
||||
void BlockOrderingToScalarOrdering(const std::vector<Block>& blocks,
|
||||
const std::vector<int>& block_ordering,
|
||||
std::vector<int>* scalar_ordering) {
|
||||
CHECK_EQ(blocks.size(), block_ordering.size());
|
||||
const int num_blocks = blocks.size();
|
||||
|
||||
// block_starts = [0, block1, block1 + block2 ..]
|
||||
vector<int> block_starts(num_blocks);
|
||||
for (int i = 0, cursor = 0; i < num_blocks; ++i) {
|
||||
block_starts[i] = cursor;
|
||||
cursor += blocks[i];
|
||||
}
|
||||
|
||||
scalar_ordering->resize(block_starts.back() + blocks.back());
|
||||
scalar_ordering->resize(NumScalarEntries(blocks));
|
||||
int cursor = 0;
|
||||
for (int i = 0; i < num_blocks; ++i) {
|
||||
const int block_id = block_ordering[i];
|
||||
const int block_size = blocks[block_id];
|
||||
int block_position = block_starts[block_id];
|
||||
const int block_size = blocks[block_id].size;
|
||||
int block_position = blocks[block_id].position;
|
||||
for (int j = 0; j < block_size; ++j) {
|
||||
(*scalar_ordering)[cursor++] = block_position++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,11 +34,11 @@
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Extract the block sparsity pattern of the scalar compressed columns
|
||||
// matrix and return it in compressed column form. The compressed
|
||||
@@ -53,8 +53,8 @@ namespace internal {
|
||||
CERES_NO_EXPORT void CompressedColumnScalarMatrixToBlockMatrix(
|
||||
const int* scalar_rows,
|
||||
const int* scalar_cols,
|
||||
const std::vector<int>& row_blocks,
|
||||
const std::vector<int>& col_blocks,
|
||||
const std::vector<Block>& row_blocks,
|
||||
const std::vector<Block>& col_blocks,
|
||||
std::vector<int>* block_rows,
|
||||
std::vector<int>* block_cols);
|
||||
|
||||
@@ -62,7 +62,7 @@ CERES_NO_EXPORT void CompressedColumnScalarMatrixToBlockMatrix(
|
||||
// the corresponding "scalar" ordering, where the scalar ordering of
|
||||
// size sum(blocks).
|
||||
CERES_NO_EXPORT void BlockOrderingToScalarOrdering(
|
||||
const std::vector<int>& blocks,
|
||||
const std::vector<Block>& blocks,
|
||||
const std::vector<int>& block_ordering,
|
||||
std::vector<int>* scalar_ordering);
|
||||
|
||||
@@ -141,8 +141,7 @@ void SolveRTRWithSparseRHS(IntegerType num_cols,
|
||||
SolveUpperTriangularInPlace(num_cols, rows, cols, values, solution);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,44 +44,42 @@
|
||||
#include "ceres/residual_block.h"
|
||||
#include "ceres/scratch_evaluate_preparer.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::adjacent_find;
|
||||
using std::make_pair;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
|
||||
namespace ceres::internal {
|
||||
void CompressedRowJacobianWriter::PopulateJacobianRowAndColumnBlockVectors(
|
||||
const Program* program, CompressedRowSparseMatrix* jacobian) {
|
||||
const vector<ParameterBlock*>& parameter_blocks = program->parameter_blocks();
|
||||
vector<int>& col_blocks = *(jacobian->mutable_col_blocks());
|
||||
const auto& parameter_blocks = program->parameter_blocks();
|
||||
auto& col_blocks = *(jacobian->mutable_col_blocks());
|
||||
col_blocks.resize(parameter_blocks.size());
|
||||
int col_pos = 0;
|
||||
for (int i = 0; i < parameter_blocks.size(); ++i) {
|
||||
col_blocks[i] = parameter_blocks[i]->TangentSize();
|
||||
col_blocks[i].size = parameter_blocks[i]->TangentSize();
|
||||
col_blocks[i].position = col_pos;
|
||||
col_pos += col_blocks[i].size;
|
||||
}
|
||||
|
||||
const vector<ResidualBlock*>& residual_blocks = program->residual_blocks();
|
||||
vector<int>& row_blocks = *(jacobian->mutable_row_blocks());
|
||||
const auto& residual_blocks = program->residual_blocks();
|
||||
auto& row_blocks = *(jacobian->mutable_row_blocks());
|
||||
row_blocks.resize(residual_blocks.size());
|
||||
int row_pos = 0;
|
||||
for (int i = 0; i < residual_blocks.size(); ++i) {
|
||||
row_blocks[i] = residual_blocks[i]->NumResiduals();
|
||||
row_blocks[i].size = residual_blocks[i]->NumResiduals();
|
||||
row_blocks[i].position = row_pos;
|
||||
row_pos += row_blocks[i].size;
|
||||
}
|
||||
}
|
||||
|
||||
void CompressedRowJacobianWriter::GetOrderedParameterBlocks(
|
||||
const Program* program,
|
||||
int residual_id,
|
||||
vector<pair<int, int>>* evaluated_jacobian_blocks) {
|
||||
const ResidualBlock* residual_block = program->residual_blocks()[residual_id];
|
||||
std::vector<std::pair<int, int>>* evaluated_jacobian_blocks) {
|
||||
auto residual_block = program->residual_blocks()[residual_id];
|
||||
const int num_parameter_blocks = residual_block->NumParameterBlocks();
|
||||
|
||||
for (int j = 0; j < num_parameter_blocks; ++j) {
|
||||
const ParameterBlock* parameter_block =
|
||||
residual_block->parameter_blocks()[j];
|
||||
auto parameter_block = residual_block->parameter_blocks()[j];
|
||||
if (!parameter_block->IsConstant()) {
|
||||
evaluated_jacobian_blocks->push_back(
|
||||
make_pair(parameter_block->index(), j));
|
||||
std::make_pair(parameter_block->index(), j));
|
||||
}
|
||||
}
|
||||
std::sort(evaluated_jacobian_blocks->begin(),
|
||||
@@ -90,20 +88,29 @@ void CompressedRowJacobianWriter::GetOrderedParameterBlocks(
|
||||
|
||||
std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
|
||||
const {
|
||||
const vector<ResidualBlock*>& residual_blocks = program_->residual_blocks();
|
||||
const auto& residual_blocks = program_->residual_blocks();
|
||||
|
||||
int total_num_residuals = program_->NumResiduals();
|
||||
int total_num_effective_parameters = program_->NumEffectiveParameters();
|
||||
const int total_num_residuals = program_->NumResiduals();
|
||||
const int total_num_effective_parameters = program_->NumEffectiveParameters();
|
||||
|
||||
// Count the number of jacobian nonzeros.
|
||||
int num_jacobian_nonzeros = 0;
|
||||
//
|
||||
// We used an unsigned int here, so that we can compare it INT_MAX without
|
||||
// triggering overflow behaviour.
|
||||
unsigned int num_jacobian_nonzeros = total_num_effective_parameters;
|
||||
for (auto* residual_block : residual_blocks) {
|
||||
const int num_residuals = residual_block->NumResiduals();
|
||||
const int num_parameter_blocks = residual_block->NumParameterBlocks();
|
||||
for (int j = 0; j < num_parameter_blocks; ++j) {
|
||||
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
|
||||
auto parameter_block = residual_block->parameter_blocks()[j];
|
||||
if (!parameter_block->IsConstant()) {
|
||||
num_jacobian_nonzeros += num_residuals * parameter_block->TangentSize();
|
||||
if (num_jacobian_nonzeros > std::numeric_limits<int>::max()) {
|
||||
LOG(ERROR) << "Unable to create Jacobian matrix: Too many entries in "
|
||||
"the Jacobian matrix. num_jacobian_nonzeros = "
|
||||
<< num_jacobian_nonzeros;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -112,14 +119,14 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
|
||||
// Allocate more space than needed to store the jacobian so that when the LM
|
||||
// algorithm adds the diagonal, no reallocation is necessary. This reduces
|
||||
// peak memory usage significantly.
|
||||
std::unique_ptr<CompressedRowSparseMatrix> jacobian =
|
||||
std::make_unique<CompressedRowSparseMatrix>(
|
||||
total_num_residuals,
|
||||
total_num_effective_parameters,
|
||||
num_jacobian_nonzeros + total_num_effective_parameters);
|
||||
auto jacobian = std::make_unique<CompressedRowSparseMatrix>(
|
||||
total_num_residuals,
|
||||
total_num_effective_parameters,
|
||||
static_cast<int>(num_jacobian_nonzeros));
|
||||
|
||||
// At this stage, the CompressedRowSparseMatrix is an invalid state. But this
|
||||
// seems to be the only way to construct it without doing a memory copy.
|
||||
// At this stage, the CompressedRowSparseMatrix is an invalid state. But
|
||||
// this seems to be the only way to construct it without doing a memory
|
||||
// copy.
|
||||
int* rows = jacobian->mutable_rows();
|
||||
int* cols = jacobian->mutable_cols();
|
||||
|
||||
@@ -131,9 +138,9 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
|
||||
// Count the number of derivatives for a row of this residual block and
|
||||
// build a list of active parameter block indices.
|
||||
int num_derivatives = 0;
|
||||
vector<int> parameter_indices;
|
||||
std::vector<int> parameter_indices;
|
||||
for (int j = 0; j < num_parameter_blocks; ++j) {
|
||||
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
|
||||
auto parameter_block = residual_block->parameter_blocks()[j];
|
||||
if (!parameter_block->IsConstant()) {
|
||||
parameter_indices.push_back(parameter_block->index());
|
||||
num_derivatives += parameter_block->TangentSize();
|
||||
@@ -141,12 +148,12 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
|
||||
}
|
||||
|
||||
// Sort the parameters by their position in the state vector.
|
||||
sort(parameter_indices.begin(), parameter_indices.end());
|
||||
std::sort(parameter_indices.begin(), parameter_indices.end());
|
||||
if (adjacent_find(parameter_indices.begin(), parameter_indices.end()) !=
|
||||
parameter_indices.end()) {
|
||||
std::string parameter_block_description;
|
||||
for (int j = 0; j < num_parameter_blocks; ++j) {
|
||||
ParameterBlock* parameter_block = residual_block->parameter_blocks()[j];
|
||||
auto parameter_block = residual_block->parameter_blocks()[j];
|
||||
parameter_block_description += parameter_block->ToString() + "\n";
|
||||
}
|
||||
LOG(FATAL) << "Ceres internal error: "
|
||||
@@ -168,15 +175,13 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
|
||||
// values are updated.
|
||||
int col_pos = 0;
|
||||
for (int parameter_index : parameter_indices) {
|
||||
ParameterBlock* parameter_block =
|
||||
program_->parameter_blocks()[parameter_index];
|
||||
auto parameter_block = program_->parameter_blocks()[parameter_index];
|
||||
const int parameter_block_size = parameter_block->TangentSize();
|
||||
|
||||
for (int r = 0; r < num_residuals; ++r) {
|
||||
// This is the position in the values array of the jacobian where this
|
||||
// row of the jacobian block should go.
|
||||
const int column_block_begin = rows[row_pos + r] + col_pos;
|
||||
|
||||
for (int c = 0; c < parameter_block_size; ++c) {
|
||||
cols[column_block_begin + c] = parameter_block->delta_offset() + c;
|
||||
}
|
||||
@@ -185,7 +190,8 @@ std::unique_ptr<SparseMatrix> CompressedRowJacobianWriter::CreateJacobian()
|
||||
}
|
||||
row_pos += num_residuals;
|
||||
}
|
||||
CHECK_EQ(num_jacobian_nonzeros, rows[total_num_residuals]);
|
||||
CHECK_EQ(num_jacobian_nonzeros - total_num_effective_parameters,
|
||||
rows[total_num_residuals]);
|
||||
|
||||
PopulateJacobianRowAndColumnBlockVectors(program_, jacobian.get());
|
||||
|
||||
@@ -201,11 +207,10 @@ void CompressedRowJacobianWriter::Write(int residual_id,
|
||||
double* jacobian_values = jacobian->mutable_values();
|
||||
const int* jacobian_rows = jacobian->rows();
|
||||
|
||||
const ResidualBlock* residual_block =
|
||||
program_->residual_blocks()[residual_id];
|
||||
auto residual_block = program_->residual_blocks()[residual_id];
|
||||
const int num_residuals = residual_block->NumResiduals();
|
||||
|
||||
vector<pair<int, int>> evaluated_jacobian_blocks;
|
||||
std::vector<std::pair<int, int>> evaluated_jacobian_blocks;
|
||||
GetOrderedParameterBlocks(program_, residual_id, &evaluated_jacobian_blocks);
|
||||
|
||||
// Where in the current row does the jacobian for a parameter block begin.
|
||||
@@ -214,7 +219,7 @@ void CompressedRowJacobianWriter::Write(int residual_id,
|
||||
// Iterate over the jacobian blocks in increasing order of their
|
||||
// positions in the reduced parameter vector.
|
||||
for (auto& evaluated_jacobian_block : evaluated_jacobian_blocks) {
|
||||
const ParameterBlock* parameter_block =
|
||||
auto parameter_block =
|
||||
program_->parameter_blocks()[evaluated_jacobian_block.first];
|
||||
const int argument = evaluated_jacobian_block.second;
|
||||
const int parameter_block_size = parameter_block->TangentSize();
|
||||
@@ -238,5 +243,4 @@ void CompressedRowJacobianWriter::Write(int residual_id,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -41,8 +41,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/scratch_evaluate_preparer.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CompressedRowSparseMatrix;
|
||||
class Program;
|
||||
@@ -107,7 +106,6 @@ class CERES_NO_EXPORT CompressedRowJacobianWriter {
|
||||
Program* program_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_COMPRESSED_ROW_JACOBIAN_WRITER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -31,25 +31,24 @@
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/crs_matrix.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/random.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::vector;
|
||||
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
|
||||
// Helper functor used by the constructor for reordering the contents
|
||||
// of a TripletSparseMatrix. This comparator assumes thay there are no
|
||||
// of a TripletSparseMatrix. This comparator assumes that there are no
|
||||
// duplicates in the pair of arrays rows and cols, i.e., there is no
|
||||
// indices i and j (not equal to each other) s.t.
|
||||
//
|
||||
@@ -119,10 +118,12 @@ void TransposeForCompressedRowSparseStructure(const int num_rows,
|
||||
transpose_rows[0] = 0;
|
||||
}
|
||||
|
||||
template <class RandomNormalFunctor>
|
||||
void AddRandomBlock(const int num_rows,
|
||||
const int num_cols,
|
||||
const int row_block_begin,
|
||||
const int col_block_begin,
|
||||
RandomNormalFunctor&& randn,
|
||||
std::vector<int>* rows,
|
||||
std::vector<int>* cols,
|
||||
std::vector<double>* values) {
|
||||
@@ -130,19 +131,21 @@ void AddRandomBlock(const int num_rows,
|
||||
for (int c = 0; c < num_cols; ++c) {
|
||||
rows->push_back(row_block_begin + r);
|
||||
cols->push_back(col_block_begin + c);
|
||||
values->push_back(RandNormal());
|
||||
values->push_back(randn());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class RandomNormalFunctor>
|
||||
void AddSymmetricRandomBlock(const int num_rows,
|
||||
const int row_block_begin,
|
||||
RandomNormalFunctor&& randn,
|
||||
std::vector<int>* rows,
|
||||
std::vector<int>* cols,
|
||||
std::vector<double>* values) {
|
||||
for (int r = 0; r < num_rows; ++r) {
|
||||
for (int c = r; c < num_rows; ++c) {
|
||||
const double v = RandNormal();
|
||||
const double v = randn();
|
||||
rows->push_back(row_block_begin + r);
|
||||
cols->push_back(row_block_begin + c);
|
||||
values->push_back(v);
|
||||
@@ -163,7 +166,7 @@ CompressedRowSparseMatrix::CompressedRowSparseMatrix(int num_rows,
|
||||
int max_num_nonzeros) {
|
||||
num_rows_ = num_rows;
|
||||
num_cols_ = num_cols;
|
||||
storage_type_ = UNSYMMETRIC;
|
||||
storage_type_ = StorageType::UNSYMMETRIC;
|
||||
rows_.resize(num_rows + 1, 0);
|
||||
cols_.resize(max_num_nonzeros, 0);
|
||||
values_.resize(max_num_nonzeros, 0.0);
|
||||
@@ -202,7 +205,7 @@ CompressedRowSparseMatrix::FromTripletSparseMatrix(
|
||||
}
|
||||
|
||||
// index is the list of indices into the TripletSparseMatrix input.
|
||||
vector<int> index(input.num_nonzeros(), 0);
|
||||
std::vector<int> index(input.num_nonzeros(), 0);
|
||||
for (int i = 0; i < input.num_nonzeros(); ++i) {
|
||||
index[i] = i;
|
||||
}
|
||||
@@ -217,9 +220,8 @@ CompressedRowSparseMatrix::FromTripletSparseMatrix(
|
||||
input.num_nonzeros() * sizeof(int) + // NOLINT
|
||||
input.num_nonzeros() * sizeof(double)); // NOLINT
|
||||
|
||||
std::unique_ptr<CompressedRowSparseMatrix> output =
|
||||
std::make_unique<CompressedRowSparseMatrix>(
|
||||
num_rows, num_cols, input.num_nonzeros());
|
||||
auto output = std::make_unique<CompressedRowSparseMatrix>(
|
||||
num_rows, num_cols, input.num_nonzeros());
|
||||
|
||||
if (num_rows == 0) {
|
||||
// No data to copy.
|
||||
@@ -255,7 +257,7 @@ CompressedRowSparseMatrix::CompressedRowSparseMatrix(const double* diagonal,
|
||||
|
||||
num_rows_ = num_rows;
|
||||
num_cols_ = num_rows;
|
||||
storage_type_ = UNSYMMETRIC;
|
||||
storage_type_ = StorageType::UNSYMMETRIC;
|
||||
rows_.resize(num_rows + 1);
|
||||
cols_.resize(num_rows);
|
||||
values_.resize(num_rows);
|
||||
@@ -276,22 +278,37 @@ void CompressedRowSparseMatrix::SetZero() {
|
||||
std::fill(values_.begin(), values_.end(), 0);
|
||||
}
|
||||
|
||||
// TODO(sameeragarwal): Make RightMultiply and LeftMultiply
|
||||
// block-aware for higher performance.
|
||||
void CompressedRowSparseMatrix::RightMultiply(const double* x,
|
||||
double* y) const {
|
||||
// TODO(sameeragarwal): Make RightMultiplyAndAccumulate and
|
||||
// LeftMultiplyAndAccumulate block-aware for higher performance.
|
||||
void CompressedRowSparseMatrix::RightMultiplyAndAccumulate(
|
||||
const double* x, double* y, ContextImpl* context, int num_threads) const {
|
||||
if (storage_type_ != StorageType::UNSYMMETRIC) {
|
||||
RightMultiplyAndAccumulate(x, y);
|
||||
return;
|
||||
}
|
||||
|
||||
auto values = values_.data();
|
||||
auto rows = rows_.data();
|
||||
auto cols = cols_.data();
|
||||
|
||||
ParallelFor(
|
||||
context, 0, num_rows_, num_threads, [values, rows, cols, x, y](int row) {
|
||||
for (int idx = rows[row]; idx < rows[row + 1]; ++idx) {
|
||||
const int c = cols[idx];
|
||||
const double v = values[idx];
|
||||
y[row] += v * x[c];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void CompressedRowSparseMatrix::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
|
||||
if (storage_type_ == UNSYMMETRIC) {
|
||||
for (int r = 0; r < num_rows_; ++r) {
|
||||
for (int idx = rows_[r]; idx < rows_[r + 1]; ++idx) {
|
||||
const int c = cols_[idx];
|
||||
const double v = values_[idx];
|
||||
y[r] += v * x[c];
|
||||
}
|
||||
}
|
||||
} else if (storage_type_ == UPPER_TRIANGULAR) {
|
||||
if (storage_type_ == StorageType::UNSYMMETRIC) {
|
||||
RightMultiplyAndAccumulate(x, y, nullptr, 1);
|
||||
} else if (storage_type_ == StorageType::UPPER_TRIANGULAR) {
|
||||
// Because of their block structure, we will have entries that lie
|
||||
// above (below) the diagonal for lower (upper) triangular matrices,
|
||||
// so the loops below need to account for this.
|
||||
@@ -317,7 +334,7 @@ void CompressedRowSparseMatrix::RightMultiply(const double* x,
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (storage_type_ == LOWER_TRIANGULAR) {
|
||||
} else if (storage_type_ == StorageType::LOWER_TRIANGULAR) {
|
||||
for (int r = 0; r < num_rows_; ++r) {
|
||||
int idx = rows_[r];
|
||||
const int idx_end = rows_[r + 1];
|
||||
@@ -340,19 +357,21 @@ void CompressedRowSparseMatrix::RightMultiply(const double* x,
|
||||
}
|
||||
}
|
||||
|
||||
void CompressedRowSparseMatrix::LeftMultiply(const double* x, double* y) const {
|
||||
void CompressedRowSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
|
||||
if (storage_type_ == UNSYMMETRIC) {
|
||||
if (storage_type_ == StorageType::UNSYMMETRIC) {
|
||||
for (int r = 0; r < num_rows_; ++r) {
|
||||
for (int idx = rows_[r]; idx < rows_[r + 1]; ++idx) {
|
||||
y[cols_[idx]] += values_[idx] * x[r];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Since the matrix is symmetric, LeftMultiply = RightMultiply.
|
||||
RightMultiply(x, y);
|
||||
// Since the matrix is symmetric, LeftMultiplyAndAccumulate =
|
||||
// RightMultiplyAndAccumulate.
|
||||
RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -360,11 +379,11 @@ void CompressedRowSparseMatrix::SquaredColumnNorm(double* x) const {
|
||||
CHECK(x != nullptr);
|
||||
|
||||
std::fill(x, x + num_cols_, 0.0);
|
||||
if (storage_type_ == UNSYMMETRIC) {
|
||||
if (storage_type_ == StorageType::UNSYMMETRIC) {
|
||||
for (int idx = 0; idx < rows_[num_rows_]; ++idx) {
|
||||
x[cols_[idx]] += values_[idx] * values_[idx];
|
||||
}
|
||||
} else if (storage_type_ == UPPER_TRIANGULAR) {
|
||||
} else if (storage_type_ == StorageType::UPPER_TRIANGULAR) {
|
||||
// Because of their block structure, we will have entries that lie
|
||||
// above (below) the diagonal for lower (upper) triangular
|
||||
// matrices, so the loops below need to account for this.
|
||||
@@ -390,7 +409,7 @@ void CompressedRowSparseMatrix::SquaredColumnNorm(double* x) const {
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (storage_type_ == LOWER_TRIANGULAR) {
|
||||
} else if (storage_type_ == StorageType::LOWER_TRIANGULAR) {
|
||||
for (int r = 0; r < num_rows_; ++r) {
|
||||
int idx = rows_[r];
|
||||
const int idx_end = rows_[r + 1];
|
||||
@@ -435,7 +454,7 @@ void CompressedRowSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
|
||||
void CompressedRowSparseMatrix::DeleteRows(int delta_rows) {
|
||||
CHECK_GE(delta_rows, 0);
|
||||
CHECK_LE(delta_rows, num_rows_);
|
||||
CHECK_EQ(storage_type_, UNSYMMETRIC);
|
||||
CHECK_EQ(storage_type_, StorageType::UNSYMMETRIC);
|
||||
|
||||
num_rows_ -= delta_rows;
|
||||
rows_.resize(num_rows_ + 1);
|
||||
@@ -451,7 +470,7 @@ void CompressedRowSparseMatrix::DeleteRows(int delta_rows) {
|
||||
int num_row_blocks = 0;
|
||||
int num_rows = 0;
|
||||
while (num_row_blocks < row_blocks_.size() && num_rows < num_rows_) {
|
||||
num_rows += row_blocks_[num_row_blocks];
|
||||
num_rows += row_blocks_[num_row_blocks].size;
|
||||
++num_row_blocks;
|
||||
}
|
||||
|
||||
@@ -459,7 +478,7 @@ void CompressedRowSparseMatrix::DeleteRows(int delta_rows) {
|
||||
}
|
||||
|
||||
void CompressedRowSparseMatrix::AppendRows(const CompressedRowSparseMatrix& m) {
|
||||
CHECK_EQ(storage_type_, UNSYMMETRIC);
|
||||
CHECK_EQ(storage_type_, StorageType::UNSYMMETRIC);
|
||||
CHECK_EQ(m.num_cols(), num_cols_);
|
||||
|
||||
CHECK((row_blocks_.empty() && m.row_blocks().empty()) ||
|
||||
@@ -539,17 +558,15 @@ void CompressedRowSparseMatrix::SetMaxNumNonZeros(int num_nonzeros) {
|
||||
|
||||
std::unique_ptr<CompressedRowSparseMatrix>
|
||||
CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(
|
||||
const double* diagonal, const vector<int>& blocks) {
|
||||
int num_rows = 0;
|
||||
const double* diagonal, const std::vector<Block>& blocks) {
|
||||
const int num_rows = NumScalarEntries(blocks);
|
||||
int num_nonzeros = 0;
|
||||
for (int block_size : blocks) {
|
||||
num_rows += block_size;
|
||||
num_nonzeros += block_size * block_size;
|
||||
for (auto& block : blocks) {
|
||||
num_nonzeros += block.size * block.size;
|
||||
}
|
||||
|
||||
std::unique_ptr<CompressedRowSparseMatrix> matrix =
|
||||
std::make_unique<CompressedRowSparseMatrix>(
|
||||
num_rows, num_rows, num_nonzeros);
|
||||
auto matrix = std::make_unique<CompressedRowSparseMatrix>(
|
||||
num_rows, num_rows, num_nonzeros);
|
||||
|
||||
int* rows = matrix->mutable_rows();
|
||||
int* cols = matrix->mutable_cols();
|
||||
@@ -558,15 +575,17 @@ CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(
|
||||
|
||||
int idx_cursor = 0;
|
||||
int col_cursor = 0;
|
||||
for (int block_size : blocks) {
|
||||
for (int r = 0; r < block_size; ++r) {
|
||||
for (auto& block : blocks) {
|
||||
for (int r = 0; r < block.size; ++r) {
|
||||
*(rows++) = idx_cursor;
|
||||
values[idx_cursor + r] = diagonal[col_cursor + r];
|
||||
for (int c = 0; c < block_size; ++c, ++idx_cursor) {
|
||||
if (diagonal != nullptr) {
|
||||
values[idx_cursor + r] = diagonal[col_cursor + r];
|
||||
}
|
||||
for (int c = 0; c < block.size; ++c, ++idx_cursor) {
|
||||
*(cols++) = col_cursor + c;
|
||||
}
|
||||
}
|
||||
col_cursor += block_size;
|
||||
col_cursor += block.size;
|
||||
}
|
||||
*rows = idx_cursor;
|
||||
|
||||
@@ -580,19 +599,18 @@ CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(
|
||||
|
||||
std::unique_ptr<CompressedRowSparseMatrix>
|
||||
CompressedRowSparseMatrix::Transpose() const {
|
||||
std::unique_ptr<CompressedRowSparseMatrix> transpose =
|
||||
std::make_unique<CompressedRowSparseMatrix>(
|
||||
num_cols_, num_rows_, num_nonzeros());
|
||||
auto transpose = std::make_unique<CompressedRowSparseMatrix>(
|
||||
num_cols_, num_rows_, num_nonzeros());
|
||||
|
||||
switch (storage_type_) {
|
||||
case UNSYMMETRIC:
|
||||
transpose->set_storage_type(UNSYMMETRIC);
|
||||
case StorageType::UNSYMMETRIC:
|
||||
transpose->set_storage_type(StorageType::UNSYMMETRIC);
|
||||
break;
|
||||
case LOWER_TRIANGULAR:
|
||||
transpose->set_storage_type(UPPER_TRIANGULAR);
|
||||
case StorageType::LOWER_TRIANGULAR:
|
||||
transpose->set_storage_type(StorageType::UPPER_TRIANGULAR);
|
||||
break;
|
||||
case UPPER_TRIANGULAR:
|
||||
transpose->set_storage_type(LOWER_TRIANGULAR);
|
||||
case StorageType::UPPER_TRIANGULAR:
|
||||
transpose->set_storage_type(StorageType::LOWER_TRIANGULAR);
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown storage type: " << storage_type_;
|
||||
@@ -621,13 +639,14 @@ CompressedRowSparseMatrix::Transpose() const {
|
||||
|
||||
std::unique_ptr<CompressedRowSparseMatrix>
|
||||
CompressedRowSparseMatrix::CreateRandomMatrix(
|
||||
CompressedRowSparseMatrix::RandomMatrixOptions options) {
|
||||
CompressedRowSparseMatrix::RandomMatrixOptions options,
|
||||
std::mt19937& prng) {
|
||||
CHECK_GT(options.num_row_blocks, 0);
|
||||
CHECK_GT(options.min_row_block_size, 0);
|
||||
CHECK_GT(options.max_row_block_size, 0);
|
||||
CHECK_LE(options.min_row_block_size, options.max_row_block_size);
|
||||
|
||||
if (options.storage_type == UNSYMMETRIC) {
|
||||
if (options.storage_type == StorageType::UNSYMMETRIC) {
|
||||
CHECK_GT(options.num_col_blocks, 0);
|
||||
CHECK_GT(options.min_col_block_size, 0);
|
||||
CHECK_GT(options.max_col_block_size, 0);
|
||||
@@ -642,33 +661,42 @@ CompressedRowSparseMatrix::CreateRandomMatrix(
|
||||
CHECK_GT(options.block_density, 0.0);
|
||||
CHECK_LE(options.block_density, 1.0);
|
||||
|
||||
vector<int> row_blocks;
|
||||
vector<int> col_blocks;
|
||||
std::vector<Block> row_blocks;
|
||||
row_blocks.reserve(options.num_row_blocks);
|
||||
std::vector<Block> col_blocks;
|
||||
col_blocks.reserve(options.num_col_blocks);
|
||||
|
||||
std::uniform_int_distribution<int> col_distribution(
|
||||
options.min_col_block_size, options.max_col_block_size);
|
||||
std::uniform_int_distribution<int> row_distribution(
|
||||
options.min_row_block_size, options.max_row_block_size);
|
||||
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
|
||||
std::normal_distribution<double> standard_normal_distribution;
|
||||
|
||||
// Generate the row block structure.
|
||||
int row_pos = 0;
|
||||
for (int i = 0; i < options.num_row_blocks; ++i) {
|
||||
// Generate a random integer in [min_row_block_size, max_row_block_size]
|
||||
const int delta_block_size =
|
||||
Uniform(options.max_row_block_size - options.min_row_block_size);
|
||||
row_blocks.push_back(options.min_row_block_size + delta_block_size);
|
||||
row_blocks.emplace_back(row_distribution(prng), row_pos);
|
||||
row_pos += row_blocks.back().size;
|
||||
}
|
||||
|
||||
if (options.storage_type == UNSYMMETRIC) {
|
||||
if (options.storage_type == StorageType::UNSYMMETRIC) {
|
||||
// Generate the col block structure.
|
||||
int col_pos = 0;
|
||||
for (int i = 0; i < options.num_col_blocks; ++i) {
|
||||
// Generate a random integer in [min_col_block_size, max_col_block_size]
|
||||
const int delta_block_size =
|
||||
Uniform(options.max_col_block_size - options.min_col_block_size);
|
||||
col_blocks.push_back(options.min_col_block_size + delta_block_size);
|
||||
col_blocks.emplace_back(col_distribution(prng), col_pos);
|
||||
col_pos += col_blocks.back().size;
|
||||
}
|
||||
} else {
|
||||
// Symmetric matrices (LOWER_TRIANGULAR or UPPER_TRIANGULAR);
|
||||
col_blocks = row_blocks;
|
||||
}
|
||||
|
||||
vector<int> tsm_rows;
|
||||
vector<int> tsm_cols;
|
||||
vector<double> tsm_values;
|
||||
std::vector<int> tsm_rows;
|
||||
std::vector<int> tsm_cols;
|
||||
std::vector<double> tsm_values;
|
||||
|
||||
// For ease of construction, we are going to generate the
|
||||
// CompressedRowSparseMatrix by generating it as a
|
||||
@@ -687,51 +715,55 @@ CompressedRowSparseMatrix::CreateRandomMatrix(
|
||||
for (int r = 0; r < options.num_row_blocks; ++r) {
|
||||
int col_block_begin = 0;
|
||||
for (int c = 0; c < options.num_col_blocks; ++c) {
|
||||
if (((options.storage_type == UPPER_TRIANGULAR) && (r > c)) ||
|
||||
((options.storage_type == LOWER_TRIANGULAR) && (r < c))) {
|
||||
col_block_begin += col_blocks[c];
|
||||
if (((options.storage_type == StorageType::UPPER_TRIANGULAR) &&
|
||||
(r > c)) ||
|
||||
((options.storage_type == StorageType::LOWER_TRIANGULAR) &&
|
||||
(r < c))) {
|
||||
col_block_begin += col_blocks[c].size;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Randomly determine if this block is present or not.
|
||||
if (RandDouble() <= options.block_density) {
|
||||
if (uniform01(prng) <= options.block_density) {
|
||||
auto randn = [&standard_normal_distribution, &prng] {
|
||||
return standard_normal_distribution(prng);
|
||||
};
|
||||
// If the matrix is symmetric, then we take care to generate
|
||||
// symmetric diagonal blocks.
|
||||
if (options.storage_type == UNSYMMETRIC || r != c) {
|
||||
AddRandomBlock(row_blocks[r],
|
||||
col_blocks[c],
|
||||
if (options.storage_type == StorageType::UNSYMMETRIC || r != c) {
|
||||
AddRandomBlock(row_blocks[r].size,
|
||||
col_blocks[c].size,
|
||||
row_block_begin,
|
||||
col_block_begin,
|
||||
randn,
|
||||
&tsm_rows,
|
||||
&tsm_cols,
|
||||
&tsm_values);
|
||||
} else {
|
||||
AddSymmetricRandomBlock(row_blocks[r],
|
||||
AddSymmetricRandomBlock(row_blocks[r].size,
|
||||
row_block_begin,
|
||||
randn,
|
||||
&tsm_rows,
|
||||
&tsm_cols,
|
||||
&tsm_values);
|
||||
}
|
||||
}
|
||||
col_block_begin += col_blocks[c];
|
||||
col_block_begin += col_blocks[c].size;
|
||||
}
|
||||
row_block_begin += row_blocks[r];
|
||||
row_block_begin += row_blocks[r].size;
|
||||
}
|
||||
}
|
||||
|
||||
const int num_rows = std::accumulate(row_blocks.begin(), row_blocks.end(), 0);
|
||||
const int num_cols = std::accumulate(col_blocks.begin(), col_blocks.end(), 0);
|
||||
const int num_rows = NumScalarEntries(row_blocks);
|
||||
const int num_cols = NumScalarEntries(col_blocks);
|
||||
const bool kDoNotTranspose = false;
|
||||
std::unique_ptr<CompressedRowSparseMatrix> matrix =
|
||||
CompressedRowSparseMatrix::FromTripletSparseMatrix(
|
||||
TripletSparseMatrix(
|
||||
num_rows, num_cols, tsm_rows, tsm_cols, tsm_values),
|
||||
kDoNotTranspose);
|
||||
auto matrix = CompressedRowSparseMatrix::FromTripletSparseMatrix(
|
||||
TripletSparseMatrix(num_rows, num_cols, tsm_rows, tsm_cols, tsm_values),
|
||||
kDoNotTranspose);
|
||||
(*matrix->mutable_row_blocks()) = row_blocks;
|
||||
(*matrix->mutable_col_blocks()) = col_blocks;
|
||||
matrix->set_storage_type(options.storage_type);
|
||||
return matrix;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,8 +32,10 @@
|
||||
#define CERES_INTERNAL_COMPRESSED_ROW_SPARSE_MATRIX_H_
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/sparse_matrix.h"
|
||||
@@ -46,11 +48,12 @@ struct CRSMatrix;
|
||||
|
||||
namespace internal {
|
||||
|
||||
class ContextImpl;
|
||||
class TripletSparseMatrix;
|
||||
|
||||
class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
|
||||
public:
|
||||
enum StorageType {
|
||||
enum class StorageType {
|
||||
UNSYMMETRIC,
|
||||
// Matrix is assumed to be symmetric but only the lower triangular
|
||||
// part of the matrix is stored.
|
||||
@@ -100,8 +103,12 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
|
||||
// SparseMatrix interface.
|
||||
~CompressedRowSparseMatrix() override;
|
||||
void SetZero() final;
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void LeftMultiply(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const final;
|
||||
void LeftMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void SquaredColumnNorm(double* x) const final;
|
||||
void ScaleColumns(const double* scale) final;
|
||||
void ToDenseMatrix(Matrix* dense_matrix) const final;
|
||||
@@ -109,8 +116,8 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
|
||||
int num_rows() const final { return num_rows_; }
|
||||
int num_cols() const final { return num_cols_; }
|
||||
int num_nonzeros() const final { return rows_[num_rows_]; }
|
||||
const double* values() const final { return &values_[0]; }
|
||||
double* mutable_values() final { return &values_[0]; }
|
||||
const double* values() const final { return values_.data(); }
|
||||
double* mutable_values() final { return values_.data(); }
|
||||
|
||||
// Delete the bottom delta_rows.
|
||||
// num_rows -= delta_rows
|
||||
@@ -132,28 +139,28 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
|
||||
void set_num_cols(const int num_cols) { num_cols_ = num_cols; }
|
||||
|
||||
// Low level access methods that expose the structure of the matrix.
|
||||
const int* cols() const { return &cols_[0]; }
|
||||
int* mutable_cols() { return &cols_[0]; }
|
||||
const int* cols() const { return cols_.data(); }
|
||||
int* mutable_cols() { return cols_.data(); }
|
||||
|
||||
const int* rows() const { return &rows_[0]; }
|
||||
int* mutable_rows() { return &rows_[0]; }
|
||||
const int* rows() const { return rows_.data(); }
|
||||
int* mutable_rows() { return rows_.data(); }
|
||||
|
||||
StorageType storage_type() const { return storage_type_; }
|
||||
void set_storage_type(const StorageType storage_type) {
|
||||
storage_type_ = storage_type;
|
||||
}
|
||||
|
||||
const std::vector<int>& row_blocks() const { return row_blocks_; }
|
||||
std::vector<int>* mutable_row_blocks() { return &row_blocks_; }
|
||||
const std::vector<Block>& row_blocks() const { return row_blocks_; }
|
||||
std::vector<Block>* mutable_row_blocks() { return &row_blocks_; }
|
||||
|
||||
const std::vector<int>& col_blocks() const { return col_blocks_; }
|
||||
std::vector<int>* mutable_col_blocks() { return &col_blocks_; }
|
||||
const std::vector<Block>& col_blocks() const { return col_blocks_; }
|
||||
std::vector<Block>* mutable_col_blocks() { return &col_blocks_; }
|
||||
|
||||
// Create a block diagonal CompressedRowSparseMatrix with the given
|
||||
// block structure. The individual blocks are assumed to be laid out
|
||||
// contiguously in the diagonal array, one block at a time.
|
||||
static std::unique_ptr<CompressedRowSparseMatrix> CreateBlockDiagonalMatrix(
|
||||
const double* diagonal, const std::vector<int>& blocks);
|
||||
const double* diagonal, const std::vector<Block>& blocks);
|
||||
|
||||
// Options struct to control the generation of random block sparse
|
||||
// matrices in compressed row sparse format.
|
||||
@@ -165,7 +172,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
|
||||
// given bounds.
|
||||
//
|
||||
// Then we walk the block structure of the resulting matrix, and with
|
||||
// probability block_density detemine whether they are structurally
|
||||
// probability block_density determine whether they are structurally
|
||||
// zero or not. If the answer is no, then we generate entries for the
|
||||
// block which are distributed normally.
|
||||
struct RandomMatrixOptions {
|
||||
@@ -176,7 +183,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
|
||||
// (lower triangular) part. In this case, num_col_blocks,
|
||||
// min_col_block_size and max_col_block_size will be ignored and
|
||||
// assumed to be equal to the corresponding row settings.
|
||||
StorageType storage_type = UNSYMMETRIC;
|
||||
StorageType storage_type = StorageType::UNSYMMETRIC;
|
||||
|
||||
int num_row_blocks = 0;
|
||||
int min_row_block_size = 0;
|
||||
@@ -195,7 +202,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
|
||||
// normally distributed and whose structure is determined by
|
||||
// RandomMatrixOptions.
|
||||
static std::unique_ptr<CompressedRowSparseMatrix> CreateRandomMatrix(
|
||||
RandomMatrixOptions options);
|
||||
RandomMatrixOptions options, std::mt19937& prng);
|
||||
|
||||
private:
|
||||
static std::unique_ptr<CompressedRowSparseMatrix> FromTripletSparseMatrix(
|
||||
@@ -209,14 +216,31 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix {
|
||||
StorageType storage_type_;
|
||||
|
||||
// If the matrix has an underlying block structure, then it can also
|
||||
// carry with it row and column block sizes. This is auxilliary and
|
||||
// carry with it row and column block sizes. This is auxiliary and
|
||||
// optional information for use by algorithms operating on the
|
||||
// matrix. The class itself does not make use of this information in
|
||||
// any way.
|
||||
std::vector<int> row_blocks_;
|
||||
std::vector<int> col_blocks_;
|
||||
std::vector<Block> row_blocks_;
|
||||
std::vector<Block> col_blocks_;
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& s,
|
||||
CompressedRowSparseMatrix::StorageType type) {
|
||||
switch (type) {
|
||||
case CompressedRowSparseMatrix::StorageType::UNSYMMETRIC:
|
||||
s << "UNSYMMETRIC";
|
||||
break;
|
||||
case CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR:
|
||||
s << "UPPER_TRIANGULAR";
|
||||
break;
|
||||
case CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR:
|
||||
s << "LOWER_TRIANGULAR";
|
||||
break;
|
||||
default:
|
||||
s << "UNKNOWN CompressedRowSparseMatrix::StorageType";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// A thread-safe multi-producer, multi-consumer queue for queueing items that
|
||||
// are typically handled asynchronously by multiple threads. The ConcurrentQueue
|
||||
@@ -152,7 +151,6 @@ class ConcurrentQueue {
|
||||
bool wait_{true};
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_CONCURRENT_QUEUE_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,253 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
//
|
||||
// A preconditioned conjugate gradients solver
|
||||
// (ConjugateGradientsSolver) for positive semidefinite linear
|
||||
// systems.
|
||||
//
|
||||
// We have also augmented the termination criterion used by this
|
||||
// solver to support not just residual based termination but also
|
||||
// termination based on decrease in the value of the quadratic model
|
||||
// that CG optimizes.
|
||||
|
||||
#include "ceres/conjugate_gradients_solver.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/linear_operator.h"
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace {
|
||||
|
||||
bool IsZeroOrInfinity(double x) { return ((x == 0.0) || std::isinf(x)); }
|
||||
|
||||
} // namespace
|
||||
|
||||
ConjugateGradientsSolver::ConjugateGradientsSolver(
|
||||
LinearSolver::Options options)
|
||||
: options_(std::move(options)) {}
|
||||
|
||||
LinearSolver::Summary ConjugateGradientsSolver::Solve(
|
||||
LinearOperator* A,
|
||||
const double* b,
|
||||
const LinearSolver::PerSolveOptions& per_solve_options,
|
||||
double* x) {
|
||||
CHECK(A != nullptr);
|
||||
CHECK(x != nullptr);
|
||||
CHECK(b != nullptr);
|
||||
CHECK_EQ(A->num_rows(), A->num_cols());
|
||||
|
||||
LinearSolver::Summary summary;
|
||||
summary.termination_type = LINEAR_SOLVER_NO_CONVERGENCE;
|
||||
summary.message = "Maximum number of iterations reached.";
|
||||
summary.num_iterations = 0;
|
||||
|
||||
const int num_cols = A->num_cols();
|
||||
VectorRef xref(x, num_cols);
|
||||
ConstVectorRef bref(b, num_cols);
|
||||
|
||||
const double norm_b = bref.norm();
|
||||
if (norm_b == 0.0) {
|
||||
xref.setZero();
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.message = "Convergence. |b| = 0.";
|
||||
return summary;
|
||||
}
|
||||
|
||||
Vector r(num_cols);
|
||||
Vector p(num_cols);
|
||||
Vector z(num_cols);
|
||||
Vector tmp(num_cols);
|
||||
|
||||
const double tol_r = per_solve_options.r_tolerance * norm_b;
|
||||
|
||||
tmp.setZero();
|
||||
A->RightMultiply(x, tmp.data());
|
||||
r = bref - tmp;
|
||||
double norm_r = r.norm();
|
||||
if (options_.min_num_iterations == 0 && norm_r <= tol_r) {
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.message =
|
||||
StringPrintf("Convergence. |r| = %e <= %e.", norm_r, tol_r);
|
||||
return summary;
|
||||
}
|
||||
|
||||
double rho = 1.0;
|
||||
|
||||
// Initial value of the quadratic model Q = x'Ax - 2 * b'x.
|
||||
double Q0 = -1.0 * xref.dot(bref + r);
|
||||
|
||||
for (summary.num_iterations = 1;; ++summary.num_iterations) {
|
||||
// Apply preconditioner
|
||||
if (per_solve_options.preconditioner != nullptr) {
|
||||
z.setZero();
|
||||
per_solve_options.preconditioner->RightMultiply(r.data(), z.data());
|
||||
} else {
|
||||
z = r;
|
||||
}
|
||||
|
||||
double last_rho = rho;
|
||||
rho = r.dot(z);
|
||||
if (IsZeroOrInfinity(rho)) {
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.message = StringPrintf("Numerical failure. rho = r'z = %e.", rho);
|
||||
break;
|
||||
}
|
||||
|
||||
if (summary.num_iterations == 1) {
|
||||
p = z;
|
||||
} else {
|
||||
double beta = rho / last_rho;
|
||||
if (IsZeroOrInfinity(beta)) {
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.message = StringPrintf(
|
||||
"Numerical failure. beta = rho_n / rho_{n-1} = %e, "
|
||||
"rho_n = %e, rho_{n-1} = %e",
|
||||
beta,
|
||||
rho,
|
||||
last_rho);
|
||||
break;
|
||||
}
|
||||
p = z + beta * p;
|
||||
}
|
||||
|
||||
Vector& q = z;
|
||||
q.setZero();
|
||||
A->RightMultiply(p.data(), q.data());
|
||||
const double pq = p.dot(q);
|
||||
if ((pq <= 0) || std::isinf(pq)) {
|
||||
summary.termination_type = LINEAR_SOLVER_NO_CONVERGENCE;
|
||||
summary.message = StringPrintf(
|
||||
"Matrix is indefinite, no more progress can be made. "
|
||||
"p'q = %e. |p| = %e, |q| = %e",
|
||||
pq,
|
||||
p.norm(),
|
||||
q.norm());
|
||||
break;
|
||||
}
|
||||
|
||||
const double alpha = rho / pq;
|
||||
if (std::isinf(alpha)) {
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.message = StringPrintf(
|
||||
"Numerical failure. alpha = rho / pq = %e, rho = %e, pq = %e.",
|
||||
alpha,
|
||||
rho,
|
||||
pq);
|
||||
break;
|
||||
}
|
||||
|
||||
xref = xref + alpha * p;
|
||||
|
||||
// Ideally we would just use the update r = r - alpha*q to keep
|
||||
// track of the residual vector. However this estimate tends to
|
||||
// drift over time due to round off errors. Thus every
|
||||
// residual_reset_period iterations, we calculate the residual as
|
||||
// r = b - Ax. We do not do this every iteration because this
|
||||
// requires an additional matrix vector multiply which would
|
||||
// double the complexity of the CG algorithm.
|
||||
if (summary.num_iterations % options_.residual_reset_period == 0) {
|
||||
tmp.setZero();
|
||||
A->RightMultiply(x, tmp.data());
|
||||
r = bref - tmp;
|
||||
} else {
|
||||
r = r - alpha * q;
|
||||
}
|
||||
|
||||
// Quadratic model based termination.
|
||||
// Q1 = x'Ax - 2 * b' x.
|
||||
const double Q1 = -1.0 * xref.dot(bref + r);
|
||||
|
||||
// For PSD matrices A, let
|
||||
//
|
||||
// Q(x) = x'Ax - 2b'x
|
||||
//
|
||||
// be the cost of the quadratic function defined by A and b. Then,
|
||||
// the solver terminates at iteration i if
|
||||
//
|
||||
// i * (Q(x_i) - Q(x_i-1)) / Q(x_i) < q_tolerance.
|
||||
//
|
||||
// This termination criterion is more useful when using CG to
|
||||
// solve the Newton step. This particular convergence test comes
|
||||
// from Stephen Nash's work on truncated Newton
|
||||
// methods. References:
|
||||
//
|
||||
// 1. Stephen G. Nash & Ariela Sofer, Assessing A Search
|
||||
// Direction Within A Truncated Newton Method, Operation
|
||||
// Research Letters 9(1990) 219-221.
|
||||
//
|
||||
// 2. Stephen G. Nash, A Survey of Truncated Newton Methods,
|
||||
// Journal of Computational and Applied Mathematics,
|
||||
// 124(1-2), 45-59, 2000.
|
||||
//
|
||||
const double zeta = summary.num_iterations * (Q1 - Q0) / Q1;
|
||||
if (zeta < per_solve_options.q_tolerance &&
|
||||
summary.num_iterations >= options_.min_num_iterations) {
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.message =
|
||||
StringPrintf("Iteration: %d Convergence: zeta = %e < %e. |r| = %e",
|
||||
summary.num_iterations,
|
||||
zeta,
|
||||
per_solve_options.q_tolerance,
|
||||
r.norm());
|
||||
break;
|
||||
}
|
||||
Q0 = Q1;
|
||||
|
||||
// Residual based termination.
|
||||
norm_r = r.norm();
|
||||
if (norm_r <= tol_r &&
|
||||
summary.num_iterations >= options_.min_num_iterations) {
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.message =
|
||||
StringPrintf("Iteration: %d Convergence. |r| = %e <= %e.",
|
||||
summary.num_iterations,
|
||||
norm_r,
|
||||
tol_r);
|
||||
break;
|
||||
}
|
||||
|
||||
if (summary.num_iterations >= options_.max_num_iterations) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,42 +34,277 @@
|
||||
#ifndef CERES_INTERNAL_CONJUGATE_GRADIENTS_SOLVER_H_
|
||||
#define CERES_INTERNAL_CONJUGATE_GRADIENTS_SOLVER_H_
|
||||
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
|
||||
#include "ceres/eigen_vector_ops.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_operator.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class LinearOperator;
|
||||
|
||||
// This class implements the now classical Conjugate Gradients
|
||||
// algorithm of Hestenes & Stiefel for solving postive semidefinite
|
||||
// linear sytems. Optionally it can use a preconditioner also to
|
||||
// reduce the condition number of the linear system and improve the
|
||||
// convergence rate. Modern references for Conjugate Gradients are the
|
||||
// books by Yousef Saad and Trefethen & Bau. This implementation of CG
|
||||
// has been augmented with additional termination tests that are
|
||||
// needed for forcing early termination when used as part of an
|
||||
// inexact Newton solver.
|
||||
//
|
||||
// For more details see the documentation for
|
||||
// LinearSolver::PerSolveOptions::r_tolerance and
|
||||
// LinearSolver::PerSolveOptions::q_tolerance in linear_solver.h.
|
||||
class CERES_NO_EXPORT ConjugateGradientsSolver final : public LinearSolver {
|
||||
// Interface for the linear operator used by ConjugateGradientsSolver.
|
||||
template <typename DenseVectorType>
|
||||
class ConjugateGradientsLinearOperator {
|
||||
public:
|
||||
explicit ConjugateGradientsSolver(LinearSolver::Options options);
|
||||
Summary Solve(LinearOperator* A,
|
||||
const double* b,
|
||||
const LinearSolver::PerSolveOptions& per_solve_options,
|
||||
double* x) final;
|
||||
|
||||
private:
|
||||
const LinearSolver::Options options_;
|
||||
~ConjugateGradientsLinearOperator() = default;
|
||||
virtual void RightMultiplyAndAccumulate(const DenseVectorType& x,
|
||||
DenseVectorType& y) = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
// Adapter class that makes LinearOperator appear like an instance of
|
||||
// ConjugateGradientsLinearOperator.
|
||||
class LinearOperatorAdapter : public ConjugateGradientsLinearOperator<Vector> {
|
||||
public:
|
||||
LinearOperatorAdapter(LinearOperator& linear_operator)
|
||||
: linear_operator_(linear_operator) {}
|
||||
|
||||
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
|
||||
linear_operator_.RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
|
||||
private:
|
||||
LinearOperator& linear_operator_;
|
||||
};
|
||||
|
||||
// Options to control the ConjugateGradientsSolver. For detailed documentation
|
||||
// for each of these options see linear_solver.h
|
||||
struct ConjugateGradientsSolverOptions {
|
||||
int min_num_iterations = 1;
|
||||
int max_num_iterations = 1;
|
||||
int residual_reset_period = 10;
|
||||
double r_tolerance = 0.0;
|
||||
double q_tolerance = 0.0;
|
||||
ContextImpl* context = nullptr;
|
||||
int num_threads = 1;
|
||||
};
|
||||
|
||||
// This function implements the now classical Conjugate Gradients algorithm of
|
||||
// Hestenes & Stiefel for solving positive semidefinite linear systems.
|
||||
// Optionally it can use a preconditioner also to reduce the condition number of
|
||||
// the linear system and improve the convergence rate. Modern references for
|
||||
// Conjugate Gradients are the books by Yousef Saad and Trefethen & Bau. This
|
||||
// implementation of CG has been augmented with additional termination tests
|
||||
// that are needed for forcing early termination when used as part of an inexact
|
||||
// Newton solver.
|
||||
//
|
||||
// This implementation is templated over DenseVectorType and then in turn on
|
||||
// ConjugateGradientsLinearOperator, which allows us to write an abstract
|
||||
// implementaion of the Conjugate Gradients algorithm without worrying about how
|
||||
// these objects are implemented or where they are stored. In particular it
|
||||
// allows us to have a single implementation that works on CPU and GPU based
|
||||
// matrices and vectors.
|
||||
//
|
||||
// scratch must contain pointers to four DenseVector objects of the same size as
|
||||
// rhs and solution. By asking the user for scratch space, we guarantee that we
|
||||
// will not perform any allocations inside this function.
|
||||
template <typename DenseVectorType>
|
||||
LinearSolver::Summary ConjugateGradientsSolver(
|
||||
const ConjugateGradientsSolverOptions options,
|
||||
ConjugateGradientsLinearOperator<DenseVectorType>& lhs,
|
||||
const DenseVectorType& rhs,
|
||||
ConjugateGradientsLinearOperator<DenseVectorType>& preconditioner,
|
||||
DenseVectorType* scratch[4],
|
||||
DenseVectorType& solution) {
|
||||
auto IsZeroOrInfinity = [](double x) {
|
||||
return ((x == 0.0) || std::isinf(x));
|
||||
};
|
||||
|
||||
DenseVectorType& p = *scratch[0];
|
||||
DenseVectorType& r = *scratch[1];
|
||||
DenseVectorType& z = *scratch[2];
|
||||
DenseVectorType& tmp = *scratch[3];
|
||||
|
||||
LinearSolver::Summary summary;
|
||||
summary.termination_type = LinearSolverTerminationType::NO_CONVERGENCE;
|
||||
summary.message = "Maximum number of iterations reached.";
|
||||
summary.num_iterations = 0;
|
||||
|
||||
const double norm_rhs = Norm(rhs, options.context, options.num_threads);
|
||||
if (norm_rhs == 0.0) {
|
||||
SetZero(solution, options.context, options.num_threads);
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message = "Convergence. |b| = 0.";
|
||||
return summary;
|
||||
}
|
||||
|
||||
const double tol_r = options.r_tolerance * norm_rhs;
|
||||
|
||||
SetZero(tmp, options.context, options.num_threads);
|
||||
lhs.RightMultiplyAndAccumulate(solution, tmp);
|
||||
|
||||
// r = rhs - tmp
|
||||
Axpby(1.0, rhs, -1.0, tmp, r, options.context, options.num_threads);
|
||||
|
||||
double norm_r = Norm(r, options.context, options.num_threads);
|
||||
if (options.min_num_iterations == 0 && norm_r <= tol_r) {
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message =
|
||||
StringPrintf("Convergence. |r| = %e <= %e.", norm_r, tol_r);
|
||||
return summary;
|
||||
}
|
||||
|
||||
double rho = 1.0;
|
||||
|
||||
// Initial value of the quadratic model Q = x'Ax - 2 * b'x.
|
||||
// double Q0 = -1.0 * solution.dot(rhs + r);
|
||||
Axpby(1.0, rhs, 1.0, r, tmp, options.context, options.num_threads);
|
||||
double Q0 = -Dot(solution, tmp, options.context, options.num_threads);
|
||||
|
||||
for (summary.num_iterations = 1;; ++summary.num_iterations) {
|
||||
SetZero(z, options.context, options.num_threads);
|
||||
preconditioner.RightMultiplyAndAccumulate(r, z);
|
||||
|
||||
const double last_rho = rho;
|
||||
// rho = r.dot(z);
|
||||
rho = Dot(r, z, options.context, options.num_threads);
|
||||
if (IsZeroOrInfinity(rho)) {
|
||||
summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
summary.message = StringPrintf("Numerical failure. rho = r'z = %e.", rho);
|
||||
break;
|
||||
}
|
||||
|
||||
if (summary.num_iterations == 1) {
|
||||
Copy(z, p, options.context, options.num_threads);
|
||||
} else {
|
||||
const double beta = rho / last_rho;
|
||||
if (IsZeroOrInfinity(beta)) {
|
||||
summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
summary.message = StringPrintf(
|
||||
"Numerical failure. beta = rho_n / rho_{n-1} = %e, "
|
||||
"rho_n = %e, rho_{n-1} = %e",
|
||||
beta,
|
||||
rho,
|
||||
last_rho);
|
||||
break;
|
||||
}
|
||||
// p = z + beta * p;
|
||||
Axpby(1.0, z, beta, p, p, options.context, options.num_threads);
|
||||
}
|
||||
|
||||
DenseVectorType& q = z;
|
||||
SetZero(q, options.context, options.num_threads);
|
||||
lhs.RightMultiplyAndAccumulate(p, q);
|
||||
const double pq = Dot(p, q, options.context, options.num_threads);
|
||||
if ((pq <= 0) || std::isinf(pq)) {
|
||||
summary.termination_type = LinearSolverTerminationType::NO_CONVERGENCE;
|
||||
summary.message = StringPrintf(
|
||||
"Matrix is indefinite, no more progress can be made. "
|
||||
"p'q = %e. |p| = %e, |q| = %e",
|
||||
pq,
|
||||
Norm(p, options.context, options.num_threads),
|
||||
Norm(q, options.context, options.num_threads));
|
||||
break;
|
||||
}
|
||||
|
||||
const double alpha = rho / pq;
|
||||
if (std::isinf(alpha)) {
|
||||
summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
summary.message = StringPrintf(
|
||||
"Numerical failure. alpha = rho / pq = %e, rho = %e, pq = %e.",
|
||||
alpha,
|
||||
rho,
|
||||
pq);
|
||||
break;
|
||||
}
|
||||
|
||||
// solution = solution + alpha * p;
|
||||
Axpby(1.0,
|
||||
solution,
|
||||
alpha,
|
||||
p,
|
||||
solution,
|
||||
options.context,
|
||||
options.num_threads);
|
||||
|
||||
// Ideally we would just use the update r = r - alpha*q to keep
|
||||
// track of the residual vector. However this estimate tends to
|
||||
// drift over time due to round off errors. Thus every
|
||||
// residual_reset_period iterations, we calculate the residual as
|
||||
// r = b - Ax. We do not do this every iteration because this
|
||||
// requires an additional matrix vector multiply which would
|
||||
// double the complexity of the CG algorithm.
|
||||
if (summary.num_iterations % options.residual_reset_period == 0) {
|
||||
SetZero(tmp, options.context, options.num_threads);
|
||||
lhs.RightMultiplyAndAccumulate(solution, tmp);
|
||||
Axpby(1.0, rhs, -1.0, tmp, r, options.context, options.num_threads);
|
||||
// r = rhs - tmp;
|
||||
} else {
|
||||
Axpby(1.0, r, -alpha, q, r, options.context, options.num_threads);
|
||||
// r = r - alpha * q;
|
||||
}
|
||||
|
||||
// Quadratic model based termination.
|
||||
// Q1 = x'Ax - 2 * b' x.
|
||||
// const double Q1 = -1.0 * solution.dot(rhs + r);
|
||||
Axpby(1.0, rhs, 1.0, r, tmp, options.context, options.num_threads);
|
||||
const double Q1 = -Dot(solution, tmp, options.context, options.num_threads);
|
||||
|
||||
// For PSD matrices A, let
|
||||
//
|
||||
// Q(x) = x'Ax - 2b'x
|
||||
//
|
||||
// be the cost of the quadratic function defined by A and b. Then,
|
||||
// the solver terminates at iteration i if
|
||||
//
|
||||
// i * (Q(x_i) - Q(x_i-1)) / Q(x_i) < q_tolerance.
|
||||
//
|
||||
// This termination criterion is more useful when using CG to
|
||||
// solve the Newton step. This particular convergence test comes
|
||||
// from Stephen Nash's work on truncated Newton
|
||||
// methods. References:
|
||||
//
|
||||
// 1. Stephen G. Nash & Ariela Sofer, Assessing A Search
|
||||
// Direction Within A Truncated Newton Method, Operation
|
||||
// Research Letters 9(1990) 219-221.
|
||||
//
|
||||
// 2. Stephen G. Nash, A Survey of Truncated Newton Methods,
|
||||
// Journal of Computational and Applied Mathematics,
|
||||
// 124(1-2), 45-59, 2000.
|
||||
//
|
||||
const double zeta = summary.num_iterations * (Q1 - Q0) / Q1;
|
||||
if (zeta < options.q_tolerance &&
|
||||
summary.num_iterations >= options.min_num_iterations) {
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message =
|
||||
StringPrintf("Iteration: %d Convergence: zeta = %e < %e. |r| = %e",
|
||||
summary.num_iterations,
|
||||
zeta,
|
||||
options.q_tolerance,
|
||||
Norm(r, options.context, options.num_threads));
|
||||
break;
|
||||
}
|
||||
Q0 = Q1;
|
||||
|
||||
// Residual based termination.
|
||||
norm_r = Norm(r, options.context, options.num_threads);
|
||||
if (norm_r <= tol_r &&
|
||||
summary.num_iterations >= options.min_num_iterations) {
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message =
|
||||
StringPrintf("Iteration: %d Convergence. |r| = %e <= %e.",
|
||||
summary.num_iterations,
|
||||
norm_r,
|
||||
tol_r);
|
||||
break;
|
||||
}
|
||||
|
||||
if (summary.num_iterations >= options.max_num_iterations) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
2
extern/ceres/internal/ceres/context.cc
vendored
2
extern/ceres/internal/ceres/context.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
178
extern/ceres/internal/ceres/context_impl.cc
vendored
178
extern/ceres/internal/ceres/context_impl.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,6 +33,8 @@
|
||||
#include <string>
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
#include "cublas_v2.h"
|
||||
@@ -40,69 +42,155 @@
|
||||
#include "cusolverDn.h"
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
ContextImpl::ContextImpl() = default;
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
bool ContextImpl::InitCUDA(std::string* message) {
|
||||
if (cuda_initialized_) {
|
||||
void ContextImpl::TearDown() {
|
||||
if (cusolver_handle_ != nullptr) {
|
||||
cusolverDnDestroy(cusolver_handle_);
|
||||
cusolver_handle_ = nullptr;
|
||||
}
|
||||
if (cublas_handle_ != nullptr) {
|
||||
cublasDestroy(cublas_handle_);
|
||||
cublas_handle_ = nullptr;
|
||||
}
|
||||
if (cusparse_handle_ != nullptr) {
|
||||
cusparseDestroy(cusparse_handle_);
|
||||
cusparse_handle_ = nullptr;
|
||||
}
|
||||
for (auto& s : streams_) {
|
||||
if (s != nullptr) {
|
||||
cudaStreamDestroy(s);
|
||||
s = nullptr;
|
||||
}
|
||||
}
|
||||
is_cuda_initialized_ = false;
|
||||
}
|
||||
|
||||
std::string ContextImpl::CudaConfigAsString() const {
|
||||
return ceres::internal::StringPrintf(
|
||||
"======================= CUDA Device Properties ======================\n"
|
||||
"Cuda version : %d.%d\n"
|
||||
"Device ID : %d\n"
|
||||
"Device name : %s\n"
|
||||
"Total GPU memory : %6.f MiB\n"
|
||||
"GPU memory available : %6.f MiB\n"
|
||||
"Compute capability : %d.%d\n"
|
||||
"Warp size : %d\n"
|
||||
"Max threads per block : %d\n"
|
||||
"Max threads per dim : %d %d %d\n"
|
||||
"Max grid size : %d %d %d\n"
|
||||
"Multiprocessor count : %d\n"
|
||||
"cudaMallocAsync supported : %s\n"
|
||||
"====================================================================",
|
||||
cuda_version_major_,
|
||||
cuda_version_minor_,
|
||||
gpu_device_id_in_use_,
|
||||
gpu_device_properties_.name,
|
||||
gpu_device_properties_.totalGlobalMem / 1024.0 / 1024.0,
|
||||
GpuMemoryAvailable() / 1024.0 / 1024.0,
|
||||
gpu_device_properties_.major,
|
||||
gpu_device_properties_.minor,
|
||||
gpu_device_properties_.warpSize,
|
||||
gpu_device_properties_.maxThreadsPerBlock,
|
||||
gpu_device_properties_.maxThreadsDim[0],
|
||||
gpu_device_properties_.maxThreadsDim[1],
|
||||
gpu_device_properties_.maxThreadsDim[2],
|
||||
gpu_device_properties_.maxGridSize[0],
|
||||
gpu_device_properties_.maxGridSize[1],
|
||||
gpu_device_properties_.maxGridSize[2],
|
||||
gpu_device_properties_.multiProcessorCount,
|
||||
// In CUDA 12.0.0+ cudaDeviceProp has field memoryPoolsSupported, but it
|
||||
// is not available in older versions
|
||||
is_cuda_memory_pools_supported_ ? "Yes" : "No");
|
||||
}
|
||||
|
||||
size_t ContextImpl::GpuMemoryAvailable() const {
|
||||
size_t free, total;
|
||||
cudaMemGetInfo(&free, &total);
|
||||
return free;
|
||||
}
|
||||
|
||||
bool ContextImpl::InitCuda(std::string* message) {
|
||||
if (is_cuda_initialized_) {
|
||||
return true;
|
||||
}
|
||||
CHECK_EQ(cudaGetDevice(&gpu_device_id_in_use_), cudaSuccess);
|
||||
int cuda_version;
|
||||
CHECK_EQ(cudaRuntimeGetVersion(&cuda_version), cudaSuccess);
|
||||
cuda_version_major_ = cuda_version / 1000;
|
||||
cuda_version_minor_ = (cuda_version % 1000) / 10;
|
||||
CHECK_EQ(
|
||||
cudaGetDeviceProperties(&gpu_device_properties_, gpu_device_id_in_use_),
|
||||
cudaSuccess);
|
||||
#if CUDART_VERSION >= 11020
|
||||
int is_cuda_memory_pools_supported;
|
||||
CHECK_EQ(cudaDeviceGetAttribute(&is_cuda_memory_pools_supported,
|
||||
cudaDevAttrMemoryPoolsSupported,
|
||||
gpu_device_id_in_use_),
|
||||
cudaSuccess);
|
||||
is_cuda_memory_pools_supported_ = is_cuda_memory_pools_supported == 1;
|
||||
#endif
|
||||
VLOG(3) << "\n" << CudaConfigAsString();
|
||||
EventLogger event_logger("InitCuda");
|
||||
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
|
||||
*message = "cuBLAS::cublasCreate failed.";
|
||||
cublas_handle_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnCreate failed.";
|
||||
cusolver_handle_ = nullptr;
|
||||
cublasDestroy(cublas_handle_);
|
||||
cublas_handle_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) !=
|
||||
cudaSuccess) {
|
||||
*message = "CUDA::cudaStreamCreateWithFlags failed.";
|
||||
cusolverDnDestroy(cusolver_handle_);
|
||||
cublasDestroy(cublas_handle_);
|
||||
cusolver_handle_ = nullptr;
|
||||
cublas_handle_ = nullptr;
|
||||
stream_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
if (cusolverDnSetStream(cusolver_handle_, stream_) !=
|
||||
CUSOLVER_STATUS_SUCCESS ||
|
||||
cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS) {
|
||||
*message =
|
||||
"cuSolverDN::cusolverDnSetStream or cuBLAS::cublasSetStream failed.";
|
||||
cusolverDnDestroy(cusolver_handle_);
|
||||
cublasDestroy(cublas_handle_);
|
||||
cudaStreamDestroy(stream_);
|
||||
cusolver_handle_ = nullptr;
|
||||
"CUDA initialization failed because cuBLAS::cublasCreate failed.";
|
||||
cublas_handle_ = nullptr;
|
||||
stream_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
cuda_initialized_ = true;
|
||||
event_logger.AddEvent("cublasCreate");
|
||||
if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
|
||||
*message =
|
||||
"CUDA initialization failed because cuSolverDN::cusolverDnCreate "
|
||||
"failed.";
|
||||
TearDown();
|
||||
return false;
|
||||
}
|
||||
event_logger.AddEvent("cusolverDnCreate");
|
||||
if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) {
|
||||
*message =
|
||||
"CUDA initialization failed because cuSPARSE::cusparseCreate failed.";
|
||||
TearDown();
|
||||
return false;
|
||||
}
|
||||
event_logger.AddEvent("cusparseCreate");
|
||||
for (auto& s : streams_) {
|
||||
if (cudaStreamCreateWithFlags(&s, cudaStreamNonBlocking) != cudaSuccess) {
|
||||
*message =
|
||||
"CUDA initialization failed because CUDA::cudaStreamCreateWithFlags "
|
||||
"failed.";
|
||||
TearDown();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
event_logger.AddEvent("cudaStreamCreateWithFlags");
|
||||
if (cusolverDnSetStream(cusolver_handle_, DefaultStream()) !=
|
||||
CUSOLVER_STATUS_SUCCESS ||
|
||||
cublasSetStream(cublas_handle_, DefaultStream()) !=
|
||||
CUBLAS_STATUS_SUCCESS ||
|
||||
cusparseSetStream(cusparse_handle_, DefaultStream()) !=
|
||||
CUSPARSE_STATUS_SUCCESS) {
|
||||
*message = "CUDA initialization failed because SetStream failed.";
|
||||
TearDown();
|
||||
return false;
|
||||
}
|
||||
event_logger.AddEvent("SetStream");
|
||||
is_cuda_initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
ContextImpl::~ContextImpl() {
|
||||
#ifndef CERES_NO_CUDA
|
||||
if (cuda_initialized_) {
|
||||
cusolverDnDestroy(cusolver_handle_);
|
||||
cublasDestroy(cublas_handle_);
|
||||
cudaStreamDestroy(stream_);
|
||||
}
|
||||
TearDown();
|
||||
#endif // CERES_NO_CUDA
|
||||
}
|
||||
|
||||
void ContextImpl::EnsureMinimumThreads(int num_threads) {
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
thread_pool.Resize(num_threads);
|
||||
#endif // CERES_USE_CXX_THREADS
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
90
extern/ceres/internal/ceres/context_impl.h
vendored
90
extern/ceres/internal/ceres/context_impl.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,14 +46,12 @@
|
||||
#include "cublas_v2.h"
|
||||
#include "cuda_runtime.h"
|
||||
#include "cusolverDn.h"
|
||||
#include "cusparse.h"
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
#include "ceres/thread_pool.h"
|
||||
#endif // CERES_USE_CXX_THREADS
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CERES_NO_EXPORT ContextImpl final : public Context {
|
||||
public:
|
||||
@@ -67,30 +65,82 @@ class CERES_NO_EXPORT ContextImpl final : public Context {
|
||||
// defined by the hardware. Otherwise this call is a no-op.
|
||||
void EnsureMinimumThreads(int num_threads);
|
||||
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
ThreadPool thread_pool;
|
||||
#endif // CERES_USE_CXX_THREADS
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
// Initializes the cuSolverDN context, creates an asynchronous stream, and
|
||||
// associates the stream with cuSolverDN. Returns true iff initialization was
|
||||
// successful, else it returns false and a human-readable error message is
|
||||
// returned.
|
||||
bool InitCUDA(std::string* message);
|
||||
// Note on Ceres' use of CUDA Devices on multi-GPU systems:
|
||||
// 1. On a multi-GPU system, if nothing special is done, the "default" CUDA
|
||||
// device will be used, which is device 0.
|
||||
// 2. If the user masks out GPUs using the CUDA_VISIBLE_DEVICES environment
|
||||
// variable, Ceres will still use device 0 visible to the program, but
|
||||
// device 0 will be the first GPU indicated in the environment variable.
|
||||
// 3. If the user explicitly selects a GPU in the host process before calling
|
||||
// Ceres, Ceres will use that GPU.
|
||||
|
||||
// Note on Ceres' use of CUDA Streams:
|
||||
// Most of operations on the GPU are performed using a single stream. In
|
||||
// those cases DefaultStream() should be used. This ensures that operations
|
||||
// are stream-ordered, and might be concurrent with cpu processing with no
|
||||
// additional efforts.
|
||||
//
|
||||
// a. Single-stream workloads
|
||||
// - Only use default stream
|
||||
// - Return control to the callee without synchronization whenever possible
|
||||
// - Stream synchronization occurs only after GPU to CPU transfers, and is
|
||||
// handled by CudaBuffer
|
||||
//
|
||||
// b. Multi-stream workloads
|
||||
// Multi-stream workloads are more restricted in order to make it harder to
|
||||
// get a race-condition.
|
||||
// - Should always synchronize the default stream on entry
|
||||
// - Should always synchronize all utilized streams on exit
|
||||
// - Should not make any assumptions on one of streams_[] being default
|
||||
//
|
||||
// With those rules in place
|
||||
// - All single-stream asynchronous workloads are serialized using default
|
||||
// stream
|
||||
// - Multiple-stream workloads always wait single-stream workloads to finish
|
||||
// and leave no running computations on exit.
|
||||
// This slightly penalizes multi-stream workloads, but makes it easier to
|
||||
// avoid race conditions when multiple-stream workload depends on results of
|
||||
// any preceeding gpu computations.
|
||||
|
||||
// Initializes cuBLAS, cuSOLVER, and cuSPARSE contexts, creates an
|
||||
// asynchronous CUDA stream, and associates the stream with the contexts.
|
||||
// Returns true iff initialization was successful, else it returns false and a
|
||||
// human-readable error message is returned.
|
||||
bool InitCuda(std::string* message);
|
||||
void TearDown();
|
||||
inline bool IsCudaInitialized() const { return is_cuda_initialized_; }
|
||||
// Returns a human-readable string describing the capabilities of the current
|
||||
// CUDA device. CudaConfigAsString can only be called after InitCuda has been
|
||||
// called.
|
||||
std::string CudaConfigAsString() const;
|
||||
// Returns the number of bytes of available global memory on the current CUDA
|
||||
// device. If it is called before InitCuda, it returns 0.
|
||||
size_t GpuMemoryAvailable() const;
|
||||
|
||||
// Handle to the cuSOLVER context.
|
||||
cusolverDnHandle_t cusolver_handle_ = nullptr;
|
||||
// Handle to cuBLAS context.
|
||||
cublasHandle_t cublas_handle_ = nullptr;
|
||||
// CUDA device stream.
|
||||
cudaStream_t stream_ = nullptr;
|
||||
// Indicates whether all the CUDA resources have been initialized.
|
||||
bool cuda_initialized_ = false;
|
||||
|
||||
// Default stream.
|
||||
// Kernel invocations and memory copies on this stream can be left without
|
||||
// synchronization.
|
||||
cudaStream_t DefaultStream() { return streams_[0]; }
|
||||
static constexpr int kNumCudaStreams = 2;
|
||||
cudaStream_t streams_[kNumCudaStreams] = {0};
|
||||
|
||||
cusparseHandle_t cusparse_handle_ = nullptr;
|
||||
bool is_cuda_initialized_ = false;
|
||||
int gpu_device_id_in_use_ = -1;
|
||||
cudaDeviceProp gpu_device_properties_;
|
||||
bool is_cuda_memory_pools_supported_ = false;
|
||||
int cuda_version_major_ = 0;
|
||||
int cuda_version_minor_ = 0;
|
||||
#endif // CERES_NO_CUDA
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,8 +32,11 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/evaluator.h"
|
||||
@@ -49,15 +52,7 @@
|
||||
#include "ceres/trust_region_minimizer.h"
|
||||
#include "ceres/trust_region_strategy.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::map;
|
||||
using std::max;
|
||||
using std::min;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
CoordinateDescentMinimizer::CoordinateDescentMinimizer(ContextImpl* context)
|
||||
: context_(context) {
|
||||
@@ -70,15 +65,19 @@ bool CoordinateDescentMinimizer::Init(
|
||||
const Program& program,
|
||||
const ProblemImpl::ParameterMap& parameter_map,
|
||||
const ParameterBlockOrdering& ordering,
|
||||
string* error) {
|
||||
std::string* /*error*/) {
|
||||
parameter_blocks_.clear();
|
||||
independent_set_offsets_.clear();
|
||||
independent_set_offsets_.push_back(0);
|
||||
|
||||
// Serialize the OrderedGroups into a vector of parameter block
|
||||
// offsets for parallel access.
|
||||
map<ParameterBlock*, int> parameter_block_index;
|
||||
map<int, set<double*>> group_to_elements = ordering.group_to_elements();
|
||||
|
||||
// TODO(sameeragarwal): Investigate if parameter_block_index should be an
|
||||
// ordered or an unordered container.
|
||||
std::map<ParameterBlock*, int> parameter_block_index;
|
||||
std::map<int, std::set<double*>> group_to_elements =
|
||||
ordering.group_to_elements();
|
||||
for (const auto& g_t_e : group_to_elements) {
|
||||
const auto& elements = g_t_e.second;
|
||||
for (double* parameter_block : elements) {
|
||||
@@ -93,7 +92,8 @@ bool CoordinateDescentMinimizer::Init(
|
||||
// The ordering does not have to contain all parameter blocks, so
|
||||
// assign zero offsets/empty independent sets to these parameter
|
||||
// blocks.
|
||||
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
|
||||
const std::vector<ParameterBlock*>& parameter_blocks =
|
||||
program.parameter_blocks();
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
if (!ordering.IsMember(parameter_block->mutable_user_state())) {
|
||||
parameter_blocks_.push_back(parameter_block);
|
||||
@@ -104,7 +104,8 @@ bool CoordinateDescentMinimizer::Init(
|
||||
// Compute the set of residual blocks that depend on each parameter
|
||||
// block.
|
||||
residual_blocks_.resize(parameter_block_index.size());
|
||||
const vector<ResidualBlock*>& residual_blocks = program.residual_blocks();
|
||||
const std::vector<ResidualBlock*>& residual_blocks =
|
||||
program.residual_blocks();
|
||||
for (auto* residual_block : residual_blocks) {
|
||||
const int num_parameter_blocks = residual_block->NumParameterBlocks();
|
||||
for (int j = 0; j < num_parameter_blocks; ++j) {
|
||||
@@ -126,7 +127,7 @@ bool CoordinateDescentMinimizer::Init(
|
||||
|
||||
void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
|
||||
double* parameters,
|
||||
Solver::Summary* summary) {
|
||||
Solver::Summary* /*summary*/) {
|
||||
// Set the state and mark all parameter blocks constant.
|
||||
for (auto* parameter_block : parameter_blocks_) {
|
||||
parameter_block->SetState(parameters + parameter_block->state_offset());
|
||||
@@ -135,8 +136,6 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
|
||||
|
||||
std::vector<std::unique_ptr<LinearSolver>> linear_solvers(
|
||||
options.num_threads);
|
||||
// std::unique_ptr<LinearSolver*[]> linear_solvers(
|
||||
// new LinearSolver*[options.num_threads]);
|
||||
|
||||
LinearSolver::Options linear_solver_options;
|
||||
linear_solver_options.type = DENSE_QR;
|
||||
@@ -155,9 +154,9 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
|
||||
}
|
||||
|
||||
const int num_inner_iteration_threads =
|
||||
min(options.num_threads, num_problems);
|
||||
std::min(options.num_threads, num_problems);
|
||||
evaluator_options_.num_threads =
|
||||
max(1, options.num_threads / num_inner_iteration_threads);
|
||||
std::max(1, options.num_threads / num_inner_iteration_threads);
|
||||
|
||||
// The parameter blocks in each independent set can be optimized
|
||||
// in parallel, since they do not co-occur in any residual block.
|
||||
@@ -170,9 +169,11 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
|
||||
ParameterBlock* parameter_block = parameter_blocks_[j];
|
||||
const int old_index = parameter_block->index();
|
||||
const int old_delta_offset = parameter_block->delta_offset();
|
||||
const int old_state_offset = parameter_block->state_offset();
|
||||
parameter_block->SetVarying();
|
||||
parameter_block->set_index(0);
|
||||
parameter_block->set_delta_offset(0);
|
||||
parameter_block->set_state_offset(0);
|
||||
|
||||
Program inner_program;
|
||||
inner_program.mutable_parameter_blocks()->push_back(parameter_block);
|
||||
@@ -189,11 +190,12 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
|
||||
Solver::Summary inner_summary;
|
||||
Solve(&inner_program,
|
||||
linear_solvers[thread_id].get(),
|
||||
parameters + parameter_block->state_offset(),
|
||||
parameters + old_state_offset,
|
||||
&inner_summary);
|
||||
|
||||
parameter_block->set_index(old_index);
|
||||
parameter_block->set_delta_offset(old_delta_offset);
|
||||
parameter_block->set_state_offset(old_state_offset);
|
||||
parameter_block->SetState(parameters +
|
||||
parameter_block->state_offset());
|
||||
parameter_block->SetConstant();
|
||||
@@ -203,10 +205,6 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options,
|
||||
for (auto* parameter_block : parameter_blocks_) {
|
||||
parameter_block->SetVarying();
|
||||
}
|
||||
|
||||
// for (int i = 0; i < options.num_threads; ++i) {
|
||||
// delete linear_solvers[i];
|
||||
//}
|
||||
}
|
||||
|
||||
// Solve the optimization problem for one parameter block.
|
||||
@@ -218,7 +216,7 @@ void CoordinateDescentMinimizer::Solve(Program* program,
|
||||
summary->initial_cost = 0.0;
|
||||
summary->fixed_cost = 0.0;
|
||||
summary->final_cost = 0.0;
|
||||
string error;
|
||||
std::string error;
|
||||
|
||||
Minimizer::Options minimizer_options;
|
||||
minimizer_options.evaluator =
|
||||
@@ -241,8 +239,10 @@ void CoordinateDescentMinimizer::Solve(Program* program,
|
||||
bool CoordinateDescentMinimizer::IsOrderingValid(
|
||||
const Program& program,
|
||||
const ParameterBlockOrdering& ordering,
|
||||
string* message) {
|
||||
const map<int, set<double*>>& group_to_elements =
|
||||
std::string* message) {
|
||||
// TODO(sameeragarwal): Investigate if this should be an ordered or an
|
||||
// unordered group.
|
||||
const std::map<int, std::set<double*>>& group_to_elements =
|
||||
ordering.group_to_elements();
|
||||
|
||||
// Verify that each group is an independent set
|
||||
@@ -270,5 +270,4 @@ CoordinateDescentMinimizer::CreateOrdering(const Program& program) {
|
||||
return ordering;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -31,6 +31,7 @@
|
||||
#ifndef CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_
|
||||
#define CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@@ -40,8 +41,7 @@
|
||||
#include "ceres/problem_impl.h"
|
||||
#include "ceres/solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Program;
|
||||
class LinearSolver;
|
||||
@@ -103,7 +103,6 @@ class CERES_NO_EXPORT CoordinateDescentMinimizer final : public Minimizer {
|
||||
ContextImpl* context_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_
|
||||
|
||||
14
extern/ceres/internal/ceres/corrector.cc
vendored
14
extern/ceres/internal/ceres/corrector.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,8 +36,7 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
Corrector::Corrector(const double sq_norm, const double rho[3]) {
|
||||
CHECK_GE(sq_norm, 0.0);
|
||||
@@ -88,7 +87,7 @@ Corrector::Corrector(const double sq_norm, const double rho[3]) {
|
||||
// We now require that the first derivative of the loss function be
|
||||
// positive only if the second derivative is positive. This is
|
||||
// because when the second derivative is non-positive, we do not use
|
||||
// the second order correction suggested by BANS and instead use a
|
||||
// the second order correction suggested by BAMS and instead use a
|
||||
// simpler first order strategy which does not use a division by the
|
||||
// gradient of the loss function.
|
||||
CHECK_GT(rho[1], 0.0);
|
||||
@@ -112,7 +111,7 @@ Corrector::Corrector(const double sq_norm, const double rho[3]) {
|
||||
|
||||
void Corrector::CorrectResiduals(const int num_rows, double* residuals) {
|
||||
DCHECK(residuals != nullptr);
|
||||
// Equation 11 in BANS.
|
||||
// Equation 11 in BAMS.
|
||||
VectorRef(residuals, num_rows) *= residual_scaling_;
|
||||
}
|
||||
|
||||
@@ -129,7 +128,7 @@ void Corrector::CorrectJacobian(const int num_rows,
|
||||
return;
|
||||
}
|
||||
|
||||
// Equation 11 in BANS.
|
||||
// Equation 11 in BAMS.
|
||||
//
|
||||
// J = sqrt(rho) * (J - alpha^2 r * r' J)
|
||||
//
|
||||
@@ -155,5 +154,4 @@ void Corrector::CorrectJacobian(const int num_rows,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
12
extern/ceres/internal/ceres/corrector.h
vendored
12
extern/ceres/internal/ceres/corrector.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,7 +30,7 @@
|
||||
//
|
||||
// Class definition for the object that is responsible for applying a
|
||||
// second order correction to the Gauss-Newton based on the ideas in
|
||||
// BANS by Triggs et al.
|
||||
// BAMS by Triggs et al.
|
||||
|
||||
#ifndef CERES_INTERNAL_CORRECTOR_H_
|
||||
#define CERES_INTERNAL_CORRECTOR_H_
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Corrector is responsible for applying the second order correction
|
||||
// to the residual and jacobian of a least squares problem based on a
|
||||
@@ -48,7 +47,7 @@ namespace internal {
|
||||
// The key idea here is to look at the expressions for the robustified
|
||||
// gauss newton approximation and then take its square root to get the
|
||||
// corresponding corrections to the residual and jacobian. For the
|
||||
// full expressions see Eq. 10 and 11 in BANS by Triggs et al.
|
||||
// full expressions see Eq. 10 and 11 in BAMS by Triggs et al.
|
||||
class CERES_NO_EXPORT Corrector {
|
||||
public:
|
||||
// The constructor takes the squared norm, the value, the first and
|
||||
@@ -87,8 +86,7 @@ class CERES_NO_EXPORT Corrector {
|
||||
double residual_scaling_;
|
||||
double alpha_sq_norm_;
|
||||
};
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
2
extern/ceres/internal/ceres/cost_function.cc
vendored
2
extern/ceres/internal/ceres/cost_function.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
16
extern/ceres/internal/ceres/covariance.cc
vendored
16
extern/ceres/internal/ceres/covariance.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,9 +39,6 @@
|
||||
|
||||
namespace ceres {
|
||||
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
|
||||
Covariance::Covariance(const Covariance::Options& options) {
|
||||
impl_ = std::make_unique<internal::CovarianceImpl>(options);
|
||||
}
|
||||
@@ -49,14 +46,15 @@ Covariance::Covariance(const Covariance::Options& options) {
|
||||
Covariance::~Covariance() = default;
|
||||
|
||||
bool Covariance::Compute(
|
||||
const vector<pair<const double*, const double*>>& covariance_blocks,
|
||||
const std::vector<std::pair<const double*, const double*>>&
|
||||
covariance_blocks,
|
||||
Problem* problem) {
|
||||
return impl_->Compute(covariance_blocks, problem->impl_.get());
|
||||
return impl_->Compute(covariance_blocks, problem->mutable_impl());
|
||||
}
|
||||
|
||||
bool Covariance::Compute(const vector<const double*>& parameter_blocks,
|
||||
bool Covariance::Compute(const std::vector<const double*>& parameter_blocks,
|
||||
Problem* problem) {
|
||||
return impl_->Compute(parameter_blocks, problem->impl_.get());
|
||||
return impl_->Compute(parameter_blocks, problem->mutable_impl());
|
||||
}
|
||||
|
||||
bool Covariance::GetCovarianceBlock(const double* parameter_block1,
|
||||
@@ -79,7 +77,7 @@ bool Covariance::GetCovarianceBlockInTangentSpace(
|
||||
}
|
||||
|
||||
bool Covariance::GetCovarianceMatrix(
|
||||
const vector<const double*>& parameter_blocks,
|
||||
const std::vector<const double*>& parameter_blocks,
|
||||
double* covariance_matrix) const {
|
||||
return impl_->GetCovarianceMatrixInTangentOrAmbientSpace(parameter_blocks,
|
||||
true, // ambient
|
||||
|
||||
78
extern/ceres/internal/ceres/covariance_impl.cc
vendored
78
extern/ceres/internal/ceres/covariance_impl.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -57,24 +57,12 @@
|
||||
#include "ceres/wall_time.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::swap;
|
||||
namespace ceres::internal {
|
||||
|
||||
using CovarianceBlocks = std::vector<std::pair<const double*, const double*>>;
|
||||
|
||||
CovarianceImpl::CovarianceImpl(const Covariance::Options& options)
|
||||
: options_(options), is_computed_(false), is_valid_(false) {
|
||||
#ifdef CERES_NO_THREADS
|
||||
if (options_.num_threads > 1) {
|
||||
LOG(WARNING) << "No threading support is compiled into this binary; "
|
||||
<< "only options.num_threads = 1 is supported. Switching "
|
||||
<< "to single threaded mode.";
|
||||
options_.num_threads = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
evaluate_options_.num_threads = options_.num_threads;
|
||||
evaluate_options_.apply_loss_function = options_.apply_loss_function;
|
||||
}
|
||||
@@ -176,7 +164,7 @@ bool CovarianceImpl::GetCovarianceBlockInTangentOrAmbientSpace(
|
||||
const double* parameter_block2 = original_parameter_block2;
|
||||
const bool transpose = parameter_block1 > parameter_block2;
|
||||
if (transpose) {
|
||||
swap(parameter_block1, parameter_block2);
|
||||
std::swap(parameter_block1, parameter_block2);
|
||||
}
|
||||
|
||||
// Find where in the covariance matrix the block is located.
|
||||
@@ -190,7 +178,7 @@ bool CovarianceImpl::GetCovarianceBlockInTangentOrAmbientSpace(
|
||||
const int* cols_begin = cols + rows[row_begin];
|
||||
|
||||
// The only part that requires work is walking the compressed column
|
||||
// vector to determine where the set of columns correspnding to the
|
||||
// vector to determine where the set of columns corresponding to the
|
||||
// covariance block begin.
|
||||
int offset = 0;
|
||||
while (cols_begin[offset] != col_begin && offset < row_size) {
|
||||
@@ -322,9 +310,8 @@ bool CovarianceImpl::GetCovarianceMatrixInTangentOrAmbientSpace(
|
||||
// Assemble the blocks in the covariance matrix.
|
||||
MatrixRef covariance(covariance_matrix, covariance_size, covariance_size);
|
||||
const int num_threads = options_.num_threads;
|
||||
std::unique_ptr<double[]> workspace(
|
||||
new double[num_threads * max_covariance_block_size *
|
||||
max_covariance_block_size]);
|
||||
auto workspace = std::make_unique<double[]>(
|
||||
num_threads * max_covariance_block_size * max_covariance_block_size);
|
||||
|
||||
bool success = true;
|
||||
|
||||
@@ -481,14 +468,12 @@ bool CovarianceImpl::ComputeCovarianceSparsity(
|
||||
// Iterate over the covariance blocks contained in this row block
|
||||
// and count the number of columns in this row block.
|
||||
int num_col_blocks = 0;
|
||||
int num_columns = 0;
|
||||
for (int j = i; j < covariance_blocks.size(); ++j, ++num_col_blocks) {
|
||||
const std::pair<const double*, const double*>& block_pair =
|
||||
covariance_blocks[j];
|
||||
if (block_pair.first != row_block) {
|
||||
break;
|
||||
}
|
||||
num_columns += problem->ParameterBlockTangentSize(block_pair.second);
|
||||
}
|
||||
|
||||
// Fill out all the compressed rows for this parameter block.
|
||||
@@ -598,9 +583,9 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() {
|
||||
cholmod_jacobian.ncol = num_cols;
|
||||
cholmod_jacobian.nzmax = num_nonzeros;
|
||||
cholmod_jacobian.nz = nullptr;
|
||||
cholmod_jacobian.p = reinterpret_cast<void*>(&transpose_rows[0]);
|
||||
cholmod_jacobian.i = reinterpret_cast<void*>(&transpose_cols[0]);
|
||||
cholmod_jacobian.x = reinterpret_cast<void*>(&transpose_values[0]);
|
||||
cholmod_jacobian.p = reinterpret_cast<void*>(transpose_rows.data());
|
||||
cholmod_jacobian.i = reinterpret_cast<void*>(transpose_cols.data());
|
||||
cholmod_jacobian.x = reinterpret_cast<void*>(transpose_values.data());
|
||||
cholmod_jacobian.z = nullptr;
|
||||
cholmod_jacobian.stype = 0; // Matrix is not symmetric.
|
||||
cholmod_jacobian.itype = CHOLMOD_LONG;
|
||||
@@ -628,13 +613,15 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() {
|
||||
// more efficient, both in runtime as well as the quality of
|
||||
// ordering computed. So, it maybe worth doing that analysis
|
||||
// separately.
|
||||
const SuiteSparse_long rank = SuiteSparseQR<double>(SPQR_ORDERING_BESTAMD,
|
||||
SPQR_DEFAULT_TOL,
|
||||
cholmod_jacobian.ncol,
|
||||
&cholmod_jacobian,
|
||||
&R,
|
||||
&permutation,
|
||||
&cc);
|
||||
const SuiteSparse_long rank = SuiteSparseQR<double>(
|
||||
SPQR_ORDERING_BESTAMD,
|
||||
options_.column_pivot_threshold < 0 ? SPQR_DEFAULT_TOL
|
||||
: options_.column_pivot_threshold,
|
||||
static_cast<int64_t>(cholmod_jacobian.ncol),
|
||||
&cholmod_jacobian,
|
||||
&R,
|
||||
&permutation,
|
||||
&cc);
|
||||
event_logger.AddEvent("Numeric Factorization");
|
||||
if (R == nullptr) {
|
||||
LOG(ERROR) << "Something is wrong. SuiteSparseQR returned R = nullptr.";
|
||||
@@ -678,7 +665,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() {
|
||||
// Since the covariance matrix is symmetric, the i^th row and column
|
||||
// are equal.
|
||||
const int num_threads = options_.num_threads;
|
||||
std::unique_ptr<double[]> workspace(new double[num_threads * num_cols]);
|
||||
auto workspace = std::make_unique<double[]>(num_threads * num_cols);
|
||||
|
||||
problem_->context()->EnsureMinimumThreads(num_threads);
|
||||
ParallelFor(
|
||||
@@ -830,19 +817,23 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
|
||||
jacobian.values.data());
|
||||
event_logger.AddEvent("ConvertToSparseMatrix");
|
||||
|
||||
Eigen::SparseQR<EigenSparseMatrix, Eigen::COLAMDOrdering<int>> qr_solver(
|
||||
sparse_jacobian);
|
||||
Eigen::SparseQR<EigenSparseMatrix, Eigen::COLAMDOrdering<int>> qr;
|
||||
if (options_.column_pivot_threshold > 0) {
|
||||
qr.setPivotThreshold(options_.column_pivot_threshold);
|
||||
}
|
||||
|
||||
qr.compute(sparse_jacobian);
|
||||
event_logger.AddEvent("QRDecomposition");
|
||||
|
||||
if (qr_solver.info() != Eigen::Success) {
|
||||
if (qr.info() != Eigen::Success) {
|
||||
LOG(ERROR) << "Eigen::SparseQR decomposition failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (qr_solver.rank() < jacobian.num_cols) {
|
||||
if (qr.rank() < jacobian.num_cols) {
|
||||
LOG(ERROR) << "Jacobian matrix is rank deficient. "
|
||||
<< "Number of columns: " << jacobian.num_cols
|
||||
<< " rank: " << qr_solver.rank();
|
||||
<< " rank: " << qr.rank();
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -852,7 +843,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
|
||||
|
||||
// Compute the inverse column permutation used by QR factorization.
|
||||
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic> inverse_permutation =
|
||||
qr_solver.colsPermutation().inverse();
|
||||
qr.colsPermutation().inverse();
|
||||
|
||||
// The following loop exploits the fact that the i^th column of A^{-1}
|
||||
// is given by the solution to the linear system
|
||||
@@ -865,7 +856,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
|
||||
// are equal.
|
||||
const int num_cols = jacobian.num_cols;
|
||||
const int num_threads = options_.num_threads;
|
||||
std::unique_ptr<double[]> workspace(new double[num_threads * num_cols]);
|
||||
auto workspace = std::make_unique<double[]>(num_threads * num_cols);
|
||||
|
||||
problem_->context()->EnsureMinimumThreads(num_threads);
|
||||
ParallelFor(
|
||||
@@ -875,9 +866,9 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
|
||||
if (row_end != row_begin) {
|
||||
double* solution = workspace.get() + thread_id * num_cols;
|
||||
SolveRTRWithSparseRHS<int>(num_cols,
|
||||
qr_solver.matrixR().innerIndexPtr(),
|
||||
qr_solver.matrixR().outerIndexPtr(),
|
||||
&qr_solver.matrixR().data().value(0),
|
||||
qr.matrixR().innerIndexPtr(),
|
||||
qr.matrixR().outerIndexPtr(),
|
||||
&qr.matrixR().data().value(0),
|
||||
inverse_permutation.indices().coeff(r),
|
||||
solution);
|
||||
|
||||
@@ -895,5 +886,4 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,8 +43,7 @@
|
||||
#include "ceres/problem_impl.h"
|
||||
#include "ceres/suitesparse.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CompressedRowSparseMatrix;
|
||||
|
||||
@@ -96,8 +95,7 @@ class CERES_NO_EXPORT CovarianceImpl {
|
||||
std::unique_ptr<CompressedRowSparseMatrix> covariance_matrix_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
103
extern/ceres/internal/ceres/cuda_block_sparse_crs_view.cc
vendored
Normal file
103
extern/ceres/internal/ceres/cuda_block_sparse_crs_view.cc
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#include "ceres/cuda_block_sparse_crs_view.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include "ceres/cuda_kernels_bsm_to_crs.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
CudaBlockSparseCRSView::CudaBlockSparseCRSView(const BlockSparseMatrix& bsm,
|
||||
ContextImpl* context)
|
||||
: context_(context) {
|
||||
block_structure_ = std::make_unique<CudaBlockSparseStructure>(
|
||||
*bsm.block_structure(), context);
|
||||
CudaBuffer<int32_t> rows(context, bsm.num_rows() + 1);
|
||||
CudaBuffer<int32_t> cols(context, bsm.num_nonzeros());
|
||||
FillCRSStructure(block_structure_->num_row_blocks(),
|
||||
bsm.num_rows(),
|
||||
block_structure_->first_cell_in_row_block(),
|
||||
block_structure_->cells(),
|
||||
block_structure_->row_blocks(),
|
||||
block_structure_->col_blocks(),
|
||||
rows.data(),
|
||||
cols.data(),
|
||||
context->DefaultStream(),
|
||||
context->is_cuda_memory_pools_supported_);
|
||||
is_crs_compatible_ = block_structure_->IsCrsCompatible();
|
||||
// if matrix is crs-compatible - we can drop block-structure and don't need
|
||||
// streamed_buffer_
|
||||
if (is_crs_compatible_) {
|
||||
VLOG(3) << "Block-sparse matrix is compatible with CRS, discarding "
|
||||
"block-structure";
|
||||
block_structure_ = nullptr;
|
||||
} else {
|
||||
streamed_buffer_ = std::make_unique<CudaStreamedBuffer<double>>(
|
||||
context_, kMaxTemporaryArraySize);
|
||||
}
|
||||
crs_matrix_ = std::make_unique<CudaSparseMatrix>(
|
||||
bsm.num_cols(), std::move(rows), std::move(cols), context);
|
||||
UpdateValues(bsm);
|
||||
}
|
||||
|
||||
void CudaBlockSparseCRSView::UpdateValues(const BlockSparseMatrix& bsm) {
|
||||
if (is_crs_compatible_) {
|
||||
// Values of CRS-compatible matrices can be copied as-is
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaMemcpyAsync(crs_matrix_->mutable_values(),
|
||||
bsm.values(),
|
||||
bsm.num_nonzeros() * sizeof(double),
|
||||
cudaMemcpyHostToDevice,
|
||||
context_->DefaultStream()));
|
||||
return;
|
||||
}
|
||||
streamed_buffer_->CopyToGpu(
|
||||
bsm.values(),
|
||||
bsm.num_nonzeros(),
|
||||
[bs = block_structure_.get(), crs = crs_matrix_.get()](
|
||||
const double* values, int num_values, int offset, auto stream) {
|
||||
PermuteToCRS(offset,
|
||||
num_values,
|
||||
bs->num_row_blocks(),
|
||||
bs->first_cell_in_row_block(),
|
||||
bs->cells(),
|
||||
bs->row_blocks(),
|
||||
bs->col_blocks(),
|
||||
crs->rows(),
|
||||
values,
|
||||
crs->mutable_values(),
|
||||
stream);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
#endif // CERES_NO_CUDA
|
||||
108
extern/ceres/internal/ceres/cuda_block_sparse_crs_view.h
vendored
Normal file
108
extern/ceres/internal/ceres/cuda_block_sparse_crs_view.h
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
//
|
||||
|
||||
#ifndef CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_
|
||||
#define CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/cuda_block_structure.h"
|
||||
#include "ceres/cuda_buffer.h"
|
||||
#include "ceres/cuda_sparse_matrix.h"
|
||||
#include "ceres/cuda_streamed_buffer.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
// We use cuSPARSE library for SpMV operations. However, it does not support
|
||||
// block-sparse format with varying size of the blocks. Thus, we perform the
|
||||
// following operations in order to compute products of block-sparse matrices
|
||||
// and dense vectors on gpu:
|
||||
// - Once per block-sparse structure update:
|
||||
// - Compute CRS structure from block-sparse structure and check if values of
|
||||
// block-sparse matrix would have the same order as values of CRS matrix
|
||||
// - Once per block-sparse values update:
|
||||
// - Update values in CRS matrix with values of block-sparse matrix
|
||||
//
|
||||
// Only block-sparse matrices with sequential order of cells are supported.
|
||||
//
|
||||
// UpdateValues method updates values:
|
||||
// - In a single host-to-device copy for matrices with CRS-compatible value
|
||||
// layout
|
||||
// - Simultaneously transferring and permuting values using CudaStreamedBuffer
|
||||
// otherwise
|
||||
class CERES_NO_EXPORT CudaBlockSparseCRSView {
|
||||
public:
|
||||
// Initializes internal CRS matrix using structure and values of block-sparse
|
||||
// matrix For block-sparse matrices that have value layout different from CRS
|
||||
// block-sparse structure will be stored/
|
||||
CudaBlockSparseCRSView(const BlockSparseMatrix& bsm, ContextImpl* context);
|
||||
|
||||
const CudaSparseMatrix* crs_matrix() const { return crs_matrix_.get(); }
|
||||
CudaSparseMatrix* mutable_crs_matrix() { return crs_matrix_.get(); }
|
||||
|
||||
// Update values of crs_matrix_ using values of block-sparse matrix.
|
||||
// Assumes that bsm has the same block-sparse structure as matrix that was
|
||||
// used for construction.
|
||||
void UpdateValues(const BlockSparseMatrix& bsm);
|
||||
|
||||
// Returns true if block-sparse matrix had CRS-compatible value layout
|
||||
bool IsCrsCompatible() const { return is_crs_compatible_; }
|
||||
|
||||
void LeftMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const {
|
||||
crs_matrix()->LeftMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
|
||||
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const {
|
||||
crs_matrix()->RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
|
||||
private:
|
||||
// Value permutation kernel performs a single element-wise operation per
|
||||
// thread, thus performing permutation in blocks of 8 megabytes of
|
||||
// block-sparse values seems reasonable
|
||||
static constexpr int kMaxTemporaryArraySize = 1 * 1024 * 1024;
|
||||
std::unique_ptr<CudaSparseMatrix> crs_matrix_;
|
||||
// Only created if block-sparse matrix has non-CRS value layout
|
||||
std::unique_ptr<CudaStreamedBuffer<double>> streamed_buffer_;
|
||||
// Only stored if block-sparse matrix has non-CRS value layout
|
||||
std::unique_ptr<CudaBlockSparseStructure> block_structure_;
|
||||
bool is_crs_compatible_;
|
||||
ContextImpl* context_;
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
#endif // CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_
|
||||
164
extern/ceres/internal/ceres/cuda_block_sparse_crs_view_test.cc
vendored
Normal file
164
extern/ceres/internal/ceres/cuda_block_sparse_crs_view_test.cc
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#include "ceres/cuda_block_sparse_crs_view.h"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
namespace ceres::internal {
|
||||
class CudaBlockSparseCRSViewTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() final {
|
||||
std::string message;
|
||||
CHECK(context_.InitCuda(&message))
|
||||
<< "InitCuda() failed because: " << message;
|
||||
|
||||
BlockSparseMatrix::RandomMatrixOptions options;
|
||||
options.num_row_blocks = 1234;
|
||||
options.min_row_block_size = 1;
|
||||
options.max_row_block_size = 10;
|
||||
options.num_col_blocks = 567;
|
||||
options.min_col_block_size = 1;
|
||||
options.max_col_block_size = 10;
|
||||
options.block_density = 0.2;
|
||||
std::mt19937 rng;
|
||||
|
||||
// Block-sparse matrix with order of values different from CRS
|
||||
block_sparse_non_crs_compatible_ =
|
||||
BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
|
||||
std::iota(block_sparse_non_crs_compatible_->mutable_values(),
|
||||
block_sparse_non_crs_compatible_->mutable_values() +
|
||||
block_sparse_non_crs_compatible_->num_nonzeros(),
|
||||
1);
|
||||
|
||||
options.max_row_block_size = 1;
|
||||
// Block-sparse matrix with CRS order of values (row-blocks are rows)
|
||||
block_sparse_crs_compatible_rows_ =
|
||||
BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
|
||||
std::iota(block_sparse_crs_compatible_rows_->mutable_values(),
|
||||
block_sparse_crs_compatible_rows_->mutable_values() +
|
||||
block_sparse_crs_compatible_rows_->num_nonzeros(),
|
||||
1);
|
||||
// Block-sparse matrix with CRS order of values (single cell per row-block)
|
||||
auto bs = std::make_unique<CompressedRowBlockStructure>(
|
||||
*block_sparse_non_crs_compatible_->block_structure());
|
||||
|
||||
int num_nonzeros = 0;
|
||||
for (auto& r : bs->rows) {
|
||||
const int num_cells = r.cells.size();
|
||||
if (num_cells > 1) {
|
||||
std::uniform_int_distribution<int> uniform_cell(0, num_cells - 1);
|
||||
const int selected_cell = uniform_cell(rng);
|
||||
std::swap(r.cells[0], r.cells[selected_cell]);
|
||||
r.cells.resize(1);
|
||||
}
|
||||
const int row_block_size = r.block.size;
|
||||
for (auto& c : r.cells) {
|
||||
c.position = num_nonzeros;
|
||||
const int col_block_size = bs->cols[c.block_id].size;
|
||||
num_nonzeros += col_block_size * row_block_size;
|
||||
}
|
||||
}
|
||||
block_sparse_crs_compatible_single_cell_ =
|
||||
std::make_unique<BlockSparseMatrix>(bs.release());
|
||||
std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(),
|
||||
block_sparse_crs_compatible_single_cell_->mutable_values() +
|
||||
block_sparse_crs_compatible_single_cell_->num_nonzeros(),
|
||||
1);
|
||||
}
|
||||
|
||||
void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) {
|
||||
ASSERT_EQ(csm.num_cols(), bsm.num_cols());
|
||||
ASSERT_EQ(csm.num_rows(), bsm.num_rows());
|
||||
ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros());
|
||||
const int num_rows = bsm.num_rows();
|
||||
const int num_cols = bsm.num_cols();
|
||||
Vector x(num_cols);
|
||||
Vector y(num_rows);
|
||||
CudaVector x_cuda(&context_, num_cols);
|
||||
CudaVector y_cuda(&context_, num_rows);
|
||||
Vector y_cuda_host(num_rows);
|
||||
|
||||
for (int i = 0; i < num_cols; ++i) {
|
||||
x.setZero();
|
||||
y.setZero();
|
||||
y_cuda.SetZero();
|
||||
x[i] = 1.;
|
||||
x_cuda.CopyFromCpu(x);
|
||||
csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda);
|
||||
bsm.RightMultiplyAndAccumulate(
|
||||
x.data(), y.data(), &context_, std::thread::hardware_concurrency());
|
||||
y_cuda.CopyTo(&y_cuda_host);
|
||||
// There will be up to 1 non-zero product per row, thus we expect an exact
|
||||
// match on 32-bit integer indices
|
||||
EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<BlockSparseMatrix> block_sparse_non_crs_compatible_;
|
||||
std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_rows_;
|
||||
std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_single_cell_;
|
||||
ContextImpl context_;
|
||||
};
|
||||
|
||||
TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) {
|
||||
auto view =
|
||||
CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_);
|
||||
ASSERT_EQ(view.IsCrsCompatible(), false);
|
||||
|
||||
auto matrix = view.crs_matrix();
|
||||
Compare(*block_sparse_non_crs_compatible_, *matrix);
|
||||
}
|
||||
|
||||
TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) {
|
||||
auto view =
|
||||
CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_);
|
||||
ASSERT_EQ(view.IsCrsCompatible(), true);
|
||||
|
||||
auto matrix = view.crs_matrix();
|
||||
Compare(*block_sparse_crs_compatible_rows_, *matrix);
|
||||
}
|
||||
|
||||
TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) {
|
||||
auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_,
|
||||
&context_);
|
||||
ASSERT_EQ(view.IsCrsCompatible(), true);
|
||||
|
||||
auto matrix = view.crs_matrix();
|
||||
Compare(*block_sparse_crs_compatible_single_cell_, *matrix);
|
||||
}
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
234
extern/ceres/internal/ceres/cuda_block_structure.cc
vendored
Normal file
234
extern/ceres/internal/ceres/cuda_block_structure.cc
vendored
Normal file
@@ -0,0 +1,234 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#include "ceres/cuda_block_structure.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
// Dimension of a sorted array of blocks
|
||||
inline int Dimension(const std::vector<Block>& blocks) {
|
||||
if (blocks.empty()) {
|
||||
return 0;
|
||||
}
|
||||
const auto& last = blocks.back();
|
||||
return last.size + last.position;
|
||||
}
|
||||
} // namespace
|
||||
CudaBlockSparseStructure::CudaBlockSparseStructure(
|
||||
const CompressedRowBlockStructure& block_structure, ContextImpl* context)
|
||||
: CudaBlockSparseStructure(block_structure, 0, context) {}
|
||||
|
||||
CudaBlockSparseStructure::CudaBlockSparseStructure(
|
||||
const CompressedRowBlockStructure& block_structure,
|
||||
const int num_col_blocks_e,
|
||||
ContextImpl* context)
|
||||
: first_cell_in_row_block_(context),
|
||||
value_offset_row_block_f_(context),
|
||||
cells_(context),
|
||||
row_blocks_(context),
|
||||
col_blocks_(context) {
|
||||
// Row blocks extracted from CompressedRowBlockStructure::rows
|
||||
std::vector<Block> row_blocks;
|
||||
// Column blocks can be reused as-is
|
||||
const auto& col_blocks = block_structure.cols;
|
||||
|
||||
// Row block offset is an index of the first cell corresponding to row block
|
||||
std::vector<int> first_cell_in_row_block;
|
||||
// Offset of the first value in the first non-empty row-block of F sub-matrix
|
||||
std::vector<int> value_offset_row_block_f;
|
||||
// Flat array of all cells from all row-blocks
|
||||
std::vector<Cell> cells;
|
||||
|
||||
int f_values_offset = -1;
|
||||
num_nonzeros_e_ = 0;
|
||||
is_crs_compatible_ = true;
|
||||
num_row_blocks_ = block_structure.rows.size();
|
||||
num_col_blocks_ = col_blocks.size();
|
||||
|
||||
row_blocks.reserve(num_row_blocks_);
|
||||
first_cell_in_row_block.reserve(num_row_blocks_ + 1);
|
||||
value_offset_row_block_f.reserve(num_row_blocks_ + 1);
|
||||
num_nonzeros_ = 0;
|
||||
// Block-sparse matrices arising from block-jacobian writer are expected to
|
||||
// have sequential layout (for partitioned matrices - it is expected that both
|
||||
// E and F sub-matrices have sequential layout).
|
||||
bool sequential_layout = true;
|
||||
int row_block_id = 0;
|
||||
num_row_blocks_e_ = 0;
|
||||
for (; row_block_id < num_row_blocks_; ++row_block_id) {
|
||||
const auto& r = block_structure.rows[row_block_id];
|
||||
const int row_block_size = r.block.size;
|
||||
const int num_cells = r.cells.size();
|
||||
|
||||
if (num_col_blocks_e == 0 || r.cells.size() == 0 ||
|
||||
r.cells[0].block_id >= num_col_blocks_e) {
|
||||
break;
|
||||
}
|
||||
num_row_blocks_e_ = row_block_id + 1;
|
||||
// In E sub-matrix there is exactly a single E cell in the row
|
||||
// since E cells are stored separately from F cells, crs-compatiblity of
|
||||
// F sub-matrix only breaks if there are more than 2 cells in row (that
|
||||
// is, more than 1 cell in F sub-matrix)
|
||||
if (num_cells > 2 && row_block_size > 1) {
|
||||
is_crs_compatible_ = false;
|
||||
}
|
||||
row_blocks.emplace_back(r.block);
|
||||
first_cell_in_row_block.push_back(cells.size());
|
||||
|
||||
for (int cell_id = 0; cell_id < num_cells; ++cell_id) {
|
||||
const auto& c = r.cells[cell_id];
|
||||
const int col_block_size = col_blocks[c.block_id].size;
|
||||
const int cell_size = col_block_size * row_block_size;
|
||||
cells.push_back(c);
|
||||
if (cell_id == 0) {
|
||||
DCHECK(c.position == num_nonzeros_e_);
|
||||
num_nonzeros_e_ += cell_size;
|
||||
} else {
|
||||
if (f_values_offset == -1) {
|
||||
num_nonzeros_ = c.position;
|
||||
f_values_offset = c.position;
|
||||
}
|
||||
sequential_layout &= c.position == num_nonzeros_;
|
||||
num_nonzeros_ += cell_size;
|
||||
if (cell_id == 1) {
|
||||
// Correct value_offset_row_block_f for empty row-blocks of F
|
||||
// preceding this one
|
||||
for (auto it = value_offset_row_block_f.rbegin();
|
||||
it != value_offset_row_block_f.rend();
|
||||
++it) {
|
||||
if (*it != -1) break;
|
||||
*it = c.position;
|
||||
}
|
||||
value_offset_row_block_f.push_back(c.position);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_cells == 1) {
|
||||
value_offset_row_block_f.push_back(-1);
|
||||
}
|
||||
}
|
||||
for (; row_block_id < num_row_blocks_; ++row_block_id) {
|
||||
const auto& r = block_structure.rows[row_block_id];
|
||||
const int row_block_size = r.block.size;
|
||||
const int num_cells = r.cells.size();
|
||||
// After num_row_blocks_e_ row-blocks, there should be no cells in E
|
||||
// sub-matrix. Thus crs-compatibility of F sub-matrix breaks if there are
|
||||
// more than one cells in the row-block
|
||||
if (num_cells > 1 && row_block_size > 1) {
|
||||
is_crs_compatible_ = false;
|
||||
}
|
||||
row_blocks.emplace_back(r.block);
|
||||
first_cell_in_row_block.push_back(cells.size());
|
||||
|
||||
if (r.cells.empty()) {
|
||||
value_offset_row_block_f.push_back(-1);
|
||||
} else {
|
||||
for (auto it = value_offset_row_block_f.rbegin();
|
||||
it != value_offset_row_block_f.rend();
|
||||
--it) {
|
||||
if (*it != -1) break;
|
||||
*it = cells[0].position;
|
||||
}
|
||||
value_offset_row_block_f.push_back(r.cells[0].position);
|
||||
}
|
||||
for (const auto& c : r.cells) {
|
||||
const int col_block_size = col_blocks[c.block_id].size;
|
||||
const int cell_size = col_block_size * row_block_size;
|
||||
cells.push_back(c);
|
||||
DCHECK(c.block_id >= num_col_blocks_e);
|
||||
if (f_values_offset == -1) {
|
||||
num_nonzeros_ = c.position;
|
||||
f_values_offset = c.position;
|
||||
}
|
||||
sequential_layout &= c.position == num_nonzeros_;
|
||||
num_nonzeros_ += cell_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (f_values_offset == -1) {
|
||||
f_values_offset = num_nonzeros_e_;
|
||||
num_nonzeros_ = num_nonzeros_e_;
|
||||
}
|
||||
// Fill non-zero offsets for the last rows of F submatrix
|
||||
for (auto it = value_offset_row_block_f.rbegin();
|
||||
it != value_offset_row_block_f.rend();
|
||||
++it) {
|
||||
if (*it != -1) break;
|
||||
*it = num_nonzeros_;
|
||||
}
|
||||
value_offset_row_block_f.push_back(num_nonzeros_);
|
||||
CHECK_EQ(num_nonzeros_e_, f_values_offset);
|
||||
first_cell_in_row_block.push_back(cells.size());
|
||||
num_cells_ = cells.size();
|
||||
|
||||
num_rows_ = Dimension(row_blocks);
|
||||
num_cols_ = Dimension(col_blocks);
|
||||
|
||||
CHECK(sequential_layout);
|
||||
|
||||
if (VLOG_IS_ON(3)) {
|
||||
const size_t first_cell_in_row_block_size =
|
||||
first_cell_in_row_block.size() * sizeof(int);
|
||||
const size_t cells_size = cells.size() * sizeof(Cell);
|
||||
const size_t row_blocks_size = row_blocks.size() * sizeof(Block);
|
||||
const size_t col_blocks_size = col_blocks.size() * sizeof(Block);
|
||||
const size_t total_size = first_cell_in_row_block_size + cells_size +
|
||||
col_blocks_size + row_blocks_size;
|
||||
const double ratio =
|
||||
(100. * total_size) / (num_nonzeros_ * (sizeof(int) + sizeof(double)) +
|
||||
num_rows_ * sizeof(int));
|
||||
VLOG(3) << "\nCudaBlockSparseStructure:\n"
|
||||
"\tRow block offsets: "
|
||||
<< first_cell_in_row_block_size
|
||||
<< " bytes\n"
|
||||
"\tColumn blocks: "
|
||||
<< col_blocks_size
|
||||
<< " bytes\n"
|
||||
"\tRow blocks: "
|
||||
<< row_blocks_size
|
||||
<< " bytes\n"
|
||||
"\tCells: "
|
||||
<< cells_size << " bytes\n\tTotal: " << total_size
|
||||
<< " bytes of GPU memory (" << ratio << "% of CRS matrix size)";
|
||||
}
|
||||
|
||||
first_cell_in_row_block_.CopyFromCpuVector(first_cell_in_row_block);
|
||||
cells_.CopyFromCpuVector(cells);
|
||||
row_blocks_.CopyFromCpuVector(row_blocks);
|
||||
col_blocks_.CopyFromCpuVector(col_blocks);
|
||||
if (num_col_blocks_e || num_row_blocks_e_) {
|
||||
value_offset_row_block_f_.CopyFromCpuVector(value_offset_row_block_f);
|
||||
}
|
||||
}
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
120
extern/ceres/internal/ceres/cuda_block_structure.h
vendored
Normal file
120
extern/ceres/internal/ceres/cuda_block_structure.h
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#ifndef CERES_INTERNAL_CUDA_BLOCK_STRUCTURE_H_
|
||||
#define CERES_INTERNAL_CUDA_BLOCK_STRUCTURE_H_
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/cuda_buffer.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
class CudaBlockStructureTest;
|
||||
|
||||
// This class stores a read-only block-sparse structure in gpu memory.
|
||||
// Invariants are the same as those of CompressedRowBlockStructure.
|
||||
// In order to simplify allocation and copying data to gpu, cells from all
|
||||
// row-blocks are stored in a single array sequentially. Array
|
||||
// first_cell_in_row_block of size num_row_blocks + 1 allows to identify range
|
||||
// of cells corresponding to a row-block. Cells corresponding to i-th row-block
|
||||
// are stored in sub-array cells[first_cell_in_row_block[i]; ...
|
||||
// first_cell_in_row_block[i + 1] - 1], and their order is preserved.
|
||||
class CERES_NO_EXPORT CudaBlockSparseStructure {
|
||||
public:
|
||||
// CompressedRowBlockStructure is contains a vector of CompressedLists, with
|
||||
// each CompressedList containing a vector of Cells. We precompute a flat
|
||||
// array of cells on cpu and transfer it to the gpu.
|
||||
CudaBlockSparseStructure(const CompressedRowBlockStructure& block_structure,
|
||||
ContextImpl* context);
|
||||
// In the case of partitioned matrices, number of non-zeros in E and layout of
|
||||
// F are computed
|
||||
CudaBlockSparseStructure(const CompressedRowBlockStructure& block_structure,
|
||||
const int num_col_blocks_e,
|
||||
ContextImpl* context);
|
||||
|
||||
int num_rows() const { return num_rows_; }
|
||||
int num_cols() const { return num_cols_; }
|
||||
int num_cells() const { return num_cells_; }
|
||||
int num_nonzeros() const { return num_nonzeros_; }
|
||||
// When partitioned matrix constructor was used, returns number of non-zeros
|
||||
// in E sub-matrix
|
||||
int num_nonzeros_e() const { return num_nonzeros_e_; }
|
||||
int num_row_blocks() const { return num_row_blocks_; }
|
||||
int num_row_blocks_e() const { return num_row_blocks_e_; }
|
||||
int num_col_blocks() const { return num_col_blocks_; }
|
||||
|
||||
// Returns true if values from block-sparse matrix (F sub-matrix in
|
||||
// partitioned case) can be copied to CRS matrix as-is. This is possible if
|
||||
// each row-block is stored in CRS order:
|
||||
// - Row-block consists of a single row
|
||||
// - Row-block contains a single cell
|
||||
bool IsCrsCompatible() const { return is_crs_compatible_; }
|
||||
|
||||
// Device pointer to array of num_row_blocks + 1 indices of the first cell of
|
||||
// row block
|
||||
const int* first_cell_in_row_block() const {
|
||||
return first_cell_in_row_block_.data();
|
||||
}
|
||||
// Device pointer to array of num_row_blocks + 1 indices of the first value in
|
||||
// this or subsequent row-blocks of submatrix F
|
||||
const int* value_offset_row_block_f() const {
|
||||
return value_offset_row_block_f_.data();
|
||||
}
|
||||
// Device pointer to array of num_cells cells, sorted by row-block
|
||||
const Cell* cells() const { return cells_.data(); }
|
||||
// Device pointer to array of row blocks
|
||||
const Block* row_blocks() const { return row_blocks_.data(); }
|
||||
// Device pointer to array of column blocks
|
||||
const Block* col_blocks() const { return col_blocks_.data(); }
|
||||
|
||||
private:
|
||||
int num_rows_;
|
||||
int num_cols_;
|
||||
int num_cells_;
|
||||
int num_nonzeros_;
|
||||
int num_nonzeros_e_;
|
||||
int num_row_blocks_;
|
||||
int num_row_blocks_e_;
|
||||
int num_col_blocks_;
|
||||
bool is_crs_compatible_;
|
||||
CudaBuffer<int> first_cell_in_row_block_;
|
||||
CudaBuffer<int> value_offset_row_block_f_;
|
||||
CudaBuffer<Cell> cells_;
|
||||
CudaBuffer<Block> row_blocks_;
|
||||
CudaBuffer<Block> col_blocks_;
|
||||
friend class CudaBlockStructureTest;
|
||||
};
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
#endif // CERES_INTERNAL_CUDA_BLOCK_SPARSE_STRUCTURE_H_
|
||||
144
extern/ceres/internal/ceres/cuda_block_structure_test.cc
vendored
Normal file
144
extern/ceres/internal/ceres/cuda_block_structure_test.cc
vendored
Normal file
@@ -0,0 +1,144 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/cuda_block_structure.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
class CudaBlockStructureTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() final {
|
||||
std::string message;
|
||||
CHECK(context_.InitCuda(&message))
|
||||
<< "InitCuda() failed because: " << message;
|
||||
|
||||
BlockSparseMatrix::RandomMatrixOptions options;
|
||||
options.num_row_blocks = 1234;
|
||||
options.min_row_block_size = 1;
|
||||
options.max_row_block_size = 10;
|
||||
options.num_col_blocks = 567;
|
||||
options.min_col_block_size = 1;
|
||||
options.max_col_block_size = 10;
|
||||
options.block_density = 0.2;
|
||||
std::mt19937 rng;
|
||||
A_ = BlockSparseMatrix::CreateRandomMatrix(options, rng);
|
||||
std::iota(
|
||||
A_->mutable_values(), A_->mutable_values() + A_->num_nonzeros(), 1);
|
||||
}
|
||||
|
||||
std::vector<Cell> GetCells(const CudaBlockSparseStructure& structure) {
|
||||
const auto& cuda_buffer = structure.cells_;
|
||||
std::vector<Cell> cells(cuda_buffer.size());
|
||||
cuda_buffer.CopyToCpu(cells.data(), cells.size());
|
||||
return cells;
|
||||
}
|
||||
std::vector<Block> GetRowBlocks(const CudaBlockSparseStructure& structure) {
|
||||
const auto& cuda_buffer = structure.row_blocks_;
|
||||
std::vector<Block> blocks(cuda_buffer.size());
|
||||
cuda_buffer.CopyToCpu(blocks.data(), blocks.size());
|
||||
return blocks;
|
||||
}
|
||||
std::vector<Block> GetColBlocks(const CudaBlockSparseStructure& structure) {
|
||||
const auto& cuda_buffer = structure.col_blocks_;
|
||||
std::vector<Block> blocks(cuda_buffer.size());
|
||||
cuda_buffer.CopyToCpu(blocks.data(), blocks.size());
|
||||
return blocks;
|
||||
}
|
||||
std::vector<int> GetRowBlockOffsets(
|
||||
const CudaBlockSparseStructure& structure) {
|
||||
const auto& cuda_buffer = structure.first_cell_in_row_block_;
|
||||
std::vector<int> first_cell_in_row_block(cuda_buffer.size());
|
||||
cuda_buffer.CopyToCpu(first_cell_in_row_block.data(),
|
||||
first_cell_in_row_block.size());
|
||||
return first_cell_in_row_block;
|
||||
}
|
||||
|
||||
std::unique_ptr<BlockSparseMatrix> A_;
|
||||
ContextImpl context_;
|
||||
};
|
||||
|
||||
TEST_F(CudaBlockStructureTest, StructureIdentity) {
|
||||
auto block_structure = A_->block_structure();
|
||||
const int num_row_blocks = block_structure->rows.size();
|
||||
const int num_col_blocks = block_structure->cols.size();
|
||||
|
||||
CudaBlockSparseStructure cuda_block_structure(*block_structure, &context_);
|
||||
|
||||
ASSERT_EQ(cuda_block_structure.num_rows(), A_->num_rows());
|
||||
ASSERT_EQ(cuda_block_structure.num_cols(), A_->num_cols());
|
||||
ASSERT_EQ(cuda_block_structure.num_nonzeros(), A_->num_nonzeros());
|
||||
ASSERT_EQ(cuda_block_structure.num_row_blocks(), num_row_blocks);
|
||||
ASSERT_EQ(cuda_block_structure.num_col_blocks(), num_col_blocks);
|
||||
|
||||
std::vector<Block> blocks = GetColBlocks(cuda_block_structure);
|
||||
ASSERT_EQ(blocks.size(), num_col_blocks);
|
||||
for (int i = 0; i < num_col_blocks; ++i) {
|
||||
EXPECT_EQ(block_structure->cols[i].position, blocks[i].position);
|
||||
EXPECT_EQ(block_structure->cols[i].size, blocks[i].size);
|
||||
}
|
||||
|
||||
std::vector<Cell> cells = GetCells(cuda_block_structure);
|
||||
std::vector<int> first_cell_in_row_block =
|
||||
GetRowBlockOffsets(cuda_block_structure);
|
||||
blocks = GetRowBlocks(cuda_block_structure);
|
||||
|
||||
ASSERT_EQ(blocks.size(), num_row_blocks);
|
||||
ASSERT_EQ(first_cell_in_row_block.size(), num_row_blocks + 1);
|
||||
ASSERT_EQ(first_cell_in_row_block.back(), cells.size());
|
||||
|
||||
for (int i = 0; i < num_row_blocks; ++i) {
|
||||
const int num_cells = block_structure->rows[i].cells.size();
|
||||
EXPECT_EQ(blocks[i].position, block_structure->rows[i].block.position);
|
||||
EXPECT_EQ(blocks[i].size, block_structure->rows[i].block.size);
|
||||
const int first_cell = first_cell_in_row_block[i];
|
||||
const int last_cell = first_cell_in_row_block[i + 1];
|
||||
ASSERT_EQ(last_cell - first_cell, num_cells);
|
||||
for (int j = 0; j < num_cells; ++j) {
|
||||
EXPECT_EQ(cells[first_cell + j].block_id,
|
||||
block_structure->rows[i].cells[j].block_id);
|
||||
EXPECT_EQ(cells[first_cell + j].position,
|
||||
block_structure->rows[i].cells[j].position);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
105
extern/ceres/internal/ceres/cuda_buffer.h
vendored
105
extern/ceres/internal/ceres/cuda_buffer.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -31,6 +31,7 @@
|
||||
#ifndef CERES_INTERNAL_CUDA_BUFFER_H_
|
||||
#define CERES_INTERNAL_CUDA_BUFFER_H_
|
||||
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
@@ -40,17 +41,27 @@
|
||||
#include "cuda_runtime.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
// An encapsulated buffer to maintain GPU memory, and handle transfers between
|
||||
// GPU and system memory. It is the responsibility of the user to ensure that
|
||||
// the appropriate GPU device is selected before each subroutine is called. This
|
||||
// is particularly important when using multiple GPU devices on different CPU
|
||||
// threads, since active Cuda devices are determined by the cuda runtime on a
|
||||
// per-thread basis. Note that unless otherwise specified, all methods use the
|
||||
// default stream, and are synchronous.
|
||||
// per-thread basis.
|
||||
template <typename T>
|
||||
class CudaBuffer {
|
||||
public:
|
||||
CudaBuffer() = default;
|
||||
explicit CudaBuffer(ContextImpl* context) : context_(context) {}
|
||||
CudaBuffer(ContextImpl* context, int size) : context_(context) {
|
||||
Reserve(size);
|
||||
}
|
||||
|
||||
CudaBuffer(CudaBuffer&& other)
|
||||
: data_(other.data_), size_(other.size_), context_(other.context_) {
|
||||
other.data_ = nullptr;
|
||||
other.size_ = 0;
|
||||
}
|
||||
|
||||
CudaBuffer(const CudaBuffer&) = delete;
|
||||
CudaBuffer& operator=(const CudaBuffer&) = delete;
|
||||
|
||||
@@ -67,41 +78,95 @@ class CudaBuffer {
|
||||
if (data_ != nullptr) {
|
||||
CHECK_EQ(cudaFree(data_), cudaSuccess);
|
||||
}
|
||||
CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess);
|
||||
CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess)
|
||||
<< "Failed to allocate " << size * sizeof(T)
|
||||
<< " bytes of GPU memory";
|
||||
size_ = size;
|
||||
}
|
||||
}
|
||||
|
||||
// Perform an asynchronous copy from CPU memory to GPU memory using the stream
|
||||
// provided.
|
||||
void CopyToGpuAsync(const T* data, const size_t size, cudaStream_t stream) {
|
||||
// Perform an asynchronous copy from CPU memory to GPU memory managed by this
|
||||
// CudaBuffer instance using the stream provided.
|
||||
void CopyFromCpu(const T* data, const size_t size) {
|
||||
Reserve(size);
|
||||
CHECK_EQ(cudaMemcpyAsync(
|
||||
data_, data, size * sizeof(T), cudaMemcpyHostToDevice, stream),
|
||||
CHECK_EQ(cudaMemcpyAsync(data_,
|
||||
data,
|
||||
size * sizeof(T),
|
||||
cudaMemcpyHostToDevice,
|
||||
context_->DefaultStream()),
|
||||
cudaSuccess);
|
||||
}
|
||||
|
||||
// Copy data from the GPU to CPU memory. This is necessarily synchronous since
|
||||
// any potential GPU kernels that may be writing to the buffer must finish
|
||||
// before the transfer happens.
|
||||
void CopyToHost(T* data, const size_t size) {
|
||||
// Perform an asynchronous copy from a vector in CPU memory to GPU memory
|
||||
// managed by this CudaBuffer instance.
|
||||
void CopyFromCpuVector(const std::vector<T>& data) {
|
||||
Reserve(data.size());
|
||||
CHECK_EQ(cudaMemcpyAsync(data_,
|
||||
data.data(),
|
||||
data.size() * sizeof(T),
|
||||
cudaMemcpyHostToDevice,
|
||||
context_->DefaultStream()),
|
||||
cudaSuccess);
|
||||
}
|
||||
|
||||
// Perform an asynchronous copy from another GPU memory array to the GPU
|
||||
// memory managed by this CudaBuffer instance using the stream provided.
|
||||
void CopyFromGPUArray(const T* data, const size_t size) {
|
||||
Reserve(size);
|
||||
CHECK_EQ(cudaMemcpyAsync(data_,
|
||||
data,
|
||||
size * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice,
|
||||
context_->DefaultStream()),
|
||||
cudaSuccess);
|
||||
}
|
||||
|
||||
// Copy data from the GPU memory managed by this CudaBuffer instance to CPU
|
||||
// memory. It is the caller's responsibility to ensure that the CPU memory
|
||||
// pointer is valid, i.e. it is not null, and that it points to memory of
|
||||
// at least this->size() size. This method ensures all previously dispatched
|
||||
// GPU operations on the specified stream have completed before copying the
|
||||
// data to CPU memory.
|
||||
void CopyToCpu(T* data, const size_t size) const {
|
||||
CHECK(data_ != nullptr);
|
||||
CHECK_EQ(cudaMemcpy(data, data_, size * sizeof(T), cudaMemcpyDeviceToHost),
|
||||
CHECK_EQ(cudaMemcpyAsync(data,
|
||||
data_,
|
||||
size * sizeof(T),
|
||||
cudaMemcpyDeviceToHost,
|
||||
context_->DefaultStream()),
|
||||
cudaSuccess);
|
||||
CHECK_EQ(cudaStreamSynchronize(context_->DefaultStream()), cudaSuccess);
|
||||
}
|
||||
|
||||
// Copy N items from another GPU memory array to the GPU memory managed by
|
||||
// this CudaBuffer instance, growing this buffer's size if needed. This copy
|
||||
// is asynchronous, and operates on the stream provided.
|
||||
void CopyNItemsFrom(int n, const CudaBuffer<T>& other) {
|
||||
Reserve(n);
|
||||
CHECK(other.data_ != nullptr);
|
||||
CHECK(data_ != nullptr);
|
||||
CHECK_EQ(cudaMemcpyAsync(data_,
|
||||
other.data_,
|
||||
size_ * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice,
|
||||
context_->DefaultStream()),
|
||||
cudaSuccess);
|
||||
}
|
||||
|
||||
void CopyToGpu(const std::vector<T>& data) {
|
||||
CopyToGpu(data.data(), data.size());
|
||||
}
|
||||
|
||||
// Return a pointer to the GPU memory managed by this CudaBuffer instance.
|
||||
T* data() { return data_; }
|
||||
const T* data() const { return data_; }
|
||||
// Return the number of items of type T that can fit in the GPU memory
|
||||
// allocated so far by this CudaBuffer instance.
|
||||
size_t size() const { return size_; }
|
||||
|
||||
private:
|
||||
T* data_ = nullptr;
|
||||
size_t size_ = 0;
|
||||
ContextImpl* context_ = nullptr;
|
||||
};
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
#endif // CERES_INTERNAL_CUDA_BUFFER_H_
|
||||
#endif // CERES_INTERNAL_CUDA_BUFFER_H_
|
||||
|
||||
332
extern/ceres/internal/ceres/cuda_dense_cholesky_test.cc
vendored
Normal file
332
extern/ceres/internal/ceres/cuda_dense_cholesky_test.cc
vendored
Normal file
@@ -0,0 +1,332 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ceres/dense_cholesky.h"
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
TEST(CUDADenseCholesky, InvalidOptionOnCreate) {
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
|
||||
EXPECT_EQ(dense_cuda_solver, nullptr);
|
||||
}
|
||||
|
||||
// Tests the CUDA Cholesky solver with a simple 4x4 matrix.
|
||||
TEST(CUDADenseCholesky, Cholesky4x4Matrix) {
|
||||
Eigen::Matrix4d A;
|
||||
// clang-format off
|
||||
A << 4, 12, -16, 0,
|
||||
12, 37, -43, 0,
|
||||
-16, -43, 98, 0,
|
||||
0, 0, 0, 1;
|
||||
// clang-format on
|
||||
|
||||
Vector b = Eigen::Vector4d::Ones();
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
|
||||
ASSERT_NE(dense_cuda_solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
Eigen::Vector4d x = Eigen::Vector4d::Zero();
|
||||
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
static const double kEpsilon = std::numeric_limits<double>::epsilon() * 10;
|
||||
const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
|
||||
EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
|
||||
}
|
||||
|
||||
TEST(CUDADenseCholesky, SingularMatrix) {
|
||||
Eigen::Matrix3d A;
|
||||
// clang-format off
|
||||
A << 1, 0, 0,
|
||||
0, 1, 0,
|
||||
0, 0, 0;
|
||||
// clang-format on
|
||||
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
|
||||
ASSERT_NE(dense_cuda_solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
|
||||
LinearSolverTerminationType::FAILURE);
|
||||
}
|
||||
|
||||
TEST(CUDADenseCholesky, NegativeMatrix) {
|
||||
Eigen::Matrix3d A;
|
||||
// clang-format off
|
||||
A << 1, 0, 0,
|
||||
0, 1, 0,
|
||||
0, 0, -1;
|
||||
// clang-format on
|
||||
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
|
||||
ASSERT_NE(dense_cuda_solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
|
||||
LinearSolverTerminationType::FAILURE);
|
||||
}
|
||||
|
||||
TEST(CUDADenseCholesky, MustFactorizeBeforeSolve) {
|
||||
const Eigen::Vector3d b = Eigen::Vector3d::Ones();
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
auto dense_cuda_solver = CUDADenseCholesky::Create(options);
|
||||
ASSERT_NE(dense_cuda_solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), nullptr, &error_string),
|
||||
LinearSolverTerminationType::FATAL_ERROR);
|
||||
}
|
||||
|
||||
TEST(CUDADenseCholesky, Randomized1600x1600Tests) {
|
||||
const int kNumCols = 1600;
|
||||
using LhsType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
|
||||
using RhsType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
|
||||
using SolutionType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
|
||||
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = ceres::CUDA;
|
||||
std::unique_ptr<DenseCholesky> dense_cholesky =
|
||||
CUDADenseCholesky::Create(options);
|
||||
|
||||
const int kNumTrials = 20;
|
||||
for (int i = 0; i < kNumTrials; ++i) {
|
||||
LhsType lhs = LhsType::Random(kNumCols, kNumCols);
|
||||
lhs = lhs.transpose() * lhs;
|
||||
lhs += 1e-3 * LhsType::Identity(kNumCols, kNumCols);
|
||||
SolutionType x_expected = SolutionType::Random(kNumCols);
|
||||
RhsType rhs = lhs * x_expected;
|
||||
SolutionType x_computed = SolutionType::Zero(kNumCols);
|
||||
// Sanity check the random matrix sizes.
|
||||
EXPECT_EQ(lhs.rows(), kNumCols);
|
||||
EXPECT_EQ(lhs.cols(), kNumCols);
|
||||
EXPECT_EQ(rhs.rows(), kNumCols);
|
||||
EXPECT_EQ(rhs.cols(), 1);
|
||||
EXPECT_EQ(x_expected.rows(), kNumCols);
|
||||
EXPECT_EQ(x_expected.cols(), 1);
|
||||
EXPECT_EQ(x_computed.rows(), kNumCols);
|
||||
EXPECT_EQ(x_computed.cols(), 1);
|
||||
LinearSolver::Summary summary;
|
||||
summary.termination_type = dense_cholesky->FactorAndSolve(
|
||||
kNumCols, lhs.data(), rhs.data(), x_computed.data(), &summary.message);
|
||||
ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS);
|
||||
static const double kEpsilon = std::numeric_limits<double>::epsilon() * 3e5;
|
||||
ASSERT_NEAR(
|
||||
(x_computed - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CUDADenseCholeskyMixedPrecision, InvalidOptionsOnCreate) {
|
||||
{
|
||||
// Did not ask for CUDA, and did not ask for mixed precision.
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
|
||||
ASSERT_EQ(solver, nullptr);
|
||||
}
|
||||
{
|
||||
// Asked for CUDA, but did not ask for mixed precision.
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = ceres::CUDA;
|
||||
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
|
||||
ASSERT_EQ(solver, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
// Tests the CUDA Cholesky solver with a simple 4x4 matrix.
|
||||
TEST(CUDADenseCholeskyMixedPrecision, Cholesky4x4Matrix1Step) {
|
||||
Eigen::Matrix4d A;
|
||||
// clang-format off
|
||||
// A common test Cholesky decomposition test matrix, see :
|
||||
// https://en.wikipedia.org/w/index.php?title=Cholesky_decomposition&oldid=1080607368#Example
|
||||
A << 4, 12, -16, 0,
|
||||
12, 37, -43, 0,
|
||||
-16, -43, 98, 0,
|
||||
0, 0, 0, 1;
|
||||
// clang-format on
|
||||
|
||||
const Eigen::Vector4d b = Eigen::Vector4d::Ones();
|
||||
LinearSolver::Options options;
|
||||
options.max_num_refinement_iterations = 0;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
options.use_mixed_precision_solves = true;
|
||||
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
|
||||
ASSERT_NE(solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(solver->Factorize(A.cols(), A.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
Eigen::Vector4d x = Eigen::Vector4d::Zero();
|
||||
ASSERT_EQ(solver->Solve(b.data(), x.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
// A single step of the mixed precision solver will be equivalent to solving
|
||||
// in low precision (FP32). Hence the tolerance is defined w.r.t. FP32 epsilon
|
||||
// instead of FP64 epsilon.
|
||||
static const double kEpsilon = std::numeric_limits<float>::epsilon() * 10;
|
||||
const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
|
||||
EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
|
||||
}
|
||||
|
||||
// Tests the CUDA Cholesky solver with a simple 4x4 matrix.
|
||||
TEST(CUDADenseCholeskyMixedPrecision, Cholesky4x4Matrix4Steps) {
|
||||
Eigen::Matrix4d A;
|
||||
// clang-format off
|
||||
A << 4, 12, -16, 0,
|
||||
12, 37, -43, 0,
|
||||
-16, -43, 98, 0,
|
||||
0, 0, 0, 1;
|
||||
// clang-format on
|
||||
|
||||
const Eigen::Vector4d b = Eigen::Vector4d::Ones();
|
||||
LinearSolver::Options options;
|
||||
options.max_num_refinement_iterations = 3;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
options.use_mixed_precision_solves = true;
|
||||
auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
|
||||
ASSERT_NE(solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(solver->Factorize(A.cols(), A.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
Eigen::Vector4d x = Eigen::Vector4d::Zero();
|
||||
ASSERT_EQ(solver->Solve(b.data(), x.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
// The error does not reduce beyond four iterations, and stagnates at this
|
||||
// level of precision.
|
||||
static const double kEpsilon = std::numeric_limits<double>::epsilon() * 100;
|
||||
const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
|
||||
EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
|
||||
}
|
||||
|
||||
TEST(CUDADenseCholeskyMixedPrecision, Randomized1600x1600Tests) {
|
||||
const int kNumCols = 1600;
|
||||
using LhsType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
|
||||
using RhsType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
|
||||
using SolutionType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
|
||||
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = ceres::CUDA;
|
||||
options.use_mixed_precision_solves = true;
|
||||
options.max_num_refinement_iterations = 20;
|
||||
std::unique_ptr<CUDADenseCholeskyMixedPrecision> dense_cholesky =
|
||||
CUDADenseCholeskyMixedPrecision::Create(options);
|
||||
|
||||
const int kNumTrials = 20;
|
||||
for (int i = 0; i < kNumTrials; ++i) {
|
||||
LhsType lhs = LhsType::Random(kNumCols, kNumCols);
|
||||
lhs = lhs.transpose() * lhs;
|
||||
lhs += 1e-3 * LhsType::Identity(kNumCols, kNumCols);
|
||||
SolutionType x_expected = SolutionType::Random(kNumCols);
|
||||
RhsType rhs = lhs * x_expected;
|
||||
SolutionType x_computed = SolutionType::Zero(kNumCols);
|
||||
// Sanity check the random matrix sizes.
|
||||
EXPECT_EQ(lhs.rows(), kNumCols);
|
||||
EXPECT_EQ(lhs.cols(), kNumCols);
|
||||
EXPECT_EQ(rhs.rows(), kNumCols);
|
||||
EXPECT_EQ(rhs.cols(), 1);
|
||||
EXPECT_EQ(x_expected.rows(), kNumCols);
|
||||
EXPECT_EQ(x_expected.cols(), 1);
|
||||
EXPECT_EQ(x_computed.rows(), kNumCols);
|
||||
EXPECT_EQ(x_computed.cols(), 1);
|
||||
LinearSolver::Summary summary;
|
||||
summary.termination_type = dense_cholesky->FactorAndSolve(
|
||||
kNumCols, lhs.data(), rhs.data(), x_computed.data(), &summary.message);
|
||||
ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS);
|
||||
static const double kEpsilon = std::numeric_limits<double>::epsilon() * 1e6;
|
||||
ASSERT_NEAR(
|
||||
(x_computed - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace ceres::internal
|
||||
177
extern/ceres/internal/ceres/cuda_dense_qr_test.cc
vendored
Normal file
177
extern/ceres/internal/ceres/cuda_dense_qr_test.cc
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ceres/dense_qr.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
TEST(CUDADenseQR, InvalidOptionOnCreate) {
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
auto dense_cuda_solver = CUDADenseQR::Create(options);
|
||||
EXPECT_EQ(dense_cuda_solver, nullptr);
|
||||
}
|
||||
|
||||
// Tests the CUDA QR solver with a simple 4x4 matrix.
|
||||
TEST(CUDADenseQR, QR4x4Matrix) {
|
||||
Eigen::Matrix4d A;
|
||||
// clang-format off
|
||||
A << 4, 12, -16, 0,
|
||||
12, 37, -43, 0,
|
||||
-16, -43, 98, 0,
|
||||
0, 0, 0, 1;
|
||||
// clang-format on
|
||||
const Eigen::Vector4d b = Eigen::Vector4d::Ones();
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
auto dense_cuda_solver = CUDADenseQR::Create(options);
|
||||
ASSERT_NE(dense_cuda_solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(
|
||||
dense_cuda_solver->Factorize(A.rows(), A.cols(), A.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
Eigen::Vector4d x = Eigen::Vector4d::Zero();
|
||||
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
// Empirically observed accuracy of cuSolverDN's QR solver.
|
||||
const double kEpsilon = std::numeric_limits<double>::epsilon() * 1500;
|
||||
const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
|
||||
EXPECT_NEAR((x - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon);
|
||||
}
|
||||
|
||||
// Tests the CUDA QR solver with a simple 4x4 matrix.
|
||||
TEST(CUDADenseQR, QR4x2Matrix) {
|
||||
Eigen::Matrix<double, 4, 2> A;
|
||||
// clang-format off
|
||||
A << 4, 12,
|
||||
12, 37,
|
||||
-16, -43,
|
||||
0, 0;
|
||||
// clang-format on
|
||||
|
||||
const std::vector<double> b(4, 1.0);
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
auto dense_cuda_solver = CUDADenseQR::Create(options);
|
||||
ASSERT_NE(dense_cuda_solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(
|
||||
dense_cuda_solver->Factorize(A.rows(), A.cols(), A.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
std::vector<double> x(2, 0);
|
||||
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string),
|
||||
LinearSolverTerminationType::SUCCESS);
|
||||
// Empirically observed accuracy of cuSolverDN's QR solver.
|
||||
const double kEpsilon = std::numeric_limits<double>::epsilon() * 10;
|
||||
// Solution values computed with Octave.
|
||||
const Eigen::Vector2d x_expected(-1.143410852713177, 0.4031007751937981);
|
||||
EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
|
||||
EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
|
||||
}
|
||||
|
||||
TEST(CUDADenseQR, MustFactorizeBeforeSolve) {
|
||||
const Eigen::Vector3d b = Eigen::Vector3d::Ones();
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = CUDA;
|
||||
auto dense_cuda_solver = CUDADenseQR::Create(options);
|
||||
ASSERT_NE(dense_cuda_solver, nullptr);
|
||||
std::string error_string;
|
||||
ASSERT_EQ(dense_cuda_solver->Solve(b.data(), nullptr, &error_string),
|
||||
LinearSolverTerminationType::FATAL_ERROR);
|
||||
}
|
||||
|
||||
TEST(CUDADenseQR, Randomized1600x100Tests) {
|
||||
const int kNumRows = 1600;
|
||||
const int kNumCols = 100;
|
||||
using LhsType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
|
||||
using RhsType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
|
||||
using SolutionType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
|
||||
|
||||
LinearSolver::Options options;
|
||||
ContextImpl context;
|
||||
options.context = &context;
|
||||
std::string error;
|
||||
EXPECT_TRUE(context.InitCuda(&error)) << error;
|
||||
options.dense_linear_algebra_library_type = ceres::CUDA;
|
||||
std::unique_ptr<DenseQR> dense_qr = CUDADenseQR::Create(options);
|
||||
|
||||
const int kNumTrials = 20;
|
||||
for (int i = 0; i < kNumTrials; ++i) {
|
||||
LhsType lhs = LhsType::Random(kNumRows, kNumCols);
|
||||
SolutionType x_expected = SolutionType::Random(kNumCols);
|
||||
RhsType rhs = lhs * x_expected;
|
||||
SolutionType x_computed = SolutionType::Zero(kNumCols);
|
||||
// Sanity check the random matrix sizes.
|
||||
EXPECT_EQ(lhs.rows(), kNumRows);
|
||||
EXPECT_EQ(lhs.cols(), kNumCols);
|
||||
EXPECT_EQ(rhs.rows(), kNumRows);
|
||||
EXPECT_EQ(rhs.cols(), 1);
|
||||
EXPECT_EQ(x_expected.rows(), kNumCols);
|
||||
EXPECT_EQ(x_expected.cols(), 1);
|
||||
EXPECT_EQ(x_computed.rows(), kNumCols);
|
||||
EXPECT_EQ(x_computed.cols(), 1);
|
||||
LinearSolver::Summary summary;
|
||||
summary.termination_type = dense_qr->FactorAndSolve(kNumRows,
|
||||
kNumCols,
|
||||
lhs.data(),
|
||||
rhs.data(),
|
||||
x_computed.data(),
|
||||
&summary.message);
|
||||
ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS);
|
||||
ASSERT_NEAR((x_computed - x_expected).norm() / x_expected.norm(),
|
||||
0.0,
|
||||
std::numeric_limits<double>::epsilon() * 400);
|
||||
}
|
||||
}
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace ceres::internal
|
||||
477
extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc
vendored
Normal file
477
extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc
vendored
Normal file
@@ -0,0 +1,477 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#include "ceres/cuda_kernels_bsm_to_crs.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/scan.h>
|
||||
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/cuda_kernels_utils.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
inline auto ThrustCudaStreamExecutionPolicy(cudaStream_t stream) {
|
||||
// par_nosync execution policy was added in Thrust 1.16
|
||||
// https://github.com/NVIDIA/thrust/blob/main/CHANGELOG.md#thrust-1160
|
||||
#if THRUST_VERSION < 101700
|
||||
return thrust::cuda::par.on(stream);
|
||||
#else
|
||||
return thrust::cuda::par_nosync.on(stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
void* CudaMalloc(size_t size,
|
||||
cudaStream_t stream,
|
||||
bool memory_pools_supported) {
|
||||
void* data = nullptr;
|
||||
// Stream-ordered alloaction API is available since CUDA 11.2, but might be
|
||||
// not implemented by particular device
|
||||
#if CUDART_VERSION < 11020
|
||||
#warning \
|
||||
"Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.2+"
|
||||
cudaMalloc(&data, size);
|
||||
#else
|
||||
if (memory_pools_supported) {
|
||||
cudaMallocAsync(&data, size, stream);
|
||||
} else {
|
||||
cudaMalloc(&data, size);
|
||||
}
|
||||
#endif
|
||||
return data;
|
||||
}
|
||||
|
||||
void CudaFree(void* data, cudaStream_t stream, bool memory_pools_supported) {
|
||||
// Stream-ordered alloaction API is available since CUDA 11.2, but might be
|
||||
// not implemented by particular device
|
||||
#if CUDART_VERSION < 11020
|
||||
#warning \
|
||||
"Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.2+"
|
||||
cudaSuccess, cudaFree(data);
|
||||
#else
|
||||
if (memory_pools_supported) {
|
||||
cudaFreeAsync(data, stream);
|
||||
} else {
|
||||
cudaFree(data);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
template <typename T>
|
||||
T* CudaAllocate(size_t num_elements,
|
||||
cudaStream_t stream,
|
||||
bool memory_pools_supported) {
|
||||
T* data = static_cast<T*>(
|
||||
CudaMalloc(num_elements * sizeof(T), stream, memory_pools_supported));
|
||||
return data;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Fill row block id and nnz for each row using block-sparse structure
|
||||
// represented by a set of flat arrays.
|
||||
// Inputs:
|
||||
// - num_row_blocks: number of row-blocks in block-sparse structure
|
||||
// - first_cell_in_row_block: index of the first cell of the row-block; size:
|
||||
// num_row_blocks + 1
|
||||
// - cells: cells of block-sparse structure as a continuous array
|
||||
// - row_blocks: row blocks of block-sparse structure stored sequentially
|
||||
// - col_blocks: column blocks of block-sparse structure stored sequentially
|
||||
// Outputs:
|
||||
// - rows: rows[i + 1] will contain number of non-zeros in i-th row, rows[0]
|
||||
// will be set to 0; rows are filled with a shift by one element in order
|
||||
// to obtain row-index array of CRS matrix with a inclusive scan afterwards
|
||||
// - row_block_ids: row_block_ids[i] will be set to index of row-block that
|
||||
// contains i-th row.
|
||||
// Computation is perform row-block-wise
|
||||
template <bool partitioned = false>
|
||||
__global__ void RowBlockIdAndNNZ(
|
||||
const int num_row_blocks,
|
||||
const int num_col_blocks_e,
|
||||
const int num_row_blocks_e,
|
||||
const int* __restrict__ first_cell_in_row_block,
|
||||
const Cell* __restrict__ cells,
|
||||
const Block* __restrict__ row_blocks,
|
||||
const Block* __restrict__ col_blocks,
|
||||
int* __restrict__ rows_e,
|
||||
int* __restrict__ rows_f,
|
||||
int* __restrict__ row_block_ids) {
|
||||
const int row_block_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (row_block_id > num_row_blocks) {
|
||||
// No synchronization is performed in this kernel, thus it is safe to return
|
||||
return;
|
||||
}
|
||||
if (row_block_id == num_row_blocks) {
|
||||
// one extra thread sets the first element
|
||||
rows_f[0] = 0;
|
||||
if constexpr (partitioned) {
|
||||
rows_e[0] = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
const auto& row_block = row_blocks[row_block_id];
|
||||
auto first_cell = cells + first_cell_in_row_block[row_block_id];
|
||||
const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1];
|
||||
int row_nnz_e = 0;
|
||||
if (partitioned && row_block_id < num_row_blocks_e) {
|
||||
// First cell is a cell from E
|
||||
row_nnz_e = col_blocks[first_cell->block_id].size;
|
||||
++first_cell;
|
||||
}
|
||||
int row_nnz_f = 0;
|
||||
for (auto cell = first_cell; cell < last_cell; ++cell) {
|
||||
row_nnz_f += col_blocks[cell->block_id].size;
|
||||
}
|
||||
const int first_row = row_block.position;
|
||||
const int last_row = first_row + row_block.size;
|
||||
for (int i = first_row; i < last_row; ++i) {
|
||||
if constexpr (partitioned) {
|
||||
rows_e[i + 1] = row_nnz_e;
|
||||
}
|
||||
rows_f[i + 1] = row_nnz_f;
|
||||
row_block_ids[i] = row_block_id;
|
||||
}
|
||||
}
|
||||
|
||||
// Row-wise creation of CRS structure
|
||||
// Inputs:
|
||||
// - num_rows: number of rows in matrix
|
||||
// - first_cell_in_row_block: index of the first cell of the row-block; size:
|
||||
// num_row_blocks + 1
|
||||
// - cells: cells of block-sparse structure as a continuous array
|
||||
// - row_blocks: row blocks of block-sparse structure stored sequentially
|
||||
// - col_blocks: column blocks of block-sparse structure stored sequentially
|
||||
// - row_block_ids: index of row-block that corresponds to row
|
||||
// - rows: row-index array of CRS structure
|
||||
// Outputs:
|
||||
// - cols: column-index array of CRS structure
|
||||
// Computaion is perform row-wise
|
||||
template <bool partitioned>
|
||||
__global__ void ComputeColumns(const int num_rows,
|
||||
const int num_row_blocks_e,
|
||||
const int num_col_blocks_e,
|
||||
const int* __restrict__ first_cell_in_row_block,
|
||||
const Cell* __restrict__ cells,
|
||||
const Block* __restrict__ row_blocks,
|
||||
const Block* __restrict__ col_blocks,
|
||||
const int* __restrict__ row_block_ids,
|
||||
const int* __restrict__ rows_e,
|
||||
int* __restrict__ cols_e,
|
||||
const int* __restrict__ rows_f,
|
||||
int* __restrict__ cols_f) {
|
||||
const int row = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (row >= num_rows) {
|
||||
// No synchronization is performed in this kernel, thus it is safe to return
|
||||
return;
|
||||
}
|
||||
const int row_block_id = row_block_ids[row];
|
||||
// position in crs matrix
|
||||
auto first_cell = cells + first_cell_in_row_block[row_block_id];
|
||||
const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1];
|
||||
const int num_cols_e = col_blocks[num_col_blocks_e].position;
|
||||
// For reach cell of row-block only current row is being filled
|
||||
if (partitioned && row_block_id < num_row_blocks_e) {
|
||||
// The first cell is cell from E
|
||||
const auto& col_block = col_blocks[first_cell->block_id];
|
||||
const int col_block_size = col_block.size;
|
||||
int column_idx = col_block.position;
|
||||
int crs_position_e = rows_e[row];
|
||||
// Column indices for each element of row_in_block row of current cell
|
||||
for (int i = 0; i < col_block_size; ++i, ++crs_position_e) {
|
||||
cols_e[crs_position_e] = column_idx++;
|
||||
}
|
||||
++first_cell;
|
||||
}
|
||||
int crs_position_f = rows_f[row];
|
||||
for (auto cell = first_cell; cell < last_cell; ++cell) {
|
||||
const auto& col_block = col_blocks[cell->block_id];
|
||||
const int col_block_size = col_block.size;
|
||||
int column_idx = col_block.position - num_cols_e;
|
||||
// Column indices for each element of row_in_block row of current cell
|
||||
for (int i = 0; i < col_block_size; ++i, ++crs_position_f) {
|
||||
cols_f[crs_position_f] = column_idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillCRSStructure(const int num_row_blocks,
|
||||
const int num_rows,
|
||||
const int* first_cell_in_row_block,
|
||||
const Cell* cells,
|
||||
const Block* row_blocks,
|
||||
const Block* col_blocks,
|
||||
int* rows,
|
||||
int* cols,
|
||||
cudaStream_t stream,
|
||||
bool memory_pools_supported) {
|
||||
// Set number of non-zeros per row in rows array and row to row-block map in
|
||||
// row_block_ids array
|
||||
int* row_block_ids =
|
||||
CudaAllocate<int>(num_rows, stream, memory_pools_supported);
|
||||
const int num_blocks_blockwise = NumBlocksInGrid(num_row_blocks + 1);
|
||||
RowBlockIdAndNNZ<false><<<num_blocks_blockwise, kCudaBlockSize, 0, stream>>>(
|
||||
num_row_blocks,
|
||||
0,
|
||||
0,
|
||||
first_cell_in_row_block,
|
||||
cells,
|
||||
row_blocks,
|
||||
col_blocks,
|
||||
nullptr,
|
||||
rows,
|
||||
row_block_ids);
|
||||
// Finalize row-index array of CRS strucure by computing prefix sum
|
||||
thrust::inclusive_scan(
|
||||
ThrustCudaStreamExecutionPolicy(stream), rows, rows + num_rows + 1, rows);
|
||||
|
||||
// Fill cols array of CRS structure
|
||||
const int num_blocks_rowwise = NumBlocksInGrid(num_rows);
|
||||
ComputeColumns<false><<<num_blocks_rowwise, kCudaBlockSize, 0, stream>>>(
|
||||
num_rows,
|
||||
0,
|
||||
0,
|
||||
first_cell_in_row_block,
|
||||
cells,
|
||||
row_blocks,
|
||||
col_blocks,
|
||||
row_block_ids,
|
||||
nullptr,
|
||||
nullptr,
|
||||
rows,
|
||||
cols);
|
||||
CudaFree(row_block_ids, stream, memory_pools_supported);
|
||||
}
|
||||
|
||||
void FillCRSStructurePartitioned(const int num_row_blocks,
|
||||
const int num_rows,
|
||||
const int num_row_blocks_e,
|
||||
const int num_col_blocks_e,
|
||||
const int num_nonzeros_e,
|
||||
const int* first_cell_in_row_block,
|
||||
const Cell* cells,
|
||||
const Block* row_blocks,
|
||||
const Block* col_blocks,
|
||||
int* rows_e,
|
||||
int* cols_e,
|
||||
int* rows_f,
|
||||
int* cols_f,
|
||||
cudaStream_t stream,
|
||||
bool memory_pools_supported) {
|
||||
// Set number of non-zeros per row in rows array and row to row-block map in
|
||||
// row_block_ids array
|
||||
int* row_block_ids =
|
||||
CudaAllocate<int>(num_rows, stream, memory_pools_supported);
|
||||
const int num_blocks_blockwise = NumBlocksInGrid(num_row_blocks + 1);
|
||||
RowBlockIdAndNNZ<true><<<num_blocks_blockwise, kCudaBlockSize, 0, stream>>>(
|
||||
num_row_blocks,
|
||||
num_col_blocks_e,
|
||||
num_row_blocks_e,
|
||||
first_cell_in_row_block,
|
||||
cells,
|
||||
row_blocks,
|
||||
col_blocks,
|
||||
rows_e,
|
||||
rows_f,
|
||||
row_block_ids);
|
||||
// Finalize row-index array of CRS strucure by computing prefix sum
|
||||
thrust::inclusive_scan(ThrustCudaStreamExecutionPolicy(stream),
|
||||
rows_e,
|
||||
rows_e + num_rows + 1,
|
||||
rows_e);
|
||||
thrust::inclusive_scan(ThrustCudaStreamExecutionPolicy(stream),
|
||||
rows_f,
|
||||
rows_f + num_rows + 1,
|
||||
rows_f);
|
||||
|
||||
// Fill cols array of CRS structure
|
||||
const int num_blocks_rowwise = NumBlocksInGrid(num_rows);
|
||||
ComputeColumns<true><<<num_blocks_rowwise, kCudaBlockSize, 0, stream>>>(
|
||||
num_rows,
|
||||
num_row_blocks_e,
|
||||
num_col_blocks_e,
|
||||
first_cell_in_row_block,
|
||||
cells,
|
||||
row_blocks,
|
||||
col_blocks,
|
||||
row_block_ids,
|
||||
rows_e,
|
||||
cols_e,
|
||||
rows_f,
|
||||
cols_f);
|
||||
CudaFree(row_block_ids, stream, memory_pools_supported);
|
||||
}
|
||||
|
||||
template <typename T, typename Predicate>
|
||||
__device__ int PartitionPoint(const T* data,
|
||||
int first,
|
||||
int last,
|
||||
Predicate&& predicate) {
|
||||
if (!predicate(data[first])) {
|
||||
return first;
|
||||
}
|
||||
while (last - first > 1) {
|
||||
const auto midpoint = first + (last - first) / 2;
|
||||
if (predicate(data[midpoint])) {
|
||||
first = midpoint;
|
||||
} else {
|
||||
last = midpoint;
|
||||
}
|
||||
}
|
||||
return last;
|
||||
}
|
||||
|
||||
// Element-wise reordering of block-sparse values
|
||||
// - first_cell_in_row_block - position of the first cell of row-block
|
||||
// - block_sparse_values - segment of block-sparse values starting from
|
||||
// block_sparse_offset, containing num_values
|
||||
template <bool partitioned>
|
||||
__global__ void PermuteToCrsKernel(
|
||||
const int block_sparse_offset,
|
||||
const int num_values,
|
||||
const int num_row_blocks,
|
||||
const int num_row_blocks_e,
|
||||
const int* __restrict__ first_cell_in_row_block,
|
||||
const int* __restrict__ value_offset_row_block_f,
|
||||
const Cell* __restrict__ cells,
|
||||
const Block* __restrict__ row_blocks,
|
||||
const Block* __restrict__ col_blocks,
|
||||
const int* __restrict__ crs_rows,
|
||||
const double* __restrict__ block_sparse_values,
|
||||
double* __restrict__ crs_values) {
|
||||
const int value_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (value_id >= num_values) {
|
||||
return;
|
||||
}
|
||||
const int block_sparse_value_id = value_id + block_sparse_offset;
|
||||
// Find the corresponding row-block with a binary search
|
||||
const int row_block_id =
|
||||
(partitioned
|
||||
? PartitionPoint(value_offset_row_block_f,
|
||||
0,
|
||||
num_row_blocks,
|
||||
[block_sparse_value_id] __device__(
|
||||
const int row_block_offset) {
|
||||
return row_block_offset <= block_sparse_value_id;
|
||||
})
|
||||
: PartitionPoint(first_cell_in_row_block,
|
||||
0,
|
||||
num_row_blocks,
|
||||
[cells, block_sparse_value_id] __device__(
|
||||
const int row_block_offset) {
|
||||
return cells[row_block_offset].position <=
|
||||
block_sparse_value_id;
|
||||
})) -
|
||||
1;
|
||||
// Find cell and calculate offset within the row with a linear scan
|
||||
const auto& row_block = row_blocks[row_block_id];
|
||||
auto first_cell = cells + first_cell_in_row_block[row_block_id];
|
||||
const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1];
|
||||
const int row_block_size = row_block.size;
|
||||
int num_cols_before = 0;
|
||||
if (partitioned && row_block_id < num_row_blocks_e) {
|
||||
++first_cell;
|
||||
}
|
||||
for (const Cell* cell = first_cell; cell < last_cell; ++cell) {
|
||||
const auto& col_block = col_blocks[cell->block_id];
|
||||
const int col_block_size = col_block.size;
|
||||
const int cell_size = row_block_size * col_block_size;
|
||||
if (cell->position + cell_size > block_sparse_value_id) {
|
||||
const int pos_in_cell = block_sparse_value_id - cell->position;
|
||||
const int row_in_cell = pos_in_cell / col_block_size;
|
||||
const int col_in_cell = pos_in_cell % col_block_size;
|
||||
const int row = row_in_cell + row_block.position;
|
||||
crs_values[crs_rows[row] + num_cols_before + col_in_cell] =
|
||||
block_sparse_values[value_id];
|
||||
break;
|
||||
}
|
||||
num_cols_before += col_block_size;
|
||||
}
|
||||
}
|
||||
|
||||
void PermuteToCRS(const int block_sparse_offset,
|
||||
const int num_values,
|
||||
const int num_row_blocks,
|
||||
const int* first_cell_in_row_block,
|
||||
const Cell* cells,
|
||||
const Block* row_blocks,
|
||||
const Block* col_blocks,
|
||||
const int* crs_rows,
|
||||
const double* block_sparse_values,
|
||||
double* crs_values,
|
||||
cudaStream_t stream) {
|
||||
const int num_blocks_valuewise = NumBlocksInGrid(num_values);
|
||||
PermuteToCrsKernel<false>
|
||||
<<<num_blocks_valuewise, kCudaBlockSize, 0, stream>>>(
|
||||
block_sparse_offset,
|
||||
num_values,
|
||||
num_row_blocks,
|
||||
0,
|
||||
first_cell_in_row_block,
|
||||
nullptr,
|
||||
cells,
|
||||
row_blocks,
|
||||
col_blocks,
|
||||
crs_rows,
|
||||
block_sparse_values,
|
||||
crs_values);
|
||||
}
|
||||
|
||||
void PermuteToCRSPartitionedF(const int block_sparse_offset,
|
||||
const int num_values,
|
||||
const int num_row_blocks,
|
||||
const int num_row_blocks_e,
|
||||
const int* first_cell_in_row_block,
|
||||
const int* value_offset_row_block_f,
|
||||
const Cell* cells,
|
||||
const Block* row_blocks,
|
||||
const Block* col_blocks,
|
||||
const int* crs_rows,
|
||||
const double* block_sparse_values,
|
||||
double* crs_values,
|
||||
cudaStream_t stream) {
|
||||
const int num_blocks_valuewise = NumBlocksInGrid(num_values);
|
||||
PermuteToCrsKernel<true><<<num_blocks_valuewise, kCudaBlockSize, 0, stream>>>(
|
||||
block_sparse_offset,
|
||||
num_values,
|
||||
num_row_blocks,
|
||||
num_row_blocks_e,
|
||||
first_cell_in_row_block,
|
||||
value_offset_row_block_f,
|
||||
cells,
|
||||
row_blocks,
|
||||
col_blocks,
|
||||
crs_rows,
|
||||
block_sparse_values,
|
||||
crs_values);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
113
extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.h
vendored
Normal file
113
extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.h
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#ifndef CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_
|
||||
#define CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
struct Block;
|
||||
struct Cell;
|
||||
|
||||
// Compute structure of CRS matrix using block-sparse structure.
|
||||
// Arrays corresponding to CRS matrix are to be allocated by caller
|
||||
void FillCRSStructure(const int num_row_blocks,
|
||||
const int num_rows,
|
||||
const int* first_cell_in_row_block,
|
||||
const Cell* cells,
|
||||
const Block* row_blocks,
|
||||
const Block* col_blocks,
|
||||
int* rows,
|
||||
int* cols,
|
||||
cudaStream_t stream,
|
||||
bool memory_pools_supported);
|
||||
|
||||
// Compute structure of partitioned CRS matrix using block-sparse structure.
|
||||
// Arrays corresponding to CRS matrices are to be allocated by caller
|
||||
void FillCRSStructurePartitioned(const int num_row_blocks,
|
||||
const int num_rows,
|
||||
const int num_row_blocks_e,
|
||||
const int num_col_blocks_e,
|
||||
const int num_nonzeros_e,
|
||||
const int* first_cell_in_row_block,
|
||||
const Cell* cells,
|
||||
const Block* row_blocks,
|
||||
const Block* col_blocks,
|
||||
int* rows_e,
|
||||
int* cols_e,
|
||||
int* rows_f,
|
||||
int* cols_f,
|
||||
cudaStream_t stream,
|
||||
bool memory_pools_supported);
|
||||
|
||||
// Permute segment of values from block-sparse matrix with sequential layout to
|
||||
// CRS order. Segment starts at block_sparse_offset and has length of num_values
|
||||
void PermuteToCRS(const int block_sparse_offset,
|
||||
const int num_values,
|
||||
const int num_row_blocks,
|
||||
const int* first_cell_in_row_block,
|
||||
const Cell* cells,
|
||||
const Block* row_blocks,
|
||||
const Block* col_blocks,
|
||||
const int* crs_rows,
|
||||
const double* block_sparse_values,
|
||||
double* crs_values,
|
||||
cudaStream_t stream);
|
||||
|
||||
// Permute segment of values from F sub-matrix of block-sparse partitioned
|
||||
// matrix with sequential layout to CRS order. Segment starts at
|
||||
// block_sparse_offset (including the offset induced by values of E submatrix)
|
||||
// and has length of num_values
|
||||
void PermuteToCRSPartitionedF(const int block_sparse_offset,
|
||||
const int num_values,
|
||||
const int num_row_blocks,
|
||||
const int num_row_blocks_e,
|
||||
const int* first_cell_in_row_block,
|
||||
const int* value_offset_row_block_f,
|
||||
const Cell* cells,
|
||||
const Block* row_blocks,
|
||||
const Block* col_blocks,
|
||||
const int* crs_rows,
|
||||
const double* block_sparse_values,
|
||||
double* crs_values,
|
||||
cudaStream_t stream);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
#endif // CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -26,53 +26,31 @@
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: alexs.mac@gmail.com (Alex Stewart)
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifdef CERES_NO_THREADS
|
||||
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "glog/logging.h"
|
||||
#ifndef CERES_INTERNAL_CUDA_KERNELS_UTILS_H_
|
||||
#define CERES_INTERNAL_CUDA_KERNELS_UTILS_H_
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
int MaxNumThreadsAvailable() { return 1; }
|
||||
// Parallel execution on CUDA device requires splitting job into blocks of a
|
||||
// fixed size. We use block-size of kCudaBlockSize for all kernels that do not
|
||||
// require any specific block size. As the CUDA Toolkit documentation says,
|
||||
// "although arbitrary in this case, is a common choice". This is determined by
|
||||
// the warp size, max block size, and multiprocessor sizes of recent GPUs. For
|
||||
// complex kernels with significant register usage and unusual memory patterns,
|
||||
// the occupancy calculator API might provide better performance. See "Occupancy
|
||||
// Calculator" under the CUDA toolkit documentation.
|
||||
constexpr int kCudaBlockSize = 256;
|
||||
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
const std::function<void(int)>& function) {
|
||||
CHECK_GT(num_threads, 0);
|
||||
CHECK(context != nullptr);
|
||||
if (end <= start) {
|
||||
return;
|
||||
}
|
||||
for (int i = start; i < end; ++i) {
|
||||
function(i);
|
||||
}
|
||||
// Compute number of blocks of kCudaBlockSize that span over 1-d grid with
|
||||
// dimension size. Note that 1-d grid dimension is limited by 2^31-1 in CUDA,
|
||||
// thus a signed int is used as an argument.
|
||||
inline int NumBlocksInGrid(int size) {
|
||||
return (size + kCudaBlockSize - 1) / kCudaBlockSize;
|
||||
}
|
||||
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
const std::function<void(int thread_id, int i)>& function) {
|
||||
CHECK_GT(num_threads, 0);
|
||||
CHECK(context != nullptr);
|
||||
if (end <= start) {
|
||||
return;
|
||||
}
|
||||
const int thread_id = 0;
|
||||
for (int i = start; i < end; ++i) {
|
||||
function(thread_id, i);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_NO_THREADS
|
||||
#endif // CERES_INTERNAL_CUDA_KERNELS_UTILS_H_
|
||||
123
extern/ceres/internal/ceres/cuda_kernels_vector_ops.cu.cc
vendored
Normal file
123
extern/ceres/internal/ceres/cuda_kernels_vector_ops.cu.cc
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
#include "ceres/cuda_kernels_vector_ops.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "ceres/cuda_kernels_utils.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
__global__ void TypeConversionKernel(const SrcType* __restrict__ input,
|
||||
DstType* __restrict__ output,
|
||||
const int size) {
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < size) {
|
||||
output[i] = static_cast<DstType>(input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void CudaFP64ToFP32(const double* input,
|
||||
float* output,
|
||||
const int size,
|
||||
cudaStream_t stream) {
|
||||
const int num_blocks = NumBlocksInGrid(size);
|
||||
TypeConversionKernel<double, float>
|
||||
<<<num_blocks, kCudaBlockSize, 0, stream>>>(input, output, size);
|
||||
}
|
||||
|
||||
void CudaFP32ToFP64(const float* input,
|
||||
double* output,
|
||||
const int size,
|
||||
cudaStream_t stream) {
|
||||
const int num_blocks = NumBlocksInGrid(size);
|
||||
TypeConversionKernel<float, double>
|
||||
<<<num_blocks, kCudaBlockSize, 0, stream>>>(input, output, size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void SetZeroKernel(T* __restrict__ output, const int size) {
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < size) {
|
||||
output[i] = T(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
void CudaSetZeroFP32(float* output, const int size, cudaStream_t stream) {
|
||||
const int num_blocks = NumBlocksInGrid(size);
|
||||
SetZeroKernel<float><<<num_blocks, kCudaBlockSize, 0, stream>>>(output, size);
|
||||
}
|
||||
|
||||
void CudaSetZeroFP64(double* output, const int size, cudaStream_t stream) {
|
||||
const int num_blocks = NumBlocksInGrid(size);
|
||||
SetZeroKernel<double>
|
||||
<<<num_blocks, kCudaBlockSize, 0, stream>>>(output, size);
|
||||
}
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
__global__ void XPlusEqualsYKernel(DstType* __restrict__ x,
|
||||
const SrcType* __restrict__ y,
|
||||
const int size) {
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < size) {
|
||||
x[i] = x[i] + DstType(y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void CudaDsxpy(double* x, float* y, const int size, cudaStream_t stream) {
|
||||
const int num_blocks = NumBlocksInGrid(size);
|
||||
XPlusEqualsYKernel<float, double>
|
||||
<<<num_blocks, kCudaBlockSize, 0, stream>>>(x, y, size);
|
||||
}
|
||||
|
||||
__global__ void CudaDtDxpyKernel(double* __restrict__ y,
|
||||
const double* D,
|
||||
const double* __restrict__ x,
|
||||
const int size) {
|
||||
const int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < size) {
|
||||
y[i] = y[i] + D[i] * D[i] * x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void CudaDtDxpy(double* y,
|
||||
const double* D,
|
||||
const double* x,
|
||||
const int size,
|
||||
cudaStream_t stream) {
|
||||
const int num_blocks = NumBlocksInGrid(size);
|
||||
CudaDtDxpyKernel<<<num_blocks, kCudaBlockSize, 0, stream>>>(y, D, x, size);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
83
extern/ceres/internal/ceres/cuda_kernels_vector_ops.h
vendored
Normal file
83
extern/ceres/internal/ceres/cuda_kernels_vector_ops.h
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
#ifndef CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_
|
||||
#define CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
class Block;
|
||||
class Cell;
|
||||
|
||||
// Convert an array of double (FP64) values to float (FP32). Both arrays must
|
||||
// already be on GPU memory.
|
||||
void CudaFP64ToFP32(const double* input,
|
||||
float* output,
|
||||
const int size,
|
||||
cudaStream_t stream);
|
||||
|
||||
// Convert an array of float (FP32) values to double (FP64). Both arrays must
|
||||
// already be on GPU memory.
|
||||
void CudaFP32ToFP64(const float* input,
|
||||
double* output,
|
||||
const int size,
|
||||
cudaStream_t stream);
|
||||
|
||||
// Set all elements of the array to the FP32 value 0. The array must be in GPU
|
||||
// memory.
|
||||
void CudaSetZeroFP32(float* output, const int size, cudaStream_t stream);
|
||||
|
||||
// Set all elements of the array to the FP64 value 0. The array must be in GPU
|
||||
// memory.
|
||||
void CudaSetZeroFP64(double* output, const int size, cudaStream_t stream);
|
||||
|
||||
// Compute x = x + double(y). Input array is float (FP32), output array is
|
||||
// double (FP64). Both arrays must already be on GPU memory.
|
||||
void CudaDsxpy(double* x, float* y, const int size, cudaStream_t stream);
|
||||
|
||||
// Compute y[i] = y[i] + d[i]^2 x[i]. All arrays must already be on GPU memory.
|
||||
void CudaDtDxpy(double* y,
|
||||
const double* D,
|
||||
const double* x,
|
||||
const int size,
|
||||
cudaStream_t stream);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
#endif // CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_
|
||||
198
extern/ceres/internal/ceres/cuda_kernels_vector_ops_test.cc
vendored
Normal file
198
extern/ceres/internal/ceres/cuda_kernels_vector_ops_test.cc
vendored
Normal file
@@ -0,0 +1,198 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
#include "ceres/cuda_kernels_vector_ops.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/cuda_buffer.h"
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
TEST(CudaFP64ToFP32, SimpleConversions) {
|
||||
ContextImpl context;
|
||||
std::string cuda_error;
|
||||
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
|
||||
std::vector<double> fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
|
||||
CudaBuffer<double> fp64_gpu(&context);
|
||||
fp64_gpu.CopyFromCpuVector(fp64_cpu);
|
||||
CudaBuffer<float> fp32_gpu(&context);
|
||||
fp32_gpu.Reserve(fp64_cpu.size());
|
||||
CudaFP64ToFP32(fp64_gpu.data(),
|
||||
fp32_gpu.data(),
|
||||
fp64_cpu.size(),
|
||||
context.DefaultStream());
|
||||
std::vector<float> fp32_cpu(fp64_cpu.size());
|
||||
fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size());
|
||||
for (int i = 0; i < fp32_cpu.size(); ++i) {
|
||||
EXPECT_EQ(fp32_cpu[i], static_cast<float>(fp64_cpu[i]));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CudaFP64ToFP32, NumericallyExtremeValues) {
|
||||
ContextImpl context;
|
||||
std::string cuda_error;
|
||||
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
|
||||
std::vector<double> fp64_cpu = {
|
||||
DBL_MIN, 10.0 * DBL_MIN, DBL_MAX, 0.1 * DBL_MAX};
|
||||
// First just make sure that the compiler has represented these values
|
||||
// accurately as fp64.
|
||||
EXPECT_GT(fp64_cpu[0], 0.0);
|
||||
EXPECT_GT(fp64_cpu[1], 0.0);
|
||||
EXPECT_TRUE(std::isfinite(fp64_cpu[2]));
|
||||
EXPECT_TRUE(std::isfinite(fp64_cpu[3]));
|
||||
CudaBuffer<double> fp64_gpu(&context);
|
||||
fp64_gpu.CopyFromCpuVector(fp64_cpu);
|
||||
CudaBuffer<float> fp32_gpu(&context);
|
||||
fp32_gpu.Reserve(fp64_cpu.size());
|
||||
CudaFP64ToFP32(fp64_gpu.data(),
|
||||
fp32_gpu.data(),
|
||||
fp64_cpu.size(),
|
||||
context.DefaultStream());
|
||||
std::vector<float> fp32_cpu(fp64_cpu.size());
|
||||
fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size());
|
||||
EXPECT_EQ(fp32_cpu[0], 0.0f);
|
||||
EXPECT_EQ(fp32_cpu[1], 0.0f);
|
||||
EXPECT_EQ(fp32_cpu[2], std::numeric_limits<float>::infinity());
|
||||
EXPECT_EQ(fp32_cpu[3], std::numeric_limits<float>::infinity());
|
||||
}
|
||||
|
||||
TEST(CudaFP32ToFP64, SimpleConversions) {
|
||||
ContextImpl context;
|
||||
std::string cuda_error;
|
||||
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
|
||||
std::vector<float> fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
|
||||
CudaBuffer<float> fp32_gpu(&context);
|
||||
fp32_gpu.CopyFromCpuVector(fp32_cpu);
|
||||
CudaBuffer<double> fp64_gpu(&context);
|
||||
fp64_gpu.Reserve(fp32_cpu.size());
|
||||
CudaFP32ToFP64(fp32_gpu.data(),
|
||||
fp64_gpu.data(),
|
||||
fp32_cpu.size(),
|
||||
context.DefaultStream());
|
||||
std::vector<double> fp64_cpu(fp32_cpu.size());
|
||||
fp64_gpu.CopyToCpu(fp64_cpu.data(), fp64_cpu.size());
|
||||
for (int i = 0; i < fp64_cpu.size(); ++i) {
|
||||
EXPECT_EQ(fp64_cpu[i], static_cast<double>(fp32_cpu[i]));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CudaSetZeroFP32, NonZeroInput) {
|
||||
ContextImpl context;
|
||||
std::string cuda_error;
|
||||
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
|
||||
std::vector<float> fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
|
||||
CudaBuffer<float> fp32_gpu(&context);
|
||||
fp32_gpu.CopyFromCpuVector(fp32_cpu);
|
||||
CudaSetZeroFP32(fp32_gpu.data(), fp32_cpu.size(), context.DefaultStream());
|
||||
std::vector<float> fp32_cpu_zero(fp32_cpu.size());
|
||||
fp32_gpu.CopyToCpu(fp32_cpu_zero.data(), fp32_cpu_zero.size());
|
||||
for (int i = 0; i < fp32_cpu_zero.size(); ++i) {
|
||||
EXPECT_EQ(fp32_cpu_zero[i], 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CudaSetZeroFP64, NonZeroInput) {
|
||||
ContextImpl context;
|
||||
std::string cuda_error;
|
||||
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
|
||||
std::vector<double> fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
|
||||
CudaBuffer<double> fp64_gpu(&context);
|
||||
fp64_gpu.CopyFromCpuVector(fp64_cpu);
|
||||
CudaSetZeroFP64(fp64_gpu.data(), fp64_cpu.size(), context.DefaultStream());
|
||||
std::vector<double> fp64_cpu_zero(fp64_cpu.size());
|
||||
fp64_gpu.CopyToCpu(fp64_cpu_zero.data(), fp64_cpu_zero.size());
|
||||
for (int i = 0; i < fp64_cpu_zero.size(); ++i) {
|
||||
EXPECT_EQ(fp64_cpu_zero[i], 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CudaDsxpy, DoubleValues) {
|
||||
ContextImpl context;
|
||||
std::string cuda_error;
|
||||
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
|
||||
std::vector<float> fp32_cpu_a = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
|
||||
std::vector<double> fp64_cpu_b = {
|
||||
1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
|
||||
CudaBuffer<float> fp32_gpu_a(&context);
|
||||
fp32_gpu_a.CopyFromCpuVector(fp32_cpu_a);
|
||||
CudaBuffer<double> fp64_gpu_b(&context);
|
||||
fp64_gpu_b.CopyFromCpuVector(fp64_cpu_b);
|
||||
CudaDsxpy(fp64_gpu_b.data(),
|
||||
fp32_gpu_a.data(),
|
||||
fp32_gpu_a.size(),
|
||||
context.DefaultStream());
|
||||
fp64_gpu_b.CopyToCpu(fp64_cpu_b.data(), fp64_cpu_b.size());
|
||||
for (int i = 0; i < fp64_cpu_b.size(); ++i) {
|
||||
EXPECT_DOUBLE_EQ(fp64_cpu_b[i], 2.0 * fp32_cpu_a[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CudaDtDxpy, ComputeFourItems) {
|
||||
ContextImpl context;
|
||||
std::string cuda_error;
|
||||
EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
|
||||
std::vector<double> x_cpu = {1, 2, 3, 4};
|
||||
std::vector<double> y_cpu = {4, 3, 2, 1};
|
||||
std::vector<double> d_cpu = {10, 20, 30, 40};
|
||||
CudaBuffer<double> x_gpu(&context);
|
||||
x_gpu.CopyFromCpuVector(x_cpu);
|
||||
CudaBuffer<double> y_gpu(&context);
|
||||
y_gpu.CopyFromCpuVector(y_cpu);
|
||||
CudaBuffer<double> d_gpu(&context);
|
||||
d_gpu.CopyFromCpuVector(d_cpu);
|
||||
CudaDtDxpy(y_gpu.data(),
|
||||
d_gpu.data(),
|
||||
x_gpu.data(),
|
||||
y_gpu.size(),
|
||||
context.DefaultStream());
|
||||
y_gpu.CopyToCpu(y_cpu.data(), y_cpu.size());
|
||||
EXPECT_DOUBLE_EQ(y_cpu[0], 4.0 + 10.0 * 10.0 * 1.0);
|
||||
EXPECT_DOUBLE_EQ(y_cpu[1], 3.0 + 20.0 * 20.0 * 2.0);
|
||||
EXPECT_DOUBLE_EQ(y_cpu[2], 2.0 + 30.0 * 30.0 * 3.0);
|
||||
EXPECT_DOUBLE_EQ(y_cpu[3], 1.0 + 40.0 * 40.0 * 4.0);
|
||||
}
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
152
extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc
vendored
Normal file
152
extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#include "ceres/cuda_partitioned_block_sparse_crs_view.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include "ceres/cuda_block_structure.h"
|
||||
#include "ceres/cuda_kernels_bsm_to_crs.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
CudaPartitionedBlockSparseCRSView::CudaPartitionedBlockSparseCRSView(
|
||||
const BlockSparseMatrix& bsm,
|
||||
const int num_col_blocks_e,
|
||||
ContextImpl* context)
|
||||
:
|
||||
|
||||
context_(context) {
|
||||
const auto& bs = *bsm.block_structure();
|
||||
block_structure_ =
|
||||
std::make_unique<CudaBlockSparseStructure>(bs, num_col_blocks_e, context);
|
||||
// Determine number of non-zeros in left submatrix
|
||||
// Row-blocks are at least 1 row high, thus we can use a temporary array of
|
||||
// num_rows for ComputeNonZerosInColumnBlockSubMatrix; and later reuse it for
|
||||
// FillCRSStructurePartitioned
|
||||
const int num_rows = bsm.num_rows();
|
||||
const int num_nonzeros_e = block_structure_->num_nonzeros_e();
|
||||
const int num_nonzeros_f = bsm.num_nonzeros() - num_nonzeros_e;
|
||||
|
||||
const int num_cols_e = num_col_blocks_e < bs.cols.size()
|
||||
? bs.cols[num_col_blocks_e].position
|
||||
: bsm.num_cols();
|
||||
const int num_cols_f = bsm.num_cols() - num_cols_e;
|
||||
|
||||
CudaBuffer<int32_t> rows_e(context, num_rows + 1);
|
||||
CudaBuffer<int32_t> cols_e(context, num_nonzeros_e);
|
||||
CudaBuffer<int32_t> rows_f(context, num_rows + 1);
|
||||
CudaBuffer<int32_t> cols_f(context, num_nonzeros_f);
|
||||
|
||||
num_row_blocks_e_ = block_structure_->num_row_blocks_e();
|
||||
FillCRSStructurePartitioned(block_structure_->num_row_blocks(),
|
||||
num_rows,
|
||||
num_row_blocks_e_,
|
||||
num_col_blocks_e,
|
||||
num_nonzeros_e,
|
||||
block_structure_->first_cell_in_row_block(),
|
||||
block_structure_->cells(),
|
||||
block_structure_->row_blocks(),
|
||||
block_structure_->col_blocks(),
|
||||
rows_e.data(),
|
||||
cols_e.data(),
|
||||
rows_f.data(),
|
||||
cols_f.data(),
|
||||
context->DefaultStream(),
|
||||
context->is_cuda_memory_pools_supported_);
|
||||
f_is_crs_compatible_ = block_structure_->IsCrsCompatible();
|
||||
if (f_is_crs_compatible_) {
|
||||
block_structure_ = nullptr;
|
||||
} else {
|
||||
streamed_buffer_ = std::make_unique<CudaStreamedBuffer<double>>(
|
||||
context, kMaxTemporaryArraySize);
|
||||
}
|
||||
matrix_e_ = std::make_unique<CudaSparseMatrix>(
|
||||
num_cols_e, std::move(rows_e), std::move(cols_e), context);
|
||||
matrix_f_ = std::make_unique<CudaSparseMatrix>(
|
||||
num_cols_f, std::move(rows_f), std::move(cols_f), context);
|
||||
|
||||
CHECK_EQ(bsm.num_nonzeros(),
|
||||
matrix_e_->num_nonzeros() + matrix_f_->num_nonzeros());
|
||||
|
||||
UpdateValues(bsm);
|
||||
}
|
||||
|
||||
void CudaPartitionedBlockSparseCRSView::UpdateValues(
|
||||
const BlockSparseMatrix& bsm) {
|
||||
if (f_is_crs_compatible_) {
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaMemcpyAsync(matrix_e_->mutable_values(),
|
||||
bsm.values(),
|
||||
matrix_e_->num_nonzeros() * sizeof(double),
|
||||
cudaMemcpyHostToDevice,
|
||||
context_->DefaultStream()));
|
||||
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaMemcpyAsync(matrix_f_->mutable_values(),
|
||||
bsm.values() + matrix_e_->num_nonzeros(),
|
||||
matrix_f_->num_nonzeros() * sizeof(double),
|
||||
cudaMemcpyHostToDevice,
|
||||
context_->DefaultStream()));
|
||||
return;
|
||||
}
|
||||
streamed_buffer_->CopyToGpu(
|
||||
bsm.values(),
|
||||
bsm.num_nonzeros(),
|
||||
[block_structure = block_structure_.get(),
|
||||
num_nonzeros_e = matrix_e_->num_nonzeros(),
|
||||
num_row_blocks_e = num_row_blocks_e_,
|
||||
values_f = matrix_f_->mutable_values(),
|
||||
rows_f = matrix_f_->rows()](
|
||||
const double* values, int num_values, int offset, auto stream) {
|
||||
PermuteToCRSPartitionedF(num_nonzeros_e + offset,
|
||||
num_values,
|
||||
block_structure->num_row_blocks(),
|
||||
num_row_blocks_e,
|
||||
block_structure->first_cell_in_row_block(),
|
||||
block_structure->value_offset_row_block_f(),
|
||||
block_structure->cells(),
|
||||
block_structure->row_blocks(),
|
||||
block_structure->col_blocks(),
|
||||
rows_f,
|
||||
values,
|
||||
values_f,
|
||||
stream);
|
||||
});
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaMemcpyAsync(matrix_e_->mutable_values(),
|
||||
bsm.values(),
|
||||
matrix_e_->num_nonzeros() * sizeof(double),
|
||||
cudaMemcpyHostToDevice,
|
||||
context_->DefaultStream()));
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
#endif // CERES_NO_CUDA
|
||||
111
extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.h
vendored
Normal file
111
extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.h
vendored
Normal file
@@ -0,0 +1,111 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
//
|
||||
|
||||
#ifndef CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_
|
||||
#define CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/cuda_block_structure.h"
|
||||
#include "ceres/cuda_buffer.h"
|
||||
#include "ceres/cuda_sparse_matrix.h"
|
||||
#include "ceres/cuda_streamed_buffer.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
// We use cuSPARSE library for SpMV operations. However, it does not support
|
||||
// neither block-sparse format with varying size of the blocks nor
|
||||
// submatrix-vector products. Thus, we perform the following operations in order
|
||||
// to compute products of partitioned block-sparse matrices and dense vectors on
|
||||
// gpu:
|
||||
// - Once per block-sparse structure update:
|
||||
// - Compute CRS structures of left and right submatrices from block-sparse
|
||||
// structure
|
||||
// - Check if values of F sub-matrix can be copied without permutation
|
||||
// matrices
|
||||
// - Once per block-sparse values update:
|
||||
// - Copy values of E sub-matrix
|
||||
// - Permute or copy values of F sub-matrix
|
||||
//
|
||||
// It is assumed that cells of block-sparse matrix are laid out sequentially in
|
||||
// both of sub-matrices and there is exactly one cell in row-block of E
|
||||
// sub-matrix in the first num_row_blocks_e_ row blocks, and no cells in E
|
||||
// sub-matrix below num_row_blocks_e_ row blocks.
|
||||
//
|
||||
// This class avoids storing both CRS and block-sparse values in GPU memory.
|
||||
// Instead, block-sparse values are transferred to gpu memory as a disjoint set
|
||||
// of small continuous segments with simultaneous permutation of the values into
|
||||
// correct order using block-structure.
|
||||
class CERES_NO_EXPORT CudaPartitionedBlockSparseCRSView {
|
||||
public:
|
||||
// Initializes internal CRS matrix and block-sparse structure on GPU side
|
||||
// values. The following objects are stored in gpu memory for the whole
|
||||
// lifetime of the object
|
||||
// - matrix_e_: left CRS submatrix
|
||||
// - matrix_f_: right CRS submatrix
|
||||
// - block_structure_: copy of block-sparse structure on GPU
|
||||
// - streamed_buffer_: helper for value updating
|
||||
CudaPartitionedBlockSparseCRSView(const BlockSparseMatrix& bsm,
|
||||
const int num_col_blocks_e,
|
||||
ContextImpl* context);
|
||||
|
||||
// Update values of CRS submatrices using values of block-sparse matrix.
|
||||
// Assumes that bsm has the same block-sparse structure as matrix that was
|
||||
// used for construction.
|
||||
void UpdateValues(const BlockSparseMatrix& bsm);
|
||||
|
||||
const CudaSparseMatrix* matrix_e() const { return matrix_e_.get(); }
|
||||
const CudaSparseMatrix* matrix_f() const { return matrix_f_.get(); }
|
||||
CudaSparseMatrix* mutable_matrix_e() { return matrix_e_.get(); }
|
||||
CudaSparseMatrix* mutable_matrix_f() { return matrix_f_.get(); }
|
||||
|
||||
private:
|
||||
// Value permutation kernel performs a single element-wise operation per
|
||||
// thread, thus performing permutation in blocks of 8 megabytes of
|
||||
// block-sparse values seems reasonable
|
||||
static constexpr int kMaxTemporaryArraySize = 1 * 1024 * 1024;
|
||||
std::unique_ptr<CudaSparseMatrix> matrix_e_;
|
||||
std::unique_ptr<CudaSparseMatrix> matrix_f_;
|
||||
std::unique_ptr<CudaStreamedBuffer<double>> streamed_buffer_;
|
||||
std::unique_ptr<CudaBlockSparseStructure> block_structure_;
|
||||
bool f_is_crs_compatible_;
|
||||
int num_row_blocks_e_;
|
||||
ContextImpl* context_;
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
#endif // CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_
|
||||
279
extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view_test.cc
vendored
Normal file
279
extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view_test.cc
vendored
Normal file
@@ -0,0 +1,279 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#include "ceres/cuda_partitioned_block_sparse_crs_view.h"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
namespace {
|
||||
struct RandomPartitionedMatrixOptions {
|
||||
int num_row_blocks_e;
|
||||
int num_row_blocks_f;
|
||||
int num_col_blocks_e;
|
||||
int num_col_blocks_f;
|
||||
int min_row_block_size;
|
||||
int max_row_block_size;
|
||||
int min_col_block_size;
|
||||
int max_col_block_size;
|
||||
double empty_f_probability;
|
||||
double cell_probability_f;
|
||||
int max_cells_f;
|
||||
};
|
||||
|
||||
std::unique_ptr<BlockSparseMatrix> CreateRandomPartitionedMatrix(
|
||||
const RandomPartitionedMatrixOptions& options, std::mt19937& rng) {
|
||||
const int num_row_blocks =
|
||||
std::max(options.num_row_blocks_e, options.num_row_blocks_f);
|
||||
const int num_col_blocks =
|
||||
options.num_col_blocks_e + options.num_col_blocks_f;
|
||||
|
||||
CompressedRowBlockStructure* block_structure =
|
||||
new CompressedRowBlockStructure;
|
||||
block_structure->cols.reserve(num_col_blocks);
|
||||
block_structure->rows.reserve(num_row_blocks);
|
||||
|
||||
// Create column blocks
|
||||
std::uniform_int_distribution<int> col_size(options.min_col_block_size,
|
||||
options.max_col_block_size);
|
||||
int num_cols = 0;
|
||||
for (int i = 0; i < num_col_blocks; ++i) {
|
||||
const int size = col_size(rng);
|
||||
block_structure->cols.emplace_back(size, num_cols);
|
||||
num_cols += size;
|
||||
}
|
||||
|
||||
// Prepare column-block indices of E cells
|
||||
std::vector<int> e_col_block_idx;
|
||||
e_col_block_idx.reserve(options.num_row_blocks_e);
|
||||
std::uniform_int_distribution<int> col_e(0, options.num_col_blocks_e - 1);
|
||||
for (int i = 0; i < options.num_row_blocks_e; ++i) {
|
||||
e_col_block_idx.emplace_back(col_e(rng));
|
||||
}
|
||||
std::sort(e_col_block_idx.begin(), e_col_block_idx.end());
|
||||
|
||||
// Prepare cell structure
|
||||
std::uniform_int_distribution<int> row_size(options.min_row_block_size,
|
||||
options.max_row_block_size);
|
||||
std::uniform_real_distribution<double> uniform;
|
||||
int num_rows = 0;
|
||||
for (int i = 0; i < num_row_blocks; ++i) {
|
||||
const int size = row_size(rng);
|
||||
block_structure->rows.emplace_back();
|
||||
auto& row = block_structure->rows.back();
|
||||
row.block.size = size;
|
||||
row.block.position = num_rows;
|
||||
num_rows += size;
|
||||
if (i < options.num_row_blocks_e) {
|
||||
row.cells.emplace_back(e_col_block_idx[i], -1);
|
||||
if (uniform(rng) < options.empty_f_probability) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (i >= options.num_row_blocks_f) continue;
|
||||
const int cells_before = row.cells.size();
|
||||
for (int j = options.num_col_blocks_e; j < num_col_blocks; ++j) {
|
||||
if (uniform(rng) > options.cell_probability_f) {
|
||||
continue;
|
||||
}
|
||||
row.cells.emplace_back(j, -1);
|
||||
}
|
||||
if (row.cells.size() > cells_before + options.max_cells_f) {
|
||||
std::shuffle(row.cells.begin() + cells_before, row.cells.end(), rng);
|
||||
row.cells.resize(cells_before + options.max_cells_f);
|
||||
std::sort(
|
||||
row.cells.begin(), row.cells.end(), [](const auto& a, const auto& b) {
|
||||
return a.block_id < b.block_id;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Fill positions in E sub-matrix
|
||||
int num_nonzeros = 0;
|
||||
for (int i = 0; i < options.num_row_blocks_e; ++i) {
|
||||
CHECK_GE(block_structure->rows[i].cells.size(), 1);
|
||||
block_structure->rows[i].cells[0].position = num_nonzeros;
|
||||
const int col_block_size =
|
||||
block_structure->cols[block_structure->rows[i].cells[0].block_id].size;
|
||||
const int row_block_size = block_structure->rows[i].block.size;
|
||||
num_nonzeros += row_block_size * col_block_size;
|
||||
CHECK_GE(num_nonzeros, 0);
|
||||
}
|
||||
// Fill positions in F sub-matrix
|
||||
for (int i = 0; i < options.num_row_blocks_f; ++i) {
|
||||
const int row_block_size = block_structure->rows[i].block.size;
|
||||
for (auto& cell : block_structure->rows[i].cells) {
|
||||
if (cell.position >= 0) continue;
|
||||
cell.position = num_nonzeros;
|
||||
const int col_block_size = block_structure->cols[cell.block_id].size;
|
||||
num_nonzeros += row_block_size * col_block_size;
|
||||
CHECK_GE(num_nonzeros, 0);
|
||||
}
|
||||
}
|
||||
// Populate values
|
||||
auto bsm = std::make_unique<BlockSparseMatrix>(block_structure, true);
|
||||
for (int i = 0; i < num_nonzeros; ++i) {
|
||||
bsm->mutable_values()[i] = i + 1;
|
||||
}
|
||||
return bsm;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class CudaPartitionedBlockSparseCRSViewTest : public ::testing::Test {
|
||||
static constexpr int kNumColBlocksE = 456;
|
||||
|
||||
protected:
|
||||
void SetUp() final {
|
||||
std::string message;
|
||||
CHECK(context_.InitCuda(&message))
|
||||
<< "InitCuda() failed because: " << message;
|
||||
|
||||
RandomPartitionedMatrixOptions options;
|
||||
options.num_row_blocks_f = 123;
|
||||
options.num_row_blocks_e = 456;
|
||||
options.num_col_blocks_f = 123;
|
||||
options.num_col_blocks_e = kNumColBlocksE;
|
||||
options.min_row_block_size = 1;
|
||||
options.max_row_block_size = 4;
|
||||
options.min_col_block_size = 1;
|
||||
options.max_col_block_size = 4;
|
||||
options.empty_f_probability = .1;
|
||||
options.cell_probability_f = .2;
|
||||
options.max_cells_f = options.num_col_blocks_f;
|
||||
|
||||
std::mt19937 rng;
|
||||
short_f_ = CreateRandomPartitionedMatrix(options, rng);
|
||||
|
||||
options.num_row_blocks_e = 123;
|
||||
options.num_row_blocks_f = 456;
|
||||
short_e_ = CreateRandomPartitionedMatrix(options, rng);
|
||||
|
||||
options.max_cells_f = 1;
|
||||
options.num_row_blocks_e = options.num_row_blocks_f;
|
||||
options.num_row_blocks_e = options.num_row_blocks_f;
|
||||
f_crs_compatible_ = CreateRandomPartitionedMatrix(options, rng);
|
||||
}
|
||||
|
||||
void TestMatrix(const BlockSparseMatrix& A_) {
|
||||
const int num_col_blocks_e = 456;
|
||||
CudaPartitionedBlockSparseCRSView view(A_, kNumColBlocksE, &context_);
|
||||
|
||||
const int num_rows = A_.num_rows();
|
||||
const int num_cols = A_.num_cols();
|
||||
|
||||
const auto& bs = *A_.block_structure();
|
||||
const int num_cols_e = bs.cols[num_col_blocks_e].position;
|
||||
const int num_cols_f = num_cols - num_cols_e;
|
||||
|
||||
auto matrix_e = view.matrix_e();
|
||||
auto matrix_f = view.matrix_f();
|
||||
ASSERT_EQ(matrix_e->num_cols(), num_cols_e);
|
||||
ASSERT_EQ(matrix_e->num_rows(), num_rows);
|
||||
ASSERT_EQ(matrix_f->num_cols(), num_cols_f);
|
||||
ASSERT_EQ(matrix_f->num_rows(), num_rows);
|
||||
|
||||
Vector x(num_cols);
|
||||
Vector x_left(num_cols_e);
|
||||
Vector x_right(num_cols_f);
|
||||
Vector y(num_rows);
|
||||
CudaVector x_cuda(&context_, num_cols);
|
||||
CudaVector x_left_cuda(&context_, num_cols_e);
|
||||
CudaVector x_right_cuda(&context_, num_cols_f);
|
||||
CudaVector y_cuda(&context_, num_rows);
|
||||
Vector y_cuda_host(num_rows);
|
||||
|
||||
for (int i = 0; i < num_cols_e; ++i) {
|
||||
x.setZero();
|
||||
x_left.setZero();
|
||||
y.setZero();
|
||||
y_cuda.SetZero();
|
||||
x[i] = 1.;
|
||||
x_left[i] = 1.;
|
||||
x_left_cuda.CopyFromCpu(x_left);
|
||||
A_.RightMultiplyAndAccumulate(
|
||||
x.data(), y.data(), &context_, std::thread::hardware_concurrency());
|
||||
matrix_e->RightMultiplyAndAccumulate(x_left_cuda, &y_cuda);
|
||||
y_cuda.CopyTo(&y_cuda_host);
|
||||
// There will be up to 1 non-zero product per row, thus we expect an exact
|
||||
// match on 32-bit integer indices
|
||||
EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
|
||||
}
|
||||
for (int i = num_cols_e; i < num_cols_f; ++i) {
|
||||
x.setZero();
|
||||
x_right.setZero();
|
||||
y.setZero();
|
||||
y_cuda.SetZero();
|
||||
x[i] = 1.;
|
||||
x_right[i - num_cols_e] = 1.;
|
||||
x_right_cuda.CopyFromCpu(x_right);
|
||||
A_.RightMultiplyAndAccumulate(
|
||||
x.data(), y.data(), &context_, std::thread::hardware_concurrency());
|
||||
matrix_f->RightMultiplyAndAccumulate(x_right_cuda, &y_cuda);
|
||||
y_cuda.CopyTo(&y_cuda_host);
|
||||
// There will be up to 1 non-zero product per row, thus we expect an exact
|
||||
// match on 32-bit integer indices
|
||||
EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
|
||||
}
|
||||
}
|
||||
|
||||
// E sub-matrix might have less row-blocks with cells than F sub-matrix. This
|
||||
// test matrix checks if this case is handled properly
|
||||
std::unique_ptr<BlockSparseMatrix> short_e_;
|
||||
// In case of non-crs compatible F matrix, permuting values from block-order
|
||||
// to crs order involves binary search over row-blocks of F. Having lots of
|
||||
// row-blocks with no F cells is an edge case for this algorithm.
|
||||
std::unique_ptr<BlockSparseMatrix> short_f_;
|
||||
// With F matrix being CRS-compatible, update of the values of partitioned
|
||||
// matrix view reduces to two host->device memcopies, and uses a separate code
|
||||
// path
|
||||
std::unique_ptr<BlockSparseMatrix> f_crs_compatible_;
|
||||
|
||||
ContextImpl context_;
|
||||
};
|
||||
|
||||
TEST_F(CudaPartitionedBlockSparseCRSViewTest, CreateUpdateValuesShortE) {
|
||||
TestMatrix(*short_e_);
|
||||
}
|
||||
|
||||
TEST_F(CudaPartitionedBlockSparseCRSViewTest, CreateUpdateValuesShortF) {
|
||||
TestMatrix(*short_f_);
|
||||
}
|
||||
|
||||
TEST_F(CudaPartitionedBlockSparseCRSViewTest,
|
||||
CreateUpdateValuesCrsCompatibleF) {
|
||||
TestMatrix(*f_crs_compatible_);
|
||||
}
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
226
extern/ceres/internal/ceres/cuda_sparse_matrix.cc
vendored
Normal file
226
extern/ceres/internal/ceres/cuda_sparse_matrix.cc
vendored
Normal file
@@ -0,0 +1,226 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
//
|
||||
// A CUDA sparse matrix linear operator.
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
// clang-format off
|
||||
#include "ceres/internal/config.h"
|
||||
// clang-format on
|
||||
|
||||
#include "ceres/cuda_sparse_matrix.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/crs_matrix.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/types.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include "ceres/cuda_buffer.h"
|
||||
#include "ceres/cuda_kernels_vector_ops.h"
|
||||
#include "ceres/cuda_vector.h"
|
||||
#include "cuda_runtime_api.h"
|
||||
#include "cusparse.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
// Starting in CUDA 11.2.1, CUSPARSE_MV_ALG_DEFAULT was deprecated in favor of
|
||||
// CUSPARSE_SPMV_ALG_DEFAULT.
|
||||
#if CUDART_VERSION >= 11021
|
||||
const auto kSpMVAlgorithm = CUSPARSE_SPMV_ALG_DEFAULT;
|
||||
#else // CUDART_VERSION >= 11021
|
||||
const auto kSpMVAlgorithm = CUSPARSE_MV_ALG_DEFAULT;
|
||||
#endif // CUDART_VERSION >= 11021
|
||||
size_t GetTempBufferSizeForOp(const cusparseHandle_t& handle,
|
||||
const cusparseOperation_t op,
|
||||
const cusparseDnVecDescr_t& x,
|
||||
const cusparseDnVecDescr_t& y,
|
||||
const cusparseSpMatDescr_t& A) {
|
||||
size_t buffer_size;
|
||||
const double alpha = 1.0;
|
||||
const double beta = 1.0;
|
||||
CHECK_NE(A, nullptr);
|
||||
CHECK_EQ(cusparseSpMV_bufferSize(handle,
|
||||
op,
|
||||
&alpha,
|
||||
A,
|
||||
x,
|
||||
&beta,
|
||||
y,
|
||||
CUDA_R_64F,
|
||||
kSpMVAlgorithm,
|
||||
&buffer_size),
|
||||
CUSPARSE_STATUS_SUCCESS);
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
size_t GetTempBufferSize(const cusparseHandle_t& handle,
|
||||
const cusparseDnVecDescr_t& left,
|
||||
const cusparseDnVecDescr_t& right,
|
||||
const cusparseSpMatDescr_t& A) {
|
||||
CHECK_NE(A, nullptr);
|
||||
return std::max(GetTempBufferSizeForOp(
|
||||
handle, CUSPARSE_OPERATION_NON_TRANSPOSE, right, left, A),
|
||||
GetTempBufferSizeForOp(
|
||||
handle, CUSPARSE_OPERATION_TRANSPOSE, left, right, A));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
CudaSparseMatrix::CudaSparseMatrix(int num_cols,
|
||||
CudaBuffer<int32_t>&& rows,
|
||||
CudaBuffer<int32_t>&& cols,
|
||||
ContextImpl* context)
|
||||
: num_rows_(rows.size() - 1),
|
||||
num_cols_(num_cols),
|
||||
num_nonzeros_(cols.size()),
|
||||
context_(context),
|
||||
rows_(std::move(rows)),
|
||||
cols_(std::move(cols)),
|
||||
values_(context, num_nonzeros_),
|
||||
spmv_buffer_(context) {
|
||||
Initialize();
|
||||
}
|
||||
|
||||
CudaSparseMatrix::CudaSparseMatrix(ContextImpl* context,
|
||||
const CompressedRowSparseMatrix& crs_matrix)
|
||||
: num_rows_(crs_matrix.num_rows()),
|
||||
num_cols_(crs_matrix.num_cols()),
|
||||
num_nonzeros_(crs_matrix.num_nonzeros()),
|
||||
context_(context),
|
||||
rows_(context, num_rows_ + 1),
|
||||
cols_(context, num_nonzeros_),
|
||||
values_(context, num_nonzeros_),
|
||||
spmv_buffer_(context) {
|
||||
rows_.CopyFromCpu(crs_matrix.rows(), num_rows_ + 1);
|
||||
cols_.CopyFromCpu(crs_matrix.cols(), num_nonzeros_);
|
||||
values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_);
|
||||
Initialize();
|
||||
}
|
||||
|
||||
CudaSparseMatrix::~CudaSparseMatrix() {
|
||||
CHECK_EQ(cusparseDestroySpMat(descr_), CUSPARSE_STATUS_SUCCESS);
|
||||
descr_ = nullptr;
|
||||
CHECK_EQ(CUSPARSE_STATUS_SUCCESS, cusparseDestroyDnVec(descr_vec_left_));
|
||||
CHECK_EQ(CUSPARSE_STATUS_SUCCESS, cusparseDestroyDnVec(descr_vec_right_));
|
||||
}
|
||||
|
||||
void CudaSparseMatrix::CopyValuesFromCpu(
|
||||
const CompressedRowSparseMatrix& crs_matrix) {
|
||||
// There is no quick and easy way to verify that the structure is unchanged,
|
||||
// but at least we can check that the size of the matrix and the number of
|
||||
// nonzeros is unchanged.
|
||||
CHECK_EQ(num_rows_, crs_matrix.num_rows());
|
||||
CHECK_EQ(num_cols_, crs_matrix.num_cols());
|
||||
CHECK_EQ(num_nonzeros_, crs_matrix.num_nonzeros());
|
||||
values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_);
|
||||
}
|
||||
|
||||
void CudaSparseMatrix::Initialize() {
|
||||
CHECK(context_->IsCudaInitialized());
|
||||
CHECK_EQ(CUSPARSE_STATUS_SUCCESS,
|
||||
cusparseCreateCsr(&descr_,
|
||||
num_rows_,
|
||||
num_cols_,
|
||||
num_nonzeros_,
|
||||
rows_.data(),
|
||||
cols_.data(),
|
||||
values_.data(),
|
||||
CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_32I,
|
||||
CUSPARSE_INDEX_BASE_ZERO,
|
||||
CUDA_R_64F));
|
||||
|
||||
// Note: values_.data() is used as non-zero pointer to device memory
|
||||
// When there is no non-zero values, data-pointer of values_ array will be a
|
||||
// nullptr; but in this case left/right products are trivial and temporary
|
||||
// buffer (and vector descriptors) is not required
|
||||
if (!num_nonzeros_) return;
|
||||
|
||||
CHECK_EQ(CUSPARSE_STATUS_SUCCESS,
|
||||
cusparseCreateDnVec(
|
||||
&descr_vec_left_, num_rows_, values_.data(), CUDA_R_64F));
|
||||
CHECK_EQ(CUSPARSE_STATUS_SUCCESS,
|
||||
cusparseCreateDnVec(
|
||||
&descr_vec_right_, num_cols_, values_.data(), CUDA_R_64F));
|
||||
size_t buffer_size = GetTempBufferSize(
|
||||
context_->cusparse_handle_, descr_vec_left_, descr_vec_right_, descr_);
|
||||
spmv_buffer_.Reserve(buffer_size);
|
||||
}
|
||||
|
||||
void CudaSparseMatrix::SpMv(cusparseOperation_t op,
|
||||
const cusparseDnVecDescr_t& x,
|
||||
const cusparseDnVecDescr_t& y) const {
|
||||
const double alpha = 1.0;
|
||||
const double beta = 1.0;
|
||||
|
||||
CHECK_EQ(cusparseSpMV(context_->cusparse_handle_,
|
||||
op,
|
||||
&alpha,
|
||||
descr_,
|
||||
x,
|
||||
&beta,
|
||||
y,
|
||||
CUDA_R_64F,
|
||||
kSpMVAlgorithm,
|
||||
spmv_buffer_.data()),
|
||||
CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
void CudaSparseMatrix::RightMultiplyAndAccumulate(const CudaVector& x,
|
||||
CudaVector* y) const {
|
||||
DCHECK(GetTempBufferSize(
|
||||
context_->cusparse_handle_, y->descr(), x.descr(), descr_) <=
|
||||
spmv_buffer_.size());
|
||||
SpMv(CUSPARSE_OPERATION_NON_TRANSPOSE, x.descr(), y->descr());
|
||||
}
|
||||
|
||||
void CudaSparseMatrix::LeftMultiplyAndAccumulate(const CudaVector& x,
|
||||
CudaVector* y) const {
|
||||
// TODO(Joydeep Biswas): We should consider storing a transposed copy of the
|
||||
// matrix by converting CSR to CSC. From the cuSPARSE documentation:
|
||||
// "In general, opA == CUSPARSE_OPERATION_NON_TRANSPOSE is 3x faster than opA
|
||||
// != CUSPARSE_OPERATION_NON_TRANSPOSE"
|
||||
DCHECK(GetTempBufferSize(
|
||||
context_->cusparse_handle_, x.descr(), y->descr(), descr_) <=
|
||||
spmv_buffer_.size());
|
||||
SpMv(CUSPARSE_OPERATION_TRANSPOSE, x.descr(), y->descr());
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
143
extern/ceres/internal/ceres/cuda_sparse_matrix.h
vendored
Normal file
143
extern/ceres/internal/ceres/cuda_sparse_matrix.h
vendored
Normal file
@@ -0,0 +1,143 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
//
|
||||
// A CUDA sparse matrix linear operator.
|
||||
|
||||
#ifndef CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_
|
||||
#define CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
// clang-format off
|
||||
#include "ceres/internal/config.h"
|
||||
// clang-format on
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
#include "ceres/cuda_buffer.h"
|
||||
#include "ceres/cuda_vector.h"
|
||||
#include "cusparse.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
// A sparse matrix hosted on the GPU in compressed row sparse format, with
|
||||
// CUDA-accelerated operations.
|
||||
// The user of the class must ensure that ContextImpl::InitCuda() has already
|
||||
// been successfully called before using this class.
|
||||
class CERES_NO_EXPORT CudaSparseMatrix {
|
||||
public:
|
||||
// Create a GPU copy of the matrix provided.
|
||||
CudaSparseMatrix(ContextImpl* context,
|
||||
const CompressedRowSparseMatrix& crs_matrix);
|
||||
|
||||
// Create matrix from existing row and column index buffers.
|
||||
// Values are left uninitialized.
|
||||
CudaSparseMatrix(int num_cols,
|
||||
CudaBuffer<int32_t>&& rows,
|
||||
CudaBuffer<int32_t>&& cols,
|
||||
ContextImpl* context);
|
||||
|
||||
~CudaSparseMatrix();
|
||||
|
||||
// Left/right products are using internal buffer and are not thread-safe
|
||||
// y = y + Ax;
|
||||
void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const;
|
||||
// y = y + A'x;
|
||||
void LeftMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const;
|
||||
|
||||
int num_rows() const { return num_rows_; }
|
||||
int num_cols() const { return num_cols_; }
|
||||
int num_nonzeros() const { return num_nonzeros_; }
|
||||
|
||||
const int32_t* rows() const { return rows_.data(); }
|
||||
const int32_t* cols() const { return cols_.data(); }
|
||||
const double* values() const { return values_.data(); }
|
||||
|
||||
int32_t* mutable_rows() { return rows_.data(); }
|
||||
int32_t* mutable_cols() { return cols_.data(); }
|
||||
double* mutable_values() { return values_.data(); }
|
||||
|
||||
// If subsequent uses of this matrix involve only numerical changes and no
|
||||
// structural changes, then this method can be used to copy the updated
|
||||
// non-zero values -- the row and column index arrays are kept the same. It
|
||||
// is the caller's responsibility to ensure that the sparsity structure of the
|
||||
// matrix is unchanged.
|
||||
void CopyValuesFromCpu(const CompressedRowSparseMatrix& crs_matrix);
|
||||
|
||||
const cusparseSpMatDescr_t& descr() const { return descr_; }
|
||||
|
||||
private:
|
||||
// Disable copy and assignment.
|
||||
CudaSparseMatrix(const CudaSparseMatrix&) = delete;
|
||||
CudaSparseMatrix& operator=(const CudaSparseMatrix&) = delete;
|
||||
|
||||
// Allocate temporary buffer for left/right products, create cuSPARSE
|
||||
// descriptors
|
||||
void Initialize();
|
||||
|
||||
// y = y + op(M)x. op must be either CUSPARSE_OPERATION_NON_TRANSPOSE or
|
||||
// CUSPARSE_OPERATION_TRANSPOSE.
|
||||
void SpMv(cusparseOperation_t op,
|
||||
const cusparseDnVecDescr_t& x,
|
||||
const cusparseDnVecDescr_t& y) const;
|
||||
|
||||
int num_rows_ = 0;
|
||||
int num_cols_ = 0;
|
||||
int num_nonzeros_ = 0;
|
||||
|
||||
ContextImpl* context_ = nullptr;
|
||||
// CSR row indices.
|
||||
CudaBuffer<int32_t> rows_;
|
||||
// CSR column indices.
|
||||
CudaBuffer<int32_t> cols_;
|
||||
// CSR values.
|
||||
CudaBuffer<double> values_;
|
||||
|
||||
// CuSparse object that describes this matrix.
|
||||
cusparseSpMatDescr_t descr_ = nullptr;
|
||||
|
||||
// Dense vector descriptors for pointer interface
|
||||
cusparseDnVecDescr_t descr_vec_left_ = nullptr;
|
||||
cusparseDnVecDescr_t descr_vec_right_ = nullptr;
|
||||
|
||||
mutable CudaBuffer<uint8_t> spmv_buffer_;
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
#endif // CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_
|
||||
286
extern/ceres/internal/ceres/cuda_sparse_matrix_test.cc
vendored
Normal file
286
extern/ceres/internal/ceres/cuda_sparse_matrix_test.cc
vendored
Normal file
@@ -0,0 +1,286 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
#include "ceres/cuda_sparse_matrix.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/casts.h"
|
||||
#include "ceres/cuda_vector.h"
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/linear_least_squares_problems.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "glog/logging.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
class CudaSparseMatrixTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() final {
|
||||
std::string message;
|
||||
CHECK(context_.InitCuda(&message))
|
||||
<< "InitCuda() failed because: " << message;
|
||||
std::unique_ptr<LinearLeastSquaresProblem> problem =
|
||||
CreateLinearLeastSquaresProblemFromId(2);
|
||||
CHECK(problem != nullptr);
|
||||
A_.reset(down_cast<BlockSparseMatrix*>(problem->A.release()));
|
||||
CHECK(A_ != nullptr);
|
||||
CHECK(problem->b != nullptr);
|
||||
CHECK(problem->x != nullptr);
|
||||
b_.resize(A_->num_rows());
|
||||
for (int i = 0; i < A_->num_rows(); ++i) {
|
||||
b_[i] = problem->b[i];
|
||||
}
|
||||
x_.resize(A_->num_cols());
|
||||
for (int i = 0; i < A_->num_cols(); ++i) {
|
||||
x_[i] = problem->x[i];
|
||||
}
|
||||
CHECK_EQ(A_->num_rows(), b_.rows());
|
||||
CHECK_EQ(A_->num_cols(), x_.rows());
|
||||
}
|
||||
|
||||
std::unique_ptr<BlockSparseMatrix> A_;
|
||||
Vector x_;
|
||||
Vector b_;
|
||||
ContextImpl context_;
|
||||
};
|
||||
|
||||
TEST_F(CudaSparseMatrixTest, RightMultiplyAndAccumulate) {
|
||||
std::string message;
|
||||
auto A_crs = A_->ToCompressedRowSparseMatrix();
|
||||
CudaSparseMatrix A_gpu(&context_, *A_crs);
|
||||
CudaVector x_gpu(&context_, A_gpu.num_cols());
|
||||
CudaVector res_gpu(&context_, A_gpu.num_rows());
|
||||
x_gpu.CopyFromCpu(x_);
|
||||
|
||||
const Vector minus_b = -b_;
|
||||
// res = -b
|
||||
res_gpu.CopyFromCpu(minus_b);
|
||||
// res += A * x
|
||||
A_gpu.RightMultiplyAndAccumulate(x_gpu, &res_gpu);
|
||||
|
||||
Vector res;
|
||||
res_gpu.CopyTo(&res);
|
||||
|
||||
Vector res_expected = minus_b;
|
||||
A_->RightMultiplyAndAccumulate(x_.data(), res_expected.data());
|
||||
|
||||
EXPECT_LE((res - res_expected).norm(),
|
||||
std::numeric_limits<double>::epsilon() * 1e3);
|
||||
}
|
||||
|
||||
TEST(CudaSparseMatrix, CopyValuesFromCpu) {
|
||||
// A1:
|
||||
// [ 1 1 0 0
|
||||
// 0 1 1 0]
|
||||
// A2:
|
||||
// [ 1 2 0 0
|
||||
// 0 3 4 0]
|
||||
// b: [1 2 3 4]'
|
||||
// A1 * b = [3 5]'
|
||||
// A2 * b = [5 18]'
|
||||
TripletSparseMatrix A1(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 1, 1, 1});
|
||||
TripletSparseMatrix A2(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
|
||||
Vector b(4);
|
||||
b << 1, 2, 3, 4;
|
||||
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
auto A1_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A1);
|
||||
CudaSparseMatrix A_gpu(&context, *A1_crs);
|
||||
CudaVector b_gpu(&context, A1.num_cols());
|
||||
CudaVector x_gpu(&context, A1.num_rows());
|
||||
b_gpu.CopyFromCpu(b);
|
||||
x_gpu.SetZero();
|
||||
|
||||
Vector x_expected(2);
|
||||
x_expected << 3, 5;
|
||||
A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
|
||||
Vector x_computed;
|
||||
x_gpu.CopyTo(&x_computed);
|
||||
EXPECT_EQ(x_computed, x_expected);
|
||||
|
||||
auto A2_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A2);
|
||||
A_gpu.CopyValuesFromCpu(*A2_crs);
|
||||
x_gpu.SetZero();
|
||||
x_expected << 5, 18;
|
||||
A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
|
||||
x_gpu.CopyTo(&x_computed);
|
||||
EXPECT_EQ(x_computed, x_expected);
|
||||
}
|
||||
|
||||
TEST(CudaSparseMatrix, RightMultiplyAndAccumulate) {
|
||||
// A:
|
||||
// [ 1 2 0 0
|
||||
// 0 3 4 0]
|
||||
// b: [1 2 3 4]'
|
||||
// A * b = [5 18]'
|
||||
TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
|
||||
Vector b(4);
|
||||
b << 1, 2, 3, 4;
|
||||
Vector x_expected(2);
|
||||
x_expected << 5, 18;
|
||||
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
|
||||
CudaSparseMatrix A_gpu(&context, *A_crs);
|
||||
CudaVector b_gpu(&context, A.num_cols());
|
||||
CudaVector x_gpu(&context, A.num_rows());
|
||||
b_gpu.CopyFromCpu(b);
|
||||
x_gpu.SetZero();
|
||||
|
||||
A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
|
||||
|
||||
Vector x_computed;
|
||||
x_gpu.CopyTo(&x_computed);
|
||||
|
||||
EXPECT_EQ(x_computed, x_expected);
|
||||
}
|
||||
|
||||
TEST(CudaSparseMatrix, LeftMultiplyAndAccumulate) {
|
||||
// A:
|
||||
// [ 1 2 0 0
|
||||
// 0 3 4 0]
|
||||
// b: [1 2]'
|
||||
// A'* b = [1 8 8 0]'
|
||||
TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
|
||||
Vector b(2);
|
||||
b << 1, 2;
|
||||
Vector x_expected(4);
|
||||
x_expected << 1, 8, 8, 0;
|
||||
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
|
||||
CudaSparseMatrix A_gpu(&context, *A_crs);
|
||||
CudaVector b_gpu(&context, A.num_rows());
|
||||
CudaVector x_gpu(&context, A.num_cols());
|
||||
b_gpu.CopyFromCpu(b);
|
||||
x_gpu.SetZero();
|
||||
|
||||
A_gpu.LeftMultiplyAndAccumulate(b_gpu, &x_gpu);
|
||||
|
||||
Vector x_computed;
|
||||
x_gpu.CopyTo(&x_computed);
|
||||
|
||||
EXPECT_EQ(x_computed, x_expected);
|
||||
}
|
||||
|
||||
// If there are numerical errors due to synchronization issues, they will show
|
||||
// up when testing with large matrices, since each operation will take
|
||||
// significant time, thus hopefully revealing any potential synchronization
|
||||
// issues.
|
||||
TEST(CudaSparseMatrix, LargeMultiplyAndAccumulate) {
|
||||
// Create a large NxN matrix A that has the following structure:
|
||||
// In row i, only columns i and i+1 are non-zero.
|
||||
// A_{i, i} = A_{i, i+1} = 1.
|
||||
// There will be 2 * N - 1 non-zero elements in A.
|
||||
// X = [1:N]
|
||||
// Right multiply test:
|
||||
// b = A * X
|
||||
// Left multiply test:
|
||||
// b = A' * X
|
||||
|
||||
const int N = 10 * 1000 * 1000;
|
||||
const int num_non_zeros = 2 * N - 1;
|
||||
std::vector<int> row_indices(num_non_zeros);
|
||||
std::vector<int> col_indices(num_non_zeros);
|
||||
std::vector<double> values(num_non_zeros);
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
row_indices[2 * i] = i;
|
||||
col_indices[2 * i] = i;
|
||||
values[2 * i] = 1.0;
|
||||
if (i + 1 < N) {
|
||||
col_indices[2 * i + 1] = i + 1;
|
||||
row_indices[2 * i + 1] = i;
|
||||
values[2 * i + 1] = 1;
|
||||
}
|
||||
}
|
||||
TripletSparseMatrix A(N, N, row_indices, col_indices, values);
|
||||
Vector x(N);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
x[i] = i + 1;
|
||||
}
|
||||
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
|
||||
CudaSparseMatrix A_gpu(&context, *A_crs);
|
||||
CudaVector b_gpu(&context, N);
|
||||
CudaVector x_gpu(&context, N);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
|
||||
// First check RightMultiply.
|
||||
{
|
||||
b_gpu.SetZero();
|
||||
A_gpu.RightMultiplyAndAccumulate(x_gpu, &b_gpu);
|
||||
Vector b_computed;
|
||||
b_gpu.CopyTo(&b_computed);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (i + 1 < N) {
|
||||
EXPECT_EQ(b_computed[i], 2 * (i + 1) + 1);
|
||||
} else {
|
||||
EXPECT_EQ(b_computed[i], i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next check LeftMultiply.
|
||||
{
|
||||
b_gpu.SetZero();
|
||||
A_gpu.LeftMultiplyAndAccumulate(x_gpu, &b_gpu);
|
||||
Vector b_computed;
|
||||
b_gpu.CopyTo(&b_computed);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (i > 0) {
|
||||
EXPECT_EQ(b_computed[i], 2 * (i + 1) - 1);
|
||||
} else {
|
||||
EXPECT_EQ(b_computed[i], i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
335
extern/ceres/internal/ceres/cuda_streamed_buffer.h
vendored
Normal file
335
extern/ceres/internal/ceres/cuda_streamed_buffer.h
vendored
Normal file
@@ -0,0 +1,335 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#ifndef CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_
|
||||
#define CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
#include "ceres/cuda_buffer.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
// Most contemporary CUDA devices are capable of simultaneous code execution and
|
||||
// host-to-device transfer. This class copies batches of data to GPU memory and
|
||||
// executes processing of copied data in parallel (asynchronously).
|
||||
// Data is copied to a fixed-size buffer on GPU (containing at most
|
||||
// max_buffer_size values), and this memory is re-used when the previous
|
||||
// batch of values is processed by user-provided callback
|
||||
// Host-to-device copy uses a temporary buffer if required. Each batch of values
|
||||
// has size of kValuesPerBatch, except the last one.
|
||||
template <typename T>
|
||||
class CERES_NO_EXPORT CudaStreamedBuffer {
|
||||
public:
|
||||
// If hardware supports only one host-to-device copy or one host-to-device
|
||||
// copy is able to reach peak bandwidth, two streams are sufficient to reach
|
||||
// maximum efficiency:
|
||||
// - If transferring batch of values takes more time, than processing it on
|
||||
// gpu, then at every moment of time one of the streams will be transferring
|
||||
// data and other stream will be either processing data or idle; the whole
|
||||
// process will be bounded by host-to-device copy.
|
||||
// - If transferring batch of values takes less time, than processing it on
|
||||
// gpu, then at every moment of time one of the streams will be processing
|
||||
// data and other stream will be either performing computations or
|
||||
// transferring data, and the whole process will be bounded by computations.
|
||||
static constexpr int kNumBatches = 2;
|
||||
// max_buffer_size is the maximal size (in elements of type T) of array
|
||||
// to be pre-allocated in gpu memory. The size of array determines size of
|
||||
// batch of values for simultaneous copying and processing. It should be large
|
||||
// enough to allow highly-parallel execution of user kernels; making it too
|
||||
// large increases latency.
|
||||
CudaStreamedBuffer(ContextImpl* context, const int max_buffer_size)
|
||||
: kValuesPerBatch(max_buffer_size / kNumBatches),
|
||||
context_(context),
|
||||
values_gpu_(context, kValuesPerBatch * kNumBatches) {
|
||||
static_assert(ContextImpl::kNumCudaStreams >= kNumBatches);
|
||||
CHECK_GE(max_buffer_size, kNumBatches);
|
||||
// Pre-allocate a buffer of page-locked memory for transfers from a regular
|
||||
// cpu memory. Because we will be only writing into that buffer from cpu,
|
||||
// memory is allocated with cudaHostAllocWriteCombined flag.
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaHostAlloc(&values_cpu_pinned_,
|
||||
sizeof(T) * kValuesPerBatch * kNumBatches,
|
||||
cudaHostAllocWriteCombined));
|
||||
for (auto& e : copy_finished_) {
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaEventCreateWithFlags(&e, cudaEventDisableTiming));
|
||||
}
|
||||
}
|
||||
|
||||
CudaStreamedBuffer(const CudaStreamedBuffer&) = delete;
|
||||
|
||||
~CudaStreamedBuffer() {
|
||||
CHECK_EQ(cudaSuccess, cudaFreeHost(values_cpu_pinned_));
|
||||
for (auto& e : copy_finished_) {
|
||||
CHECK_EQ(cudaSuccess, cudaEventDestroy(e));
|
||||
}
|
||||
}
|
||||
|
||||
// Transfer num_values at host-memory pointer from, calling
|
||||
// callback(device_pointer, size_of_batch, offset_of_batch, stream_to_use)
|
||||
// after scheduling transfer of each batch of data. User-provided callback
|
||||
// should perform processing of data at device_pointer only in
|
||||
// stream_to_use stream (device_pointer will be re-used in the next
|
||||
// callback invocation with the same stream).
|
||||
//
|
||||
// Two diagrams below describe operation in two possible scenarios, depending
|
||||
// on input data being stored in page-locked memory. In this example we will
|
||||
// have max_buffer_size = 2 * K, num_values = N * K and callback
|
||||
// scheduling a single asynchronous launch of
|
||||
// Kernel<<..., stream_to_use>>(device_pointer,
|
||||
// size_of_batch,
|
||||
// offset_of_batch)
|
||||
//
|
||||
// a. Copying from page-locked memory
|
||||
// In this case no copy on the host-side is necessary, and this method just
|
||||
// schedules a bunch of interleaved memory copies and callback invocations:
|
||||
//
|
||||
// cudaStreamSynchronize(context->DefaultStream());
|
||||
// - Iteration #0:
|
||||
// - cudaMemcpyAsync(values_gpu_, from, K * sizeof(T), H->D, stream_0)
|
||||
// - callback(values_gpu_, K, 0, stream_0)
|
||||
// - Iteration #1:
|
||||
// - cudaMemcpyAsync(values_gpu_ + K, from + K, K * sizeof(T), H->D,
|
||||
// stream_1)
|
||||
// - callback(values_gpu_ + K, K, K, stream_1)
|
||||
// - Iteration #2:
|
||||
// - cudaMemcpyAsync(values_gpu_, from + 2 * K, K * sizeof(T), H->D,
|
||||
// stream_0)
|
||||
// - callback(values_gpu_, K, 2 * K, stream_0)
|
||||
// - Iteration #3:
|
||||
// - cudaMemcpyAsync(values_gpu_ + K, from + 3 * K, K * sizeof(T), H->D,
|
||||
// stream_1)
|
||||
// - callback(values_gpu_ + K, K, 3 * K, stream_1)
|
||||
// ...
|
||||
// - Iteration #i:
|
||||
// - cudaMemcpyAsync(values_gpu_ + (i % 2) * K, from + i * K, K *
|
||||
// sizeof(T), H->D, stream_(i % 2))
|
||||
// - callback(values_gpu_ + (i % 2) * K, K, i * K, stream_(i % 2)
|
||||
// ...
|
||||
// cudaStreamSynchronize(stream_0)
|
||||
// cudaStreamSynchronize(stream_1)
|
||||
//
|
||||
// This sequence of calls results in following activity on gpu (assuming that
|
||||
// kernel invoked by callback takes less time than host-to-device copy):
|
||||
// +-------------------+-------------------+
|
||||
// | Stream #0 | Stream #1 |
|
||||
// +-------------------+-------------------+
|
||||
// | Copy host->device | |
|
||||
// | | |
|
||||
// | | |
|
||||
// +-------------------+-------------------+
|
||||
// | Kernel | Copy host->device |
|
||||
// +-------------------+ |
|
||||
// | | |
|
||||
// +-------------------+-------------------+
|
||||
// | Copy host->device | Kernel |
|
||||
// | +-------------------+
|
||||
// | | |
|
||||
// +-------------------+-------------------+
|
||||
// | Kernel | Copy host->device |
|
||||
// | ... |
|
||||
// +---------------------------------------+
|
||||
//
|
||||
// b. Copying from regular memory
|
||||
// In this case a copy from regular memory to page-locked memory is required
|
||||
// in order to get asynchrnonous operation. Because pinned memory on host-side
|
||||
// is reused, additional synchronization is required. On each iteration method
|
||||
// the following actions are performed:
|
||||
// - Wait till previous copy operation in stream is completed
|
||||
// - Copy batch of values from input array into pinned memory
|
||||
// - Asynchronously launch host-to-device copy
|
||||
// - Setup event for synchronization on copy completion
|
||||
// - Invoke callback (that launches kernel asynchronously)
|
||||
//
|
||||
// Invocations are performed with the following arguments
|
||||
// cudaStreamSynchronize(context->DefaultStream());
|
||||
// - Iteration #0:
|
||||
// - cudaEventSynchronize(copy_finished_0)
|
||||
// - std::copy_n(from, K, values_cpu_pinned_)
|
||||
// - cudaMemcpyAsync(values_gpu_, values_cpu_pinned_, K * sizeof(T), H->D,
|
||||
// stream_0)
|
||||
// - cudaEventRecord(copy_finished_0, stream_0)
|
||||
// - callback(values_gpu_, K, 0, stream_0)
|
||||
// - Iteration #1:
|
||||
// - cudaEventSynchronize(copy_finished_1)
|
||||
// - std::copy_n(from + K, K, values_cpu_pinned_ + K)
|
||||
// - cudaMemcpyAsync(values_gpu_ + K, values_cpu_pinned_ + K, K *
|
||||
// sizeof(T), H->D, stream_1)
|
||||
// - cudaEventRecord(copy_finished_1, stream_1)
|
||||
// - callback(values_gpu_ + K, K, K, stream_1)
|
||||
// - Iteration #2:
|
||||
// - cudaEventSynchronize(copy_finished_0)
|
||||
// - std::copy_n(from + 2 * K, K, values_cpu_pinned_)
|
||||
// - cudaMemcpyAsync(values_gpu_, values_cpu_pinned_, K * sizeof(T), H->D,
|
||||
// stream_0)
|
||||
// - cudaEventRecord(copy_finished_0, stream_0)
|
||||
// - callback(values_gpu_, K, 2 * K, stream_0)
|
||||
// - Iteration #3:
|
||||
// - cudaEventSynchronize(copy_finished_1)
|
||||
// - std::copy_n(from + 3 * K, K, values_cpu_pinned_ + K)
|
||||
// - cudaMemcpyAsync(values_gpu_ + K, values_cpu_pinned_ + K, K *
|
||||
// sizeof(T), H->D, stream_1)
|
||||
// - cudaEventRecord(copy_finished_1, stream_1)
|
||||
// - callback(values_gpu_ + K, K, 3 * K, stream_1)
|
||||
// ...
|
||||
// - Iteration #i:
|
||||
// - cudaEventSynchronize(copy_finished_(i % 2))
|
||||
// - std::copy_n(from + i * K, K, values_cpu_pinned_ + (i % 2) * K)
|
||||
// - cudaMemcpyAsync(values_gpu_ + (i % 2) * K, values_cpu_pinned_ + (i %
|
||||
// 2) * K, K * sizeof(T), H->D, stream_(i % 2))
|
||||
// - cudaEventRecord(copy_finished_(i % 2), stream_(i % 2))
|
||||
// - callback(values_gpu_ + (i % 2) * K, K, i * K, stream_(i % 2))
|
||||
// ...
|
||||
// cudaStreamSynchronize(stream_0)
|
||||
// cudaStreamSynchronize(stream_1)
|
||||
//
|
||||
// This sequence of calls results in following activity on cpu and gpu
|
||||
// (assuming that kernel invoked by callback takes less time than
|
||||
// host-to-device copy and copy in cpu memory, and copy in cpu memory is
|
||||
// faster than host-to-device copy):
|
||||
// +----------------------------+-------------------+-------------------+
|
||||
// | Stream #0 | Stream #0 | Stream #1 |
|
||||
// +----------------------------+-------------------+-------------------+
|
||||
// | Copy to pinned memory | | |
|
||||
// | | | |
|
||||
// +----------------------------+-------------------| |
|
||||
// | Copy to pinned memory | Copy host->device | |
|
||||
// | | | |
|
||||
// +----------------------------+ | |
|
||||
// | Waiting previous h->d copy | | |
|
||||
// +----------------------------+-------------------+-------------------+
|
||||
// | Copy to pinned memory | Kernel | Copy host->device |
|
||||
// | +-------------------+ |
|
||||
// +----------------------------+ | |
|
||||
// | Waiting previous h->d copy | | |
|
||||
// +----------------------------+-------------------+-------------------+
|
||||
// | Copy to pinned memory | Copy host->device | Kernel |
|
||||
// | | +-------------------+
|
||||
// | ... ... |
|
||||
// +----------------------------+---------------------------------------+
|
||||
//
|
||||
template <typename Fun>
|
||||
void CopyToGpu(const T* from, const int num_values, Fun&& callback) {
|
||||
// This synchronization is not required in some cases, but we perform it in
|
||||
// order to avoid situation when user callback depends on data that is
|
||||
// still to be computed in default stream
|
||||
CHECK_EQ(cudaSuccess, cudaStreamSynchronize(context_->DefaultStream()));
|
||||
|
||||
// If pointer to input data does not correspond to page-locked memory,
|
||||
// host-to-device memory copy might be executed synchrnonously (with a copy
|
||||
// to pinned memory happening inside the driver). In that case we perform
|
||||
// copy to a pre-allocated array of page-locked memory.
|
||||
const bool copy_to_pinned_memory = MemoryTypeResultsInSynchronousCopy(from);
|
||||
T* batch_values_gpu[kNumBatches];
|
||||
T* batch_values_cpu[kNumBatches];
|
||||
auto streams = context_->streams_;
|
||||
for (int i = 0; i < kNumBatches; ++i) {
|
||||
batch_values_gpu[i] = values_gpu_.data() + kValuesPerBatch * i;
|
||||
batch_values_cpu[i] = values_cpu_pinned_ + kValuesPerBatch * i;
|
||||
}
|
||||
int batch_id = 0;
|
||||
for (int offset = 0; offset < num_values; offset += kValuesPerBatch) {
|
||||
const int num_values_batch =
|
||||
std::min(num_values - offset, kValuesPerBatch);
|
||||
const T* batch_from = from + offset;
|
||||
T* batch_to = batch_values_gpu[batch_id];
|
||||
auto stream = streams[batch_id];
|
||||
auto copy_finished = copy_finished_[batch_id];
|
||||
|
||||
if (copy_to_pinned_memory) {
|
||||
// Copying values to a temporary buffer should be started only after the
|
||||
// previous copy from temporary buffer to device is completed.
|
||||
CHECK_EQ(cudaSuccess, cudaEventSynchronize(copy_finished));
|
||||
std::copy_n(batch_from, num_values_batch, batch_values_cpu[batch_id]);
|
||||
batch_from = batch_values_cpu[batch_id];
|
||||
}
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaMemcpyAsync(batch_to,
|
||||
batch_from,
|
||||
sizeof(T) * num_values_batch,
|
||||
cudaMemcpyHostToDevice,
|
||||
stream));
|
||||
if (copy_to_pinned_memory) {
|
||||
// Next copy to a temporary buffer can start straight after asynchronous
|
||||
// copy is completed (and might be started before kernels asynchronously
|
||||
// executed in stream by user-supplied callback are completed).
|
||||
// No explicit synchronization is required when copying data from
|
||||
// page-locked memory, because memory copy and user kernel execution
|
||||
// with corresponding part of values_gpu_ array is serialized using
|
||||
// stream
|
||||
CHECK_EQ(cudaSuccess, cudaEventRecord(copy_finished, stream));
|
||||
}
|
||||
callback(batch_to, num_values_batch, offset, stream);
|
||||
batch_id = (batch_id + 1) % kNumBatches;
|
||||
}
|
||||
// Explicitly synchronize on all CUDA streams that were utilized.
|
||||
for (int i = 0; i < kNumBatches; ++i) {
|
||||
CHECK_EQ(cudaSuccess, cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// It is necessary to have all host-to-device copies to be completely
|
||||
// asynchronous. This requires source memory to be allocated in page-locked
|
||||
// memory.
|
||||
static bool MemoryTypeResultsInSynchronousCopy(const void* ptr) {
|
||||
cudaPointerAttributes attributes;
|
||||
auto status = cudaPointerGetAttributes(&attributes, ptr);
|
||||
#if CUDART_VERSION < 11000
|
||||
// In CUDA versions prior 11 call to cudaPointerGetAttributes with host
|
||||
// pointer will return cudaErrorInvalidValue
|
||||
if (status == cudaErrorInvalidValue) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
CHECK_EQ(status, cudaSuccess);
|
||||
// This class only supports cpu memory as a source
|
||||
CHECK_NE(attributes.type, cudaMemoryTypeDevice);
|
||||
// If host memory was allocated (or registered) with CUDA API, or is a
|
||||
// managed memory, then call to cudaMemcpyAsync will be asynchrnous. In case
|
||||
// of managed memory it might be slightly better to perform a single call of
|
||||
// user-provided call-back (and hope that page migration will provide a
|
||||
// similar throughput with zero efforts from our side).
|
||||
return attributes.type == cudaMemoryTypeUnregistered;
|
||||
}
|
||||
|
||||
const int kValuesPerBatch;
|
||||
ContextImpl* context_ = nullptr;
|
||||
CudaBuffer<T> values_gpu_;
|
||||
T* values_cpu_pinned_ = nullptr;
|
||||
cudaEvent_t copy_finished_[kNumBatches] = {nullptr};
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
#endif // CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_
|
||||
169
extern/ceres/internal/ceres/cuda_streamed_buffer_test.cc
vendored
Normal file
169
extern/ceres/internal/ceres/cuda_streamed_buffer_test.cc
vendored
Normal file
@@ -0,0 +1,169 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "ceres/cuda_streamed_buffer.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
TEST(CudaStreamedBufferTest, IntegerCopy) {
|
||||
// Offsets and sizes of batches supplied to callback
|
||||
std::vector<std::pair<int, int>> batches;
|
||||
const int kMaxTemporaryArraySize = 16;
|
||||
const int kInputSize = kMaxTemporaryArraySize * 7 + 3;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
|
||||
std::vector<int> inputs(kInputSize);
|
||||
std::vector<int> outputs(kInputSize, -1);
|
||||
std::iota(inputs.begin(), inputs.end(), 0);
|
||||
|
||||
CudaStreamedBuffer<int> streamed_buffer(&context, kMaxTemporaryArraySize);
|
||||
streamed_buffer.CopyToGpu(inputs.data(),
|
||||
kInputSize,
|
||||
[&outputs, &batches](const int* device_pointer,
|
||||
int size,
|
||||
int offset,
|
||||
cudaStream_t stream) {
|
||||
batches.emplace_back(offset, size);
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaMemcpyAsync(outputs.data() + offset,
|
||||
device_pointer,
|
||||
sizeof(int) * size,
|
||||
cudaMemcpyDeviceToHost,
|
||||
stream));
|
||||
});
|
||||
// All operations in all streams should be completed when CopyToGpu returns
|
||||
// control to the callee
|
||||
for (int i = 0; i < ContextImpl::kNumCudaStreams; ++i) {
|
||||
CHECK_EQ(cudaSuccess, cudaStreamQuery(context.streams_[i]));
|
||||
}
|
||||
|
||||
// Check if every element was visited
|
||||
for (int i = 0; i < kInputSize; ++i) {
|
||||
CHECK_EQ(outputs[i], i);
|
||||
}
|
||||
|
||||
// Check if there is no overlap between batches
|
||||
std::sort(batches.begin(), batches.end());
|
||||
const int num_batches = batches.size();
|
||||
for (int i = 0; i < num_batches; ++i) {
|
||||
const auto [begin, size] = batches[i];
|
||||
const int end = begin + size;
|
||||
CHECK_GE(begin, 0);
|
||||
CHECK_LT(begin, kInputSize);
|
||||
|
||||
CHECK_GT(size, 0);
|
||||
CHECK_LE(end, kInputSize);
|
||||
|
||||
if (i + 1 == num_batches) continue;
|
||||
CHECK_EQ(end, batches[i + 1].first);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CudaStreamedBufferTest, IntegerNoCopy) {
|
||||
// Offsets and sizes of batches supplied to callback
|
||||
std::vector<std::pair<int, int>> batches;
|
||||
const int kMaxTemporaryArraySize = 16;
|
||||
const int kInputSize = kMaxTemporaryArraySize * 7 + 3;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
|
||||
int* inputs;
|
||||
int* outputs;
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaHostAlloc(
|
||||
&inputs, sizeof(int) * kInputSize, cudaHostAllocWriteCombined));
|
||||
CHECK_EQ(
|
||||
cudaSuccess,
|
||||
cudaHostAlloc(&outputs, sizeof(int) * kInputSize, cudaHostAllocDefault));
|
||||
|
||||
std::fill(outputs, outputs + kInputSize, -1);
|
||||
std::iota(inputs, inputs + kInputSize, 0);
|
||||
|
||||
CudaStreamedBuffer<int> streamed_buffer(&context, kMaxTemporaryArraySize);
|
||||
streamed_buffer.CopyToGpu(inputs,
|
||||
kInputSize,
|
||||
[outputs, &batches](const int* device_pointer,
|
||||
int size,
|
||||
int offset,
|
||||
cudaStream_t stream) {
|
||||
batches.emplace_back(offset, size);
|
||||
CHECK_EQ(cudaSuccess,
|
||||
cudaMemcpyAsync(outputs + offset,
|
||||
device_pointer,
|
||||
sizeof(int) * size,
|
||||
cudaMemcpyDeviceToHost,
|
||||
stream));
|
||||
});
|
||||
// All operations in all streams should be completed when CopyToGpu returns
|
||||
// control to the callee
|
||||
for (int i = 0; i < ContextImpl::kNumCudaStreams; ++i) {
|
||||
CHECK_EQ(cudaSuccess, cudaStreamQuery(context.streams_[i]));
|
||||
}
|
||||
|
||||
// Check if every element was visited
|
||||
for (int i = 0; i < kInputSize; ++i) {
|
||||
CHECK_EQ(outputs[i], i);
|
||||
}
|
||||
|
||||
// Check if there is no overlap between batches
|
||||
std::sort(batches.begin(), batches.end());
|
||||
const int num_batches = batches.size();
|
||||
for (int i = 0; i < num_batches; ++i) {
|
||||
const auto [begin, size] = batches[i];
|
||||
const int end = begin + size;
|
||||
CHECK_GE(begin, 0);
|
||||
CHECK_LT(begin, kInputSize);
|
||||
|
||||
CHECK_GT(size, 0);
|
||||
CHECK_LE(end, kInputSize);
|
||||
|
||||
if (i + 1 == num_batches) continue;
|
||||
CHECK_EQ(end, batches[i + 1].first);
|
||||
}
|
||||
|
||||
CHECK_EQ(cudaSuccess, cudaFreeHost(inputs));
|
||||
CHECK_EQ(cudaSuccess, cudaFreeHost(outputs));
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
185
extern/ceres/internal/ceres/cuda_vector.cc
vendored
Normal file
185
extern/ceres/internal/ceres/cuda_vector.cc
vendored
Normal file
@@ -0,0 +1,185 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
//
|
||||
// A simple CUDA vector class.
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
// clang-format off
|
||||
#include "ceres/internal/config.h"
|
||||
// clang-format on
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include "ceres/cuda_buffer.h"
|
||||
#include "ceres/cuda_kernels_vector_ops.h"
|
||||
#include "ceres/cuda_vector.h"
|
||||
#include "cublas_v2.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
CudaVector::CudaVector(ContextImpl* context, int size)
|
||||
: context_(context), data_(context, size) {
|
||||
DCHECK_NE(context, nullptr);
|
||||
DCHECK(context->IsCudaInitialized());
|
||||
Resize(size);
|
||||
}
|
||||
|
||||
CudaVector::CudaVector(CudaVector&& other)
|
||||
: num_rows_(other.num_rows_),
|
||||
context_(other.context_),
|
||||
data_(std::move(other.data_)),
|
||||
descr_(other.descr_) {
|
||||
other.num_rows_ = 0;
|
||||
other.descr_ = nullptr;
|
||||
}
|
||||
|
||||
CudaVector& CudaVector::operator=(const CudaVector& other) {
|
||||
if (this != &other) {
|
||||
Resize(other.num_rows());
|
||||
data_.CopyFromGPUArray(other.data_.data(), num_rows_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void CudaVector::DestroyDescriptor() {
|
||||
if (descr_ != nullptr) {
|
||||
CHECK_EQ(cusparseDestroyDnVec(descr_), CUSPARSE_STATUS_SUCCESS);
|
||||
descr_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
CudaVector::~CudaVector() { DestroyDescriptor(); }
|
||||
|
||||
void CudaVector::Resize(int size) {
|
||||
data_.Reserve(size);
|
||||
num_rows_ = size;
|
||||
DestroyDescriptor();
|
||||
CHECK_EQ(cusparseCreateDnVec(&descr_, num_rows_, data_.data(), CUDA_R_64F),
|
||||
CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
double CudaVector::Dot(const CudaVector& x) const {
|
||||
double result = 0;
|
||||
CHECK_EQ(cublasDdot(context_->cublas_handle_,
|
||||
num_rows_,
|
||||
data_.data(),
|
||||
1,
|
||||
x.data(),
|
||||
1,
|
||||
&result),
|
||||
CUBLAS_STATUS_SUCCESS)
|
||||
<< "CuBLAS cublasDdot failed.";
|
||||
return result;
|
||||
}
|
||||
|
||||
double CudaVector::Norm() const {
|
||||
double result = 0;
|
||||
CHECK_EQ(cublasDnrm2(
|
||||
context_->cublas_handle_, num_rows_, data_.data(), 1, &result),
|
||||
CUBLAS_STATUS_SUCCESS)
|
||||
<< "CuBLAS cublasDnrm2 failed.";
|
||||
return result;
|
||||
}
|
||||
|
||||
void CudaVector::CopyFromCpu(const double* x) {
|
||||
data_.CopyFromCpu(x, num_rows_);
|
||||
}
|
||||
|
||||
void CudaVector::CopyFromCpu(const Vector& x) {
|
||||
if (x.rows() != num_rows_) {
|
||||
Resize(x.rows());
|
||||
}
|
||||
CopyFromCpu(x.data());
|
||||
}
|
||||
|
||||
void CudaVector::CopyTo(Vector* x) const {
|
||||
CHECK(x != nullptr);
|
||||
x->resize(num_rows_);
|
||||
data_.CopyToCpu(x->data(), num_rows_);
|
||||
}
|
||||
|
||||
void CudaVector::CopyTo(double* x) const {
|
||||
CHECK(x != nullptr);
|
||||
data_.CopyToCpu(x, num_rows_);
|
||||
}
|
||||
|
||||
void CudaVector::SetZero() {
|
||||
// Allow empty vector to be zeroed
|
||||
if (num_rows_ == 0) return;
|
||||
CHECK(data_.data() != nullptr);
|
||||
CudaSetZeroFP64(data_.data(), num_rows_, context_->DefaultStream());
|
||||
}
|
||||
|
||||
void CudaVector::Axpby(double a, const CudaVector& x, double b) {
|
||||
if (&x == this) {
|
||||
Scale(a + b);
|
||||
return;
|
||||
}
|
||||
CHECK_EQ(num_rows_, x.num_rows_);
|
||||
if (b != 1.0) {
|
||||
// First scale y by b.
|
||||
CHECK_EQ(
|
||||
cublasDscal(context_->cublas_handle_, num_rows_, &b, data_.data(), 1),
|
||||
CUBLAS_STATUS_SUCCESS)
|
||||
<< "CuBLAS cublasDscal failed.";
|
||||
}
|
||||
// Then add a * x to y.
|
||||
CHECK_EQ(cublasDaxpy(context_->cublas_handle_,
|
||||
num_rows_,
|
||||
&a,
|
||||
x.data(),
|
||||
1,
|
||||
data_.data(),
|
||||
1),
|
||||
CUBLAS_STATUS_SUCCESS)
|
||||
<< "CuBLAS cublasDaxpy failed.";
|
||||
}
|
||||
|
||||
void CudaVector::DtDxpy(const CudaVector& D, const CudaVector& x) {
|
||||
CudaDtDxpy(
|
||||
data_.data(), D.data(), x.data(), num_rows_, context_->DefaultStream());
|
||||
}
|
||||
|
||||
void CudaVector::Scale(double s) {
|
||||
CHECK_EQ(
|
||||
cublasDscal(context_->cublas_handle_, num_rows_, &s, data_.data(), 1),
|
||||
CUBLAS_STATUS_SUCCESS)
|
||||
<< "CuBLAS cublasDscal failed.";
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
193
extern/ceres/internal/ceres/cuda_vector.h
vendored
Normal file
193
extern/ceres/internal/ceres/cuda_vector.h
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
//
|
||||
// A simple CUDA vector class.
|
||||
|
||||
#ifndef CERES_INTERNAL_CUDA_VECTOR_H_
|
||||
#define CERES_INTERNAL_CUDA_VECTOR_H_
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
// clang-format off
|
||||
#include "ceres/internal/config.h"
|
||||
// clang-format on
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
#include "ceres/cuda_buffer.h"
|
||||
#include "ceres/cuda_kernels_vector_ops.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "cublas_v2.h"
|
||||
#include "cusparse.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
// An Nx1 vector, denoted y hosted on the GPU, with CUDA-accelerated operations.
|
||||
class CERES_NO_EXPORT CudaVector {
|
||||
public:
|
||||
// Create a pre-allocated vector of size N and return a pointer to it. The
|
||||
// caller must ensure that InitCuda() has already been successfully called on
|
||||
// context before calling this method.
|
||||
CudaVector(ContextImpl* context, int size);
|
||||
|
||||
CudaVector(CudaVector&& other);
|
||||
|
||||
~CudaVector();
|
||||
|
||||
void Resize(int size);
|
||||
|
||||
// Perform a deep copy of the vector.
|
||||
CudaVector& operator=(const CudaVector&);
|
||||
|
||||
// Return the inner product x' * y.
|
||||
double Dot(const CudaVector& x) const;
|
||||
|
||||
// Return the L2 norm of the vector (||y||_2).
|
||||
double Norm() const;
|
||||
|
||||
// Set all elements to zero.
|
||||
void SetZero();
|
||||
|
||||
// Copy from Eigen vector.
|
||||
void CopyFromCpu(const Vector& x);
|
||||
|
||||
// Copy from CPU memory array.
|
||||
void CopyFromCpu(const double* x);
|
||||
|
||||
// Copy to Eigen vector.
|
||||
void CopyTo(Vector* x) const;
|
||||
|
||||
// Copy to CPU memory array. It is the caller's responsibility to ensure
|
||||
// that the array is large enough.
|
||||
void CopyTo(double* x) const;
|
||||
|
||||
// y = a * x + b * y.
|
||||
void Axpby(double a, const CudaVector& x, double b);
|
||||
|
||||
// y = diag(d)' * diag(d) * x + y.
|
||||
void DtDxpy(const CudaVector& D, const CudaVector& x);
|
||||
|
||||
// y = s * y.
|
||||
void Scale(double s);
|
||||
|
||||
int num_rows() const { return num_rows_; }
|
||||
int num_cols() const { return 1; }
|
||||
|
||||
const double* data() const { return data_.data(); }
|
||||
double* mutable_data() { return data_.data(); }
|
||||
|
||||
const cusparseDnVecDescr_t& descr() const { return descr_; }
|
||||
|
||||
private:
|
||||
CudaVector(const CudaVector&) = delete;
|
||||
void DestroyDescriptor();
|
||||
|
||||
int num_rows_ = 0;
|
||||
ContextImpl* context_ = nullptr;
|
||||
CudaBuffer<double> data_;
|
||||
// CuSparse object that describes this dense vector.
|
||||
cusparseDnVecDescr_t descr_ = nullptr;
|
||||
};
|
||||
|
||||
// Blas1 operations on Cuda vectors. These functions are needed as an
|
||||
// abstraction layer so that we can use different versions of a vector style
|
||||
// object in the conjugate gradients linear solver.
|
||||
// Context and num_threads arguments are not used by CUDA implementation,
|
||||
// context embedded into CudaVector is used instead.
|
||||
inline double Norm(const CudaVector& x,
|
||||
ContextImpl* context = nullptr,
|
||||
int num_threads = 1) {
|
||||
(void)context;
|
||||
(void)num_threads;
|
||||
return x.Norm();
|
||||
}
|
||||
inline void SetZero(CudaVector& x,
|
||||
ContextImpl* context = nullptr,
|
||||
int num_threads = 1) {
|
||||
(void)context;
|
||||
(void)num_threads;
|
||||
x.SetZero();
|
||||
}
|
||||
inline void Axpby(double a,
|
||||
const CudaVector& x,
|
||||
double b,
|
||||
const CudaVector& y,
|
||||
CudaVector& z,
|
||||
ContextImpl* context = nullptr,
|
||||
int num_threads = 1) {
|
||||
(void)context;
|
||||
(void)num_threads;
|
||||
if (&x == &y && &y == &z) {
|
||||
// z = (a + b) * z;
|
||||
z.Scale(a + b);
|
||||
} else if (&x == &z) {
|
||||
// x is aliased to z.
|
||||
// z = x
|
||||
// = b * y + a * x;
|
||||
z.Axpby(b, y, a);
|
||||
} else if (&y == &z) {
|
||||
// y is aliased to z.
|
||||
// z = y = a * x + b * y;
|
||||
z.Axpby(a, x, b);
|
||||
} else {
|
||||
// General case: all inputs and outputs are distinct.
|
||||
z = y;
|
||||
z.Axpby(a, x, b);
|
||||
}
|
||||
}
|
||||
inline double Dot(const CudaVector& x,
|
||||
const CudaVector& y,
|
||||
ContextImpl* context = nullptr,
|
||||
int num_threads = 1) {
|
||||
(void)context;
|
||||
(void)num_threads;
|
||||
return x.Dot(y);
|
||||
}
|
||||
inline void Copy(const CudaVector& from,
|
||||
CudaVector& to,
|
||||
ContextImpl* context = nullptr,
|
||||
int num_threads = 1) {
|
||||
(void)context;
|
||||
(void)num_threads;
|
||||
to = from;
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
#endif // CERES_INTERNAL_CUDA_SPARSE_LINEAR_OPERATOR_H_
|
||||
423
extern/ceres/internal/ceres/cuda_vector_test.cc
vendored
Normal file
423
extern/ceres/internal/ceres/cuda_vector_test.cc
vendored
Normal file
@@ -0,0 +1,423 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
#include "ceres/cuda_vector.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
TEST(CudaVector, Creation) {
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x(&context, 1000);
|
||||
EXPECT_EQ(x.num_rows(), 1000);
|
||||
EXPECT_NE(x.data(), nullptr);
|
||||
}
|
||||
|
||||
TEST(CudaVector, CopyVector) {
|
||||
Vector x(3);
|
||||
x << 1, 2, 3;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector y(&context, 10);
|
||||
y.CopyFromCpu(x);
|
||||
EXPECT_EQ(y.num_rows(), 3);
|
||||
|
||||
Vector z(3);
|
||||
z << 0, 0, 0;
|
||||
y.CopyTo(&z);
|
||||
EXPECT_EQ(x, z);
|
||||
}
|
||||
|
||||
TEST(CudaVector, Move) {
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector y(&context, 10);
|
||||
const auto y_data = y.data();
|
||||
const auto y_descr = y.descr();
|
||||
EXPECT_EQ(y.num_rows(), 10);
|
||||
CudaVector z(std::move(y));
|
||||
EXPECT_EQ(y.data(), nullptr);
|
||||
EXPECT_EQ(y.descr(), nullptr);
|
||||
EXPECT_EQ(y.num_rows(), 0);
|
||||
|
||||
EXPECT_EQ(z.data(), y_data);
|
||||
EXPECT_EQ(z.descr(), y_descr);
|
||||
}
|
||||
|
||||
TEST(CudaVector, DeepCopy) {
|
||||
Vector x(3);
|
||||
x << 1, 2, 3;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 3);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
|
||||
CudaVector y_gpu(&context, 3);
|
||||
y_gpu.SetZero();
|
||||
EXPECT_EQ(y_gpu.Norm(), 0.0);
|
||||
|
||||
y_gpu = x_gpu;
|
||||
Vector y(3);
|
||||
y << 0, 0, 0;
|
||||
y_gpu.CopyTo(&y);
|
||||
EXPECT_EQ(x, y);
|
||||
}
|
||||
|
||||
TEST(CudaVector, Dot) {
|
||||
Vector x(3);
|
||||
Vector y(3);
|
||||
x << 1, 2, 3;
|
||||
y << 100, 10, 1;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 10);
|
||||
CudaVector y_gpu(&context, 10);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
|
||||
EXPECT_EQ(x_gpu.Dot(y_gpu), 123.0);
|
||||
EXPECT_EQ(Dot(x_gpu, y_gpu), 123.0);
|
||||
}
|
||||
|
||||
TEST(CudaVector, Norm) {
|
||||
Vector x(3);
|
||||
x << 1, 2, 3;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 10);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
|
||||
EXPECT_NEAR(x_gpu.Norm(),
|
||||
sqrt(1.0 + 4.0 + 9.0),
|
||||
std::numeric_limits<double>::epsilon());
|
||||
|
||||
EXPECT_NEAR(Norm(x_gpu),
|
||||
sqrt(1.0 + 4.0 + 9.0),
|
||||
std::numeric_limits<double>::epsilon());
|
||||
}
|
||||
|
||||
TEST(CudaVector, SetZero) {
|
||||
Vector x(4);
|
||||
x << 1, 1, 1, 1;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 10);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
|
||||
EXPECT_NEAR(x_gpu.Norm(), 2.0, std::numeric_limits<double>::epsilon());
|
||||
|
||||
x_gpu.SetZero();
|
||||
EXPECT_NEAR(x_gpu.Norm(), 0.0, std::numeric_limits<double>::epsilon());
|
||||
|
||||
x_gpu.CopyFromCpu(x);
|
||||
EXPECT_NEAR(x_gpu.Norm(), 2.0, std::numeric_limits<double>::epsilon());
|
||||
SetZero(x_gpu);
|
||||
EXPECT_NEAR(x_gpu.Norm(), 0.0, std::numeric_limits<double>::epsilon());
|
||||
}
|
||||
|
||||
TEST(CudaVector, Resize) {
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 10);
|
||||
EXPECT_EQ(x_gpu.num_rows(), 10);
|
||||
x_gpu.Resize(4);
|
||||
EXPECT_EQ(x_gpu.num_rows(), 4);
|
||||
}
|
||||
|
||||
TEST(CudaVector, Axpy) {
|
||||
Vector x(4);
|
||||
Vector y(4);
|
||||
x << 1, 1, 1, 1;
|
||||
y << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector y_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
|
||||
x_gpu.Axpby(2.0, y_gpu, 1.0);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 201, 21, 3, 1;
|
||||
x_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyBEquals1) {
|
||||
Vector x(4);
|
||||
Vector y(4);
|
||||
x << 1, 1, 1, 1;
|
||||
y << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector y_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
|
||||
x_gpu.Axpby(2.0, y_gpu, 1.0);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 201, 21, 3, 1;
|
||||
x_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyMemberFunctionBNotEqual1) {
|
||||
Vector x(4);
|
||||
Vector y(4);
|
||||
x << 1, 1, 1, 1;
|
||||
y << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector y_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
|
||||
x_gpu.Axpby(2.0, y_gpu, 3.0);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 203, 23, 5, 3;
|
||||
x_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyMemberFunctionBEqual1) {
|
||||
Vector x(4);
|
||||
Vector y(4);
|
||||
x << 1, 1, 1, 1;
|
||||
y << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector y_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
|
||||
x_gpu.Axpby(2.0, y_gpu, 1.0);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 201, 21, 3, 1;
|
||||
x_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyMemberXAliasesY) {
|
||||
Vector x(4);
|
||||
x << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector y_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.SetZero();
|
||||
|
||||
x_gpu.Axpby(2.0, x_gpu, 1.0);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 300, 30, 3, 0;
|
||||
x_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyNonMemberMethodNoAliases) {
|
||||
Vector x(4);
|
||||
Vector y(4);
|
||||
x << 1, 1, 1, 1;
|
||||
y << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector y_gpu(&context, 4);
|
||||
CudaVector z_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
z_gpu.Resize(4);
|
||||
z_gpu.SetZero();
|
||||
|
||||
Axpby(2.0, x_gpu, 3.0, y_gpu, z_gpu);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 302, 32, 5, 2;
|
||||
z_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyNonMemberMethodXAliasesY) {
|
||||
Vector x(4);
|
||||
x << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector z_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
z_gpu.SetZero();
|
||||
|
||||
Axpby(2.0, x_gpu, 3.0, x_gpu, z_gpu);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 500, 50, 5, 0;
|
||||
z_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyNonMemberMethodXAliasesZ) {
|
||||
Vector x(4);
|
||||
Vector y(4);
|
||||
x << 1, 1, 1, 1;
|
||||
y << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 10);
|
||||
CudaVector y_gpu(&context, 10);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
|
||||
Axpby(2.0, x_gpu, 3.0, y_gpu, x_gpu);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 302, 32, 5, 2;
|
||||
x_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyNonMemberMethodYAliasesZ) {
|
||||
Vector x(4);
|
||||
Vector y(4);
|
||||
x << 1, 1, 1, 1;
|
||||
y << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector y_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
|
||||
Axpby(2.0, x_gpu, 3.0, y_gpu, y_gpu);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 302, 32, 5, 2;
|
||||
y_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, AxpbyNonMemberMethodXAliasesYAliasesZ) {
|
||||
Vector x(4);
|
||||
x << 100, 10, 1, 0;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 10);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
|
||||
Axpby(2.0, x_gpu, 3.0, x_gpu, x_gpu);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 500, 50, 5, 0;
|
||||
x_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, DtDxpy) {
|
||||
Vector x(4);
|
||||
Vector y(4);
|
||||
Vector D(4);
|
||||
x << 1, 2, 3, 4;
|
||||
y << 100, 10, 1, 0;
|
||||
D << 4, 3, 2, 1;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
CudaVector y_gpu(&context, 4);
|
||||
CudaVector D_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
y_gpu.CopyFromCpu(y);
|
||||
D_gpu.CopyFromCpu(D);
|
||||
|
||||
y_gpu.DtDxpy(D_gpu, x_gpu);
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << 116, 28, 13, 4;
|
||||
y_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
TEST(CudaVector, Scale) {
|
||||
Vector x(4);
|
||||
x << 1, 2, 3, 4;
|
||||
ContextImpl context;
|
||||
std::string message;
|
||||
CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
|
||||
CudaVector x_gpu(&context, 4);
|
||||
x_gpu.CopyFromCpu(x);
|
||||
|
||||
x_gpu.Scale(-3.0);
|
||||
|
||||
Vector result;
|
||||
Vector expected(4);
|
||||
expected << -3.0, -6.0, -9.0, -12.0;
|
||||
x_gpu.CopyTo(&result);
|
||||
EXPECT_EQ(result, expected);
|
||||
}
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
284
extern/ceres/internal/ceres/cxsparse.cc
vendored
284
extern/ceres/internal/ceres/cxsparse.cc
vendored
@@ -1,284 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: strandmark@google.com (Petter Strandmark)
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CXSPARSE
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/compressed_col_sparse_matrix_utils.h"
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/cxsparse.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::vector;
|
||||
|
||||
CXSparse::CXSparse() : scratch_(nullptr), scratch_size_(0) {}
|
||||
|
||||
CXSparse::~CXSparse() {
|
||||
if (scratch_size_ > 0) {
|
||||
cs_di_free(scratch_);
|
||||
}
|
||||
}
|
||||
|
||||
csn* CXSparse::Cholesky(cs_di* A, cs_dis* symbolic_factor) {
|
||||
return cs_di_chol(A, symbolic_factor);
|
||||
}
|
||||
|
||||
void CXSparse::Solve(cs_dis* symbolic_factor, csn* numeric_factor, double* b) {
|
||||
// Make sure we have enough scratch space available.
|
||||
const int num_cols = numeric_factor->L->n;
|
||||
if (scratch_size_ < num_cols) {
|
||||
if (scratch_size_ > 0) {
|
||||
cs_di_free(scratch_);
|
||||
}
|
||||
scratch_ =
|
||||
reinterpret_cast<CS_ENTRY*>(cs_di_malloc(num_cols, sizeof(CS_ENTRY)));
|
||||
scratch_size_ = num_cols;
|
||||
}
|
||||
|
||||
// When the Cholesky factor succeeded, these methods are
|
||||
// guaranteed to succeeded as well. In the comments below, "x"
|
||||
// refers to the scratch space.
|
||||
//
|
||||
// Set x = P * b.
|
||||
CHECK(cs_di_ipvec(symbolic_factor->pinv, b, scratch_, num_cols));
|
||||
// Set x = L \ x.
|
||||
CHECK(cs_di_lsolve(numeric_factor->L, scratch_));
|
||||
// Set x = L' \ x.
|
||||
CHECK(cs_di_ltsolve(numeric_factor->L, scratch_));
|
||||
// Set b = P' * x.
|
||||
CHECK(cs_di_pvec(symbolic_factor->pinv, scratch_, b, num_cols));
|
||||
}
|
||||
|
||||
bool CXSparse::SolveCholesky(cs_di* lhs, double* rhs_and_solution) {
|
||||
return cs_cholsol(1, lhs, rhs_and_solution);
|
||||
}
|
||||
|
||||
cs_dis* CXSparse::AnalyzeCholesky(cs_di* A) {
|
||||
// order = 1 for Cholesky factor.
|
||||
return cs_schol(1, A);
|
||||
}
|
||||
|
||||
cs_dis* CXSparse::AnalyzeCholeskyWithNaturalOrdering(cs_di* A) {
|
||||
// order = 0 for Natural ordering.
|
||||
return cs_schol(0, A);
|
||||
}
|
||||
|
||||
cs_dis* CXSparse::BlockAnalyzeCholesky(cs_di* A,
|
||||
const vector<int>& row_blocks,
|
||||
const vector<int>& col_blocks) {
|
||||
const int num_row_blocks = row_blocks.size();
|
||||
const int num_col_blocks = col_blocks.size();
|
||||
|
||||
vector<int> block_rows;
|
||||
vector<int> block_cols;
|
||||
CompressedColumnScalarMatrixToBlockMatrix(
|
||||
A->i, A->p, row_blocks, col_blocks, &block_rows, &block_cols);
|
||||
cs_di block_matrix;
|
||||
block_matrix.m = num_row_blocks;
|
||||
block_matrix.n = num_col_blocks;
|
||||
block_matrix.nz = -1;
|
||||
block_matrix.nzmax = block_rows.size();
|
||||
block_matrix.p = &block_cols[0];
|
||||
block_matrix.i = &block_rows[0];
|
||||
block_matrix.x = nullptr;
|
||||
|
||||
int* ordering = cs_amd(1, &block_matrix);
|
||||
vector<int> block_ordering(num_row_blocks, -1);
|
||||
std::copy(ordering, ordering + num_row_blocks, &block_ordering[0]);
|
||||
cs_free(ordering);
|
||||
|
||||
vector<int> scalar_ordering;
|
||||
BlockOrderingToScalarOrdering(row_blocks, block_ordering, &scalar_ordering);
|
||||
|
||||
auto* symbolic_factor =
|
||||
reinterpret_cast<cs_dis*>(cs_calloc(1, sizeof(cs_dis)));
|
||||
symbolic_factor->pinv = cs_pinv(&scalar_ordering[0], A->n);
|
||||
cs* permuted_A = cs_symperm(A, symbolic_factor->pinv, 0);
|
||||
|
||||
symbolic_factor->parent = cs_etree(permuted_A, 0);
|
||||
int* postordering = cs_post(symbolic_factor->parent, A->n);
|
||||
int* column_counts =
|
||||
cs_counts(permuted_A, symbolic_factor->parent, postordering, 0);
|
||||
cs_free(postordering);
|
||||
cs_spfree(permuted_A);
|
||||
|
||||
symbolic_factor->cp = static_cast<int*>(cs_malloc(A->n + 1, sizeof(int)));
|
||||
symbolic_factor->lnz = cs_cumsum(symbolic_factor->cp, column_counts, A->n);
|
||||
symbolic_factor->unz = symbolic_factor->lnz;
|
||||
|
||||
cs_free(column_counts);
|
||||
|
||||
if (symbolic_factor->lnz < 0) {
|
||||
cs_sfree(symbolic_factor);
|
||||
symbolic_factor = nullptr;
|
||||
}
|
||||
|
||||
return symbolic_factor;
|
||||
}
|
||||
|
||||
cs_di CXSparse::CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A) {
|
||||
cs_di At;
|
||||
At.m = A->num_cols();
|
||||
At.n = A->num_rows();
|
||||
At.nz = -1;
|
||||
At.nzmax = A->num_nonzeros();
|
||||
At.p = A->mutable_rows();
|
||||
At.i = A->mutable_cols();
|
||||
At.x = A->mutable_values();
|
||||
return At;
|
||||
}
|
||||
|
||||
cs_di* CXSparse::CreateSparseMatrix(TripletSparseMatrix* tsm) {
|
||||
cs_di_sparse tsm_wrapper;
|
||||
tsm_wrapper.nzmax = tsm->num_nonzeros();
|
||||
tsm_wrapper.nz = tsm->num_nonzeros();
|
||||
tsm_wrapper.m = tsm->num_rows();
|
||||
tsm_wrapper.n = tsm->num_cols();
|
||||
tsm_wrapper.p = tsm->mutable_cols();
|
||||
tsm_wrapper.i = tsm->mutable_rows();
|
||||
tsm_wrapper.x = tsm->mutable_values();
|
||||
|
||||
return cs_compress(&tsm_wrapper);
|
||||
}
|
||||
|
||||
void CXSparse::ApproximateMinimumDegreeOrdering(cs_di* A, int* ordering) {
|
||||
int* cs_ordering = cs_amd(1, A);
|
||||
std::copy(cs_ordering, cs_ordering + A->m, ordering);
|
||||
cs_free(cs_ordering);
|
||||
}
|
||||
|
||||
cs_di* CXSparse::TransposeMatrix(cs_di* A) { return cs_di_transpose(A, 1); }
|
||||
|
||||
cs_di* CXSparse::MatrixMatrixMultiply(cs_di* A, cs_di* B) {
|
||||
return cs_di_multiply(A, B);
|
||||
}
|
||||
|
||||
void CXSparse::Free(cs_di* sparse_matrix) { cs_di_spfree(sparse_matrix); }
|
||||
|
||||
void CXSparse::Free(cs_dis* symbolic_factor) { cs_di_sfree(symbolic_factor); }
|
||||
|
||||
void CXSparse::Free(csn* numeric_factor) { cs_di_nfree(numeric_factor); }
|
||||
|
||||
std::unique_ptr<SparseCholesky> CXSparseCholesky::Create(
|
||||
const OrderingType ordering_type) {
|
||||
return std::unique_ptr<SparseCholesky>(new CXSparseCholesky(ordering_type));
|
||||
}
|
||||
|
||||
CompressedRowSparseMatrix::StorageType CXSparseCholesky::StorageType() const {
|
||||
return CompressedRowSparseMatrix::LOWER_TRIANGULAR;
|
||||
}
|
||||
|
||||
CXSparseCholesky::CXSparseCholesky(const OrderingType ordering_type)
|
||||
: ordering_type_(ordering_type),
|
||||
symbolic_factor_(nullptr),
|
||||
numeric_factor_(nullptr) {}
|
||||
|
||||
CXSparseCholesky::~CXSparseCholesky() {
|
||||
FreeSymbolicFactorization();
|
||||
FreeNumericFactorization();
|
||||
}
|
||||
|
||||
LinearSolverTerminationType CXSparseCholesky::Factorize(
|
||||
CompressedRowSparseMatrix* lhs, std::string* message) {
|
||||
CHECK_EQ(lhs->storage_type(), StorageType());
|
||||
if (lhs == nullptr) {
|
||||
*message = "Failure: Input lhs is nullptr.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
}
|
||||
|
||||
cs_di cs_lhs = cs_.CreateSparseMatrixTransposeView(lhs);
|
||||
|
||||
if (symbolic_factor_ == nullptr) {
|
||||
if (ordering_type_ == NATURAL) {
|
||||
symbolic_factor_ = cs_.AnalyzeCholeskyWithNaturalOrdering(&cs_lhs);
|
||||
} else {
|
||||
if (!lhs->col_blocks().empty() && !(lhs->row_blocks().empty())) {
|
||||
symbolic_factor_ = cs_.BlockAnalyzeCholesky(
|
||||
&cs_lhs, lhs->col_blocks(), lhs->row_blocks());
|
||||
} else {
|
||||
symbolic_factor_ = cs_.AnalyzeCholesky(&cs_lhs);
|
||||
}
|
||||
}
|
||||
|
||||
if (symbolic_factor_ == nullptr) {
|
||||
*message = "CXSparse Failure : Symbolic factorization failed.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
FreeNumericFactorization();
|
||||
numeric_factor_ = cs_.Cholesky(&cs_lhs, symbolic_factor_);
|
||||
if (numeric_factor_ == nullptr) {
|
||||
*message = "CXSparse Failure : Numeric factorization failed.";
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
}
|
||||
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType CXSparseCholesky::Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) {
|
||||
CHECK(numeric_factor_ != nullptr)
|
||||
<< "Solve called without a call to Factorize first.";
|
||||
const int num_cols = numeric_factor_->L->n;
|
||||
memcpy(solution, rhs, num_cols * sizeof(*solution));
|
||||
cs_.Solve(symbolic_factor_, numeric_factor_, solution);
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
}
|
||||
|
||||
void CXSparseCholesky::FreeSymbolicFactorization() {
|
||||
if (symbolic_factor_ != nullptr) {
|
||||
cs_.Free(symbolic_factor_);
|
||||
symbolic_factor_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void CXSparseCholesky::FreeNumericFactorization() {
|
||||
if (numeric_factor_ != nullptr) {
|
||||
cs_.Free(numeric_factor_);
|
||||
numeric_factor_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_NO_CXSPARSE
|
||||
182
extern/ceres/internal/ceres/cxsparse.h
vendored
182
extern/ceres/internal/ceres/cxsparse.h
vendored
@@ -1,182 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: strandmark@google.com (Petter Strandmark)
|
||||
|
||||
#ifndef CERES_INTERNAL_CXSPARSE_H_
|
||||
#define CERES_INTERNAL_CXSPARSE_H_
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_CXSPARSE
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/sparse_cholesky.h"
|
||||
#include "cs.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
class CompressedRowSparseMatrix;
|
||||
class TripletSparseMatrix;
|
||||
|
||||
// This object provides access to solving linear systems using Cholesky
|
||||
// factorization with a known symbolic factorization. This features does not
|
||||
// explicitly exist in CXSparse. The methods in the class are nonstatic because
|
||||
// the class manages internal scratch space.
|
||||
class CERES_NO_EXPORT CXSparse {
|
||||
public:
|
||||
CXSparse();
|
||||
~CXSparse();
|
||||
|
||||
// Solve the system lhs * solution = rhs in place by using an
|
||||
// approximate minimum degree fill reducing ordering.
|
||||
bool SolveCholesky(cs_di* lhs, double* rhs_and_solution);
|
||||
|
||||
// Solves a linear system given its symbolic and numeric factorization.
|
||||
void Solve(cs_dis* symbolic_factor,
|
||||
csn* numeric_factor,
|
||||
double* rhs_and_solution);
|
||||
|
||||
// Compute the numeric Cholesky factorization of A, given its
|
||||
// symbolic factorization.
|
||||
//
|
||||
// Caller owns the result.
|
||||
csn* Cholesky(cs_di* A, cs_dis* symbolic_factor);
|
||||
|
||||
// Creates a sparse matrix from a compressed-column form. No memory is
|
||||
// allocated or copied; the structure A is filled out with info from the
|
||||
// argument.
|
||||
cs_di CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A);
|
||||
|
||||
// Creates a new matrix from a triplet form. Deallocate the returned matrix
|
||||
// with Free. May return nullptr if the compression or allocation fails.
|
||||
cs_di* CreateSparseMatrix(TripletSparseMatrix* A);
|
||||
|
||||
// B = A'
|
||||
//
|
||||
// The returned matrix should be deallocated with Free when not used
|
||||
// anymore.
|
||||
cs_di* TransposeMatrix(cs_di* A);
|
||||
|
||||
// C = A * B
|
||||
//
|
||||
// The returned matrix should be deallocated with Free when not used
|
||||
// anymore.
|
||||
cs_di* MatrixMatrixMultiply(cs_di* A, cs_di* B);
|
||||
|
||||
// Computes a symbolic factorization of A that can be used in SolveCholesky.
|
||||
//
|
||||
// The returned matrix should be deallocated with Free when not used anymore.
|
||||
cs_dis* AnalyzeCholesky(cs_di* A);
|
||||
|
||||
// Computes a symbolic factorization of A that can be used in
|
||||
// SolveCholesky, but does not compute a fill-reducing ordering.
|
||||
//
|
||||
// The returned matrix should be deallocated with Free when not used anymore.
|
||||
cs_dis* AnalyzeCholeskyWithNaturalOrdering(cs_di* A);
|
||||
|
||||
// Computes a symbolic factorization of A that can be used in
|
||||
// SolveCholesky. The difference from AnalyzeCholesky is that this
|
||||
// function first detects the block sparsity of the matrix using
|
||||
// information about the row and column blocks and uses this block
|
||||
// sparse matrix to find a fill-reducing ordering. This ordering is
|
||||
// then used to find a symbolic factorization. This can result in a
|
||||
// significant performance improvement AnalyzeCholesky on block
|
||||
// sparse matrices.
|
||||
//
|
||||
// The returned matrix should be deallocated with Free when not used
|
||||
// anymore.
|
||||
cs_dis* BlockAnalyzeCholesky(cs_di* A,
|
||||
const std::vector<int>& row_blocks,
|
||||
const std::vector<int>& col_blocks);
|
||||
|
||||
// Compute an fill-reducing approximate minimum degree ordering of
|
||||
// the matrix A. ordering should be non-nullptr and should point to
|
||||
// enough memory to hold the ordering for the rows of A.
|
||||
void ApproximateMinimumDegreeOrdering(cs_di* A, int* ordering);
|
||||
|
||||
void Free(cs_di* sparse_matrix);
|
||||
void Free(cs_dis* symbolic_factorization);
|
||||
void Free(csn* numeric_factorization);
|
||||
|
||||
private:
|
||||
// Cached scratch space
|
||||
CS_ENTRY* scratch_;
|
||||
int scratch_size_;
|
||||
};
|
||||
|
||||
// An implementation of SparseCholesky interface using the CXSparse
|
||||
// library.
|
||||
class CERES_NO_EXPORT CXSparseCholesky final : public SparseCholesky {
|
||||
public:
|
||||
// Factory
|
||||
static std::unique_ptr<SparseCholesky> Create(OrderingType ordering_type);
|
||||
|
||||
// SparseCholesky interface.
|
||||
~CXSparseCholesky() override;
|
||||
CompressedRowSparseMatrix::StorageType StorageType() const final;
|
||||
LinearSolverTerminationType Factorize(CompressedRowSparseMatrix* lhs,
|
||||
std::string* message) final;
|
||||
LinearSolverTerminationType Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) final;
|
||||
|
||||
private:
|
||||
explicit CXSparseCholesky(const OrderingType ordering_type);
|
||||
void FreeSymbolicFactorization();
|
||||
void FreeNumericFactorization();
|
||||
|
||||
const OrderingType ordering_type_;
|
||||
CXSparse cs_;
|
||||
cs_dis* symbolic_factor_;
|
||||
csn* numeric_factor_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
#else
|
||||
|
||||
typedef void cs_dis;
|
||||
|
||||
class CXSparse {
|
||||
public:
|
||||
void Free(void* arg) {}
|
||||
};
|
||||
#endif // CERES_NO_CXSPARSE
|
||||
|
||||
#endif // CERES_INTERNAL_CXSPARSE_H_
|
||||
458
extern/ceres/internal/ceres/dense_cholesky.cc
vendored
458
extern/ceres/internal/ceres/dense_cholesky.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,12 +33,15 @@
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/iterative_refiner.h"
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/cuda_kernels_vector_ops.h"
|
||||
#include "cuda_runtime.h"
|
||||
#include "cusolverDn.h"
|
||||
#endif // CERES_NO_CUDA
|
||||
@@ -57,10 +60,21 @@ extern "C" void dpotrs_(const char* uplo,
|
||||
double* b,
|
||||
const int* ldb,
|
||||
int* info);
|
||||
|
||||
extern "C" void spotrf_(
|
||||
const char* uplo, const int* n, float* a, const int* lda, int* info);
|
||||
|
||||
extern "C" void spotrs_(const char* uplo,
|
||||
const int* n,
|
||||
const int* nrhs,
|
||||
const float* a,
|
||||
const int* lda,
|
||||
float* b,
|
||||
const int* ldb,
|
||||
int* info);
|
||||
#endif
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
DenseCholesky::~DenseCholesky() = default;
|
||||
|
||||
@@ -70,12 +84,22 @@ std::unique_ptr<DenseCholesky> DenseCholesky::Create(
|
||||
|
||||
switch (options.dense_linear_algebra_library_type) {
|
||||
case EIGEN:
|
||||
dense_cholesky = std::make_unique<EigenDenseCholesky>();
|
||||
// Eigen mixed precision solver not yet implemented.
|
||||
if (options.use_mixed_precision_solves) {
|
||||
dense_cholesky = std::make_unique<FloatEigenDenseCholesky>();
|
||||
} else {
|
||||
dense_cholesky = std::make_unique<EigenDenseCholesky>();
|
||||
}
|
||||
break;
|
||||
|
||||
case LAPACK:
|
||||
#ifndef CERES_NO_LAPACK
|
||||
dense_cholesky = std::make_unique<LAPACKDenseCholesky>();
|
||||
// LAPACK mixed precision solver not yet implemented.
|
||||
if (options.use_mixed_precision_solves) {
|
||||
dense_cholesky = std::make_unique<FloatLAPACKDenseCholesky>();
|
||||
} else {
|
||||
dense_cholesky = std::make_unique<LAPACKDenseCholesky>();
|
||||
}
|
||||
break;
|
||||
#else
|
||||
LOG(FATAL) << "Ceres was compiled without support for LAPACK.";
|
||||
@@ -83,7 +107,11 @@ std::unique_ptr<DenseCholesky> DenseCholesky::Create(
|
||||
|
||||
case CUDA:
|
||||
#ifndef CERES_NO_CUDA
|
||||
dense_cholesky = CUDADenseCholesky::Create(options);
|
||||
if (options.use_mixed_precision_solves) {
|
||||
dense_cholesky = CUDADenseCholeskyMixedPrecision::Create(options);
|
||||
} else {
|
||||
dense_cholesky = CUDADenseCholesky::Create(options);
|
||||
}
|
||||
break;
|
||||
#else
|
||||
LOG(FATAL) << "Ceres was compiled without support for CUDA.";
|
||||
@@ -94,6 +122,14 @@ std::unique_ptr<DenseCholesky> DenseCholesky::Create(
|
||||
<< DenseLinearAlgebraLibraryTypeToString(
|
||||
options.dense_linear_algebra_library_type);
|
||||
}
|
||||
|
||||
if (options.max_num_refinement_iterations > 0) {
|
||||
auto refiner = std::make_unique<DenseIterativeRefiner>(
|
||||
options.max_num_refinement_iterations);
|
||||
dense_cholesky = std::make_unique<RefinedDenseCholesky>(
|
||||
std::move(dense_cholesky), std::move(refiner));
|
||||
}
|
||||
|
||||
return dense_cholesky;
|
||||
}
|
||||
|
||||
@@ -105,7 +141,7 @@ LinearSolverTerminationType DenseCholesky::FactorAndSolve(
|
||||
std::string* message) {
|
||||
LinearSolverTerminationType termination_type =
|
||||
Factorize(num_cols, lhs, message);
|
||||
if (termination_type == LINEAR_SOLVER_SUCCESS) {
|
||||
if (termination_type == LinearSolverTerminationType::SUCCESS) {
|
||||
termination_type = Solve(rhs, solution, message);
|
||||
}
|
||||
return termination_type;
|
||||
@@ -117,11 +153,11 @@ LinearSolverTerminationType EigenDenseCholesky::Factorize(
|
||||
llt_ = std::make_unique<LLTType>(m);
|
||||
if (llt_->info() != Eigen::Success) {
|
||||
*message = "Eigen failure. Unable to perform dense Cholesky factorization.";
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
|
||||
*message = "Success.";
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs,
|
||||
@@ -129,13 +165,41 @@ LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs,
|
||||
std::string* message) {
|
||||
if (llt_->info() != Eigen::Success) {
|
||||
*message = "Eigen failure. Unable to perform dense Cholesky factorization.";
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
|
||||
VectorRef(solution, llt_->cols()) =
|
||||
llt_->solve(ConstVectorRef(rhs, llt_->cols()));
|
||||
*message = "Success.";
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType FloatEigenDenseCholesky::Factorize(
|
||||
int num_cols, double* lhs, std::string* message) {
|
||||
// TODO(sameeragarwal): Check if this causes a double allocation.
|
||||
lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>();
|
||||
llt_ = std::make_unique<LLTType>(lhs_);
|
||||
if (llt_->info() != Eigen::Success) {
|
||||
*message = "Eigen failure. Unable to perform dense Cholesky factorization.";
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
|
||||
*message = "Success.";
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType FloatEigenDenseCholesky::Solve(
|
||||
const double* rhs, double* solution, std::string* message) {
|
||||
if (llt_->info() != Eigen::Success) {
|
||||
*message = "Eigen failure. Unable to perform dense Cholesky factorization.";
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
|
||||
rhs_ = ConstVectorRef(rhs, llt_->cols()).cast<float>();
|
||||
solution_ = llt_->solve(rhs_);
|
||||
VectorRef(solution, llt_->cols()) = solution_.cast<double>();
|
||||
*message = "Success.";
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
#ifndef CERES_NO_LAPACK
|
||||
@@ -149,19 +213,19 @@ LinearSolverTerminationType LAPACKDenseCholesky::Factorize(
|
||||
dpotrf_(&uplo, &num_cols_, lhs_, &num_cols_, &info);
|
||||
|
||||
if (info < 0) {
|
||||
termination_type_ = LINEAR_SOLVER_FATAL_ERROR;
|
||||
termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
|
||||
<< "Please report it. "
|
||||
<< "LAPACK::dpotrf fatal error. "
|
||||
<< "Argument: " << -info << " is invalid.";
|
||||
} else if (info > 0) {
|
||||
termination_type_ = LINEAR_SOLVER_FAILURE;
|
||||
termination_type_ = LinearSolverTerminationType::FAILURE;
|
||||
*message = StringPrintf(
|
||||
"LAPACK::dpotrf numerical failure. "
|
||||
"The leading minor of order %d is not positive definite.",
|
||||
info);
|
||||
} else {
|
||||
termination_type_ = LINEAR_SOLVER_SUCCESS;
|
||||
termination_type_ = LinearSolverTerminationType::SUCCESS;
|
||||
*message = "Success.";
|
||||
}
|
||||
return termination_type_;
|
||||
@@ -174,12 +238,12 @@ LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs,
|
||||
const int nrhs = 1;
|
||||
int info = 0;
|
||||
|
||||
std::copy_n(rhs, num_cols_, solution);
|
||||
VectorRef(solution, num_cols_) = ConstVectorRef(rhs, num_cols_);
|
||||
dpotrs_(
|
||||
&uplo, &num_cols_, &nrhs, lhs_, &num_cols_, solution, &num_cols_, &info);
|
||||
|
||||
if (info < 0) {
|
||||
termination_type_ = LINEAR_SOLVER_FATAL_ERROR;
|
||||
termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
|
||||
<< "Please report it. "
|
||||
<< "LAPACK::dpotrs fatal error. "
|
||||
@@ -187,35 +251,118 @@ LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs,
|
||||
}
|
||||
|
||||
*message = "Success";
|
||||
termination_type_ = LINEAR_SOLVER_SUCCESS;
|
||||
termination_type_ = LinearSolverTerminationType::SUCCESS;
|
||||
|
||||
return termination_type_;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType FloatLAPACKDenseCholesky::Factorize(
|
||||
int num_cols, double* lhs, std::string* message) {
|
||||
num_cols_ = num_cols;
|
||||
lhs_ = Eigen::Map<Eigen::MatrixXd>(lhs, num_cols, num_cols).cast<float>();
|
||||
|
||||
const char uplo = 'L';
|
||||
int info = 0;
|
||||
spotrf_(&uplo, &num_cols_, lhs_.data(), &num_cols_, &info);
|
||||
|
||||
if (info < 0) {
|
||||
termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
|
||||
<< "Please report it. "
|
||||
<< "LAPACK::spotrf fatal error. "
|
||||
<< "Argument: " << -info << " is invalid.";
|
||||
} else if (info > 0) {
|
||||
termination_type_ = LinearSolverTerminationType::FAILURE;
|
||||
*message = StringPrintf(
|
||||
"LAPACK::spotrf numerical failure. "
|
||||
"The leading minor of order %d is not positive definite.",
|
||||
info);
|
||||
} else {
|
||||
termination_type_ = LinearSolverTerminationType::SUCCESS;
|
||||
*message = "Success.";
|
||||
}
|
||||
return termination_type_;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType FloatLAPACKDenseCholesky::Solve(
|
||||
const double* rhs, double* solution, std::string* message) {
|
||||
const char uplo = 'L';
|
||||
const int nrhs = 1;
|
||||
int info = 0;
|
||||
rhs_and_solution_ = ConstVectorRef(rhs, num_cols_).cast<float>();
|
||||
spotrs_(&uplo,
|
||||
&num_cols_,
|
||||
&nrhs,
|
||||
lhs_.data(),
|
||||
&num_cols_,
|
||||
rhs_and_solution_.data(),
|
||||
&num_cols_,
|
||||
&info);
|
||||
|
||||
if (info < 0) {
|
||||
termination_type_ = LinearSolverTerminationType::FATAL_ERROR;
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
|
||||
<< "Please report it. "
|
||||
<< "LAPACK::dpotrs fatal error. "
|
||||
<< "Argument: " << -info << " is invalid.";
|
||||
}
|
||||
|
||||
*message = "Success";
|
||||
termination_type_ = LinearSolverTerminationType::SUCCESS;
|
||||
VectorRef(solution, num_cols_) =
|
||||
rhs_and_solution_.head(num_cols_).cast<double>();
|
||||
return termination_type_;
|
||||
}
|
||||
|
||||
#endif // CERES_NO_LAPACK
|
||||
|
||||
RefinedDenseCholesky::RefinedDenseCholesky(
|
||||
std::unique_ptr<DenseCholesky> dense_cholesky,
|
||||
std::unique_ptr<DenseIterativeRefiner> iterative_refiner)
|
||||
: dense_cholesky_(std::move(dense_cholesky)),
|
||||
iterative_refiner_(std::move(iterative_refiner)) {}
|
||||
|
||||
RefinedDenseCholesky::~RefinedDenseCholesky() = default;
|
||||
|
||||
LinearSolverTerminationType RefinedDenseCholesky::Factorize(
|
||||
const int num_cols, double* lhs, std::string* message) {
|
||||
lhs_ = lhs;
|
||||
num_cols_ = num_cols;
|
||||
return dense_cholesky_->Factorize(num_cols, lhs, message);
|
||||
}
|
||||
|
||||
LinearSolverTerminationType RefinedDenseCholesky::Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) {
|
||||
CHECK(lhs_ != nullptr);
|
||||
auto termination_type = dense_cholesky_->Solve(rhs, solution, message);
|
||||
if (termination_type != LinearSolverTerminationType::SUCCESS) {
|
||||
return termination_type;
|
||||
}
|
||||
|
||||
iterative_refiner_->Refine(
|
||||
num_cols_, lhs_, rhs, dense_cholesky_.get(), solution);
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
bool CUDADenseCholesky::Init(ContextImpl* context, std::string* message) {
|
||||
if (!context->InitCUDA(message)) {
|
||||
return false;
|
||||
}
|
||||
cusolver_handle_ = context->cusolver_handle_;
|
||||
stream_ = context->stream_;
|
||||
error_.Reserve(1);
|
||||
*message = "CUDADenseCholesky::Init Success.";
|
||||
return true;
|
||||
}
|
||||
CUDADenseCholesky::CUDADenseCholesky(ContextImpl* context)
|
||||
: context_(context),
|
||||
lhs_{context},
|
||||
rhs_{context},
|
||||
device_workspace_{context},
|
||||
error_(context, 1) {}
|
||||
|
||||
LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
|
||||
double* lhs,
|
||||
std::string* message) {
|
||||
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
factorize_result_ = LinearSolverTerminationType::FATAL_ERROR;
|
||||
lhs_.Reserve(num_cols * num_cols);
|
||||
num_cols_ = num_cols;
|
||||
lhs_.CopyToGpuAsync(lhs, num_cols * num_cols, stream_);
|
||||
lhs_.CopyFromCpu(lhs, num_cols * num_cols);
|
||||
int device_workspace_size = 0;
|
||||
if (cusolverDnDpotrf_bufferSize(cusolver_handle_,
|
||||
if (cusolverDnDpotrf_bufferSize(context_->cusolver_handle_,
|
||||
CUBLAS_FILL_MODE_LOWER,
|
||||
num_cols,
|
||||
lhs_.data(),
|
||||
@@ -223,10 +370,10 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
|
||||
&device_workspace_size) !=
|
||||
CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnDpotrf_bufferSize failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
device_workspace_.Reserve(device_workspace_size);
|
||||
if (cusolverDnDpotrf(cusolver_handle_,
|
||||
if (cusolverDnDpotrf(context_->cusolver_handle_,
|
||||
CUBLAS_FILL_MODE_LOWER,
|
||||
num_cols,
|
||||
lhs_.data(),
|
||||
@@ -235,15 +382,10 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
|
||||
device_workspace_.size(),
|
||||
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnDpotrf failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
}
|
||||
if (cudaDeviceSynchronize() != cudaSuccess ||
|
||||
cudaStreamSynchronize(stream_) != cudaSuccess) {
|
||||
*message = "Cuda device synchronization failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
int error = 0;
|
||||
error_.CopyToHost(&error, 1);
|
||||
error_.CopyToCpu(&error, 1);
|
||||
if (error < 0) {
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
|
||||
<< "please report it. "
|
||||
@@ -251,29 +393,29 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols,
|
||||
<< "Argument: " << -error << " is invalid.";
|
||||
// The following line is unreachable, but return failure just to be
|
||||
// pedantic, since the compiler does not know that.
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
} else if (error > 0) {
|
||||
*message = StringPrintf(
|
||||
"cuSolverDN::cusolverDnDpotrf numerical failure. "
|
||||
"The leading minor of order %d is not positive definite.",
|
||||
error);
|
||||
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FAILURE;
|
||||
factorize_result_ = LinearSolverTerminationType::FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
*message = "Success";
|
||||
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
|
||||
factorize_result_ = LinearSolverTerminationType::SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) {
|
||||
if (factorize_result_ != LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS) {
|
||||
*message = "Factorize did not complete succesfully previously.";
|
||||
if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
|
||||
*message = "Factorize did not complete successfully previously.";
|
||||
return factorize_result_;
|
||||
}
|
||||
rhs_.CopyToGpuAsync(rhs, num_cols_, stream_);
|
||||
if (cusolverDnDpotrs(cusolver_handle_,
|
||||
rhs_.CopyFromCpu(rhs, num_cols_);
|
||||
if (cusolverDnDpotrs(context_->cusolver_handle_,
|
||||
CUBLAS_FILL_MODE_LOWER,
|
||||
num_cols_,
|
||||
1,
|
||||
@@ -283,45 +425,221 @@ LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs,
|
||||
num_cols_,
|
||||
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnDpotrs failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
}
|
||||
if (cudaDeviceSynchronize() != cudaSuccess ||
|
||||
cudaStreamSynchronize(stream_) != cudaSuccess) {
|
||||
*message = "Cuda device synchronization failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
int error = 0;
|
||||
error_.CopyToHost(&error, 1);
|
||||
error_.CopyToCpu(&error, 1);
|
||||
if (error != 0) {
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
|
||||
<< "Please report it."
|
||||
<< "cuSolverDN::cusolverDnDpotrs fatal error. "
|
||||
<< "Argument: " << -error << " is invalid.";
|
||||
}
|
||||
rhs_.CopyToHost(solution, num_cols_);
|
||||
rhs_.CopyToCpu(solution, num_cols_);
|
||||
*message = "Success";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
std::unique_ptr<CUDADenseCholesky> CUDADenseCholesky::Create(
|
||||
const LinearSolver::Options& options) {
|
||||
if (options.dense_linear_algebra_library_type != CUDA) {
|
||||
// The user called the wrong factory method.
|
||||
if (options.dense_linear_algebra_library_type != CUDA ||
|
||||
options.context == nullptr || !options.context->IsCudaInitialized()) {
|
||||
return nullptr;
|
||||
}
|
||||
auto cuda_dense_cholesky =
|
||||
std::unique_ptr<CUDADenseCholesky>(new CUDADenseCholesky());
|
||||
std::string cuda_error;
|
||||
if (cuda_dense_cholesky->Init(options.context, &cuda_error)) {
|
||||
return cuda_dense_cholesky;
|
||||
return std::unique_ptr<CUDADenseCholesky>(
|
||||
new CUDADenseCholesky(options.context));
|
||||
}
|
||||
|
||||
std::unique_ptr<CUDADenseCholeskyMixedPrecision>
|
||||
CUDADenseCholeskyMixedPrecision::Create(const LinearSolver::Options& options) {
|
||||
if (options.dense_linear_algebra_library_type != CUDA ||
|
||||
!options.use_mixed_precision_solves || options.context == nullptr ||
|
||||
!options.context->IsCudaInitialized()) {
|
||||
return nullptr;
|
||||
}
|
||||
// Initialization failed, destroy the object (done automatically) and return a
|
||||
// nullptr.
|
||||
LOG(ERROR) << "CUDADenseCholesky::Init failed: " << cuda_error;
|
||||
return nullptr;
|
||||
return std::unique_ptr<CUDADenseCholeskyMixedPrecision>(
|
||||
new CUDADenseCholeskyMixedPrecision(
|
||||
options.context, options.max_num_refinement_iterations));
|
||||
}
|
||||
|
||||
LinearSolverTerminationType
|
||||
CUDADenseCholeskyMixedPrecision::CudaCholeskyFactorize(std::string* message) {
|
||||
int device_workspace_size = 0;
|
||||
if (cusolverDnSpotrf_bufferSize(context_->cusolver_handle_,
|
||||
CUBLAS_FILL_MODE_LOWER,
|
||||
num_cols_,
|
||||
lhs_fp32_.data(),
|
||||
num_cols_,
|
||||
&device_workspace_size) !=
|
||||
CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnSpotrf_bufferSize failed.";
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
device_workspace_.Reserve(device_workspace_size);
|
||||
if (cusolverDnSpotrf(context_->cusolver_handle_,
|
||||
CUBLAS_FILL_MODE_LOWER,
|
||||
num_cols_,
|
||||
lhs_fp32_.data(),
|
||||
num_cols_,
|
||||
device_workspace_.data(),
|
||||
device_workspace_.size(),
|
||||
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnSpotrf failed.";
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
int error = 0;
|
||||
error_.CopyToCpu(&error, 1);
|
||||
if (error < 0) {
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
|
||||
<< "please report it. "
|
||||
<< "cuSolverDN::cusolverDnSpotrf fatal error. "
|
||||
<< "Argument: " << -error << " is invalid.";
|
||||
// The following line is unreachable, but return failure just to be
|
||||
// pedantic, since the compiler does not know that.
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
if (error > 0) {
|
||||
*message = StringPrintf(
|
||||
"cuSolverDN::cusolverDnSpotrf numerical failure. "
|
||||
"The leading minor of order %d is not positive definite.",
|
||||
error);
|
||||
factorize_result_ = LinearSolverTerminationType::FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
*message = "Success";
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::CudaCholeskySolve(
|
||||
std::string* message) {
|
||||
CHECK_EQ(cudaMemcpyAsync(correction_fp32_.data(),
|
||||
residual_fp32_.data(),
|
||||
num_cols_ * sizeof(float),
|
||||
cudaMemcpyDeviceToDevice,
|
||||
context_->DefaultStream()),
|
||||
cudaSuccess);
|
||||
if (cusolverDnSpotrs(context_->cusolver_handle_,
|
||||
CUBLAS_FILL_MODE_LOWER,
|
||||
num_cols_,
|
||||
1,
|
||||
lhs_fp32_.data(),
|
||||
num_cols_,
|
||||
correction_fp32_.data(),
|
||||
num_cols_,
|
||||
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnDpotrs failed.";
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
int error = 0;
|
||||
error_.CopyToCpu(&error, 1);
|
||||
if (error != 0) {
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
|
||||
<< "Please report it."
|
||||
<< "cuSolverDN::cusolverDnDpotrs fatal error. "
|
||||
<< "Argument: " << -error << " is invalid.";
|
||||
}
|
||||
*message = "Success";
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
CUDADenseCholeskyMixedPrecision::CUDADenseCholeskyMixedPrecision(
|
||||
ContextImpl* context, int max_num_refinement_iterations)
|
||||
: context_(context),
|
||||
lhs_fp64_{context},
|
||||
rhs_fp64_{context},
|
||||
lhs_fp32_{context},
|
||||
device_workspace_{context},
|
||||
error_(context, 1),
|
||||
x_fp64_{context},
|
||||
correction_fp32_{context},
|
||||
residual_fp32_{context},
|
||||
residual_fp64_{context},
|
||||
max_num_refinement_iterations_(max_num_refinement_iterations) {}
|
||||
|
||||
LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Factorize(
|
||||
int num_cols, double* lhs, std::string* message) {
|
||||
num_cols_ = num_cols;
|
||||
|
||||
// Copy fp64 version of lhs to GPU.
|
||||
lhs_fp64_.Reserve(num_cols * num_cols);
|
||||
lhs_fp64_.CopyFromCpu(lhs, num_cols * num_cols);
|
||||
|
||||
// Create an fp32 copy of lhs, lhs_fp32.
|
||||
lhs_fp32_.Reserve(num_cols * num_cols);
|
||||
CudaFP64ToFP32(lhs_fp64_.data(),
|
||||
lhs_fp32_.data(),
|
||||
num_cols * num_cols,
|
||||
context_->DefaultStream());
|
||||
|
||||
// Factorize lhs_fp32.
|
||||
factorize_result_ = CudaCholeskyFactorize(message);
|
||||
return factorize_result_;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Solve(
|
||||
const double* rhs, double* solution, std::string* message) {
|
||||
// If factorization failed, return failure.
|
||||
if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
|
||||
*message = "Factorize did not complete successfully previously.";
|
||||
return factorize_result_;
|
||||
}
|
||||
|
||||
// Reserve memory for all arrays.
|
||||
rhs_fp64_.Reserve(num_cols_);
|
||||
x_fp64_.Reserve(num_cols_);
|
||||
correction_fp32_.Reserve(num_cols_);
|
||||
residual_fp32_.Reserve(num_cols_);
|
||||
residual_fp64_.Reserve(num_cols_);
|
||||
|
||||
// Initialize x = 0.
|
||||
CudaSetZeroFP64(x_fp64_.data(), num_cols_, context_->DefaultStream());
|
||||
|
||||
// Initialize residual = rhs.
|
||||
rhs_fp64_.CopyFromCpu(rhs, num_cols_);
|
||||
residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_);
|
||||
|
||||
for (int i = 0; i <= max_num_refinement_iterations_; ++i) {
|
||||
// Cast residual from fp64 to fp32.
|
||||
CudaFP64ToFP32(residual_fp64_.data(),
|
||||
residual_fp32_.data(),
|
||||
num_cols_,
|
||||
context_->DefaultStream());
|
||||
// [fp32] c = lhs^-1 * residual.
|
||||
auto result = CudaCholeskySolve(message);
|
||||
if (result != LinearSolverTerminationType::SUCCESS) {
|
||||
return result;
|
||||
}
|
||||
// [fp64] x += c.
|
||||
CudaDsxpy(x_fp64_.data(),
|
||||
correction_fp32_.data(),
|
||||
num_cols_,
|
||||
context_->DefaultStream());
|
||||
if (i < max_num_refinement_iterations_) {
|
||||
// [fp64] residual = rhs - lhs * x
|
||||
// This is done in two steps:
|
||||
// 1. [fp64] residual = rhs
|
||||
residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_);
|
||||
// 2. [fp64] residual = residual - lhs * x
|
||||
double alpha = -1.0;
|
||||
double beta = 1.0;
|
||||
cublasDsymv(context_->cublas_handle_,
|
||||
CUBLAS_FILL_MODE_LOWER,
|
||||
num_cols_,
|
||||
&alpha,
|
||||
lhs_fp64_.data(),
|
||||
num_cols_,
|
||||
x_fp64_.data(),
|
||||
1,
|
||||
&beta,
|
||||
residual_fp64_.data(),
|
||||
1);
|
||||
}
|
||||
}
|
||||
x_fp64_.CopyToCpu(solution, num_cols_);
|
||||
*message = "Success.";
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
161
extern/ceres/internal/ceres/dense_cholesky.h
vendored
161
extern/ceres/internal/ceres/dense_cholesky.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/cuda_buffer.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "glog/logging.h"
|
||||
@@ -49,8 +50,7 @@
|
||||
#include "cusolverDn.h"
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// An interface that abstracts away the internal details of various dense linear
|
||||
// algebra libraries and offers a simple API for solving dense symmetric
|
||||
@@ -88,7 +88,7 @@ class CERES_NO_EXPORT DenseCholesky {
|
||||
std::string* message) = 0;
|
||||
|
||||
// Convenience method which combines a call to Factorize and Solve. Solve is
|
||||
// only called if Factorize returns LINEAR_SOLVER_SUCCESS.
|
||||
// only called if Factorize returns LinearSolverTerminationType::SUCCESS.
|
||||
//
|
||||
// The input matrix lhs may be modified by the implementation to store the
|
||||
// factorization, irrespective of whether the method succeeds or not. It is
|
||||
@@ -115,6 +115,23 @@ class CERES_NO_EXPORT EigenDenseCholesky final : public DenseCholesky {
|
||||
std::unique_ptr<LLTType> llt_;
|
||||
};
|
||||
|
||||
class CERES_NO_EXPORT FloatEigenDenseCholesky final : public DenseCholesky {
|
||||
public:
|
||||
LinearSolverTerminationType Factorize(int num_cols,
|
||||
double* lhs,
|
||||
std::string* message) override;
|
||||
LinearSolverTerminationType Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) override;
|
||||
|
||||
private:
|
||||
Eigen::MatrixXf lhs_;
|
||||
Eigen::VectorXf rhs_;
|
||||
Eigen::VectorXf solution_;
|
||||
using LLTType = Eigen::LLT<Eigen::MatrixXf, Eigen::Lower>;
|
||||
std::unique_ptr<LLTType> llt_;
|
||||
};
|
||||
|
||||
#ifndef CERES_NO_LAPACK
|
||||
class CERES_NO_EXPORT LAPACKDenseCholesky final : public DenseCholesky {
|
||||
public:
|
||||
@@ -128,10 +145,53 @@ class CERES_NO_EXPORT LAPACKDenseCholesky final : public DenseCholesky {
|
||||
private:
|
||||
double* lhs_ = nullptr;
|
||||
int num_cols_ = -1;
|
||||
LinearSolverTerminationType termination_type_ = LINEAR_SOLVER_FATAL_ERROR;
|
||||
LinearSolverTerminationType termination_type_ =
|
||||
LinearSolverTerminationType::FATAL_ERROR;
|
||||
};
|
||||
|
||||
class CERES_NO_EXPORT FloatLAPACKDenseCholesky final : public DenseCholesky {
|
||||
public:
|
||||
LinearSolverTerminationType Factorize(int num_cols,
|
||||
double* lhs,
|
||||
std::string* message) override;
|
||||
LinearSolverTerminationType Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) override;
|
||||
|
||||
private:
|
||||
Eigen::MatrixXf lhs_;
|
||||
Eigen::VectorXf rhs_and_solution_;
|
||||
int num_cols_ = -1;
|
||||
LinearSolverTerminationType termination_type_ =
|
||||
LinearSolverTerminationType::FATAL_ERROR;
|
||||
};
|
||||
#endif // CERES_NO_LAPACK
|
||||
|
||||
class DenseIterativeRefiner;
|
||||
|
||||
// Computes an initial solution using the given instance of
|
||||
// DenseCholesky, and then refines it using the DenseIterativeRefiner.
|
||||
class CERES_NO_EXPORT RefinedDenseCholesky final : public DenseCholesky {
|
||||
public:
|
||||
RefinedDenseCholesky(
|
||||
std::unique_ptr<DenseCholesky> dense_cholesky,
|
||||
std::unique_ptr<DenseIterativeRefiner> iterative_refiner);
|
||||
~RefinedDenseCholesky() override;
|
||||
|
||||
LinearSolverTerminationType Factorize(int num_cols,
|
||||
double* lhs,
|
||||
std::string* message) override;
|
||||
LinearSolverTerminationType Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<DenseCholesky> dense_cholesky_;
|
||||
std::unique_ptr<DenseIterativeRefiner> iterative_refiner_;
|
||||
double* lhs_ = nullptr;
|
||||
int num_cols_;
|
||||
};
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
// CUDA implementation of DenseCholesky using the cuSolverDN library using the
|
||||
// 32-bit legacy interface for maximum compatibility.
|
||||
@@ -149,16 +209,9 @@ class CERES_NO_EXPORT CUDADenseCholesky final : public DenseCholesky {
|
||||
std::string* message) override;
|
||||
|
||||
private:
|
||||
CUDADenseCholesky() = default;
|
||||
// Picks up the cuSolverDN and cuStream handles from the context. If
|
||||
// the context is unable to initialize CUDA, returns false with a
|
||||
// human-readable message indicating the reason.
|
||||
bool Init(ContextImpl* context, std::string* message);
|
||||
explicit CUDADenseCholesky(ContextImpl* context);
|
||||
|
||||
// Handle to the cuSOLVER context.
|
||||
cusolverDnHandle_t cusolver_handle_ = nullptr;
|
||||
// CUDA device stream.
|
||||
cudaStream_t stream_ = nullptr;
|
||||
ContextImpl* context_ = nullptr;
|
||||
// Number of columns in the A matrix, to be cached between calls to *Factorize
|
||||
// and *Solve.
|
||||
size_t num_cols_ = 0;
|
||||
@@ -171,13 +224,85 @@ class CERES_NO_EXPORT CUDADenseCholesky final : public DenseCholesky {
|
||||
// Required for error handling with cuSOLVER.
|
||||
CudaBuffer<int> error_;
|
||||
// Cache the result of Factorize to ensure that when Solve is called, the
|
||||
// factiorization of lhs is valid.
|
||||
LinearSolverTerminationType factorize_result_ = LINEAR_SOLVER_FATAL_ERROR;
|
||||
// factorization of lhs is valid.
|
||||
LinearSolverTerminationType factorize_result_ =
|
||||
LinearSolverTerminationType::FATAL_ERROR;
|
||||
};
|
||||
|
||||
// A mixed-precision iterative refinement dense Cholesky solver using FP32 CUDA
|
||||
// Dense Cholesky for inner iterations, and FP64 outer refinements.
|
||||
// This class implements a modified version of the "Classical iterative
|
||||
// refinement" (Algorithm 4.1) from the following paper:
|
||||
// Haidar, Azzam, Harun Bayraktar, Stanimire Tomov, Jack Dongarra, and Nicholas
|
||||
// J. Higham. "Mixed-precision iterative refinement using tensor cores on GPUs
|
||||
// to accelerate solution of linear systems." Proceedings of the Royal Society A
|
||||
// 476, no. 2243 (2020): 20200110.
|
||||
//
|
||||
// The three key modifications from Algorithm 4.1 in the paper are:
|
||||
// 1. We use Cholesky factorization instead of LU factorization since our A is
|
||||
// symmetric positive definite.
|
||||
// 2. During the solution update, the up-cast and accumulation is performed in
|
||||
// one step with a custom kernel.
|
||||
class CERES_NO_EXPORT CUDADenseCholeskyMixedPrecision final
|
||||
: public DenseCholesky {
|
||||
public:
|
||||
static std::unique_ptr<CUDADenseCholeskyMixedPrecision> Create(
|
||||
const LinearSolver::Options& options);
|
||||
CUDADenseCholeskyMixedPrecision(const CUDADenseCholeskyMixedPrecision&) =
|
||||
delete;
|
||||
CUDADenseCholeskyMixedPrecision& operator=(
|
||||
const CUDADenseCholeskyMixedPrecision&) = delete;
|
||||
LinearSolverTerminationType Factorize(int num_cols,
|
||||
double* lhs,
|
||||
std::string* message) override;
|
||||
LinearSolverTerminationType Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) override;
|
||||
|
||||
private:
|
||||
CUDADenseCholeskyMixedPrecision(ContextImpl* context,
|
||||
int max_num_refinement_iterations);
|
||||
|
||||
// Helper function to wrap Cuda boilerplate needed to call Spotrf.
|
||||
LinearSolverTerminationType CudaCholeskyFactorize(std::string* message);
|
||||
// Helper function to wrap Cuda boilerplate needed to call Spotrs.
|
||||
LinearSolverTerminationType CudaCholeskySolve(std::string* message);
|
||||
// Picks up the cuSolverDN and cuStream handles from the context in the
|
||||
// options, and the number of refinement iterations from the options. If
|
||||
// the context is unable to initialize CUDA, returns false with a
|
||||
// human-readable message indicating the reason.
|
||||
bool Init(const LinearSolver::Options& options, std::string* message);
|
||||
|
||||
ContextImpl* context_ = nullptr;
|
||||
// Number of columns in the A matrix, to be cached between calls to *Factorize
|
||||
// and *Solve.
|
||||
size_t num_cols_ = 0;
|
||||
CudaBuffer<double> lhs_fp64_;
|
||||
CudaBuffer<double> rhs_fp64_;
|
||||
CudaBuffer<float> lhs_fp32_;
|
||||
// Scratch space for cuSOLVER on the GPU.
|
||||
CudaBuffer<float> device_workspace_;
|
||||
// Required for error handling with cuSOLVER.
|
||||
CudaBuffer<int> error_;
|
||||
|
||||
// Solution to lhs * x = rhs.
|
||||
CudaBuffer<double> x_fp64_;
|
||||
// Incremental correction to x.
|
||||
CudaBuffer<float> correction_fp32_;
|
||||
// Residual to iterative refinement.
|
||||
CudaBuffer<float> residual_fp32_;
|
||||
CudaBuffer<double> residual_fp64_;
|
||||
|
||||
// Number of inner refinement iterations to perform.
|
||||
int max_num_refinement_iterations_ = 0;
|
||||
// Cache the result of Factorize to ensure that when Solve is called, the
|
||||
// factorization of lhs is valid.
|
||||
LinearSolverTerminationType factorize_result_ =
|
||||
LinearSolverTerminationType::FATAL_ERROR;
|
||||
};
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_DENSE_CHOLESKY_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -75,8 +75,8 @@ class CERES_NO_EXPORT DenseJacobianWriter {
|
||||
DenseSparseMatrix* dense_jacobian = down_cast<DenseSparseMatrix*>(jacobian);
|
||||
const ResidualBlock* residual_block =
|
||||
program_->residual_blocks()[residual_id];
|
||||
int num_parameter_blocks = residual_block->NumParameterBlocks();
|
||||
int num_residuals = residual_block->NumResiduals();
|
||||
const int num_parameter_blocks = residual_block->NumParameterBlocks();
|
||||
const int num_residuals = residual_block->NumResiduals();
|
||||
|
||||
// Now copy the jacobians for each parameter into the dense jacobian matrix.
|
||||
for (int j = 0; j < num_parameter_blocks; ++j) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
DenseNormalCholeskySolver::DenseNormalCholeskySolver(
|
||||
LinearSolver::Options options)
|
||||
@@ -87,5 +86,4 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl(
|
||||
return summary;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -41,8 +41,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class DenseSparseMatrix;
|
||||
|
||||
@@ -94,8 +93,7 @@ class CERES_NO_EXPORT DenseNormalCholeskySolver
|
||||
std::unique_ptr<DenseCholesky> cholesky_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
113
extern/ceres/internal/ceres/dense_qr.cc
vendored
113
extern/ceres/internal/ceres/dense_qr.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
#include "ceres/context_impl.h"
|
||||
#include "cublas_v2.h"
|
||||
@@ -98,7 +99,7 @@ extern "C" void dormqr_(const char* side, const char* trans, const int* m,
|
||||
// a is a column major lda x n.
|
||||
// b is a column major matrix of ldb x nrhs
|
||||
//
|
||||
// info = 0 succesful.
|
||||
// info = 0 successful.
|
||||
// = -i < 0 i^th argument is an illegal value.
|
||||
// = i > 0, i^th diagonal element of A is zero.
|
||||
extern "C" void dtrtrs_(const char* uplo, const char* trans, const char* diag,
|
||||
@@ -108,8 +109,7 @@ extern "C" void dtrtrs_(const char* uplo, const char* trans, const char* diag,
|
||||
|
||||
#endif
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
DenseQR::~DenseQR() = default;
|
||||
|
||||
@@ -153,7 +153,7 @@ LinearSolverTerminationType DenseQR::FactorAndSolve(int num_rows,
|
||||
std::string* message) {
|
||||
LinearSolverTerminationType termination_type =
|
||||
Factorize(num_rows, num_cols, lhs, message);
|
||||
if (termination_type == LINEAR_SOLVER_SUCCESS) {
|
||||
if (termination_type == LinearSolverTerminationType::SUCCESS) {
|
||||
termination_type = Solve(rhs, solution, message);
|
||||
}
|
||||
return termination_type;
|
||||
@@ -166,7 +166,7 @@ LinearSolverTerminationType EigenDenseQR::Factorize(int num_rows,
|
||||
Eigen::Map<ColMajorMatrix> m(lhs, num_rows, num_cols);
|
||||
qr_ = std::make_unique<QRType>(m);
|
||||
*message = "Success.";
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType EigenDenseQR::Solve(const double* rhs,
|
||||
@@ -175,7 +175,7 @@ LinearSolverTerminationType EigenDenseQR::Solve(const double* rhs,
|
||||
VectorRef(solution, qr_->cols()) =
|
||||
qr_->solve(ConstVectorRef(rhs, qr_->rows()));
|
||||
*message = "Success.";
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
#ifndef CERES_NO_LAPACK
|
||||
@@ -237,7 +237,7 @@ LinearSolverTerminationType LAPACKDenseQR::Factorize(int num_rows,
|
||||
<< "Argument: " << -info << " is invalid.";
|
||||
}
|
||||
|
||||
termination_type_ = LINEAR_SOLVER_SUCCESS;
|
||||
termination_type_ = LinearSolverTerminationType::SUCCESS;
|
||||
*message = "Success.";
|
||||
return termination_type_;
|
||||
}
|
||||
@@ -245,7 +245,7 @@ LinearSolverTerminationType LAPACKDenseQR::Factorize(int num_rows,
|
||||
LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) {
|
||||
if (termination_type_ != LINEAR_SOLVER_SUCCESS) {
|
||||
if (termination_type_ != LinearSolverTerminationType::SUCCESS) {
|
||||
*message = "QR factorization failed and solve called.";
|
||||
return termination_type_;
|
||||
}
|
||||
@@ -298,10 +298,10 @@ LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs,
|
||||
*message =
|
||||
"QR factorization failure. The factorization is not full rank. R has "
|
||||
"zeros on the diagonal.";
|
||||
termination_type_ = LINEAR_SOLVER_FAILURE;
|
||||
termination_type_ = LinearSolverTerminationType::FAILURE;
|
||||
} else {
|
||||
std::copy_n(q_transpose_rhs_.data(), num_cols_, solution);
|
||||
termination_type_ = LINEAR_SOLVER_SUCCESS;
|
||||
termination_type_ = LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
return termination_type_;
|
||||
@@ -311,30 +311,26 @@ LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs,
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
|
||||
bool CUDADenseQR::Init(ContextImpl* context, std::string* message) {
|
||||
if (!context->InitCUDA(message)) {
|
||||
return false;
|
||||
}
|
||||
cublas_handle_ = context->cublas_handle_;
|
||||
cusolver_handle_ = context->cusolver_handle_;
|
||||
stream_ = context->stream_;
|
||||
error_.Reserve(1);
|
||||
*message = "CUDADenseQR::Init Success.";
|
||||
return true;
|
||||
}
|
||||
CUDADenseQR::CUDADenseQR(ContextImpl* context)
|
||||
: context_(context),
|
||||
lhs_{context},
|
||||
rhs_{context},
|
||||
tau_{context},
|
||||
device_workspace_{context},
|
||||
error_(context, 1) {}
|
||||
|
||||
LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows,
|
||||
int num_cols,
|
||||
double* lhs,
|
||||
std::string* message) {
|
||||
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
factorize_result_ = LinearSolverTerminationType::FATAL_ERROR;
|
||||
lhs_.Reserve(num_rows * num_cols);
|
||||
tau_.Reserve(std::min(num_rows, num_cols));
|
||||
num_rows_ = num_rows;
|
||||
num_cols_ = num_cols;
|
||||
lhs_.CopyToGpuAsync(lhs, num_rows * num_cols, stream_);
|
||||
lhs_.CopyFromCpu(lhs, num_rows * num_cols);
|
||||
int device_workspace_size = 0;
|
||||
if (cusolverDnDgeqrf_bufferSize(cusolver_handle_,
|
||||
if (cusolverDnDgeqrf_bufferSize(context_->cusolver_handle_,
|
||||
num_rows,
|
||||
num_cols,
|
||||
lhs_.data(),
|
||||
@@ -342,10 +338,10 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows,
|
||||
&device_workspace_size) !=
|
||||
CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnDgeqrf_bufferSize failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
device_workspace_.Reserve(device_workspace_size);
|
||||
if (cusolverDnDgeqrf(cusolver_handle_,
|
||||
if (cusolverDnDgeqrf(context_->cusolver_handle_,
|
||||
num_rows,
|
||||
num_cols,
|
||||
lhs_.data(),
|
||||
@@ -355,15 +351,10 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows,
|
||||
device_workspace_.size(),
|
||||
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnDgeqrf failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
}
|
||||
if (cudaDeviceSynchronize() != cudaSuccess ||
|
||||
cudaStreamSynchronize(stream_) != cudaSuccess) {
|
||||
*message = "Cuda device synchronization failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
int error = 0;
|
||||
error_.CopyToHost(&error, 1);
|
||||
error_.CopyToCpu(&error, 1);
|
||||
if (error < 0) {
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres - "
|
||||
<< "please report it. "
|
||||
@@ -371,24 +362,24 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows,
|
||||
<< "Argument: " << -error << " is invalid.";
|
||||
// The following line is unreachable, but return failure just to be
|
||||
// pedantic, since the compiler does not know that.
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
|
||||
*message = "Success";
|
||||
factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
|
||||
factorize_result_ = LinearSolverTerminationType::SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
|
||||
double* solution,
|
||||
std::string* message) {
|
||||
if (factorize_result_ != LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS) {
|
||||
*message = "Factorize did not complete succesfully previously.";
|
||||
if (factorize_result_ != LinearSolverTerminationType::SUCCESS) {
|
||||
*message = "Factorize did not complete successfully previously.";
|
||||
return factorize_result_;
|
||||
}
|
||||
rhs_.CopyToGpuAsync(rhs, num_rows_, stream_);
|
||||
rhs_.CopyFromCpu(rhs, num_rows_);
|
||||
int device_workspace_size = 0;
|
||||
if (cusolverDnDormqr_bufferSize(cusolver_handle_,
|
||||
if (cusolverDnDormqr_bufferSize(context_->cusolver_handle_,
|
||||
CUBLAS_SIDE_LEFT,
|
||||
CUBLAS_OP_T,
|
||||
num_rows_,
|
||||
@@ -402,12 +393,12 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
|
||||
&device_workspace_size) !=
|
||||
CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnDormqr_bufferSize failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
device_workspace_.Reserve(device_workspace_size);
|
||||
// Compute rhs = Q^T * rhs, assuming that lhs has already been factorized.
|
||||
// The result of factorization would have stored Q in a packed form in lhs_.
|
||||
if (cusolverDnDormqr(cusolver_handle_,
|
||||
if (cusolverDnDormqr(context_->cusolver_handle_,
|
||||
CUBLAS_SIDE_LEFT,
|
||||
CUBLAS_OP_T,
|
||||
num_rows_,
|
||||
@@ -422,10 +413,10 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
|
||||
device_workspace_.size(),
|
||||
error_.data()) != CUSOLVER_STATUS_SUCCESS) {
|
||||
*message = "cuSolverDN::cusolverDnDormqr failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
int error = 0;
|
||||
error_.CopyToHost(&error, 1);
|
||||
error_.CopyToCpu(&error, 1);
|
||||
if (error < 0) {
|
||||
LOG(FATAL) << "Congratulations, you found a bug in Ceres. "
|
||||
<< "Please report it."
|
||||
@@ -434,7 +425,7 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
|
||||
}
|
||||
// Compute the solution vector as x = R \ (Q^T * rhs). Since the previous step
|
||||
// replaced rhs by (Q^T * rhs), this is just x = R \ rhs.
|
||||
if (cublasDtrsv(cublas_handle_,
|
||||
if (cublasDtrsv(context_->cublas_handle_,
|
||||
CUBLAS_FILL_MODE_UPPER,
|
||||
CUBLAS_OP_N,
|
||||
CUBLAS_DIAG_NON_UNIT,
|
||||
@@ -444,38 +435,22 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs,
|
||||
rhs_.data(),
|
||||
1) != CUBLAS_STATUS_SUCCESS) {
|
||||
*message = "cuBLAS::cublasDtrsv failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
if (cudaDeviceSynchronize() != cudaSuccess ||
|
||||
cudaStreamSynchronize(stream_) != cudaSuccess) {
|
||||
*message = "Cuda device synchronization failed.";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR;
|
||||
}
|
||||
rhs_.CopyToHost(solution, num_cols_);
|
||||
rhs_.CopyToCpu(solution, num_cols_);
|
||||
*message = "Success";
|
||||
return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
std::unique_ptr<CUDADenseQR> CUDADenseQR::Create(
|
||||
const LinearSolver::Options& options) {
|
||||
if (options.dense_linear_algebra_library_type != CUDA) {
|
||||
// The user called the wrong factory method.
|
||||
if (options.dense_linear_algebra_library_type != CUDA ||
|
||||
options.context == nullptr || !options.context->IsCudaInitialized()) {
|
||||
return nullptr;
|
||||
}
|
||||
auto cuda_dense_qr = std::unique_ptr<CUDADenseQR>(new CUDADenseQR());
|
||||
std::string cuda_error;
|
||||
if (cuda_dense_qr->Init(options.context, &cuda_error)) {
|
||||
return cuda_dense_qr;
|
||||
}
|
||||
// Initialization failed, destroy the object (done automatically) and return a
|
||||
// nullptr.
|
||||
LOG(ERROR) << "CUDADenseQR::Init failed: " << cuda_error;
|
||||
return nullptr;
|
||||
return std::unique_ptr<CUDADenseQR>(new CUDADenseQR(options.context));
|
||||
}
|
||||
|
||||
CUDADenseQR::CUDADenseQR() = default;
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
30
extern/ceres/internal/ceres/dense_qr.h
vendored
30
extern/ceres/internal/ceres/dense_qr.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
@@ -54,8 +55,7 @@
|
||||
#include "cusolverDn.h"
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// An interface that abstracts away the internal details of various dense linear
|
||||
// algebra libraries and offers a simple API for solving dense linear systems
|
||||
@@ -92,7 +92,7 @@ class CERES_NO_EXPORT DenseQR {
|
||||
std::string* message) = 0;
|
||||
|
||||
// Convenience method which combines a call to Factorize and Solve. Solve is
|
||||
// only called if Factorize returns LINEAR_SOLVER_SUCCESS.
|
||||
// only called if Factorize returns LinearSolverTerminationType::SUCCESS.
|
||||
//
|
||||
// The input matrix lhs may be modified by the implementation to store the
|
||||
// factorization, irrespective of whether the method succeeds or not. It is
|
||||
@@ -136,7 +136,8 @@ class CERES_NO_EXPORT LAPACKDenseQR final : public DenseQR {
|
||||
double* lhs_ = nullptr;
|
||||
int num_rows_;
|
||||
int num_cols_;
|
||||
LinearSolverTerminationType termination_type_ = LINEAR_SOLVER_FATAL_ERROR;
|
||||
LinearSolverTerminationType termination_type_ =
|
||||
LinearSolverTerminationType::FATAL_ERROR;
|
||||
Vector work_;
|
||||
Vector tau_;
|
||||
Vector q_transpose_rhs_;
|
||||
@@ -164,18 +165,9 @@ class CERES_NO_EXPORT CUDADenseQR final : public DenseQR {
|
||||
std::string* message) override;
|
||||
|
||||
private:
|
||||
CUDADenseQR();
|
||||
// Picks up the cuSolverDN, cuBLAS, and cuStream handles from the context. If
|
||||
// the context is unable to initialize CUDA, returns false with a
|
||||
// human-readable message indicating the reason.
|
||||
bool Init(ContextImpl* context, std::string* message);
|
||||
explicit CUDADenseQR(ContextImpl* context);
|
||||
|
||||
// Handle to the cuSOLVER context.
|
||||
cusolverDnHandle_t cusolver_handle_ = nullptr;
|
||||
// Handle to cuBLAS context.
|
||||
cublasHandle_t cublas_handle_ = nullptr;
|
||||
// CUDA device stream.
|
||||
cudaStream_t stream_ = nullptr;
|
||||
ContextImpl* context_ = nullptr;
|
||||
// Number of rowns in the A matrix, to be cached between calls to *Factorize
|
||||
// and *Solve.
|
||||
size_t num_rows_ = 0;
|
||||
@@ -194,13 +186,13 @@ class CERES_NO_EXPORT CUDADenseQR final : public DenseQR {
|
||||
CudaBuffer<int> error_;
|
||||
// Cache the result of Factorize to ensure that when Solve is called, the
|
||||
// factiorization of lhs is valid.
|
||||
LinearSolverTerminationType factorize_result_ = LINEAR_SOLVER_FATAL_ERROR;
|
||||
LinearSolverTerminationType factorize_result_ =
|
||||
LinearSolverTerminationType::FATAL_ERROR;
|
||||
};
|
||||
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
DenseQRSolver::DenseQRSolver(const LinearSolver::Options& options)
|
||||
: options_(options), dense_qr_(DenseQR::Create(options)) {}
|
||||
@@ -81,5 +80,4 @@ LinearSolver::Summary DenseQRSolver::SolveImpl(
|
||||
return summary;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class DenseSparseMatrix;
|
||||
|
||||
@@ -112,8 +111,7 @@ class CERES_NO_EXPORT DenseQRSolver final : public DenseSparseMatrixSolver {
|
||||
std::unique_ptr<DenseQR> dense_qr_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
DenseSparseMatrix::DenseSparseMatrix(int num_rows, int num_cols)
|
||||
: m_(Matrix(num_rows, num_cols)) {}
|
||||
@@ -60,17 +59,31 @@ DenseSparseMatrix::DenseSparseMatrix(Matrix m) : m_(std::move(m)) {}
|
||||
|
||||
void DenseSparseMatrix::SetZero() { m_.setZero(); }
|
||||
|
||||
void DenseSparseMatrix::RightMultiply(const double* x, double* y) const {
|
||||
VectorRef(y, num_rows()) += matrix() * ConstVectorRef(x, num_cols());
|
||||
void DenseSparseMatrix::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
VectorRef(y, num_rows()).noalias() += m_ * ConstVectorRef(x, num_cols());
|
||||
}
|
||||
|
||||
void DenseSparseMatrix::LeftMultiply(const double* x, double* y) const {
|
||||
VectorRef(y, num_cols()) +=
|
||||
matrix().transpose() * ConstVectorRef(x, num_rows());
|
||||
void DenseSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
VectorRef(y, num_cols()).noalias() +=
|
||||
m_.transpose() * ConstVectorRef(x, num_rows());
|
||||
}
|
||||
|
||||
void DenseSparseMatrix::SquaredColumnNorm(double* x) const {
|
||||
VectorRef(x, num_cols()) = m_.colwise().squaredNorm();
|
||||
// This implementation is 3x faster than the naive version
|
||||
// x = m_.colwise().square().sum(), likely because m_
|
||||
// is a row major matrix.
|
||||
|
||||
const int num_rows = m_.rows();
|
||||
const int num_cols = m_.cols();
|
||||
std::fill_n(x, num_cols, 0.0);
|
||||
const double* m = m_.data();
|
||||
for (int i = 0; i < num_rows; ++i) {
|
||||
for (int j = 0; j < num_cols; ++j, ++m) {
|
||||
x[j] += (*m) * (*m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DenseSparseMatrix::ScaleColumns(const double* scale) {
|
||||
@@ -100,5 +113,4 @@ void DenseSparseMatrix::ToTextFile(FILE* file) const {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,7 @@
|
||||
#include "ceres/sparse_matrix.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class TripletSparseMatrix;
|
||||
|
||||
@@ -54,8 +53,8 @@ class CERES_NO_EXPORT DenseSparseMatrix final : public SparseMatrix {
|
||||
|
||||
// SparseMatrix interface.
|
||||
void SetZero() final;
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void LeftMultiply(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void LeftMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void SquaredColumnNorm(double* x) const final;
|
||||
void ScaleColumns(const double* scale) final;
|
||||
void ToDenseMatrix(Matrix* dense_matrix) const final;
|
||||
@@ -73,8 +72,7 @@ class CERES_NO_EXPORT DenseSparseMatrix final : public SparseMatrix {
|
||||
Matrix m_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,8 +33,7 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
void DetectStructure(const CompressedRowBlockStructure& bs,
|
||||
const int num_eliminate_blocks,
|
||||
@@ -119,5 +118,4 @@ void DetectStructure(const CompressedRowBlockStructure& bs,
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,8 +35,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Detect static blocks in the problem sparsity. For rows containing
|
||||
// e_blocks, we are interested in detecting if the size of the row
|
||||
@@ -63,8 +62,7 @@ void CERES_NO_EXPORT DetectStructure(const CompressedRowBlockStructure& bs,
|
||||
int* e_block_size,
|
||||
int* f_block_size);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
44
extern/ceres/internal/ceres/dogleg_strategy.cc
vendored
44
extern/ceres/internal/ceres/dogleg_strategy.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,8 +44,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
const double kMaxMu = 1.0;
|
||||
const double kMinMu = 1e-8;
|
||||
@@ -101,7 +100,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
|
||||
}
|
||||
TrustRegionStrategy::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
return summary;
|
||||
}
|
||||
|
||||
@@ -138,11 +137,13 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
|
||||
summary.num_iterations = linear_solver_summary.num_iterations;
|
||||
summary.termination_type = linear_solver_summary.termination_type;
|
||||
|
||||
if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) {
|
||||
if (linear_solver_summary.termination_type ==
|
||||
LinearSolverTerminationType::FATAL_ERROR) {
|
||||
return summary;
|
||||
}
|
||||
|
||||
if (linear_solver_summary.termination_type != LINEAR_SOLVER_FAILURE) {
|
||||
if (linear_solver_summary.termination_type !=
|
||||
LinearSolverTerminationType::FAILURE) {
|
||||
switch (dogleg_type_) {
|
||||
// Interpolate the Cauchy point and the Gauss-Newton step.
|
||||
case TRADITIONAL_DOGLEG:
|
||||
@@ -153,7 +154,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
|
||||
// Cauchy point and the (Gauss-)Newton step.
|
||||
case SUBSPACE_DOGLEG:
|
||||
if (!ComputeSubspaceModel(jacobian)) {
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
break;
|
||||
}
|
||||
ComputeSubspaceDoglegStep(step);
|
||||
@@ -174,7 +175,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep(
|
||||
void DoglegStrategy::ComputeGradient(SparseMatrix* jacobian,
|
||||
const double* residuals) {
|
||||
gradient_.setZero();
|
||||
jacobian->LeftMultiply(residuals, gradient_.data());
|
||||
jacobian->LeftMultiplyAndAccumulate(residuals, gradient_.data());
|
||||
gradient_.array() /= diagonal_.array();
|
||||
}
|
||||
|
||||
@@ -187,7 +188,7 @@ void DoglegStrategy::ComputeCauchyPoint(SparseMatrix* jacobian) {
|
||||
// The Jacobian is scaled implicitly by computing J * (D^-1 * (D^-1 * g))
|
||||
// instead of (J * D^-1) * (D^-1 * g).
|
||||
Vector scaled_gradient = (gradient_.array() / diagonal_.array()).matrix();
|
||||
jacobian->RightMultiply(scaled_gradient.data(), Jg.data());
|
||||
jacobian->RightMultiplyAndAccumulate(scaled_gradient.data(), Jg.data());
|
||||
alpha_ = gradient_.squaredNorm() / Jg.squaredNorm();
|
||||
}
|
||||
|
||||
@@ -518,7 +519,7 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep(
|
||||
const double* residuals) {
|
||||
const int n = jacobian->num_cols();
|
||||
LinearSolver::Summary linear_solver_summary;
|
||||
linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
linear_solver_summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
|
||||
// The Jacobian matrix is often quite poorly conditioned. Thus it is
|
||||
// necessary to add a diagonal matrix at the bottom to prevent the
|
||||
@@ -531,7 +532,7 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep(
|
||||
// If the solve fails, the multiplier to the diagonal is increased
|
||||
// up to max_mu_ by a factor of mu_increase_factor_ every time. If
|
||||
// the linear solver is still not successful, the strategy returns
|
||||
// with LINEAR_SOLVER_FAILURE.
|
||||
// with LinearSolverTerminationType::FAILURE.
|
||||
//
|
||||
// Next time when a new Gauss-Newton step is requested, the
|
||||
// multiplier starts out from the last successful solve.
|
||||
@@ -582,21 +583,25 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep(
|
||||
}
|
||||
}
|
||||
|
||||
if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) {
|
||||
if (linear_solver_summary.termination_type ==
|
||||
LinearSolverTerminationType::FATAL_ERROR) {
|
||||
return linear_solver_summary;
|
||||
}
|
||||
|
||||
if (linear_solver_summary.termination_type == LINEAR_SOLVER_FAILURE ||
|
||||
if (linear_solver_summary.termination_type ==
|
||||
LinearSolverTerminationType::FAILURE ||
|
||||
!IsArrayValid(n, gauss_newton_step_.data())) {
|
||||
mu_ *= mu_increase_factor_;
|
||||
VLOG(2) << "Increasing mu " << mu_;
|
||||
linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
linear_solver_summary.termination_type =
|
||||
LinearSolverTerminationType::FAILURE;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (linear_solver_summary.termination_type != LINEAR_SOLVER_FAILURE) {
|
||||
if (linear_solver_summary.termination_type !=
|
||||
LinearSolverTerminationType::FAILURE) {
|
||||
// The scaled Gauss-Newton step is D * GN:
|
||||
//
|
||||
// - (D^-1 J^T J D^-1)^-1 (D^-1 g)
|
||||
@@ -627,7 +632,7 @@ void DoglegStrategy::StepAccepted(double step_quality) {
|
||||
reuse_ = false;
|
||||
}
|
||||
|
||||
void DoglegStrategy::StepRejected(double step_quality) {
|
||||
void DoglegStrategy::StepRejected(double /*step_quality*/) {
|
||||
radius_ *= 0.5;
|
||||
reuse_ = true;
|
||||
}
|
||||
@@ -701,14 +706,13 @@ bool DoglegStrategy::ComputeSubspaceModel(SparseMatrix* jacobian) {
|
||||
|
||||
Vector tmp;
|
||||
tmp = (subspace_basis_.col(0).array() / diagonal_.array()).matrix();
|
||||
jacobian->RightMultiply(tmp.data(), Jb.row(0).data());
|
||||
jacobian->RightMultiplyAndAccumulate(tmp.data(), Jb.row(0).data());
|
||||
tmp = (subspace_basis_.col(1).array() / diagonal_.array()).matrix();
|
||||
jacobian->RightMultiply(tmp.data(), Jb.row(1).data());
|
||||
jacobian->RightMultiplyAndAccumulate(tmp.data(), Jb.row(1).data());
|
||||
|
||||
subspace_B_ = Jb * Jb.transpose();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,8 +36,7 @@
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/trust_region_strategy.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Dogleg step computation and trust region sizing strategy based on
|
||||
// on "Methods for Nonlinear Least Squares" by K. Madsen, H.B. Nielsen
|
||||
@@ -159,8 +158,7 @@ class CERES_NO_EXPORT DoglegStrategy final : public TrustRegionStrategy {
|
||||
Matrix2d subspace_B_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -28,15 +28,14 @@
|
||||
//
|
||||
// Author: richie.stebbing@gmail.com (Richard Stebbing)
|
||||
|
||||
#ifndef CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALIZER_H_
|
||||
#define CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALIZER_H_
|
||||
#ifndef CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALIZER_H_
|
||||
#define CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALIZER_H_
|
||||
|
||||
#include "ceres/casts.h"
|
||||
#include "ceres/dynamic_compressed_row_sparse_matrix.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
struct CERES_NO_EXPORT DynamicCompressedRowJacobianFinalizer {
|
||||
void operator()(SparseMatrix* base_jacobian, int num_parameters) {
|
||||
@@ -46,7 +45,6 @@ struct CERES_NO_EXPORT DynamicCompressedRowJacobianFinalizer {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALISER_H_
|
||||
#endif // CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALISER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -31,6 +31,8 @@
|
||||
#include "ceres/dynamic_compressed_row_jacobian_writer.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/casts.h"
|
||||
#include "ceres/compressed_row_jacobian_writer.h"
|
||||
@@ -39,11 +41,7 @@
|
||||
#include "ceres/program.h"
|
||||
#include "ceres/residual_block.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
std::unique_ptr<ScratchEvaluatePreparer[]>
|
||||
DynamicCompressedRowJacobianWriter::CreateEvaluatePreparers(int num_threads) {
|
||||
@@ -69,7 +67,7 @@ void DynamicCompressedRowJacobianWriter::Write(int residual_id,
|
||||
program_->residual_blocks()[residual_id];
|
||||
const int num_residuals = residual_block->NumResiduals();
|
||||
|
||||
vector<pair<int, int>> evaluated_jacobian_blocks;
|
||||
std::vector<std::pair<int, int>> evaluated_jacobian_blocks;
|
||||
CompressedRowJacobianWriter::GetOrderedParameterBlocks(
|
||||
program_, residual_id, &evaluated_jacobian_blocks);
|
||||
|
||||
@@ -100,5 +98,4 @@ void DynamicCompressedRowJacobianWriter::Write(int residual_id,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/scratch_evaluate_preparer.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Program;
|
||||
class SparseMatrix;
|
||||
@@ -68,7 +67,7 @@ class CERES_NO_EXPORT DynamicCompressedRowJacobianWriter {
|
||||
|
||||
// Write only the non-zero jacobian entries for a residual block
|
||||
// (specified by `residual_id`) into `base_jacobian`, starting at the row
|
||||
// specifed by `residual_offset`.
|
||||
// specified by `residual_offset`.
|
||||
//
|
||||
// This method is thread-safe over residual blocks (each `residual_id`).
|
||||
void Write(int residual_id,
|
||||
@@ -80,7 +79,6 @@ class CERES_NO_EXPORT DynamicCompressedRowJacobianWriter {
|
||||
Program* program_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_JACOBIAN_WRITER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,8 +32,7 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
DynamicCompressedRowSparseMatrix::DynamicCompressedRowSparseMatrix(
|
||||
int num_rows, int num_cols, int initial_max_num_nonzeros)
|
||||
@@ -99,5 +98,4 @@ void DynamicCompressedRowSparseMatrix::Finalize(int num_additional_elements) {
|
||||
<< "the number of jacobian nonzeros. Please contact the developers!";
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,13 +47,12 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final
|
||||
: public CompressedRowSparseMatrix {
|
||||
public:
|
||||
// Set the number of rows and columns for the underlyig
|
||||
// Set the number of rows and columns for the underlying
|
||||
// `CompressedRowSparseMatrix` and set the initial number of maximum non-zero
|
||||
// entries. Note that following the insertion of entries, when `Finalize`
|
||||
// is called the number of non-zeros is determined and all internal
|
||||
@@ -74,7 +73,7 @@ class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final
|
||||
|
||||
// Insert an entry at a given row and column position. This method is
|
||||
// thread-safe across rows i.e. different threads can insert values
|
||||
// simultaneously into different rows. It should be emphasised that this
|
||||
// simultaneously into different rows. It should be emphasized that this
|
||||
// method always inserts a new entry and does not check for existing
|
||||
// entries at the specified row and column position. Duplicate entries
|
||||
// for a given row and column position will result in undefined
|
||||
@@ -98,8 +97,7 @@ class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final
|
||||
std::vector<std::vector<double>> dynamic_values_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,7 +39,6 @@
|
||||
|
||||
#include "Eigen/SparseCore"
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/cxsparse.h"
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
@@ -52,8 +51,7 @@
|
||||
#include "Eigen/SparseCholesky"
|
||||
#endif
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
DynamicSparseNormalCholeskySolver::DynamicSparseNormalCholeskySolver(
|
||||
LinearSolver::Options options)
|
||||
@@ -66,7 +64,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImpl(
|
||||
double* x) {
|
||||
const int num_cols = A->num_cols();
|
||||
VectorRef(x, num_cols).setZero();
|
||||
A->LeftMultiply(b, x);
|
||||
A->LeftMultiplyAndAccumulate(b, x);
|
||||
|
||||
if (per_solve_options.D != nullptr) {
|
||||
// Temporarily append a diagonal block to the A matrix, but undo
|
||||
@@ -87,9 +85,6 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImpl(
|
||||
case SUITE_SPARSE:
|
||||
summary = SolveImplUsingSuiteSparse(A, x);
|
||||
break;
|
||||
case CX_SPARSE:
|
||||
summary = SolveImplUsingCXSparse(A, x);
|
||||
break;
|
||||
case EIGEN_SPARSE:
|
||||
summary = SolveImplUsingEigen(A, x);
|
||||
break;
|
||||
@@ -113,7 +108,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
|
||||
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
|
||||
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
|
||||
summary.message =
|
||||
"SPARSE_NORMAL_CHOLESKY cannot be used with EIGEN_SPARSE "
|
||||
"because Ceres was not built with support for "
|
||||
@@ -138,7 +133,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
|
||||
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 1;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message = "Success.";
|
||||
|
||||
solver.analyzePattern(lhs);
|
||||
@@ -150,7 +145,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
|
||||
|
||||
event_logger.AddEvent("Analyze");
|
||||
if (solver.info() != Eigen::Success) {
|
||||
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
|
||||
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
|
||||
summary.message = "Eigen failure. Unable to find symbolic factorization.";
|
||||
return summary;
|
||||
}
|
||||
@@ -158,7 +153,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
|
||||
solver.factorize(lhs);
|
||||
event_logger.AddEvent("Factorize");
|
||||
if (solver.info() != Eigen::Success) {
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
summary.message = "Eigen failure. Unable to find numeric factorization.";
|
||||
return summary;
|
||||
}
|
||||
@@ -167,7 +162,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
|
||||
VectorRef(rhs_and_solution, lhs.cols()) = solver.solve(rhs);
|
||||
event_logger.AddEvent("Solve");
|
||||
if (solver.info() != Eigen::Success) {
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
summary.message = "Eigen failure. Unable to do triangular solve.";
|
||||
return summary;
|
||||
}
|
||||
@@ -176,66 +171,16 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen(
|
||||
#endif // CERES_USE_EIGEN_SPARSE
|
||||
}
|
||||
|
||||
LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingCXSparse(
|
||||
CompressedRowSparseMatrix* A, double* rhs_and_solution) {
|
||||
#ifdef CERES_NO_CXSPARSE
|
||||
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
|
||||
summary.message =
|
||||
"SPARSE_NORMAL_CHOLESKY cannot be used with CX_SPARSE "
|
||||
"because Ceres was not built with support for CXSparse. "
|
||||
"This requires enabling building with -DCXSPARSE=ON.";
|
||||
|
||||
return summary;
|
||||
|
||||
#else
|
||||
EventLogger event_logger(
|
||||
"DynamicSparseNormalCholeskySolver::CXSparse::Solve");
|
||||
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 1;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.message = "Success.";
|
||||
|
||||
CXSparse cxsparse;
|
||||
|
||||
// Wrap the augmented Jacobian in a compressed sparse column matrix.
|
||||
cs_di a_transpose = cxsparse.CreateSparseMatrixTransposeView(A);
|
||||
|
||||
// Compute the normal equations. J'J delta = J'f and solve them
|
||||
// using a sparse Cholesky factorization. Notice that when compared
|
||||
// to SuiteSparse we have to explicitly compute the transpose of Jt,
|
||||
// and then the normal equations before they can be
|
||||
// factorized. CHOLMOD/SuiteSparse on the other hand can just work
|
||||
// off of Jt to compute the Cholesky factorization of the normal
|
||||
// equations.
|
||||
cs_di* a = cxsparse.TransposeMatrix(&a_transpose);
|
||||
cs_di* lhs = cxsparse.MatrixMatrixMultiply(&a_transpose, a);
|
||||
cxsparse.Free(a);
|
||||
event_logger.AddEvent("NormalEquations");
|
||||
|
||||
if (!cxsparse.SolveCholesky(lhs, rhs_and_solution)) {
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.message = "CXSparse::SolveCholesky failed";
|
||||
}
|
||||
event_logger.AddEvent("Solve");
|
||||
|
||||
cxsparse.Free(lhs);
|
||||
event_logger.AddEvent("TearDown");
|
||||
return summary;
|
||||
#endif
|
||||
}
|
||||
|
||||
LinearSolver::Summary
|
||||
DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
|
||||
CompressedRowSparseMatrix* A, double* rhs_and_solution) {
|
||||
#ifdef CERES_NO_SUITESPARSE
|
||||
(void)A;
|
||||
(void)rhs_and_solution;
|
||||
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
|
||||
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
|
||||
summary.message =
|
||||
"SPARSE_NORMAL_CHOLESKY cannot be used with SUITE_SPARSE "
|
||||
"because Ceres was not built with support for SuiteSparse. "
|
||||
@@ -247,7 +192,7 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
|
||||
EventLogger event_logger(
|
||||
"DynamicSparseNormalCholeskySolver::SuiteSparse::Solve");
|
||||
LinearSolver::Summary summary;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.num_iterations = 1;
|
||||
summary.message = "Success.";
|
||||
|
||||
@@ -255,16 +200,17 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
|
||||
const int num_cols = A->num_cols();
|
||||
cholmod_sparse lhs = ss.CreateSparseMatrixTransposeView(A);
|
||||
event_logger.AddEvent("Setup");
|
||||
cholmod_factor* factor = ss.AnalyzeCholesky(&lhs, &summary.message);
|
||||
cholmod_factor* factor =
|
||||
ss.AnalyzeCholesky(&lhs, options_.ordering_type, &summary.message);
|
||||
event_logger.AddEvent("Analysis");
|
||||
|
||||
if (factor == nullptr) {
|
||||
summary.termination_type = LINEAR_SOLVER_FATAL_ERROR;
|
||||
summary.termination_type = LinearSolverTerminationType::FATAL_ERROR;
|
||||
return summary;
|
||||
}
|
||||
|
||||
summary.termination_type = ss.Cholesky(&lhs, factor, &summary.message);
|
||||
if (summary.termination_type == LINEAR_SOLVER_SUCCESS) {
|
||||
if (summary.termination_type == LinearSolverTerminationType::SUCCESS) {
|
||||
cholmod_dense cholmod_rhs =
|
||||
ss.CreateDenseVectorView(rhs_and_solution, num_cols);
|
||||
cholmod_dense* solution = ss.Solve(factor, &cholmod_rhs, &summary.message);
|
||||
@@ -274,7 +220,7 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
|
||||
rhs_and_solution, solution->x, num_cols * sizeof(*rhs_and_solution));
|
||||
ss.Free(solution);
|
||||
} else {
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -285,5 +231,4 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,8 +42,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CompressedRowSparseMatrix;
|
||||
|
||||
@@ -77,7 +76,6 @@ class CERES_NO_EXPORT DynamicSparseNormalCholeskySolver
|
||||
const LinearSolver::Options options_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_DYNAMIC_SPARSE_NORMAL_CHOLESKY_SOLVER_H_
|
||||
|
||||
105
extern/ceres/internal/ceres/eigen_vector_ops.h
vendored
Normal file
105
extern/ceres/internal/ceres/eigen_vector_ops.h
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
|
||||
#ifndef CERES_INTERNAL_EIGEN_VECTOR_OPS_H_
|
||||
#define CERES_INTERNAL_EIGEN_VECTOR_OPS_H_
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/fixed_array.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
// Blas1 operations on Eigen vectors. These functions are needed as an
|
||||
// abstraction layer so that we can use different versions of a vector style
|
||||
// object in the conjugate gradients linear solver.
|
||||
template <typename Derived>
|
||||
inline double Norm(const Eigen::DenseBase<Derived>& x,
|
||||
ContextImpl* context,
|
||||
int num_threads) {
|
||||
FixedArray<double> norms(num_threads, 0.);
|
||||
ParallelFor(
|
||||
context,
|
||||
0,
|
||||
x.rows(),
|
||||
num_threads,
|
||||
[&x, &norms](int thread_id, std::tuple<int, int> range) {
|
||||
auto [start, end] = range;
|
||||
norms[thread_id] += x.segment(start, end - start).squaredNorm();
|
||||
},
|
||||
kMinBlockSizeParallelVectorOps);
|
||||
return std::sqrt(std::accumulate(norms.begin(), norms.end(), 0.));
|
||||
}
|
||||
inline void SetZero(Vector& x, ContextImpl* context, int num_threads) {
|
||||
ParallelSetZero(context, num_threads, x);
|
||||
}
|
||||
inline void Axpby(double a,
|
||||
const Vector& x,
|
||||
double b,
|
||||
const Vector& y,
|
||||
Vector& z,
|
||||
ContextImpl* context,
|
||||
int num_threads) {
|
||||
ParallelAssign(context, num_threads, z, a * x + b * y);
|
||||
}
|
||||
template <typename VectorLikeX, typename VectorLikeY>
|
||||
inline double Dot(const VectorLikeX& x,
|
||||
const VectorLikeY& y,
|
||||
ContextImpl* context,
|
||||
int num_threads) {
|
||||
FixedArray<double> dots(num_threads, 0.);
|
||||
ParallelFor(
|
||||
context,
|
||||
0,
|
||||
x.rows(),
|
||||
num_threads,
|
||||
[&x, &y, &dots](int thread_id, std::tuple<int, int> range) {
|
||||
auto [start, end] = range;
|
||||
const int block_size = end - start;
|
||||
const auto& x_block = x.segment(start, block_size);
|
||||
const auto& y_block = y.segment(start, block_size);
|
||||
dots[thread_id] += x_block.dot(y_block);
|
||||
},
|
||||
kMinBlockSizeParallelVectorOps);
|
||||
return std::accumulate(dots.begin(), dots.end(), 0.);
|
||||
}
|
||||
inline void Copy(const Vector& from,
|
||||
Vector& to,
|
||||
ContextImpl* context,
|
||||
int num_threads) {
|
||||
ParallelAssign(context, num_threads, to, from);
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_EIGEN_VECTOR_OPS_H_
|
||||
100
extern/ceres/internal/ceres/eigensparse.cc
vendored
100
extern/ceres/internal/ceres/eigensparse.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,22 +36,25 @@
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#ifndef CERES_NO_EIGEN_METIS
|
||||
#include <iostream> // This is needed because MetisSupport depends on iostream.
|
||||
|
||||
#include "Eigen/MetisSupport"
|
||||
#endif
|
||||
|
||||
#include "Eigen/SparseCholesky"
|
||||
#include "Eigen/SparseCore"
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// TODO(sameeragarwal): Use enable_if to clean up the implementations
|
||||
// for when Scalar == double.
|
||||
template <typename Solver>
|
||||
class EigenSparseCholeskyTemplate final : public SparseCholesky {
|
||||
public:
|
||||
EigenSparseCholeskyTemplate() = default;
|
||||
CompressedRowSparseMatrix::StorageType StorageType() const final {
|
||||
return CompressedRowSparseMatrix::LOWER_TRIANGULAR;
|
||||
return CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType Factorize(
|
||||
@@ -68,7 +71,7 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
|
||||
|
||||
if (solver_.info() != Eigen::Success) {
|
||||
*message = "Eigen failure. Unable to find symbolic factorization.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
|
||||
analyzed_ = true;
|
||||
@@ -77,9 +80,9 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
|
||||
solver_.factorize(lhs);
|
||||
if (solver_.info() != Eigen::Success) {
|
||||
*message = "Eigen failure. Unable to find numeric factorization.";
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType Solve(const double* rhs_ptr,
|
||||
@@ -87,23 +90,23 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
|
||||
std::string* message) override {
|
||||
CHECK(analyzed_) << "Solve called without a call to Factorize first.";
|
||||
|
||||
scalar_rhs_ = ConstVectorRef(rhs_ptr, solver_.cols())
|
||||
.template cast<typename Solver::Scalar>();
|
||||
|
||||
// The two casts are needed if the Scalar in this class is not
|
||||
// double. For code simplicity we are going to assume that Eigen
|
||||
// is smart enough to figure out that casting a double Vector to a
|
||||
// double Vector is a straight copy. If this turns into a
|
||||
// performance bottleneck (unlikely), we can revisit this.
|
||||
scalar_solution_ = solver_.solve(scalar_rhs_);
|
||||
VectorRef(solution_ptr, solver_.cols()) =
|
||||
scalar_solution_.template cast<double>();
|
||||
// Avoid copying when the scalar type is double
|
||||
if constexpr (std::is_same_v<typename Solver::Scalar, double>) {
|
||||
ConstVectorRef scalar_rhs(rhs_ptr, solver_.cols());
|
||||
VectorRef(solution_ptr, solver_.cols()) = solver_.solve(scalar_rhs);
|
||||
} else {
|
||||
auto scalar_rhs = ConstVectorRef(rhs_ptr, solver_.cols())
|
||||
.template cast<typename Solver::Scalar>();
|
||||
auto scalar_solution = solver_.solve(scalar_rhs);
|
||||
VectorRef(solution_ptr, solver_.cols()) =
|
||||
scalar_solution.template cast<double>();
|
||||
}
|
||||
|
||||
if (solver_.info() != Eigen::Success) {
|
||||
*message = "Eigen failure. Unable to do triangular solve.";
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
LinearSolverTerminationType Factorize(CompressedRowSparseMatrix* lhs,
|
||||
@@ -111,9 +114,8 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
|
||||
CHECK_EQ(lhs->storage_type(), StorageType());
|
||||
|
||||
typename Solver::Scalar* values_ptr = nullptr;
|
||||
if (std::is_same<typename Solver::Scalar, double>::value) {
|
||||
values_ptr =
|
||||
reinterpret_cast<typename Solver::Scalar*>(lhs->mutable_values());
|
||||
if constexpr (std::is_same_v<typename Solver::Scalar, double>) {
|
||||
values_ptr = lhs->mutable_values();
|
||||
} else {
|
||||
// In the case where the scalar used in this class is not
|
||||
// double. In that case, make a copy of the values array in the
|
||||
@@ -123,19 +125,20 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky {
|
||||
values_ptr = values_.data();
|
||||
}
|
||||
|
||||
Eigen::Map<Eigen::SparseMatrix<typename Solver::Scalar, Eigen::ColMajor>>
|
||||
Eigen::Map<
|
||||
const Eigen::SparseMatrix<typename Solver::Scalar, Eigen::ColMajor>>
|
||||
eigen_lhs(lhs->num_rows(),
|
||||
lhs->num_rows(),
|
||||
lhs->num_nonzeros(),
|
||||
lhs->mutable_rows(),
|
||||
lhs->mutable_cols(),
|
||||
lhs->rows(),
|
||||
lhs->cols(),
|
||||
values_ptr);
|
||||
return Factorize(eigen_lhs, message);
|
||||
}
|
||||
|
||||
private:
|
||||
Eigen::Matrix<typename Solver::Scalar, Eigen::Dynamic, 1> values_,
|
||||
scalar_rhs_, scalar_solution_;
|
||||
Eigen::Matrix<typename Solver::Scalar, Eigen::Dynamic, 1> values_;
|
||||
|
||||
bool analyzed_{false};
|
||||
Solver solver_;
|
||||
};
|
||||
@@ -150,11 +153,22 @@ std::unique_ptr<SparseCholesky> EigenSparseCholesky::Create(
|
||||
Eigen::Upper,
|
||||
Eigen::NaturalOrdering<int>>;
|
||||
|
||||
if (ordering_type == AMD) {
|
||||
if (ordering_type == OrderingType::AMD) {
|
||||
return std::make_unique<EigenSparseCholeskyTemplate<WithAMDOrdering>>();
|
||||
} else {
|
||||
return std::make_unique<EigenSparseCholeskyTemplate<WithNaturalOrdering>>();
|
||||
} else if (ordering_type == OrderingType::NESDIS) {
|
||||
#ifndef CERES_NO_EIGEN_METIS
|
||||
using WithMetisOrdering = Eigen::SimplicialLDLT<Eigen::SparseMatrix<double>,
|
||||
Eigen::Upper,
|
||||
Eigen::MetisOrdering<int>>;
|
||||
return std::make_unique<EigenSparseCholeskyTemplate<WithMetisOrdering>>();
|
||||
#else
|
||||
LOG(FATAL)
|
||||
<< "Congratulations you have found a bug in Ceres Solver. Please "
|
||||
"report it to the Ceres Solver developers.";
|
||||
return nullptr;
|
||||
#endif // CERES_NO_EIGEN_METIS
|
||||
}
|
||||
return std::make_unique<EigenSparseCholeskyTemplate<WithNaturalOrdering>>();
|
||||
}
|
||||
|
||||
EigenSparseCholesky::~EigenSparseCholesky() = default;
|
||||
@@ -168,16 +182,26 @@ std::unique_ptr<SparseCholesky> FloatEigenSparseCholesky::Create(
|
||||
Eigen::SimplicialLDLT<Eigen::SparseMatrix<float>,
|
||||
Eigen::Upper,
|
||||
Eigen::NaturalOrdering<int>>;
|
||||
if (ordering_type == AMD) {
|
||||
if (ordering_type == OrderingType::AMD) {
|
||||
return std::make_unique<EigenSparseCholeskyTemplate<WithAMDOrdering>>();
|
||||
} else {
|
||||
return std::make_unique<EigenSparseCholeskyTemplate<WithNaturalOrdering>>();
|
||||
} else if (ordering_type == OrderingType::NESDIS) {
|
||||
#ifndef CERES_NO_EIGEN_METIS
|
||||
using WithMetisOrdering = Eigen::SimplicialLDLT<Eigen::SparseMatrix<float>,
|
||||
Eigen::Upper,
|
||||
Eigen::MetisOrdering<int>>;
|
||||
return std::make_unique<EigenSparseCholeskyTemplate<WithMetisOrdering>>();
|
||||
#else
|
||||
LOG(FATAL)
|
||||
<< "Congratulations you have found a bug in Ceres Solver. Please "
|
||||
"report it to the Ceres Solver developers.";
|
||||
return nullptr;
|
||||
#endif // CERES_NO_EIGEN_METIS
|
||||
}
|
||||
return std::make_unique<EigenSparseCholeskyTemplate<WithNaturalOrdering>>();
|
||||
}
|
||||
|
||||
FloatEigenSparseCholesky::~FloatEigenSparseCholesky() = default;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_USE_EIGEN_SPARSE
|
||||
|
||||
30
extern/ceres/internal/ceres/eigensparse.h
vendored
30
extern/ceres/internal/ceres/eigensparse.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,8 +46,18 @@
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/sparse_cholesky.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class EigenSparse {
|
||||
public:
|
||||
static constexpr bool IsNestedDissectionAvailable() noexcept {
|
||||
#ifdef CERES_NO_EIGEN_METIS
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class CERES_NO_EXPORT EigenSparseCholesky : public SparseCholesky {
|
||||
public:
|
||||
@@ -83,8 +93,18 @@ class CERES_NO_EXPORT FloatEigenSparseCholesky : public SparseCholesky {
|
||||
std::string* message) override = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#else
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
class EigenSparse {
|
||||
public:
|
||||
static constexpr bool IsNestedDissectionAvailable() noexcept { return false; }
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_USE_EIGEN_SPARSE
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
23
extern/ceres/internal/ceres/evaluator.cc
vendored
23
extern/ceres/internal/ceres/evaluator.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,8 +46,7 @@
|
||||
#include "ceres/scratch_evaluate_preparer.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
Evaluator::~Evaluator() = default;
|
||||
|
||||
@@ -65,10 +64,17 @@ std::unique_ptr<Evaluator> Evaluator::Create(const Evaluator::Options& options,
|
||||
case DENSE_SCHUR:
|
||||
case SPARSE_SCHUR:
|
||||
case ITERATIVE_SCHUR:
|
||||
case CGNR:
|
||||
return std::make_unique<
|
||||
ProgramEvaluator<BlockEvaluatePreparer, BlockJacobianWriter>>(
|
||||
options, program);
|
||||
case CGNR: {
|
||||
if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) {
|
||||
return std::make_unique<ProgramEvaluator<ScratchEvaluatePreparer,
|
||||
CompressedRowJacobianWriter>>(
|
||||
options, program);
|
||||
} else {
|
||||
return std::make_unique<
|
||||
ProgramEvaluator<BlockEvaluatePreparer, BlockJacobianWriter>>(
|
||||
options, program);
|
||||
}
|
||||
}
|
||||
case SPARSE_NORMAL_CHOLESKY:
|
||||
if (options.dynamic_sparsity) {
|
||||
return std::make_unique<
|
||||
@@ -88,5 +94,4 @@ std::unique_ptr<Evaluator> Evaluator::Create(const Evaluator::Options& options,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
4
extern/ceres/internal/ceres/evaluator.h
vendored
4
extern/ceres/internal/ceres/evaluator.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -65,6 +65,8 @@ class CERES_NO_EXPORT Evaluator {
|
||||
int num_threads = 1;
|
||||
int num_eliminate_blocks = -1;
|
||||
LinearSolverType linear_solver_type = DENSE_QR;
|
||||
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
|
||||
NO_SPARSE;
|
||||
bool dynamic_sparsity = false;
|
||||
ContextImpl* context = nullptr;
|
||||
EvaluationCallback* evaluation_callback = nullptr;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
struct CallStatistics {
|
||||
CallStatistics() = default;
|
||||
@@ -85,7 +84,6 @@ class ScopedExecutionTimer {
|
||||
ExecutionSummary* summary_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_EXECUTION_SUMMARY_H_
|
||||
|
||||
120
extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.cc
vendored
Normal file
120
extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.cc
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas)
|
||||
|
||||
#include "ceres/fake_bundle_adjustment_jacobian.h"
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
std::unique_ptr<BlockSparseMatrix> CreateFakeBundleAdjustmentJacobian(
|
||||
int num_cameras,
|
||||
int num_points,
|
||||
int camera_size,
|
||||
int point_size,
|
||||
double visibility,
|
||||
std::mt19937& prng) {
|
||||
constexpr int kResidualSize = 2;
|
||||
|
||||
CompressedRowBlockStructure* bs = new CompressedRowBlockStructure;
|
||||
int c = 0;
|
||||
// Add column blocks for each point
|
||||
for (int i = 0; i < num_points; ++i) {
|
||||
bs->cols.push_back(Block(point_size, c));
|
||||
c += point_size;
|
||||
}
|
||||
|
||||
// Add column blocks for each camera.
|
||||
for (int i = 0; i < num_cameras; ++i) {
|
||||
bs->cols.push_back(Block(camera_size, c));
|
||||
c += camera_size;
|
||||
}
|
||||
|
||||
std::bernoulli_distribution visibility_distribution(visibility);
|
||||
int row_pos = 0;
|
||||
int cell_pos = 0;
|
||||
for (int i = 0; i < num_points; ++i) {
|
||||
for (int j = 0; j < num_cameras; ++j) {
|
||||
if (!visibility_distribution(prng)) {
|
||||
continue;
|
||||
}
|
||||
bs->rows.emplace_back();
|
||||
auto& row = bs->rows.back();
|
||||
row.block.position = row_pos;
|
||||
row.block.size = kResidualSize;
|
||||
auto& cells = row.cells;
|
||||
cells.resize(2);
|
||||
|
||||
cells[0].block_id = i;
|
||||
cells[0].position = cell_pos;
|
||||
cell_pos += kResidualSize * point_size;
|
||||
|
||||
cells[1].block_id = num_points + j;
|
||||
cells[1].position = cell_pos;
|
||||
cell_pos += kResidualSize * camera_size;
|
||||
|
||||
row_pos += kResidualSize;
|
||||
}
|
||||
}
|
||||
|
||||
auto jacobian = std::make_unique<BlockSparseMatrix>(bs);
|
||||
VectorRef(jacobian->mutable_values(), jacobian->num_nonzeros()).setRandom();
|
||||
return jacobian;
|
||||
}
|
||||
|
||||
std::pair<
|
||||
std::unique_ptr<PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>>,
|
||||
std::unique_ptr<BlockSparseMatrix>>
|
||||
CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras,
|
||||
int num_points,
|
||||
int camera_size,
|
||||
int landmark_size,
|
||||
double visibility,
|
||||
std::mt19937& rng) {
|
||||
using PartitionedView =
|
||||
PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>;
|
||||
auto block_sparse_matrix = CreateFakeBundleAdjustmentJacobian(
|
||||
num_cameras, num_points, camera_size, landmark_size, visibility, rng);
|
||||
LinearSolver::Options options;
|
||||
options.elimination_groups.push_back(num_points);
|
||||
auto partitioned_view =
|
||||
std::make_unique<PartitionedView>(options, *block_sparse_matrix);
|
||||
return std::make_pair(std::move(partitioned_view),
|
||||
std::move(block_sparse_matrix));
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
78
extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.h
vendored
Normal file
78
extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.h
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
|
||||
#ifndef CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
|
||||
#define CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/partitioned_matrix_view.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
std::unique_ptr<BlockSparseMatrix> CreateFakeBundleAdjustmentJacobian(
|
||||
int num_cameras,
|
||||
int num_points,
|
||||
int camera_size,
|
||||
int point_size,
|
||||
double visibility,
|
||||
std::mt19937& prng);
|
||||
|
||||
template <int kEBlockSize = 3, int kFBlockSize = 6>
|
||||
std::pair<std::unique_ptr<PartitionedMatrixView<2, kEBlockSize, kFBlockSize>>,
|
||||
std::unique_ptr<BlockSparseMatrix>>
|
||||
CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras,
|
||||
int num_points,
|
||||
double visibility,
|
||||
std::mt19937& rng) {
|
||||
using PartitionedView = PartitionedMatrixView<2, kEBlockSize, kFBlockSize>;
|
||||
auto block_sparse_matrix = CreateFakeBundleAdjustmentJacobian(
|
||||
num_cameras, num_points, kFBlockSize, kEBlockSize, visibility, rng);
|
||||
auto partitioned_view =
|
||||
std::make_unique<PartitionedView>(*block_sparse_matrix, num_points);
|
||||
return std::make_pair(std::move(partitioned_view),
|
||||
std::move(block_sparse_matrix));
|
||||
}
|
||||
|
||||
std::pair<
|
||||
std::unique_ptr<PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>>,
|
||||
std::unique_ptr<BlockSparseMatrix>>
|
||||
CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras,
|
||||
int num_points,
|
||||
int camera_size,
|
||||
int landmark_size,
|
||||
double visibility,
|
||||
std::mt19937& rng);
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN
|
||||
24
extern/ceres/internal/ceres/file.cc
vendored
24
extern/ceres/internal/ceres/file.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,15 +33,14 @@
|
||||
#include "ceres/file.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
using std::string;
|
||||
|
||||
void WriteStringToFileOrDie(const string& data, const string& filename) {
|
||||
void WriteStringToFileOrDie(const std::string& data,
|
||||
const std::string& filename) {
|
||||
FILE* file_descriptor = fopen(filename.c_str(), "wb");
|
||||
if (!file_descriptor) {
|
||||
LOG(FATAL) << "Couldn't write to file: " << filename;
|
||||
@@ -50,7 +49,7 @@ void WriteStringToFileOrDie(const string& data, const string& filename) {
|
||||
fclose(file_descriptor);
|
||||
}
|
||||
|
||||
void ReadFileToStringOrDie(const string& filename, string* data) {
|
||||
void ReadFileToStringOrDie(const std::string& filename, std::string* data) {
|
||||
FILE* file_descriptor = fopen(filename.c_str(), "r");
|
||||
|
||||
if (!file_descriptor) {
|
||||
@@ -59,12 +58,12 @@ void ReadFileToStringOrDie(const string& filename, string* data) {
|
||||
|
||||
// Resize the input buffer appropriately.
|
||||
fseek(file_descriptor, 0L, SEEK_END);
|
||||
int num_bytes = ftell(file_descriptor);
|
||||
int64_t num_bytes = ftell(file_descriptor);
|
||||
data->resize(num_bytes);
|
||||
|
||||
// Read the data.
|
||||
fseek(file_descriptor, 0L, SEEK_SET);
|
||||
int num_read =
|
||||
int64_t num_read =
|
||||
fread(&((*data)[0]), sizeof((*data)[0]), num_bytes, file_descriptor);
|
||||
if (num_read != num_bytes) {
|
||||
LOG(FATAL) << "Couldn't read all of " << filename
|
||||
@@ -74,7 +73,7 @@ void ReadFileToStringOrDie(const string& filename, string* data) {
|
||||
fclose(file_descriptor);
|
||||
}
|
||||
|
||||
string JoinPath(const string& dirname, const string& basename) {
|
||||
std::string JoinPath(const std::string& dirname, const std::string& basename) {
|
||||
#ifdef _WIN32
|
||||
static const char separator = '\\';
|
||||
#else
|
||||
@@ -86,9 +85,8 @@ string JoinPath(const string& dirname, const string& basename) {
|
||||
} else if (dirname[dirname.size() - 1] == separator) {
|
||||
return dirname + basename;
|
||||
} else {
|
||||
return dirname + string(&separator, 1) + basename;
|
||||
return dirname + std::string(&separator, 1) + basename;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/file.h
vendored
8
extern/ceres/internal/ceres/file.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
CERES_NO_EXPORT
|
||||
void WriteStringToFileOrDie(const std::string& data,
|
||||
@@ -52,8 +51,7 @@ void ReadFileToStringOrDie(const std::string& filename, std::string* data);
|
||||
CERES_NO_EXPORT
|
||||
std::string JoinPath(const std::string& dirname, const std::string& basename);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,8 +34,7 @@
|
||||
|
||||
#if !defined(CERES_NO_SUITESPARSE)
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
std::unique_ptr<SparseCholesky> FloatSuiteSparseCholesky::Create(
|
||||
OrderingType ordering_type) {
|
||||
@@ -43,7 +42,6 @@ std::unique_ptr<SparseCholesky> FloatSuiteSparseCholesky::Create(
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // !defined(CERES_NO_SUITESPARSE)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,8 +43,7 @@
|
||||
|
||||
#if !defined(CERES_NO_SUITESPARSE)
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Fake implementation of a single precision Sparse Cholesky using
|
||||
// SuiteSparse.
|
||||
@@ -53,8 +52,7 @@ class CERES_NO_EXPORT FloatSuiteSparseCholesky : public SparseCholesky {
|
||||
static std::unique_ptr<SparseCholesky> Create(OrderingType ordering_type);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // !defined(CERES_NO_SUITESPARSE)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,8 +32,7 @@
|
||||
|
||||
#include "ceres/stringprintf.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
FunctionSample::FunctionSample()
|
||||
: x(0.0),
|
||||
@@ -75,5 +74,4 @@ std::string FunctionSample::ToDebugString() const {
|
||||
gradient_is_valid);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
10
extern/ceres/internal/ceres/function_sample.h
vendored
10
extern/ceres/internal/ceres/function_sample.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,8 +37,7 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// FunctionSample is used by the line search routines to store and
|
||||
// communicate the value and (optionally) the gradient of the function
|
||||
@@ -83,13 +82,12 @@ struct CERES_NO_EXPORT FunctionSample {
|
||||
//
|
||||
// where d is the search direction.
|
||||
double gradient;
|
||||
// True if the evaluation of the gradient was sucessful and the
|
||||
// True if the evaluation of the gradient was successful and the
|
||||
// value is a finite number.
|
||||
bool gradient_is_valid;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
305
extern/ceres/internal/ceres/generate_bundle_adjustment_tests.py
vendored
Normal file
305
extern/ceres/internal/ceres/generate_bundle_adjustment_tests.py
vendored
Normal file
@@ -0,0 +1,305 @@
|
||||
# Ceres Solver - A fast non-linear least squares minimizer
|
||||
# Copyright 2023 Google Inc. All rights reserved.
|
||||
# http://ceres-solver.org/
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
# used to endorse or promote products derived from this software without
|
||||
# specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Author: keir@google.com (Keir Mierle)
|
||||
#
|
||||
# Generate bundle adjustment tests as separate binaries. Since the bundle
|
||||
# adjustment tests are fairly processing intensive, serializing them makes the
|
||||
# tests take forever to run. Splitting them into separate binaries makes it
|
||||
# easier to parallelize in continuous integration systems, and makes local
|
||||
# processing on multi-core workstations much faster.
|
||||
|
||||
# Product of ORDERINGS, THREAD_CONFIGS, and SOLVER_CONFIGS is the full set of
|
||||
# tests to generate.
|
||||
ORDERINGS = ["kAutomaticOrdering", "kUserOrdering"]
|
||||
SINGLE_THREADED = "1"
|
||||
MULTI_THREADED = "4"
|
||||
THREAD_CONFIGS = [SINGLE_THREADED, MULTI_THREADED]
|
||||
|
||||
DENSE_SOLVER_CONFIGS = [
|
||||
# Linear solver Dense backend
|
||||
('DENSE_SCHUR', 'EIGEN'),
|
||||
('DENSE_SCHUR', 'LAPACK'),
|
||||
('DENSE_SCHUR', 'CUDA'),
|
||||
]
|
||||
|
||||
SPARSE_SOLVER_CONFIGS = [
|
||||
# Linear solver Sparse backend
|
||||
('SPARSE_NORMAL_CHOLESKY', 'SUITE_SPARSE'),
|
||||
('SPARSE_NORMAL_CHOLESKY', 'EIGEN_SPARSE'),
|
||||
('SPARSE_NORMAL_CHOLESKY', 'ACCELERATE_SPARSE'),
|
||||
('SPARSE_SCHUR', 'SUITE_SPARSE'),
|
||||
('SPARSE_SCHUR', 'EIGEN_SPARSE'),
|
||||
('SPARSE_SCHUR', 'ACCELERATE_SPARSE'),
|
||||
]
|
||||
|
||||
ITERATIVE_SOLVER_CONFIGS = [
|
||||
# Linear solver Sparse backend Preconditioner
|
||||
('ITERATIVE_SCHUR', 'NO_SPARSE', 'JACOBI'),
|
||||
('ITERATIVE_SCHUR', 'NO_SPARSE', 'SCHUR_JACOBI'),
|
||||
('ITERATIVE_SCHUR', 'NO_SPARSE', 'SCHUR_POWER_SERIES_EXPANSION'),
|
||||
('ITERATIVE_SCHUR', 'SUITE_SPARSE', 'CLUSTER_JACOBI'),
|
||||
('ITERATIVE_SCHUR', 'EIGEN_SPARSE', 'CLUSTER_JACOBI'),
|
||||
('ITERATIVE_SCHUR', 'ACCELERATE_SPARSE','CLUSTER_JACOBI'),
|
||||
('ITERATIVE_SCHUR', 'SUITE_SPARSE', 'CLUSTER_TRIDIAGONAL'),
|
||||
('ITERATIVE_SCHUR', 'EIGEN_SPARSE', 'CLUSTER_TRIDIAGONAL'),
|
||||
('ITERATIVE_SCHUR', 'ACCELERATE_SPARSE','CLUSTER_TRIDIAGONAL'),
|
||||
]
|
||||
|
||||
FILENAME_SHORTENING_MAP = dict(
|
||||
DENSE_SCHUR='denseschur',
|
||||
ITERATIVE_SCHUR='iterschur',
|
||||
SPARSE_NORMAL_CHOLESKY='sparsecholesky',
|
||||
SPARSE_SCHUR='sparseschur',
|
||||
EIGEN='eigen',
|
||||
LAPACK='lapack',
|
||||
CUDA='cuda',
|
||||
NO_SPARSE='', # Omit sparse reference entirely for dense tests.
|
||||
SUITE_SPARSE='suitesparse',
|
||||
EIGEN_SPARSE='eigensparse',
|
||||
ACCELERATE_SPARSE='acceleratesparse',
|
||||
IDENTITY='identity',
|
||||
JACOBI='jacobi',
|
||||
SCHUR_JACOBI='schurjacobi',
|
||||
CLUSTER_JACOBI='clustjacobi',
|
||||
CLUSTER_TRIDIAGONAL='clusttri',
|
||||
SCHUR_POWER_SERIES_EXPANSION='spse',
|
||||
kAutomaticOrdering='auto',
|
||||
kUserOrdering='user',
|
||||
)
|
||||
|
||||
COPYRIGHT_HEADER = (
|
||||
"""// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// ========================================
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// ========================================
|
||||
//
|
||||
// This file is generated using generate_bundle_adjustment_tests.py.""")
|
||||
|
||||
BUNDLE_ADJUSTMENT_TEST_TEMPLATE = (COPYRIGHT_HEADER + """
|
||||
|
||||
#include "ceres/bundle_adjustment_test_util.h"
|
||||
#include "ceres/internal/config.h"
|
||||
#include "gtest/gtest.h"
|
||||
%(preprocessor_conditions_begin)s
|
||||
namespace ceres::internal {
|
||||
|
||||
TEST_F(BundleAdjustmentTest,
|
||||
%(test_class_name)s) { // NOLINT
|
||||
BundleAdjustmentProblem bundle_adjustment_problem;
|
||||
Solver::Options* options = bundle_adjustment_problem.mutable_solver_options();
|
||||
options->eta = 0.01;
|
||||
options->num_threads = %(num_threads)s;
|
||||
options->linear_solver_type = %(linear_solver)s;
|
||||
options->dense_linear_algebra_library_type = %(dense_backend)s;
|
||||
options->sparse_linear_algebra_library_type = %(sparse_backend)s;
|
||||
options->preconditioner_type = %(preconditioner)s;
|
||||
if (%(ordering)s) {
|
||||
options->linear_solver_ordering = nullptr;
|
||||
}
|
||||
Problem* problem = bundle_adjustment_problem.mutable_problem();
|
||||
RunSolverForConfigAndExpectResidualsMatch(*options, problem);
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
%(preprocessor_conditions_end)s""")
|
||||
|
||||
def camelcasify(token):
|
||||
"""Convert capitalized underscore tokens to camel case"""
|
||||
return ''.join([x.lower().capitalize() for x in token.split('_')])
|
||||
|
||||
|
||||
def generate_bundle_test(linear_solver,
|
||||
dense_backend,
|
||||
sparse_backend,
|
||||
preconditioner,
|
||||
ordering,
|
||||
thread_config):
|
||||
"""Generate a bundle adjustment test executable configured appropriately"""
|
||||
|
||||
# Preconditioner only makes sense for iterative schur; drop it otherwise.
|
||||
preconditioner_tag = preconditioner
|
||||
if linear_solver != 'ITERATIVE_SCHUR':
|
||||
preconditioner_tag = ''
|
||||
|
||||
dense_backend_tag = dense_backend
|
||||
if linear_solver != 'DENSE_SCHUR':
|
||||
dense_backend_tag=''
|
||||
|
||||
# Omit references to the sparse backend when one is not in use.
|
||||
sparse_backend_tag = sparse_backend
|
||||
if sparse_backend == 'NO_SPARSE':
|
||||
sparse_backend_tag = ''
|
||||
|
||||
# Use a double underscore; otherwise the names are harder to understand.
|
||||
test_class_name = '_'.join(filter(lambda x: x, [
|
||||
camelcasify(linear_solver),
|
||||
camelcasify(dense_backend_tag),
|
||||
camelcasify(sparse_backend_tag),
|
||||
camelcasify(preconditioner_tag),
|
||||
ordering[1:], # Strip 'k'
|
||||
'Threads' if thread_config == MULTI_THREADED else '']))
|
||||
|
||||
# Initial template parameters (augmented more below).
|
||||
template_parameters = dict(
|
||||
linear_solver=linear_solver,
|
||||
dense_backend=dense_backend,
|
||||
sparse_backend=sparse_backend,
|
||||
preconditioner=preconditioner,
|
||||
ordering=ordering,
|
||||
num_threads=thread_config,
|
||||
test_class_name=test_class_name)
|
||||
|
||||
# Accumulate appropriate #ifdef/#ifndefs for the solver's sparse backend.
|
||||
preprocessor_conditions_begin = []
|
||||
preprocessor_conditions_end = []
|
||||
if sparse_backend == 'SUITE_SPARSE':
|
||||
preprocessor_conditions_begin.append('#ifndef CERES_NO_SUITESPARSE')
|
||||
preprocessor_conditions_end.insert(0, '#endif // CERES_NO_SUITESPARSE')
|
||||
elif sparse_backend == 'ACCELERATE_SPARSE':
|
||||
preprocessor_conditions_begin.append('#ifndef CERES_NO_ACCELERATE_SPARSE')
|
||||
preprocessor_conditions_end.insert(0, '#endif // CERES_NO_ACCELERATE_SPARSE')
|
||||
elif sparse_backend == 'EIGEN_SPARSE':
|
||||
preprocessor_conditions_begin.append('#ifdef CERES_USE_EIGEN_SPARSE')
|
||||
preprocessor_conditions_end.insert(0, '#endif // CERES_USE_EIGEN_SPARSE')
|
||||
|
||||
if dense_backend == "LAPACK":
|
||||
preprocessor_conditions_begin.append('#ifndef CERES_NO_LAPACK')
|
||||
preprocessor_conditions_end.insert(0, '#endif // CERES_NO_LAPACK')
|
||||
elif dense_backend == "CUDA":
|
||||
preprocessor_conditions_begin.append('#ifndef CERES_NO_CUDA')
|
||||
preprocessor_conditions_end.insert(0, '#endif // CERES_NO_CUDA')
|
||||
|
||||
# If there are #ifdefs, put newlines around them.
|
||||
if preprocessor_conditions_begin:
|
||||
preprocessor_conditions_begin.insert(0, '')
|
||||
preprocessor_conditions_begin.append('')
|
||||
preprocessor_conditions_end.insert(0, '')
|
||||
preprocessor_conditions_end.append('')
|
||||
|
||||
# Put #ifdef/#ifndef stacks into the template parameters.
|
||||
template_parameters['preprocessor_conditions_begin'] = '\n'.join(
|
||||
preprocessor_conditions_begin)
|
||||
template_parameters['preprocessor_conditions_end'] = '\n'.join(
|
||||
preprocessor_conditions_end)
|
||||
|
||||
# Substitute variables into the test template, and write the result to a file.
|
||||
filename_tag = '_'.join(FILENAME_SHORTENING_MAP.get(x) for x in [
|
||||
linear_solver,
|
||||
dense_backend_tag,
|
||||
sparse_backend_tag,
|
||||
preconditioner_tag,
|
||||
ordering]
|
||||
if FILENAME_SHORTENING_MAP.get(x))
|
||||
|
||||
if (thread_config == MULTI_THREADED):
|
||||
filename_tag += '_threads'
|
||||
|
||||
filename = ('generated_bundle_adjustment_tests/ba_%s_test.cc' %
|
||||
filename_tag.lower())
|
||||
with open(filename, 'w') as fd:
|
||||
fd.write(BUNDLE_ADJUSTMENT_TEST_TEMPLATE % template_parameters)
|
||||
|
||||
# All done.
|
||||
print('Generated', filename)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Iterate over all the possible configurations and generate the tests.
|
||||
generated_files = []
|
||||
|
||||
for ordering in ORDERINGS:
|
||||
for thread_config in THREAD_CONFIGS:
|
||||
for linear_solver, dense_backend in DENSE_SOLVER_CONFIGS:
|
||||
generated_files.append(
|
||||
generate_bundle_test(linear_solver,
|
||||
dense_backend,
|
||||
'NO_SPARSE',
|
||||
'IDENTITY',
|
||||
ordering,
|
||||
thread_config))
|
||||
|
||||
for linear_solver, sparse_backend, in SPARSE_SOLVER_CONFIGS:
|
||||
generated_files.append(
|
||||
generate_bundle_test(linear_solver,
|
||||
'EIGEN',
|
||||
sparse_backend,
|
||||
'IDENTITY',
|
||||
ordering,
|
||||
thread_config))
|
||||
|
||||
for linear_solver, sparse_backend, preconditioner, in ITERATIVE_SOLVER_CONFIGS:
|
||||
generated_files.append(
|
||||
generate_bundle_test(linear_solver,
|
||||
'EIGEN',
|
||||
sparse_backend,
|
||||
preconditioner,
|
||||
ordering,
|
||||
thread_config))
|
||||
|
||||
|
||||
# Generate the CMakeLists.txt as well.
|
||||
with open('generated_bundle_adjustment_tests/CMakeLists.txt', 'w') as fd:
|
||||
fd.write(COPYRIGHT_HEADER.replace('//', '#').replace('http:#', 'http://'))
|
||||
fd.write('\n')
|
||||
fd.write('\n')
|
||||
for generated_file in generated_files:
|
||||
fd.write('ceres_test(%s)\n' %
|
||||
generated_file.split('/')[1].replace('_test.cc', ''))
|
||||
246
extern/ceres/internal/ceres/generate_template_specializations.py
vendored
Normal file
246
extern/ceres/internal/ceres/generate_template_specializations.py
vendored
Normal file
@@ -0,0 +1,246 @@
|
||||
# Ceres Solver - A fast non-linear least squares minimizer
|
||||
# Copyright 2023 Google Inc. All rights reserved.
|
||||
# http://ceres-solver.org/
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
# used to endorse or promote products derived from this software without
|
||||
# specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
#
|
||||
# Script for explicitly generating template specialization of the
|
||||
# SchurEliminator class. It is a rather large class
|
||||
# and the number of explicit instantiations is also large. Explicitly
|
||||
# generating these instantiations in separate .cc files breaks the
|
||||
# compilation into separate compilation unit rather than one large cc
|
||||
# file which takes 2+GB of RAM to compile.
|
||||
#
|
||||
# This script creates three sets of files.
|
||||
#
|
||||
# 1. schur_eliminator_x_x_x.cc and partitioned_matrix_view_x_x_x.cc
|
||||
# where, the x indicates the template parameters and
|
||||
#
|
||||
# 2. schur_eliminator.cc & partitioned_matrix_view.cc
|
||||
#
|
||||
# that contains a factory function for instantiating these classes
|
||||
# based on runtime parameters.
|
||||
#
|
||||
# 3. schur_templates.cc
|
||||
#
|
||||
# that contains a function which can be queried to determine what
|
||||
# template specializations are available.
|
||||
#
|
||||
# The following list of tuples, specializations indicates the set of
|
||||
# specializations that is generated.
|
||||
SPECIALIZATIONS = [(2, 2, 2),
|
||||
(2, 2, 3),
|
||||
(2, 2, 4),
|
||||
(2, 2, "Eigen::Dynamic"),
|
||||
(2, 3, 3),
|
||||
(2, 3, 4),
|
||||
(2, 3, 6),
|
||||
(2, 3, 9),
|
||||
(2, 3, "Eigen::Dynamic"),
|
||||
(2, 4, 3),
|
||||
(2, 4, 4),
|
||||
(2, 4, 6),
|
||||
(2, 4, 8),
|
||||
(2, 4, 9),
|
||||
(2, 4, "Eigen::Dynamic"),
|
||||
(2, "Eigen::Dynamic", "Eigen::Dynamic"),
|
||||
(3, 3, 3),
|
||||
(4, 4, 2),
|
||||
(4, 4, 3),
|
||||
(4, 4, 4),
|
||||
(4, 4, "Eigen::Dynamic")]
|
||||
|
||||
import schur_eliminator_template
|
||||
import partitioned_matrix_view_template
|
||||
import os
|
||||
import glob
|
||||
|
||||
def SuffixForSize(size):
|
||||
if size == "Eigen::Dynamic":
|
||||
return "d"
|
||||
return str(size)
|
||||
|
||||
def SpecializationFilename(prefix, row_block_size, e_block_size, f_block_size):
|
||||
return "_".join([prefix] + list(map(SuffixForSize, (row_block_size,
|
||||
e_block_size,
|
||||
f_block_size))))
|
||||
|
||||
def GenerateFactoryConditional(row_block_size, e_block_size, f_block_size):
|
||||
conditionals = []
|
||||
if (row_block_size != "Eigen::Dynamic"):
|
||||
conditionals.append("(options.row_block_size == %s)" % row_block_size)
|
||||
if (e_block_size != "Eigen::Dynamic"):
|
||||
conditionals.append("(options.e_block_size == %s)" % e_block_size)
|
||||
if (f_block_size != "Eigen::Dynamic"):
|
||||
conditionals.append("(options.f_block_size == %s)" % f_block_size)
|
||||
if (len(conditionals) == 0):
|
||||
return "%s"
|
||||
|
||||
if (len(conditionals) == 1):
|
||||
return " if " + conditionals[0] + " {\n %s\n }\n"
|
||||
|
||||
return " if (" + " &&\n ".join(conditionals) + ") {\n %s\n }\n"
|
||||
|
||||
def Specialize(name, data):
|
||||
"""
|
||||
Generate specialization code and the conditionals to instantiate it.
|
||||
"""
|
||||
|
||||
# Specialization files
|
||||
for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS:
|
||||
output = SpecializationFilename("generated/" + name,
|
||||
row_block_size,
|
||||
e_block_size,
|
||||
f_block_size) + ".cc"
|
||||
|
||||
with open(output, "w") as f:
|
||||
f.write(data["HEADER"])
|
||||
f.write(data["SPECIALIZATION_FILE"] %
|
||||
(row_block_size, e_block_size, f_block_size))
|
||||
|
||||
# Generate the _d_d_d specialization.
|
||||
output = SpecializationFilename("generated/" + name,
|
||||
"Eigen::Dynamic",
|
||||
"Eigen::Dynamic",
|
||||
"Eigen::Dynamic") + ".cc"
|
||||
with open(output, "w") as f:
|
||||
f.write(data["HEADER"])
|
||||
f.write(data["DYNAMIC_FILE"] %
|
||||
("Eigen::Dynamic", "Eigen::Dynamic", "Eigen::Dynamic"))
|
||||
|
||||
# Factory
|
||||
with open(name + ".cc", "w") as f:
|
||||
f.write(data["HEADER"])
|
||||
f.write(data["FACTORY_FILE_HEADER"])
|
||||
for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS:
|
||||
factory_conditional = GenerateFactoryConditional(
|
||||
row_block_size, e_block_size, f_block_size)
|
||||
factory = data["FACTORY"] % (row_block_size, e_block_size, f_block_size)
|
||||
f.write(factory_conditional % factory);
|
||||
f.write(data["FACTORY_FOOTER"])
|
||||
|
||||
QUERY_HEADER = """// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
//
|
||||
// What template specializations are available.
|
||||
//
|
||||
// ========================================
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
//=========================================
|
||||
//
|
||||
// This file is generated using generate_template_specializations.py.
|
||||
"""
|
||||
|
||||
QUERY_FILE_HEADER = """
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/schur_templates.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
void GetBestSchurTemplateSpecialization(int* row_block_size,
|
||||
int* e_block_size,
|
||||
int* f_block_size) {
|
||||
LinearSolver::Options options;
|
||||
options.row_block_size = *row_block_size;
|
||||
options.e_block_size = *e_block_size;
|
||||
options.f_block_size = *f_block_size;
|
||||
*row_block_size = Eigen::Dynamic;
|
||||
*e_block_size = Eigen::Dynamic;
|
||||
*f_block_size = Eigen::Dynamic;
|
||||
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
"""
|
||||
|
||||
QUERY_FOOTER = """
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
"""
|
||||
|
||||
QUERY_ACTION = """ *row_block_size = %s;
|
||||
*e_block_size = %s;
|
||||
*f_block_size = %s;
|
||||
return;"""
|
||||
|
||||
def GenerateQueryFile():
|
||||
"""
|
||||
Generate file that allows querying for available template specializations.
|
||||
"""
|
||||
|
||||
with open("schur_templates.cc", "w") as f:
|
||||
f.write(QUERY_HEADER)
|
||||
f.write(QUERY_FILE_HEADER)
|
||||
for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS:
|
||||
factory_conditional = GenerateFactoryConditional(
|
||||
row_block_size, e_block_size, f_block_size)
|
||||
action = QUERY_ACTION % (row_block_size, e_block_size, f_block_size)
|
||||
f.write(factory_conditional % action)
|
||||
f.write(QUERY_FOOTER)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for f in glob.glob("generated/*"):
|
||||
os.remove(f)
|
||||
|
||||
Specialize("schur_eliminator",
|
||||
schur_eliminator_template.__dict__)
|
||||
Specialize("partitioned_matrix_view",
|
||||
partitioned_matrix_view_template.__dict__)
|
||||
GenerateQueryFile()
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 2, 2>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 2, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 2, 4>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 2, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 3, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 3, 4>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 3, 6>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 3, 9>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 3, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 4, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 4, 4>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 4, 6>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 4, 8>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 4, 9>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, 4, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<3, 3, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<4, 4, 2>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<4, 4, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<4, 4, 4>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<4, 4, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -41,12 +41,10 @@
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<Eigen::Dynamic,
|
||||
Eigen::Dynamic,
|
||||
Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 2, 2>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 2, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 2, 4>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 2, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 3, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 3, 4>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 3, 6>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 3, 9>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 3, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 4, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 4, 4>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 4, 6>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 4, 8>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 4, 9>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, 4, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<2, Eigen::Dynamic, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<3, 3, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<4, 4, 2>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<4, 4, 3>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<4, 4, 4>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,12 +46,10 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<4, 4, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -41,10 +41,8 @@
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic>;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
67
extern/ceres/internal/ceres/gradient_checker.cc
vendored
67
extern/ceres/internal/ceres/gradient_checker.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2016 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,7 +40,6 @@
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/is_close.h"
|
||||
#include "ceres/manifold_adapter.h"
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
@@ -49,8 +48,6 @@ namespace ceres {
|
||||
using internal::IsClose;
|
||||
using internal::StringAppendF;
|
||||
using internal::StringPrintf;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
namespace {
|
||||
// Evaluate the cost function and transform the returned Jacobians to
|
||||
@@ -65,12 +62,12 @@ bool EvaluateCostFunction(const CostFunction* function,
|
||||
CHECK(jacobians != nullptr);
|
||||
CHECK(local_jacobians != nullptr);
|
||||
|
||||
const vector<int32_t>& block_sizes = function->parameter_block_sizes();
|
||||
const std::vector<int32_t>& block_sizes = function->parameter_block_sizes();
|
||||
const int num_parameter_blocks = block_sizes.size();
|
||||
|
||||
// Allocate Jacobian matrices in tangent space.
|
||||
local_jacobians->resize(num_parameter_blocks);
|
||||
vector<double*> local_jacobian_data(num_parameter_blocks);
|
||||
std::vector<double*> local_jacobian_data(num_parameter_blocks);
|
||||
for (int i = 0; i < num_parameter_blocks; ++i) {
|
||||
int block_size = block_sizes.at(i);
|
||||
if (manifolds.at(i) != nullptr) {
|
||||
@@ -83,7 +80,7 @@ bool EvaluateCostFunction(const CostFunction* function,
|
||||
|
||||
// Allocate Jacobian matrices in ambient space.
|
||||
jacobians->resize(num_parameter_blocks);
|
||||
vector<double*> jacobian_data(num_parameter_blocks);
|
||||
std::vector<double*> jacobian_data(num_parameter_blocks);
|
||||
for (int i = 0; i < num_parameter_blocks; ++i) {
|
||||
jacobians->at(i).resize(function->num_residuals(), block_sizes.at(i));
|
||||
jacobians->at(i).setZero();
|
||||
@@ -116,39 +113,8 @@ bool EvaluateCostFunction(const CostFunction* function,
|
||||
}
|
||||
} // namespace
|
||||
|
||||
GradientChecker::GradientChecker(
|
||||
const CostFunction* function,
|
||||
const vector<const LocalParameterization*>* local_parameterizations,
|
||||
const NumericDiffOptions& options)
|
||||
: delete_manifolds_(true), function_(function) {
|
||||
CHECK(function != nullptr);
|
||||
manifolds_.resize(function->parameter_block_sizes().size(), nullptr);
|
||||
|
||||
// Wrap the local parameterization into manifold objects using
|
||||
// ManifoldAdapter.
|
||||
for (int i = 0; i < manifolds_.size(); ++i) {
|
||||
const LocalParameterization* local_param = local_parameterizations->at(i);
|
||||
if (local_param == nullptr) {
|
||||
continue;
|
||||
}
|
||||
manifolds_[i] = new internal::ManifoldAdapter(local_param);
|
||||
}
|
||||
|
||||
auto finite_diff_cost_function =
|
||||
std::make_unique<DynamicNumericDiffCostFunction<CostFunction, RIDDERS>>(
|
||||
function, DO_NOT_TAKE_OWNERSHIP, options);
|
||||
const vector<int32_t>& parameter_block_sizes =
|
||||
function->parameter_block_sizes();
|
||||
for (int32_t parameter_block_size : parameter_block_sizes) {
|
||||
finite_diff_cost_function->AddParameterBlock(parameter_block_size);
|
||||
}
|
||||
finite_diff_cost_function->SetNumResiduals(function->num_residuals());
|
||||
|
||||
finite_diff_cost_function_ = std::move(finite_diff_cost_function);
|
||||
}
|
||||
|
||||
GradientChecker::GradientChecker(const CostFunction* function,
|
||||
const vector<const Manifold*>* manifolds,
|
||||
const std::vector<const Manifold*>* manifolds,
|
||||
const NumericDiffOptions& options)
|
||||
: function_(function) {
|
||||
CHECK(function != nullptr);
|
||||
@@ -161,7 +127,7 @@ GradientChecker::GradientChecker(const CostFunction* function,
|
||||
auto finite_diff_cost_function =
|
||||
std::make_unique<DynamicNumericDiffCostFunction<CostFunction, RIDDERS>>(
|
||||
function, DO_NOT_TAKE_OWNERSHIP, options);
|
||||
const vector<int32_t>& parameter_block_sizes =
|
||||
const std::vector<int32_t>& parameter_block_sizes =
|
||||
function->parameter_block_sizes();
|
||||
const int num_parameter_blocks = parameter_block_sizes.size();
|
||||
for (int i = 0; i < num_parameter_blocks; ++i) {
|
||||
@@ -172,14 +138,6 @@ GradientChecker::GradientChecker(const CostFunction* function,
|
||||
finite_diff_cost_function_ = std::move(finite_diff_cost_function);
|
||||
}
|
||||
|
||||
GradientChecker::~GradientChecker() {
|
||||
if (delete_manifolds_) {
|
||||
for (const auto m : manifolds_) {
|
||||
delete m;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool GradientChecker::Probe(double const* const* parameters,
|
||||
double relative_precision,
|
||||
ProbeResults* results_param) const {
|
||||
@@ -204,8 +162,8 @@ bool GradientChecker::Probe(double const* const* parameters,
|
||||
results->return_value = true;
|
||||
|
||||
// Evaluate the derivative using the user supplied code.
|
||||
vector<Matrix>& jacobians = results->jacobians;
|
||||
vector<Matrix>& local_jacobians = results->local_jacobians;
|
||||
std::vector<Matrix>& jacobians = results->jacobians;
|
||||
std::vector<Matrix>& local_jacobians = results->local_jacobians;
|
||||
if (!EvaluateCostFunction(function_,
|
||||
parameters,
|
||||
manifolds_,
|
||||
@@ -217,8 +175,9 @@ bool GradientChecker::Probe(double const* const* parameters,
|
||||
}
|
||||
|
||||
// Evaluate the derivative using numeric derivatives.
|
||||
vector<Matrix>& numeric_jacobians = results->numeric_jacobians;
|
||||
vector<Matrix>& local_numeric_jacobians = results->local_numeric_jacobians;
|
||||
std::vector<Matrix>& numeric_jacobians = results->numeric_jacobians;
|
||||
std::vector<Matrix>& local_numeric_jacobians =
|
||||
results->local_numeric_jacobians;
|
||||
Vector finite_diff_residuals;
|
||||
if (!EvaluateCostFunction(finite_diff_cost_function_.get(),
|
||||
parameters,
|
||||
@@ -258,7 +217,7 @@ bool GradientChecker::Probe(double const* const* parameters,
|
||||
|
||||
// Accumulate the error message for all the jacobians, since it won't get
|
||||
// output if there are no bad jacobian components.
|
||||
string error_log;
|
||||
std::string error_log;
|
||||
for (int k = 0; k < function_->parameter_block_sizes().size(); k++) {
|
||||
StringAppendF(&error_log,
|
||||
"========== "
|
||||
@@ -312,7 +271,7 @@ bool GradientChecker::Probe(double const* const* parameters,
|
||||
|
||||
// Since there were some bad errors, dump comprehensive debug info.
|
||||
if (num_bad_jacobian_components) {
|
||||
string header = StringPrintf(
|
||||
std::string header = StringPrintf(
|
||||
"\nDetected %d bad Jacobian component(s). "
|
||||
"Worst relative error was %g.\n",
|
||||
num_bad_jacobian_components,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -52,13 +52,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::abs;
|
||||
using std::max;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -68,7 +62,7 @@ class GradientCheckingCostFunction final : public CostFunction {
|
||||
const std::vector<const Manifold*>* manifolds,
|
||||
const NumericDiffOptions& options,
|
||||
double relative_precision,
|
||||
string extra_info,
|
||||
std::string extra_info,
|
||||
GradientCheckingIterationCallback* callback)
|
||||
: function_(function),
|
||||
gradient_checker_(function, manifolds, options),
|
||||
@@ -76,7 +70,7 @@ class GradientCheckingCostFunction final : public CostFunction {
|
||||
extra_info_(std::move(extra_info)),
|
||||
callback_(callback) {
|
||||
CHECK(callback_ != nullptr);
|
||||
const vector<int32_t>& parameter_block_sizes =
|
||||
const std::vector<int32_t>& parameter_block_sizes =
|
||||
function->parameter_block_sizes();
|
||||
*mutable_parameter_block_sizes() = parameter_block_sizes;
|
||||
set_num_residuals(function->num_residuals());
|
||||
@@ -105,7 +99,8 @@ class GradientCheckingCostFunction final : public CostFunction {
|
||||
MatrixRef(residuals, num_residuals, 1) = results.residuals;
|
||||
|
||||
// Copy the original jacobian blocks into the jacobians array.
|
||||
const vector<int32_t>& block_sizes = function_->parameter_block_sizes();
|
||||
const std::vector<int32_t>& block_sizes =
|
||||
function_->parameter_block_sizes();
|
||||
for (int k = 0; k < block_sizes.size(); k++) {
|
||||
if (jacobians[k] != nullptr) {
|
||||
MatrixRef(jacobians[k],
|
||||
@@ -127,7 +122,7 @@ class GradientCheckingCostFunction final : public CostFunction {
|
||||
const CostFunction* function_;
|
||||
GradientChecker gradient_checker_;
|
||||
double relative_precision_;
|
||||
string extra_info_;
|
||||
std::string extra_info_;
|
||||
GradientCheckingIterationCallback* callback_;
|
||||
};
|
||||
|
||||
@@ -137,7 +132,7 @@ GradientCheckingIterationCallback::GradientCheckingIterationCallback()
|
||||
: gradient_error_detected_(false) {}
|
||||
|
||||
CallbackReturnType GradientCheckingIterationCallback::operator()(
|
||||
const IterationSummary& summary) {
|
||||
const IterationSummary& /*summary*/) {
|
||||
if (gradient_error_detected_) {
|
||||
LOG(ERROR) << "Gradient error detected. Terminating solver.";
|
||||
return SOLVER_ABORT;
|
||||
@@ -198,7 +193,8 @@ std::unique_ptr<ProblemImpl> CreateGradientCheckingProblemImpl(
|
||||
|
||||
// For every ParameterBlock in problem_impl, create a new parameter block with
|
||||
// the same manifold and constancy.
|
||||
const vector<ParameterBlock*>& parameter_blocks = program->parameter_blocks();
|
||||
const std::vector<ParameterBlock*>& parameter_blocks =
|
||||
program->parameter_blocks();
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
gradient_checking_problem_impl->AddParameterBlock(
|
||||
parameter_block->mutable_user_state(),
|
||||
@@ -225,17 +221,18 @@ std::unique_ptr<ProblemImpl> CreateGradientCheckingProblemImpl(
|
||||
// For every ResidualBlock in problem_impl, create a new
|
||||
// ResidualBlock by wrapping its CostFunction inside a
|
||||
// GradientCheckingCostFunction.
|
||||
const vector<ResidualBlock*>& residual_blocks = program->residual_blocks();
|
||||
const std::vector<ResidualBlock*>& residual_blocks =
|
||||
program->residual_blocks();
|
||||
for (int i = 0; i < residual_blocks.size(); ++i) {
|
||||
ResidualBlock* residual_block = residual_blocks[i];
|
||||
|
||||
// Build a human readable string which identifies the
|
||||
// ResidualBlock. This is used by the GradientCheckingCostFunction
|
||||
// when logging debugging information.
|
||||
string extra_info =
|
||||
std::string extra_info =
|
||||
StringPrintf("Residual block id %d; depends on parameters [", i);
|
||||
vector<double*> parameter_blocks;
|
||||
vector<const Manifold*> manifolds;
|
||||
std::vector<double*> parameter_blocks;
|
||||
std::vector<const Manifold*> manifolds;
|
||||
parameter_blocks.reserve(residual_block->NumParameterBlocks());
|
||||
manifolds.reserve(residual_block->NumParameterBlocks());
|
||||
for (int j = 0; j < residual_block->NumParameterBlocks(); ++j) {
|
||||
@@ -280,5 +277,4 @@ std::unique_ptr<ProblemImpl> CreateGradientCheckingProblemImpl(
|
||||
return gradient_checking_problem_impl;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,8 +42,7 @@
|
||||
#include "ceres/iteration_callback.h"
|
||||
#include "ceres/manifold.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class ProblemImpl;
|
||||
|
||||
@@ -109,8 +108,7 @@ CERES_NO_EXPORT std::unique_ptr<ProblemImpl> CreateGradientCheckingProblemImpl(
|
||||
double relative_precision,
|
||||
GradientCheckingIterationCallback* callback);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
20
extern/ceres/internal/ceres/gradient_problem.cc
vendored
20
extern/ceres/internal/ceres/gradient_problem.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,8 +32,6 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/local_parameterization.h"
|
||||
#include "ceres/manifold_adapter.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
@@ -46,22 +44,6 @@ GradientProblem::GradientProblem(FirstOrderFunction* function)
|
||||
CHECK(function != nullptr);
|
||||
}
|
||||
|
||||
GradientProblem::GradientProblem(FirstOrderFunction* function,
|
||||
LocalParameterization* parameterization)
|
||||
: function_(function),
|
||||
parameterization_(parameterization),
|
||||
scratch_(new double[function_->NumParameters()]) {
|
||||
CHECK(function != nullptr);
|
||||
if (parameterization != nullptr) {
|
||||
manifold_ =
|
||||
std::make_unique<internal::ManifoldAdapter>(parameterization_.get());
|
||||
} else {
|
||||
manifold_ = std::make_unique<EuclideanManifold<DYNAMIC>>(
|
||||
function_->NumParameters());
|
||||
}
|
||||
CHECK_EQ(function_->NumParameters(), manifold_->AmbientSize());
|
||||
}
|
||||
|
||||
GradientProblem::GradientProblem(FirstOrderFunction* function,
|
||||
Manifold* manifold)
|
||||
: function_(function), scratch_(new double[function_->NumParameters()]) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,8 +43,7 @@
|
||||
#include "ceres/sparse_matrix.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator {
|
||||
public:
|
||||
@@ -53,10 +52,10 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator {
|
||||
|
||||
std::unique_ptr<SparseMatrix> CreateJacobian() const final { return nullptr; }
|
||||
|
||||
bool Evaluate(const EvaluateOptions& evaluate_options,
|
||||
bool Evaluate(const EvaluateOptions& /*evaluate_options*/,
|
||||
const double* state,
|
||||
double* cost,
|
||||
double* residuals,
|
||||
double* /*residuals*/,
|
||||
double* gradient,
|
||||
SparseMatrix* jacobian) final {
|
||||
CHECK(jacobian == nullptr);
|
||||
@@ -83,7 +82,7 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator {
|
||||
int NumParameters() const final { return problem_.NumParameters(); }
|
||||
|
||||
int NumEffectiveParameters() const final {
|
||||
return problem_.NumLocalParameters();
|
||||
return problem_.NumTangentParameters();
|
||||
}
|
||||
|
||||
int NumResiduals() const final { return 1; }
|
||||
@@ -97,8 +96,7 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator {
|
||||
::ceres::internal::ExecutionSummary execution_summary_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,7 +30,9 @@
|
||||
|
||||
#include "ceres/gradient_problem_solver.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ceres/callbacks.h"
|
||||
#include "ceres/gradient_problem.h"
|
||||
@@ -48,7 +50,6 @@
|
||||
namespace ceres {
|
||||
using internal::StringAppendF;
|
||||
using internal::StringPrintf;
|
||||
using std::string;
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -112,7 +113,7 @@ void GradientProblemSolver::Solve(const GradientProblemSolver::Options& options,
|
||||
*summary = Summary();
|
||||
// clang-format off
|
||||
summary->num_parameters = problem.NumParameters();
|
||||
summary->num_local_parameters = problem.NumLocalParameters();
|
||||
summary->num_tangent_parameters = problem.NumTangentParameters();
|
||||
summary->line_search_direction_type = options.line_search_direction_type; // NOLINT
|
||||
summary->line_search_interpolation_type = options.line_search_interpolation_type; // NOLINT
|
||||
summary->line_search_type = options.line_search_type;
|
||||
@@ -180,7 +181,7 @@ void GradientProblemSolver::Solve(const GradientProblemSolver::Options& options,
|
||||
SetSummaryFinalCost(summary);
|
||||
}
|
||||
|
||||
const std::map<string, CallStatistics>& evaluator_statistics =
|
||||
const std::map<std::string, CallStatistics>& evaluator_statistics =
|
||||
minimizer_options.evaluator->Statistics();
|
||||
{
|
||||
const CallStatistics& call_stats = FindWithDefault(
|
||||
@@ -203,7 +204,7 @@ bool GradientProblemSolver::Summary::IsSolutionUsable() const {
|
||||
return internal::IsSolutionUsable(*this);
|
||||
}
|
||||
|
||||
string GradientProblemSolver::Summary::BriefReport() const {
|
||||
std::string GradientProblemSolver::Summary::BriefReport() const {
|
||||
return StringPrintf(
|
||||
"Ceres GradientProblemSolver Report: "
|
||||
"Iterations: %d, "
|
||||
@@ -216,17 +217,20 @@ string GradientProblemSolver::Summary::BriefReport() const {
|
||||
TerminationTypeToString(termination_type));
|
||||
}
|
||||
|
||||
string GradientProblemSolver::Summary::FullReport() const {
|
||||
std::string GradientProblemSolver::Summary::FullReport() const {
|
||||
using internal::VersionString;
|
||||
|
||||
string report = string("\nSolver Summary (v " + VersionString() + ")\n\n");
|
||||
// NOTE operator+ is not usable for concatenating a string and a string_view.
|
||||
std::string report =
|
||||
std::string{"\nSolver Summary (v "}.append(VersionString()) + ")\n\n";
|
||||
|
||||
StringAppendF(&report, "Parameters % 25d\n", num_parameters);
|
||||
if (num_local_parameters != num_parameters) {
|
||||
StringAppendF(&report, "Local parameters % 25d\n", num_local_parameters);
|
||||
if (num_tangent_parameters != num_parameters) {
|
||||
StringAppendF(
|
||||
&report, "Tangent parameters % 25d\n", num_tangent_parameters);
|
||||
}
|
||||
|
||||
string line_search_direction_string;
|
||||
std::string line_search_direction_string;
|
||||
if (line_search_direction_type == LBFGS) {
|
||||
line_search_direction_string = StringPrintf("LBFGS (%d)", max_lbfgs_rank);
|
||||
} else if (line_search_direction_type == NONLINEAR_CONJUGATE_GRADIENT) {
|
||||
@@ -241,7 +245,7 @@ string GradientProblemSolver::Summary::FullReport() const {
|
||||
"Line search direction %19s\n",
|
||||
line_search_direction_string.c_str());
|
||||
|
||||
const string line_search_type_string = StringPrintf(
|
||||
const std::string line_search_type_string = StringPrintf(
|
||||
"%s %s",
|
||||
LineSearchInterpolationTypeToString(line_search_interpolation_type),
|
||||
LineSearchTypeToString(line_search_type));
|
||||
|
||||
8
extern/ceres/internal/ceres/graph.h
vendored
8
extern/ceres/internal/ceres/graph.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,8 +42,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// A unweighted undirected graph templated over the vertex ids. Vertex
|
||||
// should be hashable.
|
||||
@@ -206,7 +205,6 @@ class WeightedGraph {
|
||||
edge_weights_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_GRAPH_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -45,8 +45,7 @@
|
||||
#include "ceres/wall_time.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Compare two vertices of a graph by their degrees, if the degrees
|
||||
// are equal then order them by their ids.
|
||||
@@ -340,7 +339,6 @@ std::unique_ptr<WeightedGraph<Vertex>> Degree2MaximumSpanningForest(
|
||||
return forest;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_GRAPH_ALGORITHMS_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,15 +35,16 @@
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
ImplicitSchurComplement::ImplicitSchurComplement(
|
||||
const LinearSolver::Options& options)
|
||||
: options_(options), D_(nullptr), b_(nullptr) {}
|
||||
: options_(options) {}
|
||||
|
||||
void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
|
||||
const double* D,
|
||||
@@ -57,11 +58,16 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
|
||||
D_ = D;
|
||||
b_ = b;
|
||||
|
||||
compute_ftf_inverse_ =
|
||||
options_.use_spse_initialization ||
|
||||
options_.preconditioner_type == JACOBI ||
|
||||
options_.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION;
|
||||
|
||||
// Initialize temporary storage and compute the block diagonals of
|
||||
// E'E and F'E.
|
||||
if (block_diagonal_EtE_inverse_ == nullptr) {
|
||||
block_diagonal_EtE_inverse_ = A_->CreateBlockDiagonalEtE();
|
||||
if (options_.preconditioner_type == JACOBI) {
|
||||
if (compute_ftf_inverse_) {
|
||||
block_diagonal_FtF_inverse_ = A_->CreateBlockDiagonalFtF();
|
||||
}
|
||||
rhs_.resize(A_->num_cols_f());
|
||||
@@ -72,7 +78,7 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
|
||||
tmp_f_cols_.resize(A_->num_cols_f());
|
||||
} else {
|
||||
A_->UpdateBlockDiagonalEtE(block_diagonal_EtE_inverse_.get());
|
||||
if (options_.preconditioner_type == JACOBI) {
|
||||
if (compute_ftf_inverse_) {
|
||||
A_->UpdateBlockDiagonalFtF(block_diagonal_FtF_inverse_.get());
|
||||
}
|
||||
}
|
||||
@@ -81,7 +87,7 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
|
||||
// contributions from the diagonal D if it is non-null. Add that to
|
||||
// the block diagonals and invert them.
|
||||
AddDiagonalAndInvert(D_, block_diagonal_EtE_inverse_.get());
|
||||
if (options_.preconditioner_type == JACOBI) {
|
||||
if (compute_ftf_inverse_) {
|
||||
AddDiagonalAndInvert((D_ == nullptr) ? nullptr : D_ + A_->num_cols_e(),
|
||||
block_diagonal_FtF_inverse_.get());
|
||||
}
|
||||
@@ -97,36 +103,74 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A,
|
||||
// By breaking it down into individual matrix vector products
|
||||
// involving the matrices E and F. This is implemented using a
|
||||
// PartitionedMatrixView of the input matrix A.
|
||||
void ImplicitSchurComplement::RightMultiply(const double* x, double* y) const {
|
||||
void ImplicitSchurComplement::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
// y1 = F x
|
||||
tmp_rows_.setZero();
|
||||
A_->RightMultiplyF(x, tmp_rows_.data());
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
|
||||
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
|
||||
|
||||
// y2 = E' y1
|
||||
tmp_e_cols_.setZero();
|
||||
A_->LeftMultiplyE(tmp_rows_.data(), tmp_e_cols_.data());
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
|
||||
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
|
||||
|
||||
// y3 = -(E'E)^-1 y2
|
||||
tmp_e_cols_2_.setZero();
|
||||
block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(),
|
||||
tmp_e_cols_2_.data());
|
||||
tmp_e_cols_2_ *= -1.0;
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
|
||||
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
|
||||
tmp_e_cols_2_.data(),
|
||||
options_.context,
|
||||
options_.num_threads);
|
||||
|
||||
ParallelAssign(
|
||||
options_.context, options_.num_threads, tmp_e_cols_2_, -tmp_e_cols_2_);
|
||||
|
||||
// y1 = y1 + E y3
|
||||
A_->RightMultiplyE(tmp_e_cols_2_.data(), tmp_rows_.data());
|
||||
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
|
||||
|
||||
// y5 = D * x
|
||||
if (D_ != nullptr) {
|
||||
ConstVectorRef Dref(D_ + A_->num_cols_e(), num_cols());
|
||||
VectorRef(y, num_cols()) =
|
||||
(Dref.array().square() * ConstVectorRef(x, num_cols()).array())
|
||||
.matrix();
|
||||
VectorRef y_cols(y, num_cols());
|
||||
ParallelAssign(
|
||||
options_.context,
|
||||
options_.num_threads,
|
||||
y_cols,
|
||||
(Dref.array().square() * ConstVectorRef(x, num_cols()).array()));
|
||||
} else {
|
||||
VectorRef(y, num_cols()).setZero();
|
||||
ParallelSetZero(options_.context, options_.num_threads, y, num_cols());
|
||||
}
|
||||
|
||||
// y = y5 + F' y1
|
||||
A_->LeftMultiplyF(tmp_rows_.data(), y);
|
||||
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), y);
|
||||
}
|
||||
|
||||
void ImplicitSchurComplement::InversePowerSeriesOperatorRightMultiplyAccumulate(
|
||||
const double* x, double* y) const {
|
||||
CHECK(compute_ftf_inverse_);
|
||||
// y1 = F x
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
|
||||
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
|
||||
|
||||
// y2 = E' y1
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
|
||||
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
|
||||
|
||||
// y3 = (E'E)^-1 y2
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
|
||||
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
|
||||
tmp_e_cols_2_.data(),
|
||||
options_.context,
|
||||
options_.num_threads);
|
||||
// y1 = E y3
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
|
||||
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
|
||||
|
||||
// y4 = F' y1
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_f_cols_);
|
||||
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), tmp_f_cols_.data());
|
||||
|
||||
// y += (F'F)^-1 y4
|
||||
block_diagonal_FtF_inverse_->RightMultiplyAndAccumulate(
|
||||
tmp_f_cols_.data(), y, options_.context, options_.num_threads);
|
||||
}
|
||||
|
||||
// Given a block diagonal matrix and an optional array of diagonal
|
||||
@@ -136,26 +180,31 @@ void ImplicitSchurComplement::AddDiagonalAndInvert(
|
||||
const double* D, BlockSparseMatrix* block_diagonal) {
|
||||
const CompressedRowBlockStructure* block_diagonal_structure =
|
||||
block_diagonal->block_structure();
|
||||
for (const auto& row : block_diagonal_structure->rows) {
|
||||
const int row_block_pos = row.block.position;
|
||||
const int row_block_size = row.block.size;
|
||||
const Cell& cell = row.cells[0];
|
||||
MatrixRef m(block_diagonal->mutable_values() + cell.position,
|
||||
row_block_size,
|
||||
row_block_size);
|
||||
ParallelFor(options_.context,
|
||||
0,
|
||||
block_diagonal_structure->rows.size(),
|
||||
options_.num_threads,
|
||||
[block_diagonal_structure, D, block_diagonal](int row_block_id) {
|
||||
auto& row = block_diagonal_structure->rows[row_block_id];
|
||||
const int row_block_pos = row.block.position;
|
||||
const int row_block_size = row.block.size;
|
||||
const Cell& cell = row.cells[0];
|
||||
MatrixRef m(block_diagonal->mutable_values() + cell.position,
|
||||
row_block_size,
|
||||
row_block_size);
|
||||
|
||||
if (D != nullptr) {
|
||||
ConstVectorRef d(D + row_block_pos, row_block_size);
|
||||
m += d.array().square().matrix().asDiagonal();
|
||||
}
|
||||
if (D != nullptr) {
|
||||
ConstVectorRef d(D + row_block_pos, row_block_size);
|
||||
m += d.array().square().matrix().asDiagonal();
|
||||
}
|
||||
|
||||
m = m.selfadjointView<Eigen::Upper>().llt().solve(
|
||||
Matrix::Identity(row_block_size, row_block_size));
|
||||
}
|
||||
m = m.selfadjointView<Eigen::Upper>().llt().solve(
|
||||
Matrix::Identity(row_block_size, row_block_size));
|
||||
});
|
||||
}
|
||||
|
||||
// Similar to RightMultiply, use the block structure of the matrix A
|
||||
// to compute y = (E'E)^-1 (E'b - E'F x).
|
||||
// Similar to RightMultiplyAndAccumulate, use the block structure of the matrix
|
||||
// A to compute y = (E'E)^-1 (E'b - E'F x).
|
||||
void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
|
||||
const int num_cols_e = A_->num_cols_e();
|
||||
const int num_cols_f = A_->num_cols_f();
|
||||
@@ -163,26 +212,34 @@ void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
|
||||
const int num_rows = A_->num_rows();
|
||||
|
||||
// y1 = F x
|
||||
tmp_rows_.setZero();
|
||||
A_->RightMultiplyF(x, tmp_rows_.data());
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
|
||||
A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data());
|
||||
|
||||
// y2 = b - y1
|
||||
tmp_rows_ = ConstVectorRef(b_, num_rows) - tmp_rows_;
|
||||
ParallelAssign(options_.context,
|
||||
options_.num_threads,
|
||||
tmp_rows_,
|
||||
ConstVectorRef(b_, num_rows) - tmp_rows_);
|
||||
|
||||
// y3 = E' y2
|
||||
tmp_e_cols_.setZero();
|
||||
A_->LeftMultiplyE(tmp_rows_.data(), tmp_e_cols_.data());
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
|
||||
A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data());
|
||||
|
||||
// y = (E'E)^-1 y3
|
||||
VectorRef(y, num_cols).setZero();
|
||||
block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(), y);
|
||||
ParallelSetZero(options_.context, options_.num_threads, y, num_cols);
|
||||
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(
|
||||
tmp_e_cols_.data(), y, options_.context, options_.num_threads);
|
||||
|
||||
// The full solution vector y has two blocks. The first block of
|
||||
// variables corresponds to the eliminated variables, which we just
|
||||
// computed via back substitution. The second block of variables
|
||||
// corresponds to the Schur complement system, so we just copy those
|
||||
// values from the solution to the Schur complement.
|
||||
VectorRef(y + num_cols_e, num_cols_f) = ConstVectorRef(x, num_cols_f);
|
||||
VectorRef y_cols_f(y + num_cols_e, num_cols_f);
|
||||
ParallelAssign(options_.context,
|
||||
options_.num_threads,
|
||||
y_cols_f,
|
||||
ConstVectorRef(x, num_cols_f));
|
||||
}
|
||||
|
||||
// Compute the RHS of the Schur complement system.
|
||||
@@ -193,24 +250,29 @@ void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) {
|
||||
// this using a series of matrix vector products.
|
||||
void ImplicitSchurComplement::UpdateRhs() {
|
||||
// y1 = E'b
|
||||
tmp_e_cols_.setZero();
|
||||
A_->LeftMultiplyE(b_, tmp_e_cols_.data());
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_);
|
||||
A_->LeftMultiplyAndAccumulateE(b_, tmp_e_cols_.data());
|
||||
|
||||
// y2 = (E'E)^-1 y1
|
||||
Vector y2 = Vector::Zero(A_->num_cols_e());
|
||||
block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(), y2.data());
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_);
|
||||
block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(),
|
||||
tmp_e_cols_2_.data(),
|
||||
options_.context,
|
||||
options_.num_threads);
|
||||
|
||||
// y3 = E y2
|
||||
tmp_rows_.setZero();
|
||||
A_->RightMultiplyE(y2.data(), tmp_rows_.data());
|
||||
ParallelSetZero(options_.context, options_.num_threads, tmp_rows_);
|
||||
A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data());
|
||||
|
||||
// y3 = b - y3
|
||||
tmp_rows_ = ConstVectorRef(b_, A_->num_rows()) - tmp_rows_;
|
||||
ParallelAssign(options_.context,
|
||||
options_.num_threads,
|
||||
tmp_rows_,
|
||||
ConstVectorRef(b_, A_->num_rows()) - tmp_rows_);
|
||||
|
||||
// rhs = F' y3
|
||||
rhs_.setZero();
|
||||
A_->LeftMultiplyF(tmp_rows_.data(), rhs_.data());
|
||||
ParallelSetZero(options_.context, options_.num_threads, rhs_);
|
||||
A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), rhs_.data());
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,8 +44,7 @@
|
||||
#include "ceres/partitioned_matrix_view.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockSparseMatrix;
|
||||
|
||||
@@ -82,13 +81,13 @@ class BlockSparseMatrix;
|
||||
// (which for our purposes is an easily inverted block diagonal
|
||||
// matrix), it can be done in terms of matrix vector products with E,
|
||||
// F and (E'E)^-1. This class implements this functionality and other
|
||||
// auxilliary bits needed to implement a CG solver on the Schur
|
||||
// auxiliary bits needed to implement a CG solver on the Schur
|
||||
// complement using the PartitionedMatrixView object.
|
||||
//
|
||||
// THREAD SAFETY: This class is nqot thread safe. In particular, the
|
||||
// RightMultiply (and the LeftMultiply) methods are not thread safe as
|
||||
// they depend on mutable arrays used for the temporaries needed to
|
||||
// compute the product y += Sx;
|
||||
// THREAD SAFETY: This class is not thread safe. In particular, the
|
||||
// RightMultiplyAndAccumulate (and the LeftMultiplyAndAccumulate) methods are
|
||||
// not thread safe as they depend on mutable arrays used for the temporaries
|
||||
// needed to compute the product y += Sx;
|
||||
class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator {
|
||||
public:
|
||||
// num_eliminate_blocks is the number of E blocks in the matrix
|
||||
@@ -115,14 +114,20 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator {
|
||||
void Init(const BlockSparseMatrix& A, const double* D, const double* b);
|
||||
|
||||
// y += Sx, where S is the Schur complement.
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
|
||||
// The Schur complement is a symmetric positive definite matrix,
|
||||
// thus the left and right multiply operators are the same.
|
||||
void LeftMultiply(const double* x, double* y) const final {
|
||||
RightMultiply(x, y);
|
||||
void LeftMultiplyAndAccumulate(const double* x, double* y) const final {
|
||||
RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
|
||||
// Following is useful for approximation of S^-1 via power series expansion.
|
||||
// Z = (F'F)^-1 F'E (E'E)^-1 E'F
|
||||
// y += Zx
|
||||
void InversePowerSeriesOperatorRightMultiplyAccumulate(const double* x,
|
||||
double* y) const;
|
||||
|
||||
// y = (E'E)^-1 (E'b - E'F x). Given an estimate of the solution to
|
||||
// the Schur complement system, this method computes the value of
|
||||
// the e_block variables that were eliminated to form the Schur
|
||||
@@ -138,6 +143,7 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator {
|
||||
}
|
||||
|
||||
const BlockSparseMatrix* block_diagonal_FtF_inverse() const {
|
||||
CHECK(compute_ftf_inverse_);
|
||||
return block_diagonal_FtF_inverse_.get();
|
||||
}
|
||||
|
||||
@@ -146,25 +152,24 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator {
|
||||
void UpdateRhs();
|
||||
|
||||
const LinearSolver::Options& options_;
|
||||
|
||||
bool compute_ftf_inverse_ = false;
|
||||
std::unique_ptr<PartitionedMatrixViewBase> A_;
|
||||
const double* D_;
|
||||
const double* b_;
|
||||
const double* D_ = nullptr;
|
||||
const double* b_ = nullptr;
|
||||
|
||||
std::unique_ptr<BlockSparseMatrix> block_diagonal_EtE_inverse_;
|
||||
std::unique_ptr<BlockSparseMatrix> block_diagonal_FtF_inverse_;
|
||||
|
||||
Vector rhs_;
|
||||
|
||||
// Temporary storage vectors used to implement RightMultiply.
|
||||
// Temporary storage vectors used to implement RightMultiplyAndAccumulate.
|
||||
mutable Vector tmp_rows_;
|
||||
mutable Vector tmp_e_cols_;
|
||||
mutable Vector tmp_e_cols_2_;
|
||||
mutable Vector tmp_f_cols_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,8 +35,7 @@
|
||||
|
||||
#include "ceres/small_blas.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Create the CompressedRowSparseMatrix matrix that will contain the
|
||||
// inner product.
|
||||
@@ -52,16 +51,9 @@ InnerProductComputer::CreateResultMatrix(
|
||||
auto matrix = std::make_unique<CompressedRowSparseMatrix>(
|
||||
m_.num_cols(), m_.num_cols(), num_nonzeros);
|
||||
matrix->set_storage_type(storage_type);
|
||||
|
||||
const CompressedRowBlockStructure* bs = m_.block_structure();
|
||||
const std::vector<Block>& blocks = bs->cols;
|
||||
matrix->mutable_row_blocks()->resize(blocks.size());
|
||||
matrix->mutable_col_blocks()->resize(blocks.size());
|
||||
for (int i = 0; i < blocks.size(); ++i) {
|
||||
(*(matrix->mutable_row_blocks()))[i] = blocks[i].size;
|
||||
(*(matrix->mutable_col_blocks()))[i] = blocks[i].size;
|
||||
}
|
||||
|
||||
*matrix->mutable_row_blocks() = bs->cols;
|
||||
*matrix->mutable_col_blocks() = bs->cols;
|
||||
return matrix;
|
||||
}
|
||||
|
||||
@@ -78,6 +70,10 @@ int InnerProductComputer::ComputeNonzeros(
|
||||
row_nnz->resize(blocks.size());
|
||||
std::fill(row_nnz->begin(), row_nnz->end(), 0);
|
||||
|
||||
if (product_terms.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// First product term.
|
||||
(*row_nnz)[product_terms[0].row] = blocks[product_terms[0].col].size;
|
||||
int num_nonzeros =
|
||||
@@ -130,8 +126,10 @@ std::unique_ptr<InnerProductComputer> InnerProductComputer::Create(
|
||||
const int start_row_block,
|
||||
const int end_row_block,
|
||||
CompressedRowSparseMatrix::StorageType product_storage_type) {
|
||||
CHECK(product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR ||
|
||||
product_storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR);
|
||||
CHECK(product_storage_type ==
|
||||
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR ||
|
||||
product_storage_type ==
|
||||
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
|
||||
CHECK_GT(m.num_nonzeros(), 0)
|
||||
<< "Congratulations, you found a bug in Ceres. Please report it.";
|
||||
std::unique_ptr<InnerProductComputer> inner_product_computer(
|
||||
@@ -157,7 +155,8 @@ void InnerProductComputer::Init(
|
||||
for (int c1 = 0; c1 < row.cells.size(); ++c1) {
|
||||
const Cell& cell1 = row.cells[c1];
|
||||
int c2_begin, c2_end;
|
||||
if (product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) {
|
||||
if (product_storage_type ==
|
||||
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) {
|
||||
c2_begin = 0;
|
||||
c2_end = c1 + 1;
|
||||
} else {
|
||||
@@ -195,6 +194,10 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix(
|
||||
*(crsm_rows + 1) = *crsm_rows + row_block_nnz[i];
|
||||
}
|
||||
}
|
||||
result_offsets_.resize(product_terms.size());
|
||||
if (num_nonzeros == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The following macro FILL_CRSM_COL_BLOCK is key to understanding
|
||||
// how this class works.
|
||||
@@ -241,12 +244,11 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix(
|
||||
} \
|
||||
}
|
||||
|
||||
result_offsets_.resize(product_terms.size());
|
||||
int col_nnz = 0;
|
||||
int nnz = 0;
|
||||
|
||||
// Process the first term.
|
||||
const InnerProductComputer::ProductTerm* current = &product_terms[0];
|
||||
const InnerProductComputer::ProductTerm* current = product_terms.data();
|
||||
FILL_CRSM_COL_BLOCK;
|
||||
|
||||
// Process the rest of the terms.
|
||||
@@ -264,7 +266,7 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix(
|
||||
if (previous->row == current->row) {
|
||||
// if the current and previous terms are in the same row block,
|
||||
// then they differ in the column block, in which case advance
|
||||
// col_nnz by the column size of the prevous term.
|
||||
// col_nnz by the column size of the previous term.
|
||||
col_nnz += col_blocks[previous->col].size;
|
||||
} else {
|
||||
// If we have moved to a new row-block , then col_nnz is zero,
|
||||
@@ -302,7 +304,8 @@ void InnerProductComputer::Compute() {
|
||||
rows[bs->cols[cell1.block_id].position];
|
||||
|
||||
int c2_begin, c2_end;
|
||||
if (storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) {
|
||||
if (storage_type ==
|
||||
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) {
|
||||
c2_begin = 0;
|
||||
c2_end = c1 + 1;
|
||||
} else {
|
||||
@@ -330,5 +333,4 @@ void InnerProductComputer::Compute() {
|
||||
CHECK_EQ(cursor, result_offsets_.size());
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// This class is used to repeatedly compute the inner product
|
||||
//
|
||||
@@ -153,8 +152,7 @@ class CERES_NO_EXPORT InnerProductComputer {
|
||||
std::vector<int> result_offsets_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,8 +35,7 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Helper routine to compute the inverse or pseudo-inverse of a
|
||||
// symmetric positive semi-definite matrix.
|
||||
@@ -73,7 +72,6 @@ typename EigenTypes<kSize, kSize>::Matrix InvertPSDMatrix(
|
||||
return svd.solve(MType::Identity(size, size));
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_INVERT_PSD_MATRIX_H_
|
||||
|
||||
8
extern/ceres/internal/ceres/is_close.cc
vendored
8
extern/ceres/internal/ceres/is_close.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2016 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,8 +33,7 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
bool IsClose(double x,
|
||||
double y,
|
||||
double relative_precision,
|
||||
@@ -57,5 +56,4 @@ bool IsClose(double x,
|
||||
}
|
||||
return *relative_error < std::fabs(relative_precision);
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/is_close.h
vendored
8
extern/ceres/internal/ceres/is_close.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2016 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,8 +36,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
// Returns true if x and y have a relative (unsigned) difference less than
|
||||
// relative_precision and false otherwise. Stores the relative and absolute
|
||||
// difference in relative/absolute_error if non-nullptr. If one of the two
|
||||
@@ -48,8 +47,7 @@ CERES_NO_EXPORT bool IsClose(double x,
|
||||
double relative_precision,
|
||||
double* relative_error,
|
||||
double* absolute_error);
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
58
extern/ceres/internal/ceres/iterative_refiner.cc
vendored
58
extern/ceres/internal/ceres/iterative_refiner.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,43 +33,69 @@
|
||||
#include <string>
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "ceres/dense_cholesky.h"
|
||||
#include "ceres/sparse_cholesky.h"
|
||||
#include "ceres/sparse_matrix.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
IterativeRefiner::IterativeRefiner(const int max_num_iterations)
|
||||
SparseIterativeRefiner::SparseIterativeRefiner(const int max_num_iterations)
|
||||
: max_num_iterations_(max_num_iterations) {}
|
||||
|
||||
IterativeRefiner::~IterativeRefiner() = default;
|
||||
SparseIterativeRefiner::~SparseIterativeRefiner() = default;
|
||||
|
||||
void IterativeRefiner::Allocate(int num_cols) {
|
||||
void SparseIterativeRefiner::Allocate(int num_cols) {
|
||||
residual_.resize(num_cols);
|
||||
correction_.resize(num_cols);
|
||||
lhs_x_solution_.resize(num_cols);
|
||||
}
|
||||
|
||||
void IterativeRefiner::Refine(const SparseMatrix& lhs,
|
||||
const double* rhs_ptr,
|
||||
SparseCholesky* sparse_cholesky,
|
||||
double* solution_ptr) {
|
||||
void SparseIterativeRefiner::Refine(const SparseMatrix& lhs,
|
||||
const double* rhs_ptr,
|
||||
SparseCholesky* cholesky,
|
||||
double* solution_ptr) {
|
||||
const int num_cols = lhs.num_cols();
|
||||
Allocate(num_cols);
|
||||
ConstVectorRef rhs(rhs_ptr, num_cols);
|
||||
VectorRef solution(solution_ptr, num_cols);
|
||||
std::string ignored_message;
|
||||
for (int i = 0; i < max_num_iterations_; ++i) {
|
||||
// residual = rhs - lhs * solution
|
||||
lhs_x_solution_.setZero();
|
||||
lhs.RightMultiply(solution_ptr, lhs_x_solution_.data());
|
||||
lhs.RightMultiplyAndAccumulate(solution_ptr, lhs_x_solution_.data());
|
||||
residual_ = rhs - lhs_x_solution_;
|
||||
// solution += lhs^-1 residual
|
||||
std::string ignored_message;
|
||||
sparse_cholesky->Solve(
|
||||
residual_.data(), correction_.data(), &ignored_message);
|
||||
cholesky->Solve(residual_.data(), correction_.data(), &ignored_message);
|
||||
solution += correction_;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
DenseIterativeRefiner::DenseIterativeRefiner(const int max_num_iterations)
|
||||
: max_num_iterations_(max_num_iterations) {}
|
||||
|
||||
DenseIterativeRefiner::~DenseIterativeRefiner() = default;
|
||||
|
||||
void DenseIterativeRefiner::Allocate(int num_cols) {
|
||||
residual_.resize(num_cols);
|
||||
correction_.resize(num_cols);
|
||||
}
|
||||
|
||||
void DenseIterativeRefiner::Refine(const int num_cols,
|
||||
const double* lhs_ptr,
|
||||
const double* rhs_ptr,
|
||||
DenseCholesky* cholesky,
|
||||
double* solution_ptr) {
|
||||
Allocate(num_cols);
|
||||
ConstMatrixRef lhs(lhs_ptr, num_cols, num_cols);
|
||||
ConstVectorRef rhs(rhs_ptr, num_cols);
|
||||
VectorRef solution(solution_ptr, num_cols);
|
||||
std::string ignored_message;
|
||||
for (int i = 0; i < max_num_iterations_; ++i) {
|
||||
residual_ = rhs - lhs * solution;
|
||||
// solution += lhs^-1 residual
|
||||
cholesky->Solve(residual_.data(), correction_.data(), &ignored_message);
|
||||
solution += correction_;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
54
extern/ceres/internal/ceres/iterative_refiner.h
vendored
54
extern/ceres/internal/ceres/iterative_refiner.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,9 +39,9 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class DenseCholesky;
|
||||
class SparseCholesky;
|
||||
class SparseMatrix;
|
||||
|
||||
@@ -58,20 +58,20 @@ class SparseMatrix;
|
||||
// Definite linear systems.
|
||||
//
|
||||
// The above iterative loop is run until max_num_iterations is reached.
|
||||
class CERES_NO_EXPORT IterativeRefiner {
|
||||
class CERES_NO_EXPORT SparseIterativeRefiner {
|
||||
public:
|
||||
// max_num_iterations is the number of refinement iterations to
|
||||
// perform.
|
||||
explicit IterativeRefiner(int max_num_iterations);
|
||||
explicit SparseIterativeRefiner(int max_num_iterations);
|
||||
|
||||
// Needed for mocking.
|
||||
virtual ~IterativeRefiner();
|
||||
virtual ~SparseIterativeRefiner();
|
||||
|
||||
// Given an initial estimate of the solution of lhs * x = rhs, use
|
||||
// max_num_iterations rounds of iterative refinement to improve it.
|
||||
//
|
||||
// sparse_cholesky is assumed to contain an already computed
|
||||
// factorization (or approximation thereof) of lhs.
|
||||
// cholesky is assumed to contain an already computed factorization (or
|
||||
// an approximation thereof) of lhs.
|
||||
//
|
||||
// solution is expected to contain a approximation to the solution
|
||||
// to lhs * x = rhs. It can be zero.
|
||||
@@ -79,7 +79,7 @@ class CERES_NO_EXPORT IterativeRefiner {
|
||||
// This method is virtual to facilitate mocking.
|
||||
virtual void Refine(const SparseMatrix& lhs,
|
||||
const double* rhs,
|
||||
SparseCholesky* sparse_cholesky,
|
||||
SparseCholesky* cholesky,
|
||||
double* solution);
|
||||
|
||||
private:
|
||||
@@ -91,7 +91,39 @@ class CERES_NO_EXPORT IterativeRefiner {
|
||||
Vector lhs_x_solution_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
class CERES_NO_EXPORT DenseIterativeRefiner {
|
||||
public:
|
||||
// max_num_iterations is the number of refinement iterations to
|
||||
// perform.
|
||||
explicit DenseIterativeRefiner(int max_num_iterations);
|
||||
|
||||
// Needed for mocking.
|
||||
virtual ~DenseIterativeRefiner();
|
||||
|
||||
// Given an initial estimate of the solution of lhs * x = rhs, use
|
||||
// max_num_iterations rounds of iterative refinement to improve it.
|
||||
//
|
||||
// cholesky is assumed to contain an already computed factorization (or
|
||||
// an approximation thereof) of lhs.
|
||||
//
|
||||
// solution is expected to contain a approximation to the solution
|
||||
// to lhs * x = rhs. It can be zero.
|
||||
//
|
||||
// This method is virtual to facilitate mocking.
|
||||
virtual void Refine(int num_cols,
|
||||
const double* lhs,
|
||||
const double* rhs,
|
||||
DenseCholesky* cholesky,
|
||||
double* solution);
|
||||
|
||||
private:
|
||||
void Allocate(int num_cols);
|
||||
|
||||
int max_num_iterations_;
|
||||
Vector residual_;
|
||||
Vector correction_;
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_ITERATIVE_REFINER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,6 +43,7 @@
|
||||
#include "ceres/implicit_schur_complement.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/power_series_expansion_preconditioner.h"
|
||||
#include "ceres/preconditioner.h"
|
||||
#include "ceres/schur_jacobi_preconditioner.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
@@ -51,8 +52,7 @@
|
||||
#include "ceres/wall_time.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
IterativeSchurComplementSolver::IterativeSchurComplementSolver(
|
||||
LinearSolver::Options options)
|
||||
@@ -68,6 +68,8 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
|
||||
EventLogger event_logger("IterativeSchurComplementSolver::Solve");
|
||||
|
||||
CHECK(A->block_structure() != nullptr);
|
||||
CHECK(A->transpose_block_structure() != nullptr);
|
||||
|
||||
const int num_eliminate_blocks = options_.elimination_groups[0];
|
||||
// Initialize a ImplicitSchurComplement object.
|
||||
if (schur_complement_ == nullptr) {
|
||||
@@ -86,45 +88,66 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
|
||||
VLOG(2) << "No parameter blocks left in the schur complement.";
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
schur_complement_->BackSubstitute(nullptr, x);
|
||||
return summary;
|
||||
}
|
||||
|
||||
// Initialize the solution to the Schur complement system to zero.
|
||||
// Initialize the solution to the Schur complement system.
|
||||
reduced_linear_system_solution_.resize(schur_complement_->num_rows());
|
||||
reduced_linear_system_solution_.setZero();
|
||||
|
||||
LinearSolver::Options cg_options;
|
||||
cg_options.min_num_iterations = options_.min_num_iterations;
|
||||
cg_options.max_num_iterations = options_.max_num_iterations;
|
||||
ConjugateGradientsSolver cg_solver(cg_options);
|
||||
|
||||
LinearSolver::PerSolveOptions cg_per_solve_options;
|
||||
cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance;
|
||||
cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance;
|
||||
if (options_.use_spse_initialization) {
|
||||
Preconditioner::Options preconditioner_options(options_);
|
||||
preconditioner_options.type = SCHUR_POWER_SERIES_EXPANSION;
|
||||
PowerSeriesExpansionPreconditioner pse_solver(
|
||||
schur_complement_.get(),
|
||||
options_.max_num_spse_iterations,
|
||||
options_.spse_tolerance,
|
||||
preconditioner_options);
|
||||
pse_solver.RightMultiplyAndAccumulate(
|
||||
schur_complement_->rhs().data(),
|
||||
reduced_linear_system_solution_.data());
|
||||
}
|
||||
|
||||
CreatePreconditioner(A);
|
||||
if (preconditioner_.get() != nullptr) {
|
||||
if (preconditioner_ != nullptr) {
|
||||
if (!preconditioner_->Update(*A, per_solve_options.D)) {
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
summary.termination_type = LinearSolverTerminationType::FAILURE;
|
||||
summary.message = "Preconditioner update failed.";
|
||||
return summary;
|
||||
}
|
||||
|
||||
cg_per_solve_options.preconditioner = preconditioner_.get();
|
||||
}
|
||||
|
||||
ConjugateGradientsSolverOptions cg_options;
|
||||
cg_options.min_num_iterations = options_.min_num_iterations;
|
||||
cg_options.max_num_iterations = options_.max_num_iterations;
|
||||
cg_options.residual_reset_period = options_.residual_reset_period;
|
||||
cg_options.q_tolerance = per_solve_options.q_tolerance;
|
||||
cg_options.r_tolerance = per_solve_options.r_tolerance;
|
||||
|
||||
LinearOperatorAdapter lhs(*schur_complement_);
|
||||
LinearOperatorAdapter preconditioner(*preconditioner_);
|
||||
|
||||
Vector scratch[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
scratch[i].resize(schur_complement_->num_cols());
|
||||
}
|
||||
Vector* scratch_ptr[4] = {&scratch[0], &scratch[1], &scratch[2], &scratch[3]};
|
||||
|
||||
event_logger.AddEvent("Setup");
|
||||
|
||||
LinearSolver::Summary summary =
|
||||
cg_solver.Solve(schur_complement_.get(),
|
||||
schur_complement_->rhs().data(),
|
||||
cg_per_solve_options,
|
||||
reduced_linear_system_solution_.data());
|
||||
if (summary.termination_type != LINEAR_SOLVER_FAILURE &&
|
||||
summary.termination_type != LINEAR_SOLVER_FATAL_ERROR) {
|
||||
ConjugateGradientsSolver(cg_options,
|
||||
lhs,
|
||||
schur_complement_->rhs(),
|
||||
preconditioner,
|
||||
scratch_ptr,
|
||||
reduced_linear_system_solution_);
|
||||
|
||||
if (summary.termination_type != LinearSolverTerminationType::FAILURE &&
|
||||
summary.termination_type != LinearSolverTerminationType::FATAL_ERROR) {
|
||||
schur_complement_->BackSubstitute(reduced_linear_system_solution_.data(),
|
||||
x);
|
||||
}
|
||||
@@ -134,29 +157,31 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
|
||||
|
||||
void IterativeSchurComplementSolver::CreatePreconditioner(
|
||||
BlockSparseMatrix* A) {
|
||||
if (options_.preconditioner_type == IDENTITY ||
|
||||
preconditioner_.get() != nullptr) {
|
||||
if (preconditioner_ != nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
Preconditioner::Options preconditioner_options;
|
||||
preconditioner_options.type = options_.preconditioner_type;
|
||||
preconditioner_options.visibility_clustering_type =
|
||||
options_.visibility_clustering_type;
|
||||
preconditioner_options.sparse_linear_algebra_library_type =
|
||||
options_.sparse_linear_algebra_library_type;
|
||||
preconditioner_options.num_threads = options_.num_threads;
|
||||
preconditioner_options.row_block_size = options_.row_block_size;
|
||||
preconditioner_options.e_block_size = options_.e_block_size;
|
||||
preconditioner_options.f_block_size = options_.f_block_size;
|
||||
preconditioner_options.elimination_groups = options_.elimination_groups;
|
||||
Preconditioner::Options preconditioner_options(options_);
|
||||
CHECK(options_.context != nullptr);
|
||||
preconditioner_options.context = options_.context;
|
||||
|
||||
switch (options_.preconditioner_type) {
|
||||
case IDENTITY:
|
||||
preconditioner_ = std::make_unique<IdentityPreconditioner>(
|
||||
schur_complement_->num_cols());
|
||||
break;
|
||||
case JACOBI:
|
||||
preconditioner_ = std::make_unique<SparseMatrixPreconditionerWrapper>(
|
||||
schur_complement_->block_diagonal_FtF_inverse());
|
||||
schur_complement_->block_diagonal_FtF_inverse(),
|
||||
preconditioner_options);
|
||||
break;
|
||||
case SCHUR_POWER_SERIES_EXPANSION:
|
||||
// Ignoring the value of spse_tolerance to ensure preconditioner stays
|
||||
// fixed during the iterations of cg.
|
||||
preconditioner_ = std::make_unique<PowerSeriesExpansionPreconditioner>(
|
||||
schur_complement_.get(),
|
||||
options_.max_num_spse_iterations,
|
||||
0,
|
||||
preconditioner_options);
|
||||
break;
|
||||
case SCHUR_JACOBI:
|
||||
preconditioner_ = std::make_unique<SchurJacobiPreconditioner>(
|
||||
@@ -172,5 +197,4 @@ void IterativeSchurComplementSolver::CreatePreconditioner(
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,7 @@
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockSparseMatrix;
|
||||
class ImplicitSchurComplement;
|
||||
@@ -53,7 +52,7 @@ class Preconditioner;
|
||||
// The algorithm used by this solver was developed in a series of
|
||||
// papers - "Agarwal et al, Bundle Adjustment in the Large, ECCV 2010"
|
||||
// and "Wu et al, Multicore Bundle Adjustment, submitted to CVPR
|
||||
// 2011" at the Univeristy of Washington.
|
||||
// 2011" at the University of Washington.
|
||||
//
|
||||
// The key idea is that one can run Conjugate Gradients on the Schur
|
||||
// Complement system without explicitly forming the Schur Complement
|
||||
@@ -94,8 +93,7 @@ class CERES_NO_EXPORT IterativeSchurComplementSolver final
|
||||
Vector reduced_linear_system_solution_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,13 +38,13 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/linear_least_squares_problems.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "ceres/sparse_matrix.h"
|
||||
#include "ceres/trust_region_strategy.h"
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
LevenbergMarquardtStrategy::LevenbergMarquardtStrategy(
|
||||
const TrustRegionStrategy::Options& options)
|
||||
@@ -54,7 +54,9 @@ LevenbergMarquardtStrategy::LevenbergMarquardtStrategy(
|
||||
min_diagonal_(options.min_lm_diagonal),
|
||||
max_diagonal_(options.max_lm_diagonal),
|
||||
decrease_factor_(2.0),
|
||||
reuse_diagonal_(false) {
|
||||
reuse_diagonal_(false),
|
||||
context_(options.context),
|
||||
num_threads_(options.num_threads) {
|
||||
CHECK(linear_solver_ != nullptr);
|
||||
CHECK_GT(min_diagonal_, 0.0);
|
||||
CHECK_LE(min_diagonal_, max_diagonal_);
|
||||
@@ -78,14 +80,18 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep(
|
||||
diagonal_.resize(num_parameters, 1);
|
||||
}
|
||||
|
||||
jacobian->SquaredColumnNorm(diagonal_.data());
|
||||
for (int i = 0; i < num_parameters; ++i) {
|
||||
diagonal_[i] =
|
||||
std::min(std::max(diagonal_[i], min_diagonal_), max_diagonal_);
|
||||
}
|
||||
jacobian->SquaredColumnNorm(diagonal_.data(), context_, num_threads_);
|
||||
ParallelAssign(context_,
|
||||
num_threads_,
|
||||
diagonal_,
|
||||
diagonal_.array().max(min_diagonal_).min(max_diagonal_));
|
||||
}
|
||||
|
||||
lm_diagonal_ = (diagonal_ / radius_).array().sqrt();
|
||||
if (lm_diagonal_.size() == 0) {
|
||||
lm_diagonal_.resize(num_parameters);
|
||||
}
|
||||
ParallelAssign(
|
||||
context_, num_threads_, lm_diagonal_, (diagonal_ / radius_).cwiseSqrt());
|
||||
|
||||
LinearSolver::PerSolveOptions solve_options;
|
||||
solve_options.D = lm_diagonal_.data();
|
||||
@@ -99,7 +105,7 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep(
|
||||
// Invalidate the output array lm_step, so that we can detect if
|
||||
// the linear solver generated numerical garbage. This is known
|
||||
// to happen for the DENSE_QR and then DENSE_SCHUR solver when
|
||||
// the Jacobin is severely rank deficient and mu is too small.
|
||||
// the Jacobian is severely rank deficient and mu is too small.
|
||||
InvalidateArray(num_parameters, step);
|
||||
|
||||
// Instead of solving Jx = -r, solve Jy = r.
|
||||
@@ -108,17 +114,21 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep(
|
||||
LinearSolver::Summary linear_solver_summary =
|
||||
linear_solver_->Solve(jacobian, residuals, solve_options, step);
|
||||
|
||||
if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) {
|
||||
if (linear_solver_summary.termination_type ==
|
||||
LinearSolverTerminationType::FATAL_ERROR) {
|
||||
LOG(WARNING) << "Linear solver fatal error: "
|
||||
<< linear_solver_summary.message;
|
||||
} else if (linear_solver_summary.termination_type == LINEAR_SOLVER_FAILURE) {
|
||||
} else if (linear_solver_summary.termination_type ==
|
||||
LinearSolverTerminationType::FAILURE) {
|
||||
LOG(WARNING) << "Linear solver failure. Failed to compute a step: "
|
||||
<< linear_solver_summary.message;
|
||||
} else if (!IsArrayValid(num_parameters, step)) {
|
||||
LOG(WARNING) << "Linear solver failure. Failed to compute a finite step.";
|
||||
linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE;
|
||||
linear_solver_summary.termination_type =
|
||||
LinearSolverTerminationType::FAILURE;
|
||||
} else {
|
||||
VectorRef(step, num_parameters) *= -1.0;
|
||||
VectorRef step_vec(step, num_parameters);
|
||||
ParallelAssign(context_, num_threads_, step_vec, -step_vec);
|
||||
}
|
||||
reuse_diagonal_ = true;
|
||||
|
||||
@@ -153,7 +163,7 @@ void LevenbergMarquardtStrategy::StepAccepted(double step_quality) {
|
||||
reuse_diagonal_ = false;
|
||||
}
|
||||
|
||||
void LevenbergMarquardtStrategy::StepRejected(double step_quality) {
|
||||
void LevenbergMarquardtStrategy::StepRejected(double /*step_quality*/) {
|
||||
radius_ = radius_ / decrease_factor_;
|
||||
decrease_factor_ *= 2.0;
|
||||
reuse_diagonal_ = true;
|
||||
@@ -161,5 +171,4 @@ void LevenbergMarquardtStrategy::StepRejected(double step_quality) {
|
||||
|
||||
double LevenbergMarquardtStrategy::Radius() const { return radius_; }
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,8 +36,9 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/trust_region_strategy.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class ContextImpl;
|
||||
|
||||
// Levenberg-Marquardt step computation and trust region sizing
|
||||
// strategy based on on "Methods for Nonlinear Least Squares" by
|
||||
@@ -82,10 +83,11 @@ class CERES_NO_EXPORT LevenbergMarquardtStrategy final
|
||||
// allocations in every iteration and reuse when a step fails and
|
||||
// ComputeStep is called again.
|
||||
Vector lm_diagonal_; // lm_diagonal_ = sqrt(diagonal_ / radius_);
|
||||
ContextImpl* context_;
|
||||
int num_threads_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
46
extern/ceres/internal/ceres/line_search.cc
vendored
46
extern/ceres/internal/ceres/line_search.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,8 +33,11 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <iomanip>
|
||||
#include <iostream> // NOLINT
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <ostream> // NOLINT
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/evaluator.h"
|
||||
#include "ceres/function_sample.h"
|
||||
@@ -45,23 +48,17 @@
|
||||
#include "ceres/wall_time.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::map;
|
||||
using std::ostream;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
namespace {
|
||||
// Precision used for floating point values in error message output.
|
||||
const int kErrorMessageNumericPrecision = 8;
|
||||
} // namespace
|
||||
|
||||
ostream& operator<<(ostream& os, const FunctionSample& sample);
|
||||
std::ostream& operator<<(std::ostream& os, const FunctionSample& sample);
|
||||
|
||||
// Convenience stream operator for pushing FunctionSamples into log messages.
|
||||
ostream& operator<<(ostream& os, const FunctionSample& sample) {
|
||||
std::ostream& operator<<(std::ostream& os, const FunctionSample& sample) {
|
||||
os << sample.ToDebugString();
|
||||
return os;
|
||||
}
|
||||
@@ -74,16 +71,16 @@ LineSearch::LineSearch(const LineSearch::Options& options)
|
||||
std::unique_ptr<LineSearch> LineSearch::Create(
|
||||
const LineSearchType line_search_type,
|
||||
const LineSearch::Options& options,
|
||||
string* error) {
|
||||
std::string* error) {
|
||||
switch (line_search_type) {
|
||||
case ceres::ARMIJO:
|
||||
return std::make_unique<ArmijoLineSearch>(options);
|
||||
case ceres::WOLFE:
|
||||
return std::make_unique<WolfeLineSearch>(options);
|
||||
default:
|
||||
*error = string("Invalid line search algorithm type: ") +
|
||||
*error = std::string("Invalid line search algorithm type: ") +
|
||||
LineSearchTypeToString(line_search_type) +
|
||||
string(", unable to create line search.");
|
||||
std::string(", unable to create line search.");
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -150,7 +147,7 @@ double LineSearchFunction::DirectionInfinityNorm() const {
|
||||
}
|
||||
|
||||
void LineSearchFunction::ResetTimeStatistics() {
|
||||
const map<string, CallStatistics> evaluator_statistics =
|
||||
const std::map<std::string, CallStatistics> evaluator_statistics =
|
||||
evaluator_->Statistics();
|
||||
|
||||
initial_evaluator_residual_time_in_seconds =
|
||||
@@ -166,7 +163,7 @@ void LineSearchFunction::ResetTimeStatistics() {
|
||||
void LineSearchFunction::TimeStatistics(
|
||||
double* cost_evaluation_time_in_seconds,
|
||||
double* gradient_evaluation_time_in_seconds) const {
|
||||
const map<string, CallStatistics> evaluator_time_statistics =
|
||||
const std::map<std::string, CallStatistics> evaluator_time_statistics =
|
||||
evaluator_->Statistics();
|
||||
*cost_evaluation_time_in_seconds =
|
||||
FindWithDefault(
|
||||
@@ -243,7 +240,7 @@ double LineSearch::InterpolatingPolynomialMinimizingStepSize(
|
||||
|
||||
// Select step size by interpolating the function and gradient values
|
||||
// and minimizing the corresponding polynomial.
|
||||
vector<FunctionSample> samples;
|
||||
std::vector<FunctionSample> samples;
|
||||
samples.push_back(lowerbound);
|
||||
|
||||
if (interpolation_type == QUADRATIC) {
|
||||
@@ -427,7 +424,7 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate,
|
||||
// shrank the bracket width until it was below our minimum tolerance.
|
||||
// As these are 'artificial' constraints, and we would otherwise fail to
|
||||
// produce a valid point when ArmijoLineSearch would succeed, we return the
|
||||
// point with the lowest cost found thus far which satsifies the Armijo
|
||||
// point with the lowest cost found thus far which satisfies the Armijo
|
||||
// condition (but not the Wolfe conditions).
|
||||
summary->optimal_point = bracket_low;
|
||||
summary->success = true;
|
||||
@@ -449,8 +446,8 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate,
|
||||
// defined by bracket_low & bracket_high, which satisfy:
|
||||
//
|
||||
// 1. The interval bounded by step sizes: bracket_low.x & bracket_high.x
|
||||
// contains step sizes that satsify the strong Wolfe conditions.
|
||||
// 2. bracket_low.x is of all the step sizes evaluated *which satisifed the
|
||||
// contains step sizes that satisfy the strong Wolfe conditions.
|
||||
// 2. bracket_low.x is of all the step sizes evaluated *which satisfied the
|
||||
// Armijo sufficient decrease condition*, the one which generated the
|
||||
// smallest function value, i.e. bracket_low.value <
|
||||
// f(all other steps satisfying Armijo).
|
||||
@@ -494,7 +491,7 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate,
|
||||
// Or, searching was stopped due to an 'artificial' constraint, i.e. not
|
||||
// a condition imposed / required by the underlying algorithm, but instead an
|
||||
// engineering / implementation consideration. But a step which exceeds the
|
||||
// minimum step size, and satsifies the Armijo condition was still found,
|
||||
// minimum step size, and satisfies the Armijo condition was still found,
|
||||
// and should thus be used [zoom not required].
|
||||
//
|
||||
// Returns false if no step size > minimum step size was found which
|
||||
@@ -518,7 +515,7 @@ bool WolfeLineSearch::BracketingPhase(const FunctionSample& initial_position,
|
||||
// As we require the gradient to evaluate the Wolfe condition, we always
|
||||
// calculate it together with the value, irrespective of the interpolation
|
||||
// type. As opposed to only calculating the gradient after the Armijo
|
||||
// condition is satisifed, as the computational saving from this approach
|
||||
// condition is satisfied, as the computational saving from this approach
|
||||
// would be slight (perhaps even negative due to the extra call). Also,
|
||||
// always calculating the value & gradient together protects against us
|
||||
// reporting invalid solutions if the cost function returns slightly different
|
||||
@@ -821,7 +818,7 @@ bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position,
|
||||
// As we require the gradient to evaluate the Wolfe condition, we always
|
||||
// calculate it together with the value, irrespective of the interpolation
|
||||
// type. As opposed to only calculating the gradient after the Armijo
|
||||
// condition is satisifed, as the computational saving from this approach
|
||||
// condition is satisfied, as the computational saving from this approach
|
||||
// would be slight (perhaps even negative due to the extra call). Also,
|
||||
// always calculating the value & gradient together protects against us
|
||||
// reporting invalid solutions if the cost function returns slightly
|
||||
@@ -883,5 +880,4 @@ bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/line_search.h
vendored
8
extern/ceres/internal/ceres/line_search.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,8 +42,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Evaluator;
|
||||
class LineSearchFunction;
|
||||
@@ -302,7 +301,6 @@ class CERES_NO_EXPORT WolfeLineSearch final : public LineSearch {
|
||||
Summary* summary) const final;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_LINE_SEARCH_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,12 +38,11 @@
|
||||
#include "ceres/low_rank_inverse_hessian.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CERES_NO_EXPORT SteepestDescent final : public LineSearchDirection {
|
||||
public:
|
||||
bool NextDirection(const LineSearchMinimizer::State& previous,
|
||||
bool NextDirection(const LineSearchMinimizer::State& /*previous*/,
|
||||
const LineSearchMinimizer::State& current,
|
||||
Vector* search_direction) override {
|
||||
*search_direction = -current.gradient;
|
||||
@@ -121,8 +120,8 @@ class CERES_NO_EXPORT LBFGS final : public LineSearchDirection {
|
||||
current.gradient - previous.gradient);
|
||||
|
||||
search_direction->setZero();
|
||||
low_rank_inverse_hessian_.RightMultiply(current.gradient.data(),
|
||||
search_direction->data());
|
||||
low_rank_inverse_hessian_.RightMultiplyAndAccumulate(
|
||||
current.gradient.data(), search_direction->data());
|
||||
*search_direction *= -1.0;
|
||||
|
||||
if (search_direction->dot(current.gradient) >= 0.0) {
|
||||
@@ -242,7 +241,7 @@ class CERES_NO_EXPORT BFGS final : public LineSearchDirection {
|
||||
//
|
||||
// The original origin of this rescaling trick is somewhat unclear, the
|
||||
// earliest reference appears to be Oren [1], however it is widely
|
||||
// discussed without specific attributation in various texts including
|
||||
// discussed without specific attribution in various texts including
|
||||
// [2] (p143).
|
||||
//
|
||||
// [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms
|
||||
@@ -367,5 +366,4 @@ std::unique_ptr<LineSearchDirection> LineSearchDirection::Create(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/line_search_minimizer.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CERES_NO_EXPORT LineSearchDirection {
|
||||
public:
|
||||
@@ -61,7 +60,6 @@ class CERES_NO_EXPORT LineSearchDirection {
|
||||
Vector* search_direction) = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_LINE_SEARCH_DIRECTION_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,7 +30,7 @@
|
||||
//
|
||||
// Generic loop for line search based optimization algorithms.
|
||||
//
|
||||
// This is primarily inpsired by the minFunc packaged written by Mark
|
||||
// This is primarily inspired by the minFunc packaged written by Mark
|
||||
// Schmidt.
|
||||
//
|
||||
// http://www.di.ens.fr/~mschmidt/Software/minFunc.html
|
||||
@@ -59,8 +59,7 @@
|
||||
#include "ceres/wall_time.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
|
||||
bool EvaluateGradientNorms(Evaluator* evaluator,
|
||||
@@ -473,5 +472,4 @@ void LineSearchMinimizer::Minimize(const Minimizer::Options& options,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Generic line search minimization algorithm.
|
||||
//
|
||||
@@ -47,7 +46,7 @@ namespace internal {
|
||||
class CERES_NO_EXPORT LineSearchMinimizer final : public Minimizer {
|
||||
public:
|
||||
struct State {
|
||||
State(int num_parameters, int num_effective_parameters)
|
||||
State(int /*num_parameters*/, int num_effective_parameters)
|
||||
: cost(0.0),
|
||||
gradient(num_effective_parameters),
|
||||
gradient_squared_norm(0.0),
|
||||
@@ -69,7 +68,6 @@ class CERES_NO_EXPORT LineSearchMinimizer final : public Minimizer {
|
||||
Solver::Summary* summary) final;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_LINE_SEARCH_MINIMIZER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -41,8 +41,7 @@
|
||||
#include "ceres/program.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
|
||||
bool IsProgramValid(const Program& program, std::string* error) {
|
||||
@@ -102,5 +101,4 @@ bool LineSearchPreprocessor::Preprocess(const Solver::Options& options,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,8 +35,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/preprocessor.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CERES_NO_EXPORT LineSearchPreprocessor final : public Preprocessor {
|
||||
public:
|
||||
@@ -45,8 +44,7 @@ class CERES_NO_EXPORT LineSearchPreprocessor final : public Preprocessor {
|
||||
PreprocessedProblem* preprocessed_problem) final;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,10 +44,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::string;
|
||||
namespace ceres::internal {
|
||||
|
||||
std::unique_ptr<LinearLeastSquaresProblem>
|
||||
CreateLinearLeastSquaresProblemFromId(int id) {
|
||||
@@ -62,6 +59,10 @@ CreateLinearLeastSquaresProblemFromId(int id) {
|
||||
return LinearLeastSquaresProblem3();
|
||||
case 4:
|
||||
return LinearLeastSquaresProblem4();
|
||||
case 5:
|
||||
return LinearLeastSquaresProblem5();
|
||||
case 6:
|
||||
return LinearLeastSquaresProblem6();
|
||||
default:
|
||||
LOG(FATAL) << "Unknown problem id requested " << id;
|
||||
}
|
||||
@@ -87,8 +88,7 @@ x_D = [1.78448275;
|
||||
2.82327586;]
|
||||
*/
|
||||
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem0() {
|
||||
std::unique_ptr<LinearLeastSquaresProblem> problem =
|
||||
std::make_unique<LinearLeastSquaresProblem>();
|
||||
auto problem = std::make_unique<LinearLeastSquaresProblem>();
|
||||
|
||||
auto A = std::make_unique<TripletSparseMatrix>(3, 2, 6);
|
||||
problem->b = std::make_unique<double[]>(3);
|
||||
@@ -161,13 +161,15 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem0() {
|
||||
12 0 1 17 1
|
||||
0 30 1 1 37]
|
||||
|
||||
cond(A'A) = 200.36
|
||||
|
||||
S = [ 42.3419 -1.4000 -11.5806
|
||||
-1.4000 2.6000 1.0000
|
||||
-11.5806 1.0000 31.1935]
|
||||
|
||||
r = [ 4.3032
|
||||
5.4000
|
||||
5.0323]
|
||||
4.0323]
|
||||
|
||||
S\r = [ 0.2102
|
||||
2.1367
|
||||
@@ -187,14 +189,21 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem1() {
|
||||
int num_rows = 6;
|
||||
int num_cols = 5;
|
||||
|
||||
std::unique_ptr<LinearLeastSquaresProblem> problem =
|
||||
std::make_unique<LinearLeastSquaresProblem>();
|
||||
auto problem = std::make_unique<LinearLeastSquaresProblem>();
|
||||
|
||||
auto A = std::make_unique<TripletSparseMatrix>(
|
||||
num_rows, num_cols, num_rows * num_cols);
|
||||
problem->b = std::make_unique<double[]>(num_rows);
|
||||
problem->D = std::make_unique<double[]>(num_cols);
|
||||
problem->num_eliminate_blocks = 2;
|
||||
|
||||
problem->x = std::make_unique<double[]>(num_cols);
|
||||
problem->x[0] = -2.3061;
|
||||
problem->x[1] = 0.3172;
|
||||
problem->x[2] = 0.2102;
|
||||
problem->x[3] = 2.1367;
|
||||
problem->x[4] = 0.1388;
|
||||
|
||||
int* rows = A->mutable_rows();
|
||||
int* cols = A->mutable_cols();
|
||||
double* values = A->mutable_values();
|
||||
@@ -292,16 +301,21 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem2() {
|
||||
int num_rows = 6;
|
||||
int num_cols = 5;
|
||||
|
||||
std::unique_ptr<LinearLeastSquaresProblem> problem =
|
||||
std::make_unique<LinearLeastSquaresProblem>();
|
||||
auto problem = std::make_unique<LinearLeastSquaresProblem>();
|
||||
|
||||
problem->b = std::make_unique<double[]>(num_rows);
|
||||
problem->D = std::make_unique<double[]>(num_cols);
|
||||
problem->num_eliminate_blocks = 2;
|
||||
|
||||
problem->x = std::make_unique<double[]>(num_cols);
|
||||
problem->x[0] = -2.3061;
|
||||
problem->x[1] = 0.3172;
|
||||
problem->x[2] = 0.2102;
|
||||
problem->x[3] = 2.1367;
|
||||
problem->x[4] = 0.1388;
|
||||
|
||||
auto* bs = new CompressedRowBlockStructure;
|
||||
std::unique_ptr<double[]> values =
|
||||
std::make_unique<double[]>(num_rows * num_cols);
|
||||
auto values = std::make_unique<double[]>(num_rows * num_cols);
|
||||
|
||||
for (int c = 0; c < num_cols; ++c) {
|
||||
bs->cols.emplace_back();
|
||||
@@ -427,16 +441,14 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem3() {
|
||||
int num_rows = 5;
|
||||
int num_cols = 2;
|
||||
|
||||
std::unique_ptr<LinearLeastSquaresProblem> problem =
|
||||
std::make_unique<LinearLeastSquaresProblem>();
|
||||
auto problem = std::make_unique<LinearLeastSquaresProblem>();
|
||||
|
||||
problem->b = std::make_unique<double[]>(num_rows);
|
||||
problem->D = std::make_unique<double[]>(num_cols);
|
||||
problem->num_eliminate_blocks = 2;
|
||||
|
||||
auto* bs = new CompressedRowBlockStructure;
|
||||
std::unique_ptr<double[]> values =
|
||||
std::make_unique<double[]>(num_rows * num_cols);
|
||||
auto values = std::make_unique<double[]>(num_rows * num_cols);
|
||||
|
||||
for (int c = 0; c < num_cols; ++c) {
|
||||
bs->cols.emplace_back();
|
||||
@@ -536,16 +548,14 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem4() {
|
||||
int num_rows = 3;
|
||||
int num_cols = 7;
|
||||
|
||||
std::unique_ptr<LinearLeastSquaresProblem> problem =
|
||||
std::make_unique<LinearLeastSquaresProblem>();
|
||||
auto problem = std::make_unique<LinearLeastSquaresProblem>();
|
||||
|
||||
problem->b = std::make_unique<double[]>(num_rows);
|
||||
problem->D = std::make_unique<double[]>(num_cols);
|
||||
problem->num_eliminate_blocks = 1;
|
||||
|
||||
auto* bs = new CompressedRowBlockStructure;
|
||||
std::unique_ptr<double[]> values =
|
||||
std::make_unique<double[]>(num_rows * num_cols);
|
||||
auto values = std::make_unique<double[]>(num_rows * num_cols);
|
||||
|
||||
// Column block structure
|
||||
bs->cols.emplace_back();
|
||||
@@ -614,12 +624,313 @@ std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem4() {
|
||||
return problem;
|
||||
}
|
||||
|
||||
/*
|
||||
A problem with block-diagonal F'F.
|
||||
|
||||
A = [1 0 | 0 0 2
|
||||
3 0 | 0 0 4
|
||||
0 -1 | 0 1 0
|
||||
0 -3 | 0 1 0
|
||||
0 -1 | 3 0 0
|
||||
0 -2 | 1 0 0]
|
||||
|
||||
b = [0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5]
|
||||
|
||||
c = A'* b = [ 22
|
||||
-25
|
||||
17
|
||||
7
|
||||
4]
|
||||
|
||||
A'A = [10 0 0 0 10
|
||||
0 15 -5 -4 0
|
||||
0 -5 10 0 0
|
||||
0 -4 0 2 0
|
||||
10 0 0 0 20]
|
||||
|
||||
cond(A'A) = 41.402
|
||||
|
||||
S = [ 8.3333 -1.3333 0
|
||||
-1.3333 0.9333 0
|
||||
0 0 10.0000]
|
||||
|
||||
r = [ 8.6667
|
||||
-1.6667
|
||||
1.0000]
|
||||
|
||||
S\r = [ 0.9778
|
||||
-0.3889
|
||||
0.1000]
|
||||
|
||||
A\b = [ 0.2
|
||||
-1.4444
|
||||
0.9777
|
||||
-0.3888
|
||||
0.1]
|
||||
*/
|
||||
|
||||
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem5() {
|
||||
int num_rows = 6;
|
||||
int num_cols = 5;
|
||||
|
||||
auto problem = std::make_unique<LinearLeastSquaresProblem>();
|
||||
problem->b = std::make_unique<double[]>(num_rows);
|
||||
problem->D = std::make_unique<double[]>(num_cols);
|
||||
problem->num_eliminate_blocks = 2;
|
||||
|
||||
// TODO: add x
|
||||
problem->x = std::make_unique<double[]>(num_cols);
|
||||
problem->x[0] = 0.2;
|
||||
problem->x[1] = -1.4444;
|
||||
problem->x[2] = 0.9777;
|
||||
problem->x[3] = -0.3888;
|
||||
problem->x[4] = 0.1;
|
||||
|
||||
auto* bs = new CompressedRowBlockStructure;
|
||||
auto values = std::make_unique<double[]>(num_rows * num_cols);
|
||||
|
||||
for (int c = 0; c < num_cols; ++c) {
|
||||
bs->cols.emplace_back();
|
||||
bs->cols.back().size = 1;
|
||||
bs->cols.back().position = c;
|
||||
}
|
||||
|
||||
int nnz = 0;
|
||||
|
||||
// Row 1
|
||||
{
|
||||
values[nnz++] = -1;
|
||||
values[nnz++] = 2;
|
||||
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 1;
|
||||
row.block.position = 0;
|
||||
row.cells.emplace_back(0, 0);
|
||||
row.cells.emplace_back(4, 1);
|
||||
}
|
||||
|
||||
// Row 2
|
||||
{
|
||||
values[nnz++] = 3;
|
||||
values[nnz++] = 4;
|
||||
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 1;
|
||||
row.block.position = 1;
|
||||
row.cells.emplace_back(0, 2);
|
||||
row.cells.emplace_back(4, 3);
|
||||
}
|
||||
|
||||
// Row 3
|
||||
{
|
||||
values[nnz++] = -1;
|
||||
values[nnz++] = 1;
|
||||
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 1;
|
||||
row.block.position = 2;
|
||||
row.cells.emplace_back(1, 4);
|
||||
row.cells.emplace_back(3, 5);
|
||||
}
|
||||
|
||||
// Row 4
|
||||
{
|
||||
values[nnz++] = -3;
|
||||
values[nnz++] = 1;
|
||||
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 1;
|
||||
row.block.position = 3;
|
||||
row.cells.emplace_back(1, 6);
|
||||
row.cells.emplace_back(3, 7);
|
||||
}
|
||||
|
||||
// Row 5
|
||||
{
|
||||
values[nnz++] = -1;
|
||||
values[nnz++] = 3;
|
||||
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 1;
|
||||
row.block.position = 4;
|
||||
row.cells.emplace_back(1, 8);
|
||||
row.cells.emplace_back(2, 9);
|
||||
}
|
||||
|
||||
// Row 6
|
||||
{
|
||||
// values[nnz++] = 2;
|
||||
values[nnz++] = -2;
|
||||
values[nnz++] = 1;
|
||||
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 1;
|
||||
row.block.position = 5;
|
||||
// row.cells.emplace_back(0, 10);
|
||||
row.cells.emplace_back(1, 10);
|
||||
row.cells.emplace_back(2, 11);
|
||||
}
|
||||
|
||||
auto A = std::make_unique<BlockSparseMatrix>(bs);
|
||||
memcpy(A->mutable_values(), values.get(), nnz * sizeof(*A->values()));
|
||||
|
||||
for (int i = 0; i < num_cols; ++i) {
|
||||
problem->D.get()[i] = 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_rows; ++i) {
|
||||
problem->b.get()[i] = i;
|
||||
}
|
||||
|
||||
problem->A = std::move(A);
|
||||
|
||||
return problem;
|
||||
}
|
||||
|
||||
/*
|
||||
A = [1 2 0 0 0 1 1
|
||||
1 4 0 0 0 5 6
|
||||
3 4 0 0 0 7 8
|
||||
5 6 0 0 0 9 0
|
||||
0 0 9 0 0 3 1]
|
||||
|
||||
b = [0
|
||||
1
|
||||
2
|
||||
3
|
||||
4]
|
||||
*/
|
||||
// BlockSparseMatrix version
|
||||
//
|
||||
// This problem has the unique property that it has two different
|
||||
// sized f-blocks, but only one of them occurs in the rows involving
|
||||
// the one e-block. So performing Schur elimination on this problem
|
||||
// tests the Schur Eliminator's ability to handle non-e-block rows
|
||||
// correctly when their structure does not conform to the static
|
||||
// structure determined by DetectStructure.
|
||||
//
|
||||
// Additionally, this problem has the first row of the last row block of E being
|
||||
// larger than number of row blocks in E
|
||||
//
|
||||
// NOTE: This problem is too small and rank deficient to be solved without
|
||||
// the diagonal regularization.
|
||||
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem6() {
|
||||
int num_rows = 5;
|
||||
int num_cols = 7;
|
||||
|
||||
auto problem = std::make_unique<LinearLeastSquaresProblem>();
|
||||
|
||||
problem->b = std::make_unique<double[]>(num_rows);
|
||||
problem->D = std::make_unique<double[]>(num_cols);
|
||||
problem->num_eliminate_blocks = 1;
|
||||
|
||||
auto* bs = new CompressedRowBlockStructure;
|
||||
auto values = std::make_unique<double[]>(num_rows * num_cols);
|
||||
|
||||
// Column block structure
|
||||
bs->cols.emplace_back();
|
||||
bs->cols.back().size = 2;
|
||||
bs->cols.back().position = 0;
|
||||
|
||||
bs->cols.emplace_back();
|
||||
bs->cols.back().size = 3;
|
||||
bs->cols.back().position = 2;
|
||||
|
||||
bs->cols.emplace_back();
|
||||
bs->cols.back().size = 2;
|
||||
bs->cols.back().position = 5;
|
||||
|
||||
int nnz = 0;
|
||||
|
||||
// Row 1 & 2
|
||||
{
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 2;
|
||||
row.block.position = 0;
|
||||
|
||||
row.cells.emplace_back(0, nnz);
|
||||
values[nnz++] = 1;
|
||||
values[nnz++] = 2;
|
||||
values[nnz++] = 1;
|
||||
values[nnz++] = 4;
|
||||
|
||||
row.cells.emplace_back(2, nnz);
|
||||
values[nnz++] = 1;
|
||||
values[nnz++] = 1;
|
||||
values[nnz++] = 5;
|
||||
values[nnz++] = 6;
|
||||
}
|
||||
|
||||
// Row 3 & 4
|
||||
{
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 2;
|
||||
row.block.position = 2;
|
||||
|
||||
row.cells.emplace_back(0, nnz);
|
||||
values[nnz++] = 3;
|
||||
values[nnz++] = 4;
|
||||
values[nnz++] = 5;
|
||||
values[nnz++] = 6;
|
||||
|
||||
row.cells.emplace_back(2, nnz);
|
||||
values[nnz++] = 7;
|
||||
values[nnz++] = 8;
|
||||
values[nnz++] = 9;
|
||||
values[nnz++] = 0;
|
||||
}
|
||||
|
||||
// Row 5
|
||||
{
|
||||
bs->rows.emplace_back();
|
||||
CompressedRow& row = bs->rows.back();
|
||||
row.block.size = 1;
|
||||
row.block.position = 4;
|
||||
|
||||
row.cells.emplace_back(1, nnz);
|
||||
values[nnz++] = 9;
|
||||
values[nnz++] = 0;
|
||||
values[nnz++] = 0;
|
||||
|
||||
row.cells.emplace_back(2, nnz);
|
||||
values[nnz++] = 3;
|
||||
values[nnz++] = 1;
|
||||
}
|
||||
|
||||
auto A = std::make_unique<BlockSparseMatrix>(bs);
|
||||
memcpy(A->mutable_values(), values.get(), nnz * sizeof(*A->values()));
|
||||
|
||||
for (int i = 0; i < num_cols; ++i) {
|
||||
problem->D.get()[i] = (i + 1) * 100;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_rows; ++i) {
|
||||
problem->b.get()[i] = i;
|
||||
}
|
||||
|
||||
problem->A = std::move(A);
|
||||
return problem;
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool DumpLinearLeastSquaresProblemToConsole(const SparseMatrix* A,
|
||||
const double* D,
|
||||
const double* b,
|
||||
const double* x,
|
||||
int num_eliminate_blocks) {
|
||||
int /*num_eliminate_blocks*/) {
|
||||
CHECK(A != nullptr);
|
||||
Matrix AA;
|
||||
A->ToDenseMatrix(&AA);
|
||||
@@ -639,7 +950,7 @@ bool DumpLinearLeastSquaresProblemToConsole(const SparseMatrix* A,
|
||||
return true;
|
||||
}
|
||||
|
||||
void WriteArrayToFileOrDie(const string& filename,
|
||||
void WriteArrayToFileOrDie(const std::string& filename,
|
||||
const double* x,
|
||||
const int size) {
|
||||
CHECK(x != nullptr);
|
||||
@@ -652,23 +963,23 @@ void WriteArrayToFileOrDie(const string& filename,
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
bool DumpLinearLeastSquaresProblemToTextFile(const string& filename_base,
|
||||
bool DumpLinearLeastSquaresProblemToTextFile(const std::string& filename_base,
|
||||
const SparseMatrix* A,
|
||||
const double* D,
|
||||
const double* b,
|
||||
const double* x,
|
||||
int num_eliminate_blocks) {
|
||||
int /*num_eliminate_blocks*/) {
|
||||
CHECK(A != nullptr);
|
||||
LOG(INFO) << "writing to: " << filename_base << "*";
|
||||
|
||||
string matlab_script;
|
||||
std::string matlab_script;
|
||||
StringAppendF(&matlab_script,
|
||||
"function lsqp = load_trust_region_problem()\n");
|
||||
StringAppendF(&matlab_script, "lsqp.num_rows = %d;\n", A->num_rows());
|
||||
StringAppendF(&matlab_script, "lsqp.num_cols = %d;\n", A->num_cols());
|
||||
|
||||
{
|
||||
string filename = filename_base + "_A.txt";
|
||||
std::string filename = filename_base + "_A.txt";
|
||||
FILE* fptr = fopen(filename.c_str(), "w");
|
||||
CHECK(fptr != nullptr);
|
||||
A->ToTextFile(fptr);
|
||||
@@ -683,33 +994,33 @@ bool DumpLinearLeastSquaresProblemToTextFile(const string& filename_base,
|
||||
}
|
||||
|
||||
if (D != nullptr) {
|
||||
string filename = filename_base + "_D.txt";
|
||||
std::string filename = filename_base + "_D.txt";
|
||||
WriteArrayToFileOrDie(filename, D, A->num_cols());
|
||||
StringAppendF(
|
||||
&matlab_script, "lsqp.D = load('%s', '-ascii');\n", filename.c_str());
|
||||
}
|
||||
|
||||
if (b != nullptr) {
|
||||
string filename = filename_base + "_b.txt";
|
||||
std::string filename = filename_base + "_b.txt";
|
||||
WriteArrayToFileOrDie(filename, b, A->num_rows());
|
||||
StringAppendF(
|
||||
&matlab_script, "lsqp.b = load('%s', '-ascii');\n", filename.c_str());
|
||||
}
|
||||
|
||||
if (x != nullptr) {
|
||||
string filename = filename_base + "_x.txt";
|
||||
std::string filename = filename_base + "_x.txt";
|
||||
WriteArrayToFileOrDie(filename, x, A->num_cols());
|
||||
StringAppendF(
|
||||
&matlab_script, "lsqp.x = load('%s', '-ascii');\n", filename.c_str());
|
||||
}
|
||||
|
||||
string matlab_filename = filename_base + ".m";
|
||||
std::string matlab_filename = filename_base + ".m";
|
||||
WriteStringToFileOrDie(matlab_script, matlab_filename);
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool DumpLinearLeastSquaresProblem(const string& filename_base,
|
||||
bool DumpLinearLeastSquaresProblem(const std::string& filename_base,
|
||||
DumpFormatType dump_format_type,
|
||||
const SparseMatrix* A,
|
||||
const double* D,
|
||||
@@ -730,5 +1041,4 @@ bool DumpLinearLeastSquaresProblem(const string& filename_base,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/sparse_matrix.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Structure defining a linear least squares problem and if possible
|
||||
// ground truth solutions. To be used by various LinearSolver tests.
|
||||
@@ -74,6 +73,10 @@ CERES_NO_EXPORT
|
||||
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem3();
|
||||
CERES_NO_EXPORT
|
||||
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem4();
|
||||
CERES_NO_EXPORT
|
||||
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem5();
|
||||
CERES_NO_EXPORT
|
||||
std::unique_ptr<LinearLeastSquaresProblem> LinearLeastSquaresProblem6();
|
||||
|
||||
// Write the linear least squares problem to disk. The exact format
|
||||
// depends on dump_format_type.
|
||||
@@ -85,8 +88,7 @@ bool DumpLinearLeastSquaresProblem(const std::string& filename_base,
|
||||
const double* b,
|
||||
const double* x,
|
||||
int num_eliminate_blocks);
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
34
extern/ceres/internal/ceres/linear_operator.cc
vendored
34
extern/ceres/internal/ceres/linear_operator.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,10 +30,34 @@
|
||||
|
||||
#include "ceres/linear_operator.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
#include <glog/logging.h>
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
void LinearOperator::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
(void)context;
|
||||
if (num_threads != 1) {
|
||||
VLOG(3) << "Parallel right product is not supported by linear operator "
|
||||
"implementation";
|
||||
}
|
||||
RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
|
||||
void LinearOperator::LeftMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
(void)context;
|
||||
if (num_threads != 1) {
|
||||
VLOG(3) << "Parallel left product is not supported by linear operator "
|
||||
"implementation";
|
||||
}
|
||||
LeftMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
|
||||
LinearOperator::~LinearOperator() = default;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
45
extern/ceres/internal/ceres/linear_operator.h
vendored
45
extern/ceres/internal/ceres/linear_operator.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,11 +33,13 @@
|
||||
#ifndef CERES_INTERNAL_LINEAR_OPERATOR_H_
|
||||
#define CERES_INTERNAL_LINEAR_OPERATOR_H_
|
||||
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class ContextImpl;
|
||||
|
||||
// This is an abstract base class for linear operators. It supports
|
||||
// access to size information and left and right multiply operators.
|
||||
@@ -46,15 +48,44 @@ class CERES_NO_EXPORT LinearOperator {
|
||||
virtual ~LinearOperator();
|
||||
|
||||
// y = y + Ax;
|
||||
virtual void RightMultiply(const double* x, double* y) const = 0;
|
||||
virtual void RightMultiplyAndAccumulate(const double* x, double* y) const = 0;
|
||||
virtual void RightMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const;
|
||||
// y = y + A'x;
|
||||
virtual void LeftMultiply(const double* x, double* y) const = 0;
|
||||
virtual void LeftMultiplyAndAccumulate(const double* x, double* y) const = 0;
|
||||
virtual void LeftMultiplyAndAccumulate(const double* x,
|
||||
double* y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const;
|
||||
|
||||
virtual void RightMultiplyAndAccumulate(const Vector& x, Vector& y) const {
|
||||
RightMultiplyAndAccumulate(x.data(), y.data());
|
||||
}
|
||||
|
||||
virtual void LeftMultiplyAndAccumulate(const Vector& x, Vector& y) const {
|
||||
LeftMultiplyAndAccumulate(x.data(), y.data());
|
||||
}
|
||||
|
||||
virtual void RightMultiplyAndAccumulate(const Vector& x,
|
||||
Vector& y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
RightMultiplyAndAccumulate(x.data(), y.data(), context, num_threads);
|
||||
}
|
||||
|
||||
virtual void LeftMultiplyAndAccumulate(const Vector& x,
|
||||
Vector& y,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
LeftMultiplyAndAccumulate(x.data(), y.data(), context, num_threads);
|
||||
}
|
||||
|
||||
virtual int num_rows() const = 0;
|
||||
virtual int num_cols() const = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_LINEAR_OPERATOR_H_
|
||||
|
||||
17
extern/ceres/internal/ceres/linear_solver.cc
vendored
17
extern/ceres/internal/ceres/linear_solver.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,8 +43,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
LinearSolver::~LinearSolver() = default;
|
||||
|
||||
@@ -77,8 +76,15 @@ std::unique_ptr<LinearSolver> LinearSolver::Create(
|
||||
CHECK(options.context != nullptr);
|
||||
|
||||
switch (options.type) {
|
||||
case CGNR:
|
||||
case CGNR: {
|
||||
#ifndef CERES_NO_CUDA
|
||||
if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) {
|
||||
std::string error;
|
||||
return CudaCgnrSolver::Create(options, &error);
|
||||
}
|
||||
#endif
|
||||
return std::make_unique<CgnrSolver>(options);
|
||||
} break;
|
||||
|
||||
case SPARSE_NORMAL_CHOLESKY:
|
||||
#if defined(CERES_NO_SPARSE)
|
||||
@@ -120,5 +126,4 @@ std::unique_ptr<LinearSolver> LinearSolver::Create(
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
99
extern/ceres/internal/ceres/linear_solver.h
vendored
99
extern/ceres/internal/ceres/linear_solver.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -52,39 +52,81 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
enum LinearSolverTerminationType {
|
||||
enum class LinearSolverTerminationType {
|
||||
// Termination criterion was met.
|
||||
LINEAR_SOLVER_SUCCESS,
|
||||
SUCCESS,
|
||||
|
||||
// Solver ran for max_num_iterations and terminated before the
|
||||
// termination tolerance could be satisfied.
|
||||
LINEAR_SOLVER_NO_CONVERGENCE,
|
||||
NO_CONVERGENCE,
|
||||
|
||||
// Solver was terminated due to numerical problems, generally due to
|
||||
// the linear system being poorly conditioned.
|
||||
LINEAR_SOLVER_FAILURE,
|
||||
FAILURE,
|
||||
|
||||
// Solver failed with a fatal error that cannot be recovered from,
|
||||
// e.g. CHOLMOD ran out of memory when computing the symbolic or
|
||||
// numeric factorization or an underlying library was called with
|
||||
// the wrong arguments.
|
||||
LINEAR_SOLVER_FATAL_ERROR
|
||||
FATAL_ERROR
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& s,
|
||||
LinearSolverTerminationType type) {
|
||||
switch (type) {
|
||||
case LinearSolverTerminationType::SUCCESS:
|
||||
s << "LINEAR_SOLVER_SUCCESS";
|
||||
break;
|
||||
case LinearSolverTerminationType::NO_CONVERGENCE:
|
||||
s << "LINEAR_SOLVER_NO_CONVERGENCE";
|
||||
break;
|
||||
case LinearSolverTerminationType::FAILURE:
|
||||
s << "LINEAR_SOLVER_FAILURE";
|
||||
break;
|
||||
case LinearSolverTerminationType::FATAL_ERROR:
|
||||
s << "LINEAR_SOLVER_FATAL_ERROR";
|
||||
break;
|
||||
default:
|
||||
s << "UNKNOWN LinearSolverTerminationType";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// This enum controls the fill-reducing ordering a sparse linear
|
||||
// algebra library should use before computing a sparse factorization
|
||||
// (usually Cholesky).
|
||||
enum OrderingType {
|
||||
//
|
||||
// TODO(sameeragarwal): Add support for nested dissection
|
||||
enum class OrderingType {
|
||||
NATURAL, // Do not re-order the matrix. This is useful when the
|
||||
// matrix has been ordered using a fill-reducing ordering
|
||||
// already.
|
||||
AMD // Use the Approximate Minimum Degree algorithm to re-order
|
||||
// the matrix.
|
||||
|
||||
AMD, // Use the Approximate Minimum Degree algorithm to re-order
|
||||
// the matrix.
|
||||
|
||||
NESDIS, // Use the Nested Dissection algorithm to re-order the matrix.
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& s, OrderingType type) {
|
||||
switch (type) {
|
||||
case OrderingType::NATURAL:
|
||||
s << "NATURAL";
|
||||
break;
|
||||
case OrderingType::AMD:
|
||||
s << "AMD";
|
||||
break;
|
||||
case OrderingType::NESDIS:
|
||||
s << "NESDIS";
|
||||
break;
|
||||
default:
|
||||
s << "UNKNOWN OrderingType";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
class LinearOperator;
|
||||
|
||||
// Abstract base class for objects that implement algorithms for
|
||||
@@ -112,9 +154,9 @@ class CERES_NO_EXPORT LinearSolver {
|
||||
DenseLinearAlgebraLibraryType dense_linear_algebra_library_type = EIGEN;
|
||||
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
|
||||
SUITE_SPARSE;
|
||||
OrderingType ordering_type = OrderingType::NATURAL;
|
||||
|
||||
// See solver.h for information about these flags.
|
||||
bool use_postordering = false;
|
||||
bool dynamic_sparsity = false;
|
||||
bool use_explicit_schur_complement = false;
|
||||
|
||||
@@ -123,6 +165,23 @@ class CERES_NO_EXPORT LinearSolver {
|
||||
int min_num_iterations = 1;
|
||||
int max_num_iterations = 1;
|
||||
|
||||
// Maximum number of iterations performed by SCHUR_POWER_SERIES_EXPANSION.
|
||||
// This value controls the maximum number of iterations whether it is used
|
||||
// as a preconditioner or just to initialize the solution for
|
||||
// ITERATIVE_SCHUR.
|
||||
int max_num_spse_iterations = 5;
|
||||
|
||||
// Use SCHUR_POWER_SERIES_EXPANSION to initialize the solution for
|
||||
// ITERATIVE_SCHUR. This option can be set true regardless of what
|
||||
// preconditioner is being used.
|
||||
bool use_spse_initialization = false;
|
||||
|
||||
// When use_spse_initialization is true, this parameter along with
|
||||
// max_num_spse_iterations controls the number of
|
||||
// SCHUR_POWER_SERIES_EXPANSION iterations performed for initialization. It
|
||||
// is not used to control the preconditioner.
|
||||
double spse_tolerance = 0.1;
|
||||
|
||||
// If possible, how many threads can the solver use.
|
||||
int num_threads = 1;
|
||||
|
||||
@@ -261,7 +320,8 @@ class CERES_NO_EXPORT LinearSolver {
|
||||
struct Summary {
|
||||
double residual_norm = -1.0;
|
||||
int num_iterations = -1;
|
||||
LinearSolverTerminationType termination_type = LINEAR_SOLVER_FAILURE;
|
||||
LinearSolverTerminationType termination_type =
|
||||
LinearSolverTerminationType::FAILURE;
|
||||
std::string message;
|
||||
};
|
||||
|
||||
@@ -329,17 +389,16 @@ class TypedLinearSolver : public LinearSolver {
|
||||
ExecutionSummary execution_summary_;
|
||||
};
|
||||
|
||||
// Linear solvers that depend on acccess to the low level structure of
|
||||
// Linear solvers that depend on access to the low level structure of
|
||||
// a SparseMatrix.
|
||||
// clang-format off
|
||||
typedef TypedLinearSolver<BlockSparseMatrix> BlockSparseMatrixSolver; // NOLINT
|
||||
typedef TypedLinearSolver<CompressedRowSparseMatrix> CompressedRowSparseMatrixSolver; // NOLINT
|
||||
typedef TypedLinearSolver<DenseSparseMatrix> DenseSparseMatrixSolver; // NOLINT
|
||||
typedef TypedLinearSolver<TripletSparseMatrix> TripletSparseMatrixSolver; // NOLINT
|
||||
using BlockSparseMatrixSolver = TypedLinearSolver<BlockSparseMatrix>; // NOLINT
|
||||
using CompressedRowSparseMatrixSolver = TypedLinearSolver<CompressedRowSparseMatrix>; // NOLINT
|
||||
using DenseSparseMatrixSolver = TypedLinearSolver<DenseSparseMatrix>; // NOLINT
|
||||
using TripletSparseMatrixSolver = TypedLinearSolver<TripletSparseMatrix>; // NOLINT
|
||||
// clang-format on
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,349 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
|
||||
#include "ceres/local_parameterization.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "Eigen/Geometry"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/fixed_array.h"
|
||||
#include "ceres/internal/householder_vector.h"
|
||||
#include "ceres/rotation.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
|
||||
using std::vector;
|
||||
|
||||
LocalParameterization::~LocalParameterization() = default;
|
||||
|
||||
bool LocalParameterization::MultiplyByJacobian(const double* x,
|
||||
const int num_rows,
|
||||
const double* global_matrix,
|
||||
double* local_matrix) const {
|
||||
if (LocalSize() == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Matrix jacobian(GlobalSize(), LocalSize());
|
||||
if (!ComputeJacobian(x, jacobian.data())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MatrixRef(local_matrix, num_rows, LocalSize()) =
|
||||
ConstMatrixRef(global_matrix, num_rows, GlobalSize()) * jacobian;
|
||||
return true;
|
||||
}
|
||||
|
||||
IdentityParameterization::IdentityParameterization(const int size)
|
||||
: size_(size) {
|
||||
CHECK_GT(size, 0);
|
||||
}
|
||||
|
||||
bool IdentityParameterization::Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const {
|
||||
VectorRef(x_plus_delta, size_) =
|
||||
ConstVectorRef(x, size_) + ConstVectorRef(delta, size_);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IdentityParameterization::ComputeJacobian(const double* x,
|
||||
double* jacobian) const {
|
||||
MatrixRef(jacobian, size_, size_).setIdentity();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IdentityParameterization::MultiplyByJacobian(const double* x,
|
||||
const int num_cols,
|
||||
const double* global_matrix,
|
||||
double* local_matrix) const {
|
||||
std::copy(
|
||||
global_matrix, global_matrix + num_cols * GlobalSize(), local_matrix);
|
||||
return true;
|
||||
}
|
||||
|
||||
SubsetParameterization::SubsetParameterization(
|
||||
int size, const vector<int>& constant_parameters)
|
||||
: local_size_(size - constant_parameters.size()), constancy_mask_(size, 0) {
|
||||
if (constant_parameters.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<int> constant = constant_parameters;
|
||||
std::sort(constant.begin(), constant.end());
|
||||
CHECK_GE(constant.front(), 0) << "Indices indicating constant parameter must "
|
||||
"be greater than equal to zero.";
|
||||
CHECK_LT(constant.back(), size)
|
||||
<< "Indices indicating constant parameter must be less than the size "
|
||||
<< "of the parameter block.";
|
||||
CHECK(std::adjacent_find(constant.begin(), constant.end()) == constant.end())
|
||||
<< "The set of constant parameters cannot contain duplicates";
|
||||
for (int parameter : constant_parameters) {
|
||||
constancy_mask_[parameter] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool SubsetParameterization::Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const {
|
||||
const int global_size = GlobalSize();
|
||||
for (int i = 0, j = 0; i < global_size; ++i) {
|
||||
if (constancy_mask_[i]) {
|
||||
x_plus_delta[i] = x[i];
|
||||
} else {
|
||||
x_plus_delta[i] = x[i] + delta[j++];
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SubsetParameterization::ComputeJacobian(const double* x,
|
||||
double* jacobian) const {
|
||||
if (local_size_ == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const int global_size = GlobalSize();
|
||||
MatrixRef m(jacobian, global_size, local_size_);
|
||||
m.setZero();
|
||||
for (int i = 0, j = 0; i < global_size; ++i) {
|
||||
if (!constancy_mask_[i]) {
|
||||
m(i, j++) = 1.0;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SubsetParameterization::MultiplyByJacobian(const double* x,
|
||||
const int num_cols,
|
||||
const double* global_matrix,
|
||||
double* local_matrix) const {
|
||||
if (local_size_ == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const int global_size = GlobalSize();
|
||||
for (int col = 0; col < num_cols; ++col) {
|
||||
for (int i = 0, j = 0; i < global_size; ++i) {
|
||||
if (!constancy_mask_[i]) {
|
||||
local_matrix[col * local_size_ + j++] =
|
||||
global_matrix[col * global_size + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool QuaternionParameterization::Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const {
|
||||
const double norm_delta =
|
||||
sqrt(delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]);
|
||||
if (norm_delta > 0.0) {
|
||||
const double sin_delta_by_delta = (sin(norm_delta) / norm_delta);
|
||||
double q_delta[4];
|
||||
q_delta[0] = cos(norm_delta);
|
||||
q_delta[1] = sin_delta_by_delta * delta[0];
|
||||
q_delta[2] = sin_delta_by_delta * delta[1];
|
||||
q_delta[3] = sin_delta_by_delta * delta[2];
|
||||
QuaternionProduct(q_delta, x, x_plus_delta);
|
||||
} else {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
x_plus_delta[i] = x[i];
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool QuaternionParameterization::ComputeJacobian(const double* x,
|
||||
double* jacobian) const {
|
||||
// clang-format off
|
||||
jacobian[0] = -x[1]; jacobian[1] = -x[2]; jacobian[2] = -x[3];
|
||||
jacobian[3] = x[0]; jacobian[4] = x[3]; jacobian[5] = -x[2];
|
||||
jacobian[6] = -x[3]; jacobian[7] = x[0]; jacobian[8] = x[1];
|
||||
jacobian[9] = x[2]; jacobian[10] = -x[1]; jacobian[11] = x[0];
|
||||
// clang-format on
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EigenQuaternionParameterization::Plus(const double* x_ptr,
|
||||
const double* delta,
|
||||
double* x_plus_delta_ptr) const {
|
||||
Eigen::Map<Eigen::Quaterniond> x_plus_delta(x_plus_delta_ptr);
|
||||
Eigen::Map<const Eigen::Quaterniond> x(x_ptr);
|
||||
|
||||
const double norm_delta =
|
||||
sqrt(delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]);
|
||||
if (norm_delta > 0.0) {
|
||||
const double sin_delta_by_delta = sin(norm_delta) / norm_delta;
|
||||
|
||||
// Note, in the constructor w is first.
|
||||
Eigen::Quaterniond delta_q(cos(norm_delta),
|
||||
sin_delta_by_delta * delta[0],
|
||||
sin_delta_by_delta * delta[1],
|
||||
sin_delta_by_delta * delta[2]);
|
||||
x_plus_delta = delta_q * x;
|
||||
} else {
|
||||
x_plus_delta = x;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool EigenQuaternionParameterization::ComputeJacobian(const double* x,
|
||||
double* jacobian) const {
|
||||
// clang-format off
|
||||
jacobian[0] = x[3]; jacobian[1] = x[2]; jacobian[2] = -x[1];
|
||||
jacobian[3] = -x[2]; jacobian[4] = x[3]; jacobian[5] = x[0];
|
||||
jacobian[6] = x[1]; jacobian[7] = -x[0]; jacobian[8] = x[3];
|
||||
jacobian[9] = -x[0]; jacobian[10] = -x[1]; jacobian[11] = -x[2];
|
||||
// clang-format on
|
||||
return true;
|
||||
}
|
||||
|
||||
HomogeneousVectorParameterization::HomogeneousVectorParameterization(int size)
|
||||
: size_(size) {
|
||||
CHECK_GT(size_, 1) << "The size of the homogeneous vector needs to be "
|
||||
<< "greater than 1.";
|
||||
}
|
||||
|
||||
bool HomogeneousVectorParameterization::Plus(const double* x_ptr,
|
||||
const double* delta_ptr,
|
||||
double* x_plus_delta_ptr) const {
|
||||
ConstVectorRef x(x_ptr, size_);
|
||||
ConstVectorRef delta(delta_ptr, size_ - 1);
|
||||
VectorRef x_plus_delta(x_plus_delta_ptr, size_);
|
||||
|
||||
const double norm_delta = delta.norm();
|
||||
|
||||
if (norm_delta == 0.0) {
|
||||
x_plus_delta = x;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Map the delta from the minimum representation to the over parameterized
|
||||
// homogeneous vector. See section A6.9.2 on page 624 of Hartley & Zisserman
|
||||
// (2nd Edition) for a detailed description. Note there is a typo on Page
|
||||
// 625, line 4 so check the book errata.
|
||||
const double norm_delta_div_2 = 0.5 * norm_delta;
|
||||
const double sin_delta_by_delta =
|
||||
std::sin(norm_delta_div_2) / norm_delta_div_2;
|
||||
|
||||
Vector y(size_);
|
||||
y.head(size_ - 1) = 0.5 * sin_delta_by_delta * delta;
|
||||
y(size_ - 1) = std::cos(norm_delta_div_2);
|
||||
|
||||
Vector v(size_);
|
||||
double beta;
|
||||
|
||||
// NOTE: The explicit template arguments are needed here because
|
||||
// ComputeHouseholderVector is templated and some versions of MSVC
|
||||
// have trouble deducing the type of v automatically.
|
||||
internal::ComputeHouseholderVector<ConstVectorRef, double, Eigen::Dynamic>(
|
||||
x, &v, &beta);
|
||||
|
||||
// Apply the delta update to remain on the unit sphere. See section A6.9.3
|
||||
// on page 625 of Hartley & Zisserman (2nd Edition) for a detailed
|
||||
// description.
|
||||
x_plus_delta = x.norm() * (y - v * (beta * (v.transpose() * y)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HomogeneousVectorParameterization::ComputeJacobian(
|
||||
const double* x_ptr, double* jacobian_ptr) const {
|
||||
ConstVectorRef x(x_ptr, size_);
|
||||
MatrixRef jacobian(jacobian_ptr, size_, size_ - 1);
|
||||
|
||||
Vector v(size_);
|
||||
double beta;
|
||||
|
||||
// NOTE: The explicit template arguments are needed here because
|
||||
// ComputeHouseholderVector is templated and some versions of MSVC
|
||||
// have trouble deducing the type of v automatically.
|
||||
internal::ComputeHouseholderVector<ConstVectorRef, double, Eigen::Dynamic>(
|
||||
x, &v, &beta);
|
||||
|
||||
// The Jacobian is equal to J = 0.5 * H.leftCols(size_ - 1) where H is the
|
||||
// Householder matrix (H = I - beta * v * v').
|
||||
for (int i = 0; i < size_ - 1; ++i) {
|
||||
jacobian.col(i) = -0.5 * beta * v(i) * v;
|
||||
jacobian.col(i)(i) += 0.5;
|
||||
}
|
||||
jacobian *= x.norm();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProductParameterization::Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const {
|
||||
int x_cursor = 0;
|
||||
int delta_cursor = 0;
|
||||
for (const auto& param : local_params_) {
|
||||
if (!param->Plus(
|
||||
x + x_cursor, delta + delta_cursor, x_plus_delta + x_cursor)) {
|
||||
return false;
|
||||
}
|
||||
delta_cursor += param->LocalSize();
|
||||
x_cursor += param->GlobalSize();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProductParameterization::ComputeJacobian(const double* x,
|
||||
double* jacobian_ptr) const {
|
||||
MatrixRef jacobian(jacobian_ptr, GlobalSize(), LocalSize());
|
||||
jacobian.setZero();
|
||||
internal::FixedArray<double> buffer(buffer_size_);
|
||||
|
||||
int x_cursor = 0;
|
||||
int delta_cursor = 0;
|
||||
for (const auto& param : local_params_) {
|
||||
const int local_size = param->LocalSize();
|
||||
const int global_size = param->GlobalSize();
|
||||
|
||||
if (!param->ComputeJacobian(x + x_cursor, buffer.data())) {
|
||||
return false;
|
||||
}
|
||||
jacobian.block(x_cursor, delta_cursor, global_size, local_size) =
|
||||
MatrixRef(buffer.data(), global_size, local_size);
|
||||
|
||||
delta_cursor += local_size;
|
||||
x_cursor += global_size;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ceres
|
||||
2
extern/ceres/internal/ceres/loss_function.cc
vendored
2
extern/ceres/internal/ceres/loss_function.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,10 +35,7 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::list;
|
||||
namespace ceres::internal {
|
||||
|
||||
// The (L)BFGS algorithm explicitly requires that the secant equation:
|
||||
//
|
||||
@@ -117,8 +114,8 @@ bool LowRankInverseHessian::Update(const Vector& delta_x,
|
||||
return true;
|
||||
}
|
||||
|
||||
void LowRankInverseHessian::RightMultiply(const double* x_ptr,
|
||||
double* y_ptr) const {
|
||||
void LowRankInverseHessian::RightMultiplyAndAccumulate(const double* x_ptr,
|
||||
double* y_ptr) const {
|
||||
ConstVectorRef gradient(x_ptr, num_parameters_);
|
||||
VectorRef search_direction(y_ptr, num_parameters_);
|
||||
|
||||
@@ -159,7 +156,7 @@ void LowRankInverseHessian::RightMultiply(const double* x_ptr,
|
||||
//
|
||||
// The original origin of this rescaling trick is somewhat unclear, the
|
||||
// earliest reference appears to be Oren [1], however it is widely discussed
|
||||
// without specific attributation in various texts including [2] (p143/178).
|
||||
// without specific attribution in various texts including [2] (p143/178).
|
||||
//
|
||||
// [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms Part II:
|
||||
// Implementation and experiments, Management Science,
|
||||
@@ -179,5 +176,4 @@ void LowRankInverseHessian::RightMultiply(const double* x_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_operator.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// LowRankInverseHessian is a positive definite approximation to the
|
||||
// Hessian using the limited memory variant of the
|
||||
@@ -65,7 +64,7 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator {
|
||||
// num_parameters is the row/column size of the Hessian.
|
||||
// max_num_corrections is the rank of the Hessian approximation.
|
||||
// use_approximate_eigenvalue_scaling controls whether the initial
|
||||
// inverse Hessian used during Right/LeftMultiply() is scaled by
|
||||
// inverse Hessian used during Right/LeftMultiplyAndAccumulate() is scaled by
|
||||
// the approximate eigenvalue of the true inverse Hessian at the
|
||||
// current operating point.
|
||||
// The approximation uses:
|
||||
@@ -84,9 +83,9 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator {
|
||||
bool Update(const Vector& delta_x, const Vector& delta_gradient);
|
||||
|
||||
// LinearOperator interface
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void LeftMultiply(const double* x, double* y) const final {
|
||||
RightMultiply(x, y);
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void LeftMultiplyAndAccumulate(const double* x, double* y) const final {
|
||||
RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
int num_rows() const final { return num_parameters_; }
|
||||
int num_cols() const final { return num_parameters_; }
|
||||
@@ -102,7 +101,6 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator {
|
||||
std::list<int> indices_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_LOW_RANK_INVERSE_HESSIAN_H_
|
||||
|
||||
29
extern/ceres/internal/ceres/manifold.cc
vendored
29
extern/ceres/internal/ceres/manifold.cc
vendored
@@ -30,13 +30,11 @@ inline void QuaternionPlusImpl(const double* x,
|
||||
double* x_plus_delta) {
|
||||
// x_plus_delta = QuaternionProduct(q_delta, x), where q_delta is the
|
||||
// quaternion constructed from delta.
|
||||
const double norm_delta = std::sqrt(
|
||||
delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]);
|
||||
const double norm_delta = std::hypot(delta[0], delta[1], delta[2]);
|
||||
|
||||
if (norm_delta == 0.0) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
x_plus_delta[i] = x[i];
|
||||
}
|
||||
if (std::fpclassify(norm_delta) == FP_ZERO) {
|
||||
// No change in rotation: return the quaternion as is.
|
||||
std::copy_n(x, 4, x_plus_delta);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -100,19 +98,16 @@ inline void QuaternionMinusImpl(const double* y,
|
||||
-y[Order::kW] * x[Order::kZ] - y[Order::kX] * x[Order::kY] +
|
||||
y[Order::kY] * x[Order::kX] + y[Order::kZ] * x[Order::kW];
|
||||
|
||||
const double u_norm =
|
||||
std::sqrt(ambient_y_minus_x[Order::kX] * ambient_y_minus_x[Order::kX] +
|
||||
ambient_y_minus_x[Order::kY] * ambient_y_minus_x[Order::kY] +
|
||||
ambient_y_minus_x[Order::kZ] * ambient_y_minus_x[Order::kZ]);
|
||||
if (u_norm > 0.0) {
|
||||
const double u_norm = std::hypot(ambient_y_minus_x[Order::kX],
|
||||
ambient_y_minus_x[Order::kY],
|
||||
ambient_y_minus_x[Order::kZ]);
|
||||
if (std::fpclassify(u_norm) != FP_ZERO) {
|
||||
const double theta = std::atan2(u_norm, ambient_y_minus_x[Order::kW]);
|
||||
y_minus_x[0] = theta * ambient_y_minus_x[Order::kX] / u_norm;
|
||||
y_minus_x[1] = theta * ambient_y_minus_x[Order::kY] / u_norm;
|
||||
y_minus_x[2] = theta * ambient_y_minus_x[Order::kZ] / u_norm;
|
||||
} else {
|
||||
y_minus_x[0] = 0.0;
|
||||
y_minus_x[1] = 0.0;
|
||||
y_minus_x[2] = 0.0;
|
||||
std::fill_n(y_minus_x, 3, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -201,7 +196,7 @@ bool SubsetManifold::Plus(const double* x,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SubsetManifold::PlusJacobian(const double* x,
|
||||
bool SubsetManifold::PlusJacobian(const double* /*x*/,
|
||||
double* plus_jacobian) const {
|
||||
if (tangent_size_ == 0) {
|
||||
return true;
|
||||
@@ -218,7 +213,7 @@ bool SubsetManifold::PlusJacobian(const double* x,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SubsetManifold::RightMultiplyByPlusJacobian(const double* x,
|
||||
bool SubsetManifold::RightMultiplyByPlusJacobian(const double* /*x*/,
|
||||
const int num_rows,
|
||||
const double* ambient_matrix,
|
||||
double* tangent_matrix) const {
|
||||
@@ -254,7 +249,7 @@ bool SubsetManifold::Minus(const double* y,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SubsetManifold::MinusJacobian(const double* x,
|
||||
bool SubsetManifold::MinusJacobian(const double* /*x*/,
|
||||
double* minus_jacobian) const {
|
||||
const int ambient_size = AmbientSize();
|
||||
MatrixRef m(minus_jacobian, tangent_size_, ambient_size);
|
||||
|
||||
60
extern/ceres/internal/ceres/manifold_adapter.h
vendored
60
extern/ceres/internal/ceres/manifold_adapter.h
vendored
@@ -1,60 +0,0 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/local_parameterization.h"
|
||||
#include "ceres/manifold.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
// Adapter to wrap LocalParameterization and make them look like Manifolds.
|
||||
//
|
||||
// ManifoldAdapter NEVER takes ownership of local_parameterization.
|
||||
class CERES_NO_EXPORT ManifoldAdapter final : public Manifold {
|
||||
public:
|
||||
explicit ManifoldAdapter(const LocalParameterization* local_parameterization)
|
||||
: local_parameterization_(local_parameterization) {
|
||||
CHECK(local_parameterization != nullptr);
|
||||
}
|
||||
|
||||
bool Plus(const double* x,
|
||||
const double* delta,
|
||||
double* x_plus_delta) const override {
|
||||
return local_parameterization_->Plus(x, delta, x_plus_delta);
|
||||
}
|
||||
|
||||
bool PlusJacobian(const double* x, double* jacobian) const override {
|
||||
return local_parameterization_->ComputeJacobian(x, jacobian);
|
||||
}
|
||||
|
||||
bool RightMultiplyByPlusJacobian(const double* x,
|
||||
const int num_rows,
|
||||
const double* ambient_matrix,
|
||||
double* tangent_matrix) const override {
|
||||
return local_parameterization_->MultiplyByJacobian(
|
||||
x, num_rows, ambient_matrix, tangent_matrix);
|
||||
}
|
||||
|
||||
bool Minus(const double* y, const double* x, double* delta) const override {
|
||||
LOG(FATAL) << "This should never be called.";
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MinusJacobian(const double* x, double* jacobian) const override {
|
||||
LOG(FATAL) << "This should never be called.";
|
||||
return false;
|
||||
}
|
||||
|
||||
int AmbientSize() const override {
|
||||
return local_parameterization_->GlobalSize();
|
||||
}
|
||||
|
||||
int TangentSize() const override {
|
||||
return local_parameterization_->LocalSize();
|
||||
}
|
||||
|
||||
private:
|
||||
const LocalParameterization* local_parameterization_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
2
extern/ceres/internal/ceres/map_util.h
vendored
2
extern/ceres/internal/ceres/map_util.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
8
extern/ceres/internal/ceres/minimizer.cc
vendored
8
extern/ceres/internal/ceres/minimizer.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,8 +37,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
std::unique_ptr<Minimizer> Minimizer::Create(MinimizerType minimizer_type) {
|
||||
if (minimizer_type == TRUST_REGION) {
|
||||
@@ -89,5 +88,4 @@ bool Minimizer::RunCallbacks(const Minimizer::Options& options,
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
10
extern/ceres/internal/ceres/minimizer.h
vendored
10
extern/ceres/internal/ceres/minimizer.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,14 +40,14 @@
|
||||
#include "ceres/iteration_callback.h"
|
||||
#include "ceres/solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Evaluator;
|
||||
class SparseMatrix;
|
||||
class TrustRegionStrategy;
|
||||
class CoordinateDescentMinimizer;
|
||||
class LinearSolver;
|
||||
class ContextImpl;
|
||||
|
||||
// Interface for non-linear least squares solvers.
|
||||
class CERES_NO_EXPORT Minimizer {
|
||||
@@ -114,6 +114,7 @@ class CERES_NO_EXPORT Minimizer {
|
||||
int max_num_iterations;
|
||||
double max_solver_time_in_seconds;
|
||||
int num_threads;
|
||||
ContextImpl* context = nullptr;
|
||||
|
||||
// Number of times the linear solver should be retried in case of
|
||||
// numerical failure. The retries are done by exponentially scaling up
|
||||
@@ -193,8 +194,7 @@ class CERES_NO_EXPORT Minimizer {
|
||||
Solver::Summary* summary) = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
7
extern/ceres/internal/ceres/normal_prior.cc
vendored
7
extern/ceres/internal/ceres/normal_prior.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "ceres/normal_prior.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/internal/eigen.h"
|
||||
@@ -39,7 +40,7 @@
|
||||
|
||||
namespace ceres {
|
||||
|
||||
NormalPrior::NormalPrior(const Matrix& A, const Vector& b) : A_(A), b_(b) {
|
||||
NormalPrior::NormalPrior(const Matrix& A, Vector b) : A_(A), b_(std::move(b)) {
|
||||
CHECK_GT(b_.rows(), 0);
|
||||
CHECK_GT(A_.rows(), 0);
|
||||
CHECK_EQ(b_.rows(), A.cols());
|
||||
@@ -54,7 +55,7 @@ bool NormalPrior::Evaluate(double const* const* parameters,
|
||||
VectorRef r(residuals, num_residuals());
|
||||
// The following line should read
|
||||
// r = A_ * (p - b_);
|
||||
// The extra eval is to get around a bug in the eigen library.
|
||||
// The extra eval is to get around a bug in the Eigen library.
|
||||
r = A_ * (p - b_).eval();
|
||||
if ((jacobians != nullptr) && (jacobians[0] != nullptr)) {
|
||||
MatrixRef(jacobians[0], num_residuals(), parameter_block_sizes()[0]) = A_;
|
||||
|
||||
8
extern/ceres/internal/ceres/pair_hash.h
vendored
8
extern/ceres/internal/ceres/pair_hash.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
#if defined(_WIN32) && !defined(__MINGW64__) && !defined(__MINGW32__)
|
||||
#define GG_LONGLONG(x) x##I64
|
||||
@@ -112,7 +111,6 @@ struct pair_hash {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_PAIR_HASH_H_
|
||||
|
||||
173
extern/ceres/internal/ceres/parallel_for.h
vendored
173
extern/ceres/internal/ceres/parallel_for.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -26,48 +26,161 @@
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: vitus@google.com (Michael Vitus)
|
||||
// Authors: vitus@google.com (Michael Vitus),
|
||||
// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#ifndef CERES_INTERNAL_PARALLEL_FOR_H_
|
||||
#define CERES_INTERNAL_PARALLEL_FOR_H_
|
||||
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/parallel_invoke.h"
|
||||
#include "ceres/partition_range_for_parallel_for.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Returns the maximum number of threads supported by the threading backend
|
||||
// Ceres was compiled with.
|
||||
CERES_NO_EXPORT
|
||||
int MaxNumThreadsAvailable();
|
||||
// Use a dummy mutex if num_threads = 1.
|
||||
inline decltype(auto) MakeConditionalLock(const int num_threads,
|
||||
std::mutex& m) {
|
||||
return (num_threads == 1) ? std::unique_lock<std::mutex>{}
|
||||
: std::unique_lock<std::mutex>{m};
|
||||
}
|
||||
|
||||
// Execute the function for every element in the range [start, end) with at most
|
||||
// num_threads. It will execute all the work on the calling thread if
|
||||
// num_threads is 1.
|
||||
CERES_NO_EXPORT void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
const std::function<void(int)>& function);
|
||||
// num_threads or (end - start) is equal to 1.
|
||||
// Depending on function signature, it will be supplied with either loop index
|
||||
// or a range of loop indicies; function can also be supplied with thread_id.
|
||||
// The following function signatures are supported:
|
||||
// - Functions accepting a single loop index:
|
||||
// - [](int index) { ... }
|
||||
// - [](int thread_id, int index) { ... }
|
||||
// - Functions accepting a range of loop index:
|
||||
// - [](std::tuple<int, int> index) { ... }
|
||||
// - [](int thread_id, std::tuple<int, int> index) { ... }
|
||||
//
|
||||
// When distributing workload between threads, it is assumed that each loop
|
||||
// iteration takes approximately equal time to complete.
|
||||
template <typename F>
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
F&& function,
|
||||
int min_block_size = 1) {
|
||||
CHECK_GT(num_threads, 0);
|
||||
if (start >= end) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Execute the function for every element in the range [start, end) with at most
|
||||
// num_threads. It will execute all the work on the calling thread if
|
||||
// num_threads is 1. Each invocation of function() will be passed a thread_id
|
||||
// in [0, num_threads) that is guaranteed to be distinct from the value passed
|
||||
// to any concurrent execution of function().
|
||||
CERES_NO_EXPORT void ParallelFor(
|
||||
ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
const std::function<void(int thread_id, int i)>& function);
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
if (num_threads == 1 || end - start < min_block_size * 2) {
|
||||
InvokeOnSegment(0, std::make_tuple(start, end), std::forward<F>(function));
|
||||
return;
|
||||
}
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
CHECK(context != nullptr);
|
||||
ParallelInvoke(context,
|
||||
start,
|
||||
end,
|
||||
num_threads,
|
||||
std::forward<F>(function),
|
||||
min_block_size);
|
||||
}
|
||||
|
||||
// Execute function for every element in the range [start, end) with at most
|
||||
// num_threads, using user-provided partitions array.
|
||||
// When distributing workload between threads, it is assumed that each segment
|
||||
// bounded by adjacent elements of partitions array takes approximately equal
|
||||
// time to process.
|
||||
template <typename F>
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
F&& function,
|
||||
const std::vector<int>& partitions) {
|
||||
CHECK_GT(num_threads, 0);
|
||||
if (start >= end) {
|
||||
return;
|
||||
}
|
||||
CHECK_EQ(partitions.front(), start);
|
||||
CHECK_EQ(partitions.back(), end);
|
||||
if (num_threads == 1 || end - start <= num_threads) {
|
||||
ParallelFor(context, start, end, num_threads, std::forward<F>(function));
|
||||
return;
|
||||
}
|
||||
CHECK_GT(partitions.size(), 1);
|
||||
const int num_partitions = partitions.size() - 1;
|
||||
ParallelFor(context,
|
||||
0,
|
||||
num_partitions,
|
||||
num_threads,
|
||||
[&function, &partitions](int thread_id,
|
||||
std::tuple<int, int> partition_ids) {
|
||||
// partition_ids is a range of partition indices
|
||||
const auto [partition_start, partition_end] = partition_ids;
|
||||
// Execution over several adjacent segments is equivalent
|
||||
// to execution over union of those segments (which is also a
|
||||
// contiguous segment)
|
||||
const int range_start = partitions[partition_start];
|
||||
const int range_end = partitions[partition_end];
|
||||
// Range of original loop indices
|
||||
const auto range = std::make_tuple(range_start, range_end);
|
||||
InvokeOnSegment(thread_id, range, function);
|
||||
});
|
||||
}
|
||||
|
||||
// Execute function for every element in the range [start, end) with at most
|
||||
// num_threads, taking into account user-provided integer cumulative costs of
|
||||
// iterations. Cumulative costs of iteration for indices in range [0, end) are
|
||||
// stored in objects from cumulative_cost_data. User-provided
|
||||
// cumulative_cost_fun returns non-decreasing integer values corresponding to
|
||||
// inclusive cumulative cost of loop iterations, provided with a reference to
|
||||
// user-defined object. Only indices from [start, end) will be referenced. This
|
||||
// routine assumes that cumulative_cost_fun is non-decreasing (in other words,
|
||||
// all costs are non-negative);
|
||||
// When distributing workload between threads, input range of loop indices will
|
||||
// be partitioned into disjoint contiguous intervals, with the maximal cost
|
||||
// being minimized.
|
||||
// For example, with iteration costs of [1, 1, 5, 3, 1, 4] cumulative_cost_fun
|
||||
// should return [1, 2, 7, 10, 11, 15], and with num_threads = 4 this range
|
||||
// will be split into segments [0, 2) [2, 3) [3, 5) [5, 6) with costs
|
||||
// [2, 5, 4, 4].
|
||||
template <typename F, typename CumulativeCostData, typename CumulativeCostFun>
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
F&& function,
|
||||
const CumulativeCostData* cumulative_cost_data,
|
||||
CumulativeCostFun&& cumulative_cost_fun) {
|
||||
CHECK_GT(num_threads, 0);
|
||||
if (start >= end) {
|
||||
return;
|
||||
}
|
||||
if (num_threads == 1 || end - start <= num_threads) {
|
||||
ParallelFor(context, start, end, num_threads, std::forward<F>(function));
|
||||
return;
|
||||
}
|
||||
// Creating several partitions allows us to tolerate imperfections of
|
||||
// partitioning and user-supplied iteration costs up to a certain extent
|
||||
constexpr int kNumPartitionsPerThread = 4;
|
||||
const int kMaxPartitions = num_threads * kNumPartitionsPerThread;
|
||||
const auto& partitions = PartitionRangeForParallelFor(
|
||||
start,
|
||||
end,
|
||||
kMaxPartitions,
|
||||
cumulative_cost_data,
|
||||
std::forward<CumulativeCostFun>(cumulative_cost_fun));
|
||||
CHECK_GT(partitions.size(), 1);
|
||||
ParallelFor(
|
||||
context, start, end, num_threads, std::forward<F>(function), partitions);
|
||||
}
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_PARALLEL_FOR_H_
|
||||
|
||||
245
extern/ceres/internal/ceres/parallel_for_cxx.cc
vendored
245
extern/ceres/internal/ceres/parallel_for_cxx.cc
vendored
@@ -1,245 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: vitus@google.com (Michael Vitus)
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
|
||||
#include <cmath>
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include "ceres/concurrent_queue.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/scoped_thread_token.h"
|
||||
#include "ceres/thread_token_provider.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace {
|
||||
// This class creates a thread safe barrier which will block until a
|
||||
// pre-specified number of threads call Finished. This allows us to block the
|
||||
// main thread until all the parallel threads are finished processing all the
|
||||
// work.
|
||||
class BlockUntilFinished {
|
||||
public:
|
||||
explicit BlockUntilFinished(int num_total)
|
||||
: num_finished_(0), num_total_(num_total) {}
|
||||
|
||||
// Increment the number of jobs that have finished and signal the blocking
|
||||
// thread if all jobs have finished.
|
||||
void Finished() {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
++num_finished_;
|
||||
CHECK_LE(num_finished_, num_total_);
|
||||
if (num_finished_ == num_total_) {
|
||||
condition_.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
// Block until all threads have signaled they are finished.
|
||||
void Block() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
condition_.wait(lock, [&]() { return num_finished_ == num_total_; });
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex_;
|
||||
std::condition_variable condition_;
|
||||
// The current number of jobs finished.
|
||||
int num_finished_;
|
||||
// The total number of jobs.
|
||||
int num_total_;
|
||||
};
|
||||
|
||||
// Shared state between the parallel tasks. Each thread will use this
|
||||
// information to get the next block of work to be performed.
|
||||
struct SharedState {
|
||||
SharedState(int start, int end, int num_work_items)
|
||||
: start(start),
|
||||
end(end),
|
||||
num_work_items(num_work_items),
|
||||
i(0),
|
||||
thread_token_provider(num_work_items),
|
||||
block_until_finished(num_work_items) {}
|
||||
|
||||
// The start and end index of the for loop.
|
||||
const int start;
|
||||
const int end;
|
||||
// The number of blocks that need to be processed.
|
||||
const int num_work_items;
|
||||
|
||||
// The next block of work to be assigned to a worker. The parallel for loop
|
||||
// range is split into num_work_items blocks of work, i.e. a single block of
|
||||
// work is:
|
||||
// for (int j = start + i; j < end; j += num_work_items) { ... }.
|
||||
int i;
|
||||
std::mutex mutex_i;
|
||||
|
||||
// Provides a unique thread ID among all active threads working on the same
|
||||
// group of tasks. Thread-safe.
|
||||
ThreadTokenProvider thread_token_provider;
|
||||
|
||||
// Used to signal when all the work has been completed. Thread safe.
|
||||
BlockUntilFinished block_until_finished;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
int MaxNumThreadsAvailable() { return ThreadPool::MaxNumThreadsAvailable(); }
|
||||
|
||||
// See ParallelFor (below) for more details.
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
const std::function<void(int)>& function) {
|
||||
CHECK_GT(num_threads, 0);
|
||||
CHECK(context != nullptr);
|
||||
if (end <= start) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Fast path for when it is single threaded.
|
||||
if (num_threads == 1) {
|
||||
for (int i = start; i < end; ++i) {
|
||||
function(i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
ParallelFor(
|
||||
context, start, end, num_threads, [&function](int /*thread_id*/, int i) {
|
||||
function(i);
|
||||
});
|
||||
}
|
||||
|
||||
// This implementation uses a fixed size max worker pool with a shared task
|
||||
// queue. The problem of executing the function for the interval of [start, end)
|
||||
// is broken up into at most num_threads blocks and added to the thread pool. To
|
||||
// avoid deadlocks, the calling thread is allowed to steal work from the worker
|
||||
// pool. This is implemented via a shared state between the tasks. In order for
|
||||
// the calling thread or thread pool to get a block of work, it will query the
|
||||
// shared state for the next block of work to be done. If there is nothing left,
|
||||
// it will return. We will exit the ParallelFor call when all of the work has
|
||||
// been done, not when all of the tasks have been popped off the task queue.
|
||||
//
|
||||
// A unique thread ID among all active tasks will be acquired once for each
|
||||
// block of work. This avoids the significant performance penalty for acquiring
|
||||
// it on every iteration of the for loop. The thread ID is guaranteed to be in
|
||||
// [0, num_threads).
|
||||
//
|
||||
// A performance analysis has shown this implementation is onpar with OpenMP and
|
||||
// TBB.
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
const std::function<void(int thread_id, int i)>& function) {
|
||||
CHECK_GT(num_threads, 0);
|
||||
CHECK(context != nullptr);
|
||||
if (end <= start) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Fast path for when it is single threaded.
|
||||
if (num_threads == 1) {
|
||||
// Even though we only have one thread, use the thread token provider to
|
||||
// guarantee the exact same behavior when running with multiple threads.
|
||||
ThreadTokenProvider thread_token_provider(num_threads);
|
||||
const ScopedThreadToken scoped_thread_token(&thread_token_provider);
|
||||
const int thread_id = scoped_thread_token.token();
|
||||
for (int i = start; i < end; ++i) {
|
||||
function(thread_id, i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// We use a std::shared_ptr because the main thread can finish all
|
||||
// the work before the tasks have been popped off the queue. So the
|
||||
// shared state needs to exist for the duration of all the tasks.
|
||||
const int num_work_items = std::min((end - start), num_threads);
|
||||
std::shared_ptr<SharedState> shared_state(
|
||||
new SharedState(start, end, num_work_items));
|
||||
|
||||
// A function which tries to perform a chunk of work. This returns false if
|
||||
// there is no work to be done.
|
||||
auto task_function = [shared_state, &function]() {
|
||||
int i = 0;
|
||||
{
|
||||
// Get the next available chunk of work to be performed. If there is no
|
||||
// work, return false.
|
||||
std::lock_guard<std::mutex> lock(shared_state->mutex_i);
|
||||
if (shared_state->i >= shared_state->num_work_items) {
|
||||
return false;
|
||||
}
|
||||
i = shared_state->i;
|
||||
++shared_state->i;
|
||||
}
|
||||
|
||||
const ScopedThreadToken scoped_thread_token(
|
||||
&shared_state->thread_token_provider);
|
||||
const int thread_id = scoped_thread_token.token();
|
||||
|
||||
// Perform each task.
|
||||
for (int j = shared_state->start + i; j < shared_state->end;
|
||||
j += shared_state->num_work_items) {
|
||||
function(thread_id, j);
|
||||
}
|
||||
shared_state->block_until_finished.Finished();
|
||||
return true;
|
||||
};
|
||||
|
||||
// Add all the tasks to the thread pool.
|
||||
for (int i = 0; i < num_work_items; ++i) {
|
||||
// Note we are taking the task_function as value so the shared_state
|
||||
// shared pointer is copied and the ref count is increased. This is to
|
||||
// prevent it from being deleted when the main thread finishes all the
|
||||
// work and exits before the threads finish.
|
||||
context->thread_pool.AddTask([task_function]() { task_function(); });
|
||||
}
|
||||
|
||||
// Try to do any available work on the main thread. This may steal work from
|
||||
// the thread pool, but when there is no work left the thread pool tasks
|
||||
// will be no-ops.
|
||||
while (task_function()) {
|
||||
}
|
||||
|
||||
// Wait until all tasks have finished.
|
||||
shared_state->block_until_finished.Block();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_USE_CXX_THREADS
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -28,58 +28,50 @@
|
||||
//
|
||||
// Author: vitus@google.com (Michael Vitus)
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <tuple>
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#if defined(CERES_USE_OPENMP)
|
||||
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/scoped_thread_token.h"
|
||||
#include "ceres/thread_token_provider.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "glog/logging.h"
|
||||
#include "omp.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
int MaxNumThreadsAvailable() { return omp_get_max_threads(); }
|
||||
BlockUntilFinished::BlockUntilFinished(int num_total_jobs)
|
||||
: num_total_jobs_finished_(0), num_total_jobs_(num_total_jobs) {}
|
||||
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
const std::function<void(int)>& function) {
|
||||
CHECK_GT(num_threads, 0);
|
||||
CHECK(context != nullptr);
|
||||
if (end <= start) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CERES_USE_OPENMP
|
||||
#pragma omp parallel for num_threads(num_threads) \
|
||||
schedule(dynamic) if (num_threads > 1)
|
||||
#endif // CERES_USE_OPENMP
|
||||
for (int i = start; i < end; ++i) {
|
||||
function(i);
|
||||
void BlockUntilFinished::Finished(int num_jobs_finished) {
|
||||
if (num_jobs_finished == 0) return;
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
num_total_jobs_finished_ += num_jobs_finished;
|
||||
CHECK_LE(num_total_jobs_finished_, num_total_jobs_);
|
||||
if (num_total_jobs_finished_ == num_total_jobs_) {
|
||||
condition_.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelFor(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
const std::function<void(int thread_id, int i)>& function) {
|
||||
CHECK(context != nullptr);
|
||||
|
||||
ThreadTokenProvider thread_token_provider(num_threads);
|
||||
ParallelFor(context, start, end, num_threads, [&](int i) {
|
||||
const ScopedThreadToken scoped_thread_token(&thread_token_provider);
|
||||
const int thread_id = scoped_thread_token.token();
|
||||
function(thread_id, i);
|
||||
});
|
||||
void BlockUntilFinished::Block() {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
condition_.wait(
|
||||
lock, [this]() { return num_total_jobs_finished_ == num_total_jobs_; });
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
ParallelInvokeState::ParallelInvokeState(int start,
|
||||
int end,
|
||||
int num_work_blocks)
|
||||
: start(start),
|
||||
end(end),
|
||||
num_work_blocks(num_work_blocks),
|
||||
base_block_size((end - start) / num_work_blocks),
|
||||
num_base_p1_sized_blocks((end - start) % num_work_blocks),
|
||||
block_id(0),
|
||||
thread_id(0),
|
||||
block_until_finished(num_work_blocks) {}
|
||||
|
||||
#endif // defined(CERES_USE_OPENMP)
|
||||
} // namespace ceres::internal
|
||||
272
extern/ceres/internal/ceres/parallel_invoke.h
vendored
Normal file
272
extern/ceres/internal/ceres/parallel_invoke.h
vendored
Normal file
@@ -0,0 +1,272 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: vitus@google.com (Michael Vitus),
|
||||
// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#ifndef CERES_INTERNAL_PARALLEL_INVOKE_H_
|
||||
#define CERES_INTERNAL_PARALLEL_INVOKE_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
// InvokeWithThreadId handles passing thread_id to the function
|
||||
template <typename F, typename... Args>
|
||||
void InvokeWithThreadId(int thread_id, F&& function, Args&&... args) {
|
||||
constexpr bool kPassThreadId = std::is_invocable_v<F, int, Args...>;
|
||||
|
||||
if constexpr (kPassThreadId) {
|
||||
function(thread_id, std::forward<Args>(args)...);
|
||||
} else {
|
||||
function(std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
// InvokeOnSegment either runs a loop over segment indices or passes it to the
|
||||
// function
|
||||
template <typename F>
|
||||
void InvokeOnSegment(int thread_id, std::tuple<int, int> range, F&& function) {
|
||||
constexpr bool kExplicitLoop =
|
||||
std::is_invocable_v<F, int> || std::is_invocable_v<F, int, int>;
|
||||
|
||||
if constexpr (kExplicitLoop) {
|
||||
const auto [start, end] = range;
|
||||
for (int i = start; i != end; ++i) {
|
||||
InvokeWithThreadId(thread_id, std::forward<F>(function), i);
|
||||
}
|
||||
} else {
|
||||
InvokeWithThreadId(thread_id, std::forward<F>(function), range);
|
||||
}
|
||||
}
|
||||
|
||||
// This class creates a thread safe barrier which will block until a
|
||||
// pre-specified number of threads call Finished. This allows us to block the
|
||||
// main thread until all the parallel threads are finished processing all the
|
||||
// work.
|
||||
class BlockUntilFinished {
|
||||
public:
|
||||
explicit BlockUntilFinished(int num_total_jobs);
|
||||
|
||||
// Increment the number of jobs that have been processed by the number of
|
||||
// jobs processed by caller and signal the blocking thread if all jobs
|
||||
// have finished.
|
||||
void Finished(int num_jobs_finished);
|
||||
|
||||
// Block until receiving confirmation of all jobs being finished.
|
||||
void Block();
|
||||
|
||||
private:
|
||||
std::mutex mutex_;
|
||||
std::condition_variable condition_;
|
||||
int num_total_jobs_finished_;
|
||||
const int num_total_jobs_;
|
||||
};
|
||||
|
||||
// Shared state between the parallel tasks. Each thread will use this
|
||||
// information to get the next block of work to be performed.
|
||||
struct ParallelInvokeState {
|
||||
// The entire range [start, end) is split into num_work_blocks contiguous
|
||||
// disjoint intervals (blocks), which are as equal as possible given
|
||||
// total index count and requested number of blocks.
|
||||
//
|
||||
// Those num_work_blocks blocks are then processed in parallel.
|
||||
//
|
||||
// Total number of integer indices in interval [start, end) is
|
||||
// end - start, and when splitting them into num_work_blocks blocks
|
||||
// we can either
|
||||
// - Split into equal blocks when (end - start) is divisible by
|
||||
// num_work_blocks
|
||||
// - Split into blocks with size difference at most 1:
|
||||
// - Size of the smallest block(s) is (end - start) / num_work_blocks
|
||||
// - (end - start) % num_work_blocks will need to be 1 index larger
|
||||
//
|
||||
// Note that this splitting is optimal in the sense of maximal difference
|
||||
// between block sizes, since splitting into equal blocks is possible
|
||||
// if and only if number of indices is divisible by number of blocks.
|
||||
ParallelInvokeState(int start, int end, int num_work_blocks);
|
||||
|
||||
// The start and end index of the for loop.
|
||||
const int start;
|
||||
const int end;
|
||||
// The number of blocks that need to be processed.
|
||||
const int num_work_blocks;
|
||||
// Size of the smallest block
|
||||
const int base_block_size;
|
||||
// Number of blocks of size base_block_size + 1
|
||||
const int num_base_p1_sized_blocks;
|
||||
|
||||
// The next block of work to be assigned to a worker. The parallel for loop
|
||||
// range is split into num_work_blocks blocks of work, with a single block of
|
||||
// work being of size
|
||||
// - base_block_size + 1 for the first num_base_p1_sized_blocks blocks
|
||||
// - base_block_size for the rest of the blocks
|
||||
// blocks of indices are contiguous and disjoint
|
||||
std::atomic<int> block_id;
|
||||
|
||||
// Provides a unique thread ID among all active threads
|
||||
// We do not schedule more than num_threads threads via thread pool
|
||||
// and caller thread might steal one ID
|
||||
std::atomic<int> thread_id;
|
||||
|
||||
// Used to signal when all the work has been completed. Thread safe.
|
||||
BlockUntilFinished block_until_finished;
|
||||
};
|
||||
|
||||
// This implementation uses a fixed size max worker pool with a shared task
|
||||
// queue. The problem of executing the function for the interval of [start, end)
|
||||
// is broken up into at most num_threads * kWorkBlocksPerThread blocks (each of
|
||||
// size at least min_block_size) and added to the thread pool. To avoid
|
||||
// deadlocks, the calling thread is allowed to steal work from the worker pool.
|
||||
// This is implemented via a shared state between the tasks. In order for
|
||||
// the calling thread or thread pool to get a block of work, it will query the
|
||||
// shared state for the next block of work to be done. If there is nothing left,
|
||||
// it will return. We will exit the ParallelFor call when all of the work has
|
||||
// been done, not when all of the tasks have been popped off the task queue.
|
||||
//
|
||||
// A unique thread ID among all active tasks will be acquired once for each
|
||||
// block of work. This avoids the significant performance penalty for acquiring
|
||||
// it on every iteration of the for loop. The thread ID is guaranteed to be in
|
||||
// [0, num_threads).
|
||||
//
|
||||
// A performance analysis has shown this implementation is on par with OpenMP
|
||||
// and TBB.
|
||||
template <typename F>
|
||||
void ParallelInvoke(ContextImpl* context,
|
||||
int start,
|
||||
int end,
|
||||
int num_threads,
|
||||
F&& function,
|
||||
int min_block_size) {
|
||||
CHECK(context != nullptr);
|
||||
|
||||
// Maximal number of work items scheduled for a single thread
|
||||
// - Lower number of work items results in larger runtimes on unequal tasks
|
||||
// - Higher number of work items results in larger losses for synchronization
|
||||
constexpr int kWorkBlocksPerThread = 4;
|
||||
|
||||
// Interval [start, end) is being split into
|
||||
// num_threads * kWorkBlocksPerThread contiguous disjoint blocks.
|
||||
//
|
||||
// In order to avoid creating empty blocks of work, we need to limit
|
||||
// number of work blocks by a total number of indices.
|
||||
const int num_work_blocks = std::min((end - start) / min_block_size,
|
||||
num_threads * kWorkBlocksPerThread);
|
||||
|
||||
// We use a std::shared_ptr because the main thread can finish all
|
||||
// the work before the tasks have been popped off the queue. So the
|
||||
// shared state needs to exist for the duration of all the tasks.
|
||||
auto shared_state =
|
||||
std::make_shared<ParallelInvokeState>(start, end, num_work_blocks);
|
||||
|
||||
// A function which tries to schedule another task in the thread pool and
|
||||
// perform several chunks of work. Function expects itself as the argument in
|
||||
// order to schedule next task in the thread pool.
|
||||
auto task = [context, shared_state, num_threads, &function](auto& task_copy) {
|
||||
int num_jobs_finished = 0;
|
||||
const int thread_id = shared_state->thread_id.fetch_add(1);
|
||||
// In order to avoid dead-locks in nested parallel for loops, task() will be
|
||||
// invoked num_threads + 1 times:
|
||||
// - num_threads times via enqueueing task into thread pool
|
||||
// - one more time in the main thread
|
||||
// Tasks enqueued to thread pool might take some time before execution, and
|
||||
// the last task being executed will be terminated here in order to avoid
|
||||
// having more than num_threads active threads
|
||||
if (thread_id >= num_threads) return;
|
||||
const int num_work_blocks = shared_state->num_work_blocks;
|
||||
if (thread_id + 1 < num_threads &&
|
||||
shared_state->block_id < num_work_blocks) {
|
||||
// Add another thread to the thread pool.
|
||||
// Note we are taking the task as value so the copy of shared_state shared
|
||||
// pointer (captured by value at declaration of task lambda-function) is
|
||||
// copied and the ref count is increased. This is to prevent it from being
|
||||
// deleted when the main thread finishes all the work and exits before the
|
||||
// threads finish.
|
||||
context->thread_pool.AddTask([task_copy]() { task_copy(task_copy); });
|
||||
}
|
||||
|
||||
const int start = shared_state->start;
|
||||
const int base_block_size = shared_state->base_block_size;
|
||||
const int num_base_p1_sized_blocks = shared_state->num_base_p1_sized_blocks;
|
||||
|
||||
while (true) {
|
||||
// Get the next available chunk of work to be performed. If there is no
|
||||
// work, return.
|
||||
int block_id = shared_state->block_id.fetch_add(1);
|
||||
if (block_id >= num_work_blocks) {
|
||||
break;
|
||||
}
|
||||
++num_jobs_finished;
|
||||
|
||||
// For-loop interval [start, end) was split into num_work_blocks,
|
||||
// with num_base_p1_sized_blocks of size base_block_size + 1 and remaining
|
||||
// num_work_blocks - num_base_p1_sized_blocks of size base_block_size
|
||||
//
|
||||
// Then, start index of the block #block_id is given by a total
|
||||
// length of preceeding blocks:
|
||||
// * Total length of preceeding blocks of size base_block_size + 1:
|
||||
// min(block_id, num_base_p1_sized_blocks) * (base_block_size + 1)
|
||||
//
|
||||
// * Total length of preceeding blocks of size base_block_size:
|
||||
// (block_id - min(block_id, num_base_p1_sized_blocks)) *
|
||||
// base_block_size
|
||||
//
|
||||
// Simplifying sum of those quantities yields a following
|
||||
// expression for start index of the block #block_id
|
||||
const int curr_start = start + block_id * base_block_size +
|
||||
std::min(block_id, num_base_p1_sized_blocks);
|
||||
// First num_base_p1_sized_blocks have size base_block_size + 1
|
||||
//
|
||||
// Note that it is guaranteed that all blocks are within
|
||||
// [start, end) interval
|
||||
const int curr_end = curr_start + base_block_size +
|
||||
(block_id < num_base_p1_sized_blocks ? 1 : 0);
|
||||
// Perform each task in current block
|
||||
const auto range = std::make_tuple(curr_start, curr_end);
|
||||
InvokeOnSegment(thread_id, range, function);
|
||||
}
|
||||
shared_state->block_until_finished.Finished(num_jobs_finished);
|
||||
};
|
||||
|
||||
// Start scheduling threads and doing work. We might end up with less threads
|
||||
// scheduled than expected, if scheduling overhead is larger than the amount
|
||||
// of work to be done.
|
||||
task(task);
|
||||
|
||||
// Wait until all tasks have finished.
|
||||
shared_state->block_until_finished.Block();
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,8 +30,7 @@
|
||||
|
||||
#include "ceres/parallel_utils.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
void LinearIndexToUpperTriangularIndex(int k, int n, int* i, int* j) {
|
||||
// This works by unfolding a rectangle into a triangle.
|
||||
@@ -86,5 +85,4 @@ void LinearIndexToUpperTriangularIndex(int k, int n, int* i, int* j) {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/parallel_utils.h
vendored
8
extern/ceres/internal/ceres/parallel_utils.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,8 +33,7 @@
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Converts a linear iteration order into a triangular iteration order.
|
||||
// Suppose you have nested loops that look like
|
||||
@@ -66,7 +65,6 @@ CERES_NO_EXPORT void LinearIndexToUpperTriangularIndex(int k,
|
||||
int* i,
|
||||
int* j);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_PARALLEL_UTILS_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -25,35 +25,30 @@
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
|
||||
#ifndef CERES_INTERNAL_FLOAT_CXSPARSE_H_
|
||||
#define CERES_INTERNAL_FLOAT_CXSPARSE_H_
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include "ceres/internal/config.h"
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
|
||||
#if !defined(CERES_NO_CXSPARSE)
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
|
||||
#include <memory>
|
||||
namespace ceres::internal {
|
||||
void ParallelSetZero(ContextImpl* context,
|
||||
int num_threads,
|
||||
double* values,
|
||||
int num_values) {
|
||||
ParallelFor(
|
||||
context,
|
||||
0,
|
||||
num_values,
|
||||
num_threads,
|
||||
[values](std::tuple<int, int> range) {
|
||||
auto [start, end] = range;
|
||||
std::fill(values + start, values + end, 0.);
|
||||
},
|
||||
kMinBlockSizeParallelVectorOps);
|
||||
}
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/sparse_cholesky.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
// Fake implementation of a single precision Sparse Cholesky using
|
||||
// CXSparse.
|
||||
class CERES_NO_EXPORT FloatCXSparseCholesky : public SparseCholesky {
|
||||
public:
|
||||
static std::unique_ptr<SparseCholesky> Create(OrderingType ordering_type);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // !defined(CERES_NO_CXSPARSE)
|
||||
|
||||
#endif // CERES_INTERNAL_FLOAT_CXSPARSE_H_
|
||||
} // namespace ceres::internal
|
||||
90
extern/ceres/internal/ceres/parallel_vector_ops.h
vendored
Normal file
90
extern/ceres/internal/ceres/parallel_vector_ops.h
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: vitus@google.com (Michael Vitus),
|
||||
// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#ifndef CERES_INTERNAL_PARALLEL_VECTOR_OPS_H_
|
||||
#define CERES_INTERNAL_PARALLEL_VECTOR_OPS_H_
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
// Lower bound on block size for parallel vector operations.
|
||||
// Operations with vectors of less than kMinBlockSizeParallelVectorOps elements
|
||||
// will be executed in a single thread.
|
||||
constexpr int kMinBlockSizeParallelVectorOps = 1 << 16;
|
||||
// Evaluate vector expression in parallel
|
||||
// Assuming LhsExpression and RhsExpression are some sort of column-vector
|
||||
// expression, assignment lhs = rhs is eavluated over a set of contiguous blocks
|
||||
// in parallel. This is expected to work well in the case of vector-based
|
||||
// expressions (since they typically do not result into temporaries). This
|
||||
// method expects lhs to be size-compatible with rhs
|
||||
template <typename LhsExpression, typename RhsExpression>
|
||||
void ParallelAssign(ContextImpl* context,
|
||||
int num_threads,
|
||||
LhsExpression& lhs,
|
||||
const RhsExpression& rhs) {
|
||||
static_assert(LhsExpression::ColsAtCompileTime == 1);
|
||||
static_assert(RhsExpression::ColsAtCompileTime == 1);
|
||||
CHECK_EQ(lhs.rows(), rhs.rows());
|
||||
const int num_rows = lhs.rows();
|
||||
ParallelFor(
|
||||
context,
|
||||
0,
|
||||
num_rows,
|
||||
num_threads,
|
||||
[&lhs, &rhs](const std::tuple<int, int>& range) {
|
||||
auto [start, end] = range;
|
||||
lhs.segment(start, end - start) = rhs.segment(start, end - start);
|
||||
},
|
||||
kMinBlockSizeParallelVectorOps);
|
||||
}
|
||||
|
||||
// Set vector to zero using num_threads
|
||||
template <typename VectorType>
|
||||
void ParallelSetZero(ContextImpl* context,
|
||||
int num_threads,
|
||||
VectorType& vector) {
|
||||
ParallelSetZero(context, num_threads, vector.data(), vector.rows());
|
||||
}
|
||||
void ParallelSetZero(ContextImpl* context,
|
||||
int num_threads,
|
||||
double* values,
|
||||
int num_values);
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_PARALLEL_FOR_H_
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,8 +47,7 @@
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class ProblemImpl;
|
||||
class ResidualBlock;
|
||||
@@ -382,8 +381,7 @@ class CERES_NO_EXPORT ParameterBlock {
|
||||
friend class ProblemImpl;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,8 +30,11 @@
|
||||
|
||||
#include "ceres/parameter_block_ordering.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/graph.h"
|
||||
#include "ceres/graph_algorithms.h"
|
||||
@@ -42,22 +45,18 @@
|
||||
#include "ceres/wall_time.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
int ComputeStableSchurOrdering(const Program& program,
|
||||
vector<ParameterBlock*>* ordering) {
|
||||
std::vector<ParameterBlock*>* ordering) {
|
||||
CHECK(ordering != nullptr);
|
||||
ordering->clear();
|
||||
EventLogger event_logger("ComputeStableSchurOrdering");
|
||||
auto graph = CreateHessianGraph(program);
|
||||
event_logger.AddEvent("CreateHessianGraph");
|
||||
|
||||
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
|
||||
const std::vector<ParameterBlock*>& parameter_blocks =
|
||||
program.parameter_blocks();
|
||||
const std::unordered_set<ParameterBlock*>& vertices = graph->vertices();
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
if (vertices.count(parameter_block) > 0) {
|
||||
@@ -81,13 +80,14 @@ int ComputeStableSchurOrdering(const Program& program,
|
||||
}
|
||||
|
||||
int ComputeSchurOrdering(const Program& program,
|
||||
vector<ParameterBlock*>* ordering) {
|
||||
std::vector<ParameterBlock*>* ordering) {
|
||||
CHECK(ordering != nullptr);
|
||||
ordering->clear();
|
||||
|
||||
auto graph = CreateHessianGraph(program);
|
||||
int independent_set_size = IndependentSetOrdering(*graph, ordering);
|
||||
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
|
||||
const std::vector<ParameterBlock*>& parameter_blocks =
|
||||
program.parameter_blocks();
|
||||
|
||||
// Add the excluded blocks to back of the ordering vector.
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
@@ -103,13 +103,14 @@ void ComputeRecursiveIndependentSetOrdering(const Program& program,
|
||||
ParameterBlockOrdering* ordering) {
|
||||
CHECK(ordering != nullptr);
|
||||
ordering->Clear();
|
||||
const vector<ParameterBlock*> parameter_blocks = program.parameter_blocks();
|
||||
const std::vector<ParameterBlock*> parameter_blocks =
|
||||
program.parameter_blocks();
|
||||
auto graph = CreateHessianGraph(program);
|
||||
|
||||
int num_covered = 0;
|
||||
int round = 0;
|
||||
while (num_covered < parameter_blocks.size()) {
|
||||
vector<ParameterBlock*> independent_set_ordering;
|
||||
std::vector<ParameterBlock*> independent_set_ordering;
|
||||
const int independent_set_size =
|
||||
IndependentSetOrdering(*graph, &independent_set_ordering);
|
||||
for (int i = 0; i < independent_set_size; ++i) {
|
||||
@@ -126,14 +127,16 @@ std::unique_ptr<Graph<ParameterBlock*>> CreateHessianGraph(
|
||||
const Program& program) {
|
||||
auto graph = std::make_unique<Graph<ParameterBlock*>>();
|
||||
CHECK(graph != nullptr);
|
||||
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
|
||||
const std::vector<ParameterBlock*>& parameter_blocks =
|
||||
program.parameter_blocks();
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
if (!parameter_block->IsConstant()) {
|
||||
graph->AddVertex(parameter_block);
|
||||
}
|
||||
}
|
||||
|
||||
const vector<ResidualBlock*>& residual_blocks = program.residual_blocks();
|
||||
const std::vector<ResidualBlock*>& residual_blocks =
|
||||
program.residual_blocks();
|
||||
for (auto* residual_block : residual_blocks) {
|
||||
const int num_parameter_blocks = residual_block->NumParameterBlocks();
|
||||
ParameterBlock* const* parameter_blocks =
|
||||
@@ -157,19 +160,20 @@ std::unique_ptr<Graph<ParameterBlock*>> CreateHessianGraph(
|
||||
}
|
||||
|
||||
void OrderingToGroupSizes(const ParameterBlockOrdering* ordering,
|
||||
vector<int>* group_sizes) {
|
||||
std::vector<int>* group_sizes) {
|
||||
CHECK(group_sizes != nullptr);
|
||||
group_sizes->clear();
|
||||
if (ordering == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
const map<int, set<double*>>& group_to_elements =
|
||||
// TODO(sameeragarwal): Investigate if this should be a set or an
|
||||
// unordered_set.
|
||||
const std::map<int, std::set<double*>>& group_to_elements =
|
||||
ordering->group_to_elements();
|
||||
for (const auto& g_t_e : group_to_elements) {
|
||||
group_sizes->push_back(g_t_e.second.size());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,15 +40,14 @@
|
||||
#include "ceres/ordered_groups.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Program;
|
||||
class ParameterBlock;
|
||||
|
||||
// Uses an approximate independent set ordering to order the parameter
|
||||
// blocks of a problem so that it is suitable for use with Schur
|
||||
// complement based solvers. The output variable ordering contains an
|
||||
// blocks of a problem so that it is suitable for use with Schur-
|
||||
// complement-based solvers. The output variable ordering contains an
|
||||
// ordering of the parameter blocks and the return value is size of
|
||||
// the independent set or the number of e_blocks (see
|
||||
// schur_complement_solver.h for an explanation). Constant parameters
|
||||
@@ -88,8 +87,7 @@ CERES_NO_EXPORT std::unique_ptr<Graph<ParameterBlock*>> CreateHessianGraph(
|
||||
CERES_NO_EXPORT void OrderingToGroupSizes(
|
||||
const ParameterBlockOrdering* ordering, std::vector<int>* group_sizes);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
150
extern/ceres/internal/ceres/partition_range_for_parallel_for.h
vendored
Normal file
150
extern/ceres/internal/ceres/partition_range_for_parallel_for.h
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: vitus@google.com (Michael Vitus),
|
||||
// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
|
||||
|
||||
#ifndef CERES_INTERNAL_PARTITION_RANGE_FOR_PARALLEL_FOR_H_
|
||||
#define CERES_INTERNAL_PARTITION_RANGE_FOR_PARALLEL_FOR_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
namespace ceres::internal {
|
||||
// Check if it is possible to split range [start; end) into at most
|
||||
// max_num_partitions contiguous partitions of cost not greater than
|
||||
// max_partition_cost. Inclusive integer cumulative costs are provided by
|
||||
// cumulative_cost_data objects, with cumulative_cost_offset being a total cost
|
||||
// of all indices (starting from zero) preceding start element. Cumulative costs
|
||||
// are returned by cumulative_cost_fun called with a reference to
|
||||
// cumulative_cost_data element with index from range[start; end), and should be
|
||||
// non-decreasing. Partition of the range is returned via partition argument
|
||||
template <typename CumulativeCostData, typename CumulativeCostFun>
|
||||
bool MaxPartitionCostIsFeasible(int start,
|
||||
int end,
|
||||
int max_num_partitions,
|
||||
int max_partition_cost,
|
||||
int cumulative_cost_offset,
|
||||
const CumulativeCostData* cumulative_cost_data,
|
||||
CumulativeCostFun&& cumulative_cost_fun,
|
||||
std::vector<int>* partition) {
|
||||
partition->clear();
|
||||
partition->push_back(start);
|
||||
int partition_start = start;
|
||||
int cost_offset = cumulative_cost_offset;
|
||||
|
||||
while (partition_start < end) {
|
||||
// Already have max_num_partitions
|
||||
if (partition->size() > max_num_partitions) {
|
||||
return false;
|
||||
}
|
||||
const int target = max_partition_cost + cost_offset;
|
||||
const int partition_end =
|
||||
std::partition_point(
|
||||
cumulative_cost_data + partition_start,
|
||||
cumulative_cost_data + end,
|
||||
[&cumulative_cost_fun, target](const CumulativeCostData& item) {
|
||||
return cumulative_cost_fun(item) <= target;
|
||||
}) -
|
||||
cumulative_cost_data;
|
||||
// Unable to make a partition from a single element
|
||||
if (partition_end == partition_start) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int cost_last =
|
||||
cumulative_cost_fun(cumulative_cost_data[partition_end - 1]);
|
||||
partition->push_back(partition_end);
|
||||
partition_start = partition_end;
|
||||
cost_offset = cost_last;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Split integer interval [start, end) into at most max_num_partitions
|
||||
// contiguous intervals, minimizing maximal total cost of a single interval.
|
||||
// Inclusive integer cumulative costs for each (zero-based) index are provided
|
||||
// by cumulative_cost_data objects, and are returned by cumulative_cost_fun call
|
||||
// with a reference to one of the objects from range [start, end)
|
||||
template <typename CumulativeCostData, typename CumulativeCostFun>
|
||||
std::vector<int> PartitionRangeForParallelFor(
|
||||
int start,
|
||||
int end,
|
||||
int max_num_partitions,
|
||||
const CumulativeCostData* cumulative_cost_data,
|
||||
CumulativeCostFun&& cumulative_cost_fun) {
|
||||
// Given maximal partition cost, it is possible to verify if it is admissible
|
||||
// and obtain corresponding partition using MaxPartitionCostIsFeasible
|
||||
// function. In order to find the lowest admissible value, a binary search
|
||||
// over all potentially optimal cost values is being performed
|
||||
const int cumulative_cost_last =
|
||||
cumulative_cost_fun(cumulative_cost_data[end - 1]);
|
||||
const int cumulative_cost_offset =
|
||||
start ? cumulative_cost_fun(cumulative_cost_data[start - 1]) : 0;
|
||||
const int total_cost = cumulative_cost_last - cumulative_cost_offset;
|
||||
|
||||
// Minimal maximal partition cost is not smaller than the average
|
||||
// We will use non-inclusive lower bound
|
||||
int partition_cost_lower_bound = total_cost / max_num_partitions - 1;
|
||||
// Minimal maximal partition cost is not larger than the total cost
|
||||
// Upper bound is inclusive
|
||||
int partition_cost_upper_bound = total_cost;
|
||||
|
||||
std::vector<int> partition;
|
||||
// Range partition corresponding to the latest evaluated upper bound.
|
||||
// A single segment covering the whole input interval [start, end) corresponds
|
||||
// to minimal maximal partition cost of total_cost.
|
||||
std::vector<int> partition_upper_bound = {start, end};
|
||||
// Binary search over partition cost, returning the lowest admissible cost
|
||||
while (partition_cost_upper_bound - partition_cost_lower_bound > 1) {
|
||||
partition.reserve(max_num_partitions + 1);
|
||||
const int partition_cost =
|
||||
partition_cost_lower_bound +
|
||||
(partition_cost_upper_bound - partition_cost_lower_bound) / 2;
|
||||
bool admissible = MaxPartitionCostIsFeasible(
|
||||
start,
|
||||
end,
|
||||
max_num_partitions,
|
||||
partition_cost,
|
||||
cumulative_cost_offset,
|
||||
cumulative_cost_data,
|
||||
std::forward<CumulativeCostFun>(cumulative_cost_fun),
|
||||
&partition);
|
||||
if (admissible) {
|
||||
partition_cost_upper_bound = partition_cost;
|
||||
std::swap(partition, partition_upper_bound);
|
||||
} else {
|
||||
partition_cost_lower_bound = partition_cost;
|
||||
}
|
||||
}
|
||||
|
||||
return partition_upper_bound;
|
||||
}
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,8 +44,7 @@
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/partitioned_matrix_view.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
PartitionedMatrixViewBase::~PartitionedMatrixViewBase() = default;
|
||||
|
||||
@@ -56,121 +55,121 @@ std::unique_ptr<PartitionedMatrixViewBase> PartitionedMatrixViewBase::Create(
|
||||
(options.e_block_size == 2) &&
|
||||
(options.f_block_size == 2)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,2, 2>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 2) &&
|
||||
(options.f_block_size == 3)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,2, 3>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 2) &&
|
||||
(options.f_block_size == 4)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,2, 4>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 2)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,2, Eigen::Dynamic>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 3) &&
|
||||
(options.f_block_size == 3)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,3, 3>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 3) &&
|
||||
(options.f_block_size == 4)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,3, 4>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 3) &&
|
||||
(options.f_block_size == 6)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,3, 6>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 3) &&
|
||||
(options.f_block_size == 9)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,3, 9>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 3)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,3, Eigen::Dynamic>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 4) &&
|
||||
(options.f_block_size == 3)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,4, 3>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 4) &&
|
||||
(options.f_block_size == 4)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,4, 4>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 4) &&
|
||||
(options.f_block_size == 6)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,4, 6>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 4) &&
|
||||
(options.f_block_size == 8)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,4, 8>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 4) &&
|
||||
(options.f_block_size == 9)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,4, 9>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 2) &&
|
||||
(options.e_block_size == 4)) {
|
||||
return std::make_unique<PartitionedMatrixView<2,4, Eigen::Dynamic>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if (options.row_block_size == 2) {
|
||||
return std::make_unique<PartitionedMatrixView<2,Eigen::Dynamic, Eigen::Dynamic>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 3) &&
|
||||
(options.e_block_size == 3) &&
|
||||
(options.f_block_size == 3)) {
|
||||
return std::make_unique<PartitionedMatrixView<3,3, 3>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 4) &&
|
||||
(options.e_block_size == 4) &&
|
||||
(options.f_block_size == 2)) {
|
||||
return std::make_unique<PartitionedMatrixView<4,4, 2>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 4) &&
|
||||
(options.e_block_size == 4) &&
|
||||
(options.f_block_size == 3)) {
|
||||
return std::make_unique<PartitionedMatrixView<4,4, 3>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 4) &&
|
||||
(options.e_block_size == 4) &&
|
||||
(options.f_block_size == 4)) {
|
||||
return std::make_unique<PartitionedMatrixView<4,4, 4>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
if ((options.row_block_size == 4) &&
|
||||
(options.e_block_size == 4)) {
|
||||
return std::make_unique<PartitionedMatrixView<4,4, Eigen::Dynamic>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -180,8 +179,7 @@ std::unique_ptr<PartitionedMatrixViewBase> PartitionedMatrixViewBase::Create(
|
||||
return std::make_unique<PartitionedMatrixView<Eigen::Dynamic,
|
||||
Eigen::Dynamic,
|
||||
Eigen::Dynamic>>(
|
||||
matrix, options.elimination_groups[0]);
|
||||
options, matrix);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -50,12 +50,13 @@
|
||||
#include "ceres/small_blas.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class ContextImpl;
|
||||
|
||||
// Given generalized bi-partite matrix A = [E F], with the same block
|
||||
// structure as required by the Schur complement based solver, found
|
||||
// in explicit_schur_complement_solver.h, provide access to the
|
||||
// in schur_complement_solver.h, provide access to the
|
||||
// matrices E and F and their outer products E'E and F'F with
|
||||
// themselves.
|
||||
//
|
||||
@@ -68,16 +69,26 @@ class CERES_NO_EXPORT PartitionedMatrixViewBase {
|
||||
virtual ~PartitionedMatrixViewBase();
|
||||
|
||||
// y += E'x
|
||||
virtual void LeftMultiplyE(const double* x, double* y) const = 0;
|
||||
virtual void LeftMultiplyAndAccumulateE(const double* x, double* y) const = 0;
|
||||
virtual void LeftMultiplyAndAccumulateESingleThreaded(const double* x,
|
||||
double* y) const = 0;
|
||||
virtual void LeftMultiplyAndAccumulateEMultiThreaded(const double* x,
|
||||
double* y) const = 0;
|
||||
|
||||
// y += F'x
|
||||
virtual void LeftMultiplyF(const double* x, double* y) const = 0;
|
||||
virtual void LeftMultiplyAndAccumulateF(const double* x, double* y) const = 0;
|
||||
virtual void LeftMultiplyAndAccumulateFSingleThreaded(const double* x,
|
||||
double* y) const = 0;
|
||||
virtual void LeftMultiplyAndAccumulateFMultiThreaded(const double* x,
|
||||
double* y) const = 0;
|
||||
|
||||
// y += Ex
|
||||
virtual void RightMultiplyE(const double* x, double* y) const = 0;
|
||||
virtual void RightMultiplyAndAccumulateE(const double* x,
|
||||
double* y) const = 0;
|
||||
|
||||
// y += Fx
|
||||
virtual void RightMultiplyF(const double* x, double* y) const = 0;
|
||||
virtual void RightMultiplyAndAccumulateF(const double* x,
|
||||
double* y) const = 0;
|
||||
|
||||
// Create and return the block diagonal of the matrix E'E.
|
||||
virtual std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalEtE() const = 0;
|
||||
@@ -109,6 +120,8 @@ class CERES_NO_EXPORT PartitionedMatrixViewBase {
|
||||
virtual int num_cols_f() const = 0;
|
||||
virtual int num_rows() const = 0;
|
||||
virtual int num_cols() const = 0;
|
||||
virtual const std::vector<int>& e_cols_partition() const = 0;
|
||||
virtual const std::vector<int>& f_cols_partition() const = 0;
|
||||
// clang-format on
|
||||
|
||||
static std::unique_ptr<PartitionedMatrixViewBase> Create(
|
||||
@@ -122,17 +135,46 @@ class CERES_NO_EXPORT PartitionedMatrixView final
|
||||
: public PartitionedMatrixViewBase {
|
||||
public:
|
||||
// matrix = [E F], where the matrix E contains the first
|
||||
// num_col_blocks_a column blocks.
|
||||
PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e);
|
||||
// options.elimination_groups[0] column blocks.
|
||||
PartitionedMatrixView(const LinearSolver::Options& options,
|
||||
const BlockSparseMatrix& matrix);
|
||||
|
||||
// y += E'x
|
||||
virtual void LeftMultiplyAndAccumulateE(const double* x,
|
||||
double* y) const final;
|
||||
virtual void LeftMultiplyAndAccumulateESingleThreaded(const double* x,
|
||||
double* y) const final;
|
||||
virtual void LeftMultiplyAndAccumulateEMultiThreaded(const double* x,
|
||||
double* y) const final;
|
||||
|
||||
// y += F'x
|
||||
virtual void LeftMultiplyAndAccumulateF(const double* x,
|
||||
double* y) const final;
|
||||
virtual void LeftMultiplyAndAccumulateFSingleThreaded(const double* x,
|
||||
double* y) const final;
|
||||
virtual void LeftMultiplyAndAccumulateFMultiThreaded(const double* x,
|
||||
double* y) const final;
|
||||
|
||||
// y += Ex
|
||||
virtual void RightMultiplyAndAccumulateE(const double* x,
|
||||
double* y) const final;
|
||||
|
||||
// y += Fx
|
||||
virtual void RightMultiplyAndAccumulateF(const double* x,
|
||||
double* y) const final;
|
||||
|
||||
void LeftMultiplyE(const double* x, double* y) const final;
|
||||
void LeftMultiplyF(const double* x, double* y) const final;
|
||||
void RightMultiplyE(const double* x, double* y) const final;
|
||||
void RightMultiplyF(const double* x, double* y) const final;
|
||||
std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalEtE() const final;
|
||||
std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalFtF() const final;
|
||||
void UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const final;
|
||||
void UpdateBlockDiagonalEtESingleThreaded(
|
||||
BlockSparseMatrix* block_diagonal) const;
|
||||
void UpdateBlockDiagonalEtEMultiThreaded(
|
||||
BlockSparseMatrix* block_diagonal) const;
|
||||
void UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const final;
|
||||
void UpdateBlockDiagonalFtFSingleThreaded(
|
||||
BlockSparseMatrix* block_diagonal) const;
|
||||
void UpdateBlockDiagonalFtFMultiThreaded(
|
||||
BlockSparseMatrix* block_diagonal) const;
|
||||
// clang-format off
|
||||
int num_col_blocks_e() const final { return num_col_blocks_e_; }
|
||||
int num_col_blocks_f() const final { return num_col_blocks_f_; }
|
||||
@@ -141,21 +183,29 @@ class CERES_NO_EXPORT PartitionedMatrixView final
|
||||
int num_rows() const final { return matrix_.num_rows(); }
|
||||
int num_cols() const final { return matrix_.num_cols(); }
|
||||
// clang-format on
|
||||
const std::vector<int>& e_cols_partition() const final {
|
||||
return e_cols_partition_;
|
||||
}
|
||||
const std::vector<int>& f_cols_partition() const final {
|
||||
return f_cols_partition_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<BlockSparseMatrix> CreateBlockDiagonalMatrixLayout(
|
||||
int start_col_block, int end_col_block) const;
|
||||
|
||||
const LinearSolver::Options options_;
|
||||
const BlockSparseMatrix& matrix_;
|
||||
int num_row_blocks_e_;
|
||||
int num_col_blocks_e_;
|
||||
int num_col_blocks_f_;
|
||||
int num_cols_e_;
|
||||
int num_cols_f_;
|
||||
std::vector<int> e_cols_partition_;
|
||||
std::vector<int> f_cols_partition_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,27 +36,31 @@
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/partition_range_for_parallel_for.h"
|
||||
#include "ceres/partitioned_matrix_view.h"
|
||||
#include "ceres/small_blas.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e)
|
||||
: matrix_(matrix), num_col_blocks_e_(num_col_blocks_e) {
|
||||
PartitionedMatrixView(const LinearSolver::Options& options,
|
||||
const BlockSparseMatrix& matrix)
|
||||
|
||||
: options_(options), matrix_(matrix) {
|
||||
const CompressedRowBlockStructure* bs = matrix_.block_structure();
|
||||
CHECK(bs != nullptr);
|
||||
|
||||
num_col_blocks_e_ = options_.elimination_groups[0];
|
||||
num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_;
|
||||
|
||||
// Compute the number of row blocks in E. The number of row blocks
|
||||
// in E maybe less than the number of row blocks in the input matrix
|
||||
// as some of the row blocks at the bottom may not have any
|
||||
// e_blocks. For a definition of what an e_block is, please see
|
||||
// explicit_schur_complement_solver.h
|
||||
// schur_complement_solver.h
|
||||
num_row_blocks_e_ = 0;
|
||||
for (const auto& row : bs->rows) {
|
||||
const std::vector<Cell>& cells = row.cells;
|
||||
@@ -79,6 +83,25 @@ PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
}
|
||||
|
||||
CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols());
|
||||
|
||||
auto transpose_bs = matrix_.transpose_block_structure();
|
||||
const int num_threads = options_.num_threads;
|
||||
if (transpose_bs != nullptr && num_threads > 1) {
|
||||
int kMaxPartitions = num_threads * 4;
|
||||
e_cols_partition_ = PartitionRangeForParallelFor(
|
||||
0,
|
||||
num_col_blocks_e_,
|
||||
kMaxPartitions,
|
||||
transpose_bs->rows.data(),
|
||||
[](const CompressedRow& row) { return row.cumulative_nnz; });
|
||||
|
||||
f_cols_partition_ = PartitionRangeForParallelFor(
|
||||
num_col_blocks_e_,
|
||||
num_col_blocks_e_ + num_col_blocks_f_,
|
||||
kMaxPartitions,
|
||||
transpose_bs->rows.data(),
|
||||
[](const CompressedRow& row) { return row.cumulative_nnz; });
|
||||
}
|
||||
}
|
||||
|
||||
// The next four methods don't seem to be particularly cache
|
||||
@@ -88,77 +111,101 @@ PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
RightMultiplyE(const double* x, double* y) const {
|
||||
const CompressedRowBlockStructure* bs = matrix_.block_structure();
|
||||
|
||||
RightMultiplyAndAccumulateE(const double* x, double* y) const {
|
||||
// Iterate over the first num_row_blocks_e_ row blocks, and multiply
|
||||
// by the first cell in each row block.
|
||||
auto bs = matrix_.block_structure();
|
||||
const double* values = matrix_.values();
|
||||
for (int r = 0; r < num_row_blocks_e_; ++r) {
|
||||
const Cell& cell = bs->rows[r].cells[0];
|
||||
const int row_block_pos = bs->rows[r].block.position;
|
||||
const int row_block_size = bs->rows[r].block.size;
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
// clang-format off
|
||||
MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
|
||||
values + cell.position, row_block_size, col_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
// clang-format on
|
||||
}
|
||||
ParallelFor(options_.context,
|
||||
0,
|
||||
num_row_blocks_e_,
|
||||
options_.num_threads,
|
||||
[values, bs, x, y](int row_block_id) {
|
||||
const Cell& cell = bs->rows[row_block_id].cells[0];
|
||||
const int row_block_pos = bs->rows[row_block_id].block.position;
|
||||
const int row_block_size = bs->rows[row_block_id].block.size;
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
// clang-format off
|
||||
MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
|
||||
values + cell.position, row_block_size, col_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
// clang-format on
|
||||
});
|
||||
}
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
RightMultiplyF(const double* x, double* y) const {
|
||||
const CompressedRowBlockStructure* bs = matrix_.block_structure();
|
||||
|
||||
RightMultiplyAndAccumulateF(const double* x, double* y) const {
|
||||
// Iterate over row blocks, and if the row block is in E, then
|
||||
// multiply by all the cells except the first one which is of type
|
||||
// E. If the row block is not in E (i.e its in the bottom
|
||||
// num_row_blocks - num_row_blocks_e row blocks), then all the cells
|
||||
// are of type F and multiply by them all.
|
||||
const CompressedRowBlockStructure* bs = matrix_.block_structure();
|
||||
const int num_row_blocks = bs->rows.size();
|
||||
const int num_cols_e = num_cols_e_;
|
||||
const double* values = matrix_.values();
|
||||
for (int r = 0; r < num_row_blocks_e_; ++r) {
|
||||
const int row_block_pos = bs->rows[r].block.position;
|
||||
const int row_block_size = bs->rows[r].block.size;
|
||||
const std::vector<Cell>& cells = bs->rows[r].cells;
|
||||
for (int c = 1; c < cells.size(); ++c) {
|
||||
const int col_block_id = cells[c].block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
// clang-format off
|
||||
MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
|
||||
values + cells[c].position, row_block_size, col_block_size,
|
||||
x + col_block_pos - num_cols_e_,
|
||||
y + row_block_pos);
|
||||
// clang-format on
|
||||
}
|
||||
}
|
||||
ParallelFor(options_.context,
|
||||
0,
|
||||
num_row_blocks_e_,
|
||||
options_.num_threads,
|
||||
[values, bs, num_cols_e, x, y](int row_block_id) {
|
||||
const int row_block_pos = bs->rows[row_block_id].block.position;
|
||||
const int row_block_size = bs->rows[row_block_id].block.size;
|
||||
const auto& cells = bs->rows[row_block_id].cells;
|
||||
for (int c = 1; c < cells.size(); ++c) {
|
||||
const int col_block_id = cells[c].block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
// clang-format off
|
||||
MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
|
||||
values + cells[c].position, row_block_size, col_block_size,
|
||||
x + col_block_pos - num_cols_e,
|
||||
y + row_block_pos);
|
||||
// clang-format on
|
||||
}
|
||||
});
|
||||
ParallelFor(options_.context,
|
||||
num_row_blocks_e_,
|
||||
num_row_blocks,
|
||||
options_.num_threads,
|
||||
[values, bs, num_cols_e, x, y](int row_block_id) {
|
||||
const int row_block_pos = bs->rows[row_block_id].block.position;
|
||||
const int row_block_size = bs->rows[row_block_id].block.size;
|
||||
const auto& cells = bs->rows[row_block_id].cells;
|
||||
for (const auto& cell : cells) {
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
// clang-format off
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values + cell.position, row_block_size, col_block_size,
|
||||
x + col_block_pos - num_cols_e,
|
||||
y + row_block_pos);
|
||||
// clang-format on
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
|
||||
const int row_block_pos = bs->rows[r].block.position;
|
||||
const int row_block_size = bs->rows[r].block.size;
|
||||
const std::vector<Cell>& cells = bs->rows[r].cells;
|
||||
for (const auto& cell : cells) {
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
// clang-format off
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values + cell.position, row_block_size, col_block_size,
|
||||
x + col_block_pos - num_cols_e_,
|
||||
y + row_block_pos);
|
||||
// clang-format on
|
||||
}
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
LeftMultiplyAndAccumulateE(const double* x, double* y) const {
|
||||
if (!num_col_blocks_e_) return;
|
||||
if (!num_row_blocks_e_) return;
|
||||
if (options_.num_threads == 1) {
|
||||
LeftMultiplyAndAccumulateESingleThreaded(x, y);
|
||||
} else {
|
||||
CHECK(options_.context != nullptr);
|
||||
LeftMultiplyAndAccumulateEMultiThreaded(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
LeftMultiplyE(const double* x, double* y) const {
|
||||
LeftMultiplyAndAccumulateESingleThreaded(const double* x, double* y) const {
|
||||
const CompressedRowBlockStructure* bs = matrix_.block_structure();
|
||||
|
||||
// Iterate over the first num_row_blocks_e_ row blocks, and multiply
|
||||
@@ -182,7 +229,55 @@ void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
LeftMultiplyF(const double* x, double* y) const {
|
||||
LeftMultiplyAndAccumulateEMultiThreaded(const double* x, double* y) const {
|
||||
auto transpose_bs = matrix_.transpose_block_structure();
|
||||
CHECK(transpose_bs != nullptr);
|
||||
|
||||
// Local copies of class members in order to avoid capturing pointer to the
|
||||
// whole object in lambda function
|
||||
auto values = matrix_.values();
|
||||
const int num_row_blocks_e = num_row_blocks_e_;
|
||||
ParallelFor(
|
||||
options_.context,
|
||||
0,
|
||||
num_col_blocks_e_,
|
||||
options_.num_threads,
|
||||
[values, transpose_bs, num_row_blocks_e, x, y](int row_block_id) {
|
||||
int row_block_pos = transpose_bs->rows[row_block_id].block.position;
|
||||
int row_block_size = transpose_bs->rows[row_block_id].block.size;
|
||||
auto& cells = transpose_bs->rows[row_block_id].cells;
|
||||
|
||||
for (auto& cell : cells) {
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_size = transpose_bs->cols[col_block_id].size;
|
||||
const int col_block_pos = transpose_bs->cols[col_block_id].position;
|
||||
if (col_block_id >= num_row_blocks_e) break;
|
||||
MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
|
||||
values + cell.position,
|
||||
col_block_size,
|
||||
row_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
}
|
||||
},
|
||||
e_cols_partition());
|
||||
}
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
LeftMultiplyAndAccumulateF(const double* x, double* y) const {
|
||||
if (!num_col_blocks_f_) return;
|
||||
if (options_.num_threads == 1) {
|
||||
LeftMultiplyAndAccumulateFSingleThreaded(x, y);
|
||||
} else {
|
||||
CHECK(options_.context != nullptr);
|
||||
LeftMultiplyAndAccumulateFMultiThreaded(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
LeftMultiplyAndAccumulateFSingleThreaded(const double* x, double* y) const {
|
||||
const CompressedRowBlockStructure* bs = matrix_.block_structure();
|
||||
|
||||
// Iterate over row blocks, and if the row block is in E, then
|
||||
@@ -226,10 +321,63 @@ void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
}
|
||||
}
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
LeftMultiplyAndAccumulateFMultiThreaded(const double* x, double* y) const {
|
||||
auto transpose_bs = matrix_.transpose_block_structure();
|
||||
CHECK(transpose_bs != nullptr);
|
||||
// Local copies of class members in order to avoid capturing pointer to the
|
||||
// whole object in lambda function
|
||||
auto values = matrix_.values();
|
||||
const int num_row_blocks_e = num_row_blocks_e_;
|
||||
const int num_cols_e = num_cols_e_;
|
||||
ParallelFor(
|
||||
options_.context,
|
||||
num_col_blocks_e_,
|
||||
num_col_blocks_e_ + num_col_blocks_f_,
|
||||
options_.num_threads,
|
||||
[values, transpose_bs, num_row_blocks_e, num_cols_e, x, y](
|
||||
int row_block_id) {
|
||||
int row_block_pos = transpose_bs->rows[row_block_id].block.position;
|
||||
int row_block_size = transpose_bs->rows[row_block_id].block.size;
|
||||
auto& cells = transpose_bs->rows[row_block_id].cells;
|
||||
|
||||
const int num_cells = cells.size();
|
||||
int cell_idx = 0;
|
||||
for (; cell_idx < num_cells; ++cell_idx) {
|
||||
auto& cell = cells[cell_idx];
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_size = transpose_bs->cols[col_block_id].size;
|
||||
const int col_block_pos = transpose_bs->cols[col_block_id].position;
|
||||
if (col_block_id >= num_row_blocks_e) break;
|
||||
|
||||
MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
|
||||
values + cell.position,
|
||||
col_block_size,
|
||||
row_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos - num_cols_e);
|
||||
}
|
||||
for (; cell_idx < num_cells; ++cell_idx) {
|
||||
auto& cell = cells[cell_idx];
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_size = transpose_bs->cols[col_block_id].size;
|
||||
const int col_block_pos = transpose_bs->cols[col_block_id].position;
|
||||
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values + cell.position,
|
||||
col_block_size,
|
||||
row_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos - num_cols_e);
|
||||
}
|
||||
},
|
||||
f_cols_partition());
|
||||
}
|
||||
|
||||
// Given a range of columns blocks of a matrix m, compute the block
|
||||
// structure of the block diagonal of the matrix m(:,
|
||||
// start_col_block:end_col_block)'m(:, start_col_block:end_col_block)
|
||||
// and return a BlockSparseMatrix with the this block structure. The
|
||||
// and return a BlockSparseMatrix with this block structure. The
|
||||
// caller owns the result.
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
std::unique_ptr<BlockSparseMatrix>
|
||||
@@ -290,17 +438,17 @@ PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
return block_diagonal;
|
||||
}
|
||||
|
||||
// Similar to the code in RightMultiplyE, except instead of the matrix
|
||||
// vector multiply its an outer product.
|
||||
// Similar to the code in RightMultiplyAndAccumulateE, except instead of the
|
||||
// matrix vector multiply its an outer product.
|
||||
//
|
||||
// block_diagonal = block_diagonal(E'E)
|
||||
//
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
|
||||
const CompressedRowBlockStructure* bs = matrix_.block_structure();
|
||||
const CompressedRowBlockStructure* block_diagonal_structure =
|
||||
block_diagonal->block_structure();
|
||||
UpdateBlockDiagonalEtESingleThreaded(
|
||||
BlockSparseMatrix* block_diagonal) const {
|
||||
auto bs = matrix_.block_structure();
|
||||
auto block_diagonal_structure = block_diagonal->block_structure();
|
||||
|
||||
block_diagonal->SetZero();
|
||||
const double* values = matrix_.values();
|
||||
@@ -323,17 +471,68 @@ void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
}
|
||||
}
|
||||
|
||||
// Similar to the code in RightMultiplyF, except instead of the matrix
|
||||
// vector multiply its an outer product.
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
UpdateBlockDiagonalEtEMultiThreaded(
|
||||
BlockSparseMatrix* block_diagonal) const {
|
||||
auto transpose_block_structure = matrix_.transpose_block_structure();
|
||||
CHECK(transpose_block_structure != nullptr);
|
||||
auto block_diagonal_structure = block_diagonal->block_structure();
|
||||
|
||||
const double* values = matrix_.values();
|
||||
double* values_diagonal = block_diagonal->mutable_values();
|
||||
ParallelFor(
|
||||
options_.context,
|
||||
0,
|
||||
num_col_blocks_e_,
|
||||
options_.num_threads,
|
||||
[values,
|
||||
transpose_block_structure,
|
||||
values_diagonal,
|
||||
block_diagonal_structure](int col_block_id) {
|
||||
int cell_position =
|
||||
block_diagonal_structure->rows[col_block_id].cells[0].position;
|
||||
double* cell_values = values_diagonal + cell_position;
|
||||
int col_block_size =
|
||||
transpose_block_structure->rows[col_block_id].block.size;
|
||||
auto& cells = transpose_block_structure->rows[col_block_id].cells;
|
||||
MatrixRef(cell_values, col_block_size, col_block_size).setZero();
|
||||
|
||||
for (auto& c : cells) {
|
||||
int row_block_size = transpose_block_structure->cols[c.block_id].size;
|
||||
// clang-format off
|
||||
MatrixTransposeMatrixMultiply<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
|
||||
values + c.position, row_block_size, col_block_size,
|
||||
values + c.position, row_block_size, col_block_size,
|
||||
cell_values, 0, 0, col_block_size, col_block_size);
|
||||
// clang-format on
|
||||
}
|
||||
},
|
||||
e_cols_partition_);
|
||||
}
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
|
||||
if (options_.num_threads == 1) {
|
||||
UpdateBlockDiagonalEtESingleThreaded(block_diagonal);
|
||||
} else {
|
||||
CHECK(options_.context != nullptr);
|
||||
UpdateBlockDiagonalEtEMultiThreaded(block_diagonal);
|
||||
}
|
||||
}
|
||||
|
||||
// Similar to the code in RightMultiplyAndAccumulateF, except instead of the
|
||||
// matrix vector multiply its an outer product.
|
||||
//
|
||||
// block_diagonal = block_diagonal(F'F)
|
||||
//
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
|
||||
const CompressedRowBlockStructure* bs = matrix_.block_structure();
|
||||
const CompressedRowBlockStructure* block_diagonal_structure =
|
||||
block_diagonal->block_structure();
|
||||
UpdateBlockDiagonalFtFSingleThreaded(
|
||||
BlockSparseMatrix* block_diagonal) const {
|
||||
auto bs = matrix_.block_structure();
|
||||
auto block_diagonal_structure = block_diagonal->block_structure();
|
||||
|
||||
block_diagonal->SetZero();
|
||||
const double* values = matrix_.values();
|
||||
@@ -380,5 +579,82 @@ void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
UpdateBlockDiagonalFtFMultiThreaded(
|
||||
BlockSparseMatrix* block_diagonal) const {
|
||||
auto transpose_block_structure = matrix_.transpose_block_structure();
|
||||
CHECK(transpose_block_structure != nullptr);
|
||||
auto block_diagonal_structure = block_diagonal->block_structure();
|
||||
|
||||
const double* values = matrix_.values();
|
||||
double* values_diagonal = block_diagonal->mutable_values();
|
||||
|
||||
const int num_col_blocks_e = num_col_blocks_e_;
|
||||
const int num_row_blocks_e = num_row_blocks_e_;
|
||||
ParallelFor(
|
||||
options_.context,
|
||||
num_col_blocks_e_,
|
||||
num_col_blocks_e + num_col_blocks_f_,
|
||||
options_.num_threads,
|
||||
[transpose_block_structure,
|
||||
block_diagonal_structure,
|
||||
num_col_blocks_e,
|
||||
num_row_blocks_e,
|
||||
values,
|
||||
values_diagonal](int col_block_id) {
|
||||
const int col_block_size =
|
||||
transpose_block_structure->rows[col_block_id].block.size;
|
||||
const int diagonal_block_id = col_block_id - num_col_blocks_e;
|
||||
const int cell_position =
|
||||
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
|
||||
double* cell_values = values_diagonal + cell_position;
|
||||
|
||||
MatrixRef(cell_values, col_block_size, col_block_size).setZero();
|
||||
|
||||
auto& cells = transpose_block_structure->rows[col_block_id].cells;
|
||||
const int num_cells = cells.size();
|
||||
int i = 0;
|
||||
for (; i < num_cells; ++i) {
|
||||
auto& cell = cells[i];
|
||||
const int row_block_id = cell.block_id;
|
||||
if (row_block_id >= num_row_blocks_e) break;
|
||||
const int row_block_size =
|
||||
transpose_block_structure->cols[row_block_id].size;
|
||||
// clang-format off
|
||||
MatrixTransposeMatrixMultiply
|
||||
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
|
||||
values + cell.position, row_block_size, col_block_size,
|
||||
values + cell.position, row_block_size, col_block_size,
|
||||
cell_values, 0, 0, col_block_size, col_block_size);
|
||||
// clang-format on
|
||||
}
|
||||
for (; i < num_cells; ++i) {
|
||||
auto& cell = cells[i];
|
||||
const int row_block_id = cell.block_id;
|
||||
const int row_block_size =
|
||||
transpose_block_structure->cols[row_block_id].size;
|
||||
// clang-format off
|
||||
MatrixTransposeMatrixMultiply
|
||||
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values + cell.position, row_block_size, col_block_size,
|
||||
values + cell.position, row_block_size, col_block_size,
|
||||
cell_values, 0, 0, col_block_size, col_block_size);
|
||||
// clang-format on
|
||||
}
|
||||
},
|
||||
f_cols_partition_);
|
||||
}
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
|
||||
if (options_.num_threads == 1) {
|
||||
UpdateBlockDiagonalFtFSingleThreaded(block_diagonal);
|
||||
} else {
|
||||
CHECK(options_.context != nullptr);
|
||||
UpdateBlockDiagonalFtFMultiThreaded(block_diagonal);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
149
extern/ceres/internal/ceres/partitioned_matrix_view_template.py
vendored
Normal file
149
extern/ceres/internal/ceres/partitioned_matrix_view_template.py
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
# Ceres Solver - A fast non-linear least squares minimizer
|
||||
# Copyright 2023 Google Inc. All rights reserved.
|
||||
# http://ceres-solver.org/
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
# used to endorse or promote products derived from this software without
|
||||
# specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
#
|
||||
# Script for explicitly generating template specialization of the
|
||||
# PartitionedMatrixView class. Explicitly generating these
|
||||
# instantiations in separate .cc files breaks the compilation into
|
||||
# separate compilation unit rather than one large cc file.
|
||||
#
|
||||
# This script creates two sets of files.
|
||||
#
|
||||
# 1. partitioned_matrix_view_x_x_x.cc
|
||||
# where the x indicates the template parameters and
|
||||
#
|
||||
# 2. partitioned_matrix_view.cc
|
||||
#
|
||||
# that contains a factory function for instantiating these classes
|
||||
# based on runtime parameters.
|
||||
#
|
||||
# The list of tuples, specializations indicates the set of
|
||||
# specializations that is generated.
|
||||
|
||||
HEADER = """// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
//
|
||||
// Template specialization of PartitionedMatrixView.
|
||||
//
|
||||
// ========================================
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
//=========================================
|
||||
//
|
||||
// This file is generated using generate_template_specializations.py.
|
||||
"""
|
||||
|
||||
DYNAMIC_FILE = """
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<%s,
|
||||
%s,
|
||||
%s>;
|
||||
|
||||
} // namespace ceres::internal
|
||||
"""
|
||||
|
||||
SPECIALIZATION_FILE = """
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
#include "ceres/partitioned_matrix_view_impl.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
template class PartitionedMatrixView<%s, %s, %s>;
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
"""
|
||||
|
||||
FACTORY_FILE_HEADER = """
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/partitioned_matrix_view.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
PartitionedMatrixViewBase::~PartitionedMatrixViewBase() = default;
|
||||
|
||||
std::unique_ptr<PartitionedMatrixViewBase> PartitionedMatrixViewBase::Create(
|
||||
const LinearSolver::Options& options, const BlockSparseMatrix& matrix) {
|
||||
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
"""
|
||||
FACTORY = """ return std::make_unique<PartitionedMatrixView<%s,%s, %s>>(
|
||||
options, matrix);"""
|
||||
|
||||
FACTORY_FOOTER = """
|
||||
#endif
|
||||
VLOG(1) << "Template specializations not found for <"
|
||||
<< options.row_block_size << "," << options.e_block_size << ","
|
||||
<< options.f_block_size << ">";
|
||||
return std::make_unique<PartitionedMatrixView<Eigen::Dynamic,
|
||||
Eigen::Dynamic,
|
||||
Eigen::Dynamic>>(
|
||||
options, matrix);
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
"""
|
||||
14
extern/ceres/internal/ceres/polynomial.cc
vendored
14
extern/ceres/internal/ceres/polynomial.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,10 +40,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -326,7 +323,7 @@ void MinimizePolynomial(const Vector& polynomial,
|
||||
}
|
||||
}
|
||||
|
||||
Vector FindInterpolatingPolynomial(const vector<FunctionSample>& samples) {
|
||||
Vector FindInterpolatingPolynomial(const std::vector<FunctionSample>& samples) {
|
||||
const int num_samples = samples.size();
|
||||
int num_constraints = 0;
|
||||
for (int i = 0; i < num_samples; ++i) {
|
||||
@@ -369,7 +366,7 @@ Vector FindInterpolatingPolynomial(const vector<FunctionSample>& samples) {
|
||||
return lu.setThreshold(0.0).solve(rhs);
|
||||
}
|
||||
|
||||
void MinimizeInterpolatingPolynomial(const vector<FunctionSample>& samples,
|
||||
void MinimizeInterpolatingPolynomial(const std::vector<FunctionSample>& samples,
|
||||
double x_min,
|
||||
double x_max,
|
||||
double* optimal_x,
|
||||
@@ -389,5 +386,4 @@ void MinimizeInterpolatingPolynomial(const vector<FunctionSample>& samples,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/polynomial.h
vendored
8
extern/ceres/internal/ceres/polynomial.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
struct FunctionSample;
|
||||
|
||||
@@ -116,8 +115,7 @@ CERES_NO_EXPORT void MinimizeInterpolatingPolynomial(
|
||||
double* optimal_x,
|
||||
double* optimal_value);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
88
extern/ceres/internal/ceres/power_series_expansion_preconditioner.cc
vendored
Normal file
88
extern/ceres/internal/ceres/power_series_expansion_preconditioner.cc
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: markshachkov@gmail.com (Mark Shachkov)
|
||||
|
||||
#include "ceres/power_series_expansion_preconditioner.h"
|
||||
|
||||
#include "ceres/eigen_vector_ops.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "ceres/preconditioner.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
PowerSeriesExpansionPreconditioner::PowerSeriesExpansionPreconditioner(
|
||||
const ImplicitSchurComplement* isc,
|
||||
const int max_num_spse_iterations,
|
||||
const double spse_tolerance,
|
||||
const Preconditioner::Options& options)
|
||||
: isc_(isc),
|
||||
max_num_spse_iterations_(max_num_spse_iterations),
|
||||
spse_tolerance_(spse_tolerance),
|
||||
options_(options) {}
|
||||
|
||||
PowerSeriesExpansionPreconditioner::~PowerSeriesExpansionPreconditioner() =
|
||||
default;
|
||||
|
||||
bool PowerSeriesExpansionPreconditioner::Update(const LinearOperator& /*A*/,
|
||||
const double* /*D*/) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void PowerSeriesExpansionPreconditioner::RightMultiplyAndAccumulate(
|
||||
const double* x, double* y) const {
|
||||
VectorRef yref(y, num_rows());
|
||||
Vector series_term(num_rows());
|
||||
Vector previous_series_term(num_rows());
|
||||
ParallelSetZero(options_.context, options_.num_threads, yref);
|
||||
isc_->block_diagonal_FtF_inverse()->RightMultiplyAndAccumulate(
|
||||
x, y, options_.context, options_.num_threads);
|
||||
ParallelAssign(
|
||||
options_.context, options_.num_threads, previous_series_term, yref);
|
||||
|
||||
const double norm_threshold =
|
||||
spse_tolerance_ * Norm(yref, options_.context, options_.num_threads);
|
||||
|
||||
for (int i = 1;; i++) {
|
||||
ParallelSetZero(options_.context, options_.num_threads, series_term);
|
||||
isc_->InversePowerSeriesOperatorRightMultiplyAccumulate(
|
||||
previous_series_term.data(), series_term.data());
|
||||
ParallelAssign(
|
||||
options_.context, options_.num_threads, yref, yref + series_term);
|
||||
if (i >= max_num_spse_iterations_ || series_term.norm() < norm_threshold) {
|
||||
break;
|
||||
}
|
||||
std::swap(previous_series_term, series_term);
|
||||
}
|
||||
}
|
||||
|
||||
int PowerSeriesExpansionPreconditioner::num_rows() const {
|
||||
return isc_->num_rows();
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
71
extern/ceres/internal/ceres/power_series_expansion_preconditioner.h
vendored
Normal file
71
extern/ceres/internal/ceres/power_series_expansion_preconditioner.h
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: markshachkov@gmail.com (Mark Shachkov)
|
||||
|
||||
#ifndef CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_
|
||||
#define CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_
|
||||
|
||||
#include "ceres/implicit_schur_complement.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/preconditioner.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
// This is a preconditioner via power series expansion of Schur
|
||||
// complement inverse based on "Weber et al, Power Bundle Adjustment for
|
||||
// Large-Scale 3D Reconstruction".
|
||||
class CERES_NO_EXPORT PowerSeriesExpansionPreconditioner
|
||||
: public Preconditioner {
|
||||
public:
|
||||
// TODO: Consider moving max_num_spse_iterations and spse_tolerance to
|
||||
// Preconditioner::Options
|
||||
PowerSeriesExpansionPreconditioner(const ImplicitSchurComplement* isc,
|
||||
const int max_num_spse_iterations,
|
||||
const double spse_tolerance,
|
||||
const Preconditioner::Options& options);
|
||||
PowerSeriesExpansionPreconditioner(
|
||||
const PowerSeriesExpansionPreconditioner&) = delete;
|
||||
void operator=(const PowerSeriesExpansionPreconditioner&) = delete;
|
||||
~PowerSeriesExpansionPreconditioner() override;
|
||||
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
bool Update(const LinearOperator& A, const double* D) final;
|
||||
int num_rows() const final;
|
||||
|
||||
private:
|
||||
const ImplicitSchurComplement* isc_;
|
||||
const int max_num_spse_iterations_;
|
||||
const double spse_tolerance_;
|
||||
const Preconditioner::Options options_;
|
||||
};
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_
|
||||
23
extern/ceres/internal/ceres/preconditioner.cc
vendored
23
extern/ceres/internal/ceres/preconditioner.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,8 +32,7 @@
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
Preconditioner::~Preconditioner() = default;
|
||||
|
||||
@@ -48,27 +47,27 @@ PreconditionerType Preconditioner::PreconditionerForZeroEBlocks(
|
||||
}
|
||||
|
||||
SparseMatrixPreconditionerWrapper::SparseMatrixPreconditionerWrapper(
|
||||
const SparseMatrix* matrix)
|
||||
: matrix_(matrix) {
|
||||
const SparseMatrix* matrix, const Preconditioner::Options& options)
|
||||
: matrix_(matrix), options_(options) {
|
||||
CHECK(matrix != nullptr);
|
||||
}
|
||||
|
||||
SparseMatrixPreconditionerWrapper::~SparseMatrixPreconditionerWrapper() =
|
||||
default;
|
||||
|
||||
bool SparseMatrixPreconditionerWrapper::UpdateImpl(const SparseMatrix& A,
|
||||
const double* D) {
|
||||
bool SparseMatrixPreconditionerWrapper::UpdateImpl(const SparseMatrix& /*A*/,
|
||||
const double* /*D*/) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void SparseMatrixPreconditionerWrapper::RightMultiply(const double* x,
|
||||
double* y) const {
|
||||
matrix_->RightMultiply(x, y);
|
||||
void SparseMatrixPreconditionerWrapper::RightMultiplyAndAccumulate(
|
||||
const double* x, double* y) const {
|
||||
matrix_->RightMultiplyAndAccumulate(
|
||||
x, y, options_.context, options_.num_threads);
|
||||
}
|
||||
|
||||
int SparseMatrixPreconditionerWrapper::num_rows() const {
|
||||
return matrix_->num_rows();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
64
extern/ceres/internal/ceres/preconditioner.h
vendored
64
extern/ceres/internal/ceres/preconditioner.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,11 +39,11 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_operator.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/sparse_matrix.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockSparseMatrix;
|
||||
class SparseMatrix;
|
||||
@@ -51,10 +51,25 @@ class SparseMatrix;
|
||||
class CERES_NO_EXPORT Preconditioner : public LinearOperator {
|
||||
public:
|
||||
struct Options {
|
||||
Options() = default;
|
||||
Options(const LinearSolver::Options& linear_solver_options)
|
||||
: type(linear_solver_options.preconditioner_type),
|
||||
visibility_clustering_type(
|
||||
linear_solver_options.visibility_clustering_type),
|
||||
sparse_linear_algebra_library_type(
|
||||
linear_solver_options.sparse_linear_algebra_library_type),
|
||||
num_threads(linear_solver_options.num_threads),
|
||||
row_block_size(linear_solver_options.row_block_size),
|
||||
e_block_size(linear_solver_options.e_block_size),
|
||||
f_block_size(linear_solver_options.f_block_size),
|
||||
elimination_groups(linear_solver_options.elimination_groups),
|
||||
context(linear_solver_options.context) {}
|
||||
|
||||
PreconditionerType type = JACOBI;
|
||||
VisibilityClusteringType visibility_clustering_type = CANONICAL_VIEWS;
|
||||
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
|
||||
SUITE_SPARSE;
|
||||
OrderingType ordering_type = OrderingType::NATURAL;
|
||||
|
||||
// When using the subset preconditioner, all row blocks starting
|
||||
// from this row block are used to construct the preconditioner.
|
||||
@@ -68,9 +83,6 @@ class CERES_NO_EXPORT Preconditioner : public LinearOperator {
|
||||
// and the preconditioner is the inverse of the matrix Q'Q.
|
||||
int subset_preconditioner_start_row_block = -1;
|
||||
|
||||
// See solver.h for information about these flags.
|
||||
bool use_postordering = false;
|
||||
|
||||
// If possible, how many threads the preconditioner can use.
|
||||
int num_threads = 1;
|
||||
|
||||
@@ -132,18 +144,37 @@ class CERES_NO_EXPORT Preconditioner : public LinearOperator {
|
||||
virtual bool Update(const LinearOperator& A, const double* D) = 0;
|
||||
|
||||
// LinearOperator interface. Since the operator is symmetric,
|
||||
// LeftMultiply and num_cols are just calls to RightMultiply and
|
||||
// num_rows respectively. Update() must be called before
|
||||
// RightMultiply can be called.
|
||||
void RightMultiply(const double* x, double* y) const override = 0;
|
||||
void LeftMultiply(const double* x, double* y) const override {
|
||||
return RightMultiply(x, y);
|
||||
// LeftMultiplyAndAccumulate and num_cols are just calls to
|
||||
// RightMultiplyAndAccumulate and num_rows respectively. Update() must be
|
||||
// called before RightMultiplyAndAccumulate can be called.
|
||||
void RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const override = 0;
|
||||
void LeftMultiplyAndAccumulate(const double* x, double* y) const override {
|
||||
return RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
|
||||
int num_rows() const override = 0;
|
||||
int num_cols() const override { return num_rows(); }
|
||||
};
|
||||
|
||||
class CERES_NO_EXPORT IdentityPreconditioner : public Preconditioner {
|
||||
public:
|
||||
IdentityPreconditioner(int num_rows) : num_rows_(num_rows) {}
|
||||
|
||||
bool Update(const LinearOperator& /*A*/, const double* /*D*/) final {
|
||||
return true;
|
||||
}
|
||||
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final {
|
||||
VectorRef(y, num_rows_) += ConstVectorRef(x, num_rows_);
|
||||
}
|
||||
|
||||
int num_rows() const final { return num_rows_; }
|
||||
|
||||
private:
|
||||
int num_rows_ = -1;
|
||||
};
|
||||
|
||||
// This templated subclass of Preconditioner serves as a base class for
|
||||
// other preconditioners that depend on the particular matrix layout of
|
||||
// the underlying linear operator.
|
||||
@@ -171,20 +202,21 @@ class CERES_NO_EXPORT SparseMatrixPreconditionerWrapper final
|
||||
: public SparseMatrixPreconditioner {
|
||||
public:
|
||||
// Wrapper does NOT take ownership of the matrix pointer.
|
||||
explicit SparseMatrixPreconditionerWrapper(const SparseMatrix* matrix);
|
||||
explicit SparseMatrixPreconditionerWrapper(
|
||||
const SparseMatrix* matrix, const Preconditioner::Options& options);
|
||||
~SparseMatrixPreconditionerWrapper() override;
|
||||
|
||||
// Preconditioner interface
|
||||
void RightMultiply(const double* x, double* y) const override;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const override;
|
||||
int num_rows() const override;
|
||||
|
||||
private:
|
||||
bool UpdateImpl(const SparseMatrix& A, const double* D) override;
|
||||
const SparseMatrix* matrix_;
|
||||
const Preconditioner::Options options_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
14
extern/ceres/internal/ceres/preprocessor.cc
vendored
14
extern/ceres/internal/ceres/preprocessor.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,13 +35,12 @@
|
||||
#include "ceres/callbacks.h"
|
||||
#include "ceres/gradient_checking_cost_function.h"
|
||||
#include "ceres/line_search_preprocessor.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/problem_impl.h"
|
||||
#include "ceres/solver.h"
|
||||
#include "ceres/thread_pool.h"
|
||||
#include "ceres/trust_region_preprocessor.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
std::unique_ptr<Preprocessor> Preprocessor::Create(
|
||||
MinimizerType minimizer_type) {
|
||||
@@ -63,7 +62,7 @@ void ChangeNumThreadsIfNeeded(Solver::Options* options) {
|
||||
if (options->num_threads == 1) {
|
||||
return;
|
||||
}
|
||||
const int num_threads_available = MaxNumThreadsAvailable();
|
||||
const int num_threads_available = ThreadPool::MaxNumThreadsAvailable();
|
||||
if (options->num_threads > num_threads_available) {
|
||||
LOG(WARNING) << "Specified options.num_threads: " << options->num_threads
|
||||
<< " exceeds maximum available from the threading model Ceres "
|
||||
@@ -83,9 +82,11 @@ void SetupCommonMinimizerOptions(PreprocessedProblem* pp) {
|
||||
double* reduced_parameters = pp->reduced_parameters.data();
|
||||
program->ParameterBlocksToStateVector(reduced_parameters);
|
||||
|
||||
auto context = pp->problem->context();
|
||||
Minimizer::Options& minimizer_options = pp->minimizer_options;
|
||||
minimizer_options = Minimizer::Options(options);
|
||||
minimizer_options.evaluator = pp->evaluator;
|
||||
minimizer_options.context = context;
|
||||
|
||||
if (options.logging_type != SILENT) {
|
||||
pp->logging_callback = std::make_unique<LoggingCallback>(
|
||||
@@ -104,5 +105,4 @@ void SetupCommonMinimizerOptions(PreprocessedProblem* pp) {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/preprocessor.h
vendored
8
extern/ceres/internal/ceres/preprocessor.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,8 +47,7 @@
|
||||
#include "ceres/program.h"
|
||||
#include "ceres/solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
struct PreprocessedProblem;
|
||||
|
||||
@@ -118,8 +117,7 @@ void ChangeNumThreadsIfNeeded(Solver::Options* options);
|
||||
CERES_NO_EXPORT
|
||||
void SetupCommonMinimizerOptions(PreprocessedProblem* pp);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
46
extern/ceres/internal/ceres/problem.cc
vendored
46
extern/ceres/internal/ceres/problem.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,6 @@
|
||||
|
||||
namespace ceres {
|
||||
|
||||
using std::vector;
|
||||
|
||||
Problem::Problem() : impl_(new internal::ProblemImpl) {}
|
||||
Problem::Problem(const Problem::Options& options)
|
||||
: impl_(new internal::ProblemImpl(options)) {}
|
||||
@@ -52,7 +50,7 @@ Problem::~Problem() = default;
|
||||
ResidualBlockId Problem::AddResidualBlock(
|
||||
CostFunction* cost_function,
|
||||
LossFunction* loss_function,
|
||||
const vector<double*>& parameter_blocks) {
|
||||
const std::vector<double*>& parameter_blocks) {
|
||||
return impl_->AddResidualBlock(cost_function,
|
||||
loss_function,
|
||||
parameter_blocks.data(),
|
||||
@@ -71,12 +69,6 @@ void Problem::AddParameterBlock(double* values, int size) {
|
||||
impl_->AddParameterBlock(values, size);
|
||||
}
|
||||
|
||||
void Problem::AddParameterBlock(double* values,
|
||||
int size,
|
||||
LocalParameterization* local_parameterization) {
|
||||
impl_->AddParameterBlock(values, size, local_parameterization);
|
||||
}
|
||||
|
||||
void Problem::AddParameterBlock(double* values, int size, Manifold* manifold) {
|
||||
impl_->AddParameterBlock(values, size, manifold);
|
||||
}
|
||||
@@ -101,20 +93,6 @@ bool Problem::IsParameterBlockConstant(const double* values) const {
|
||||
return impl_->IsParameterBlockConstant(values);
|
||||
}
|
||||
|
||||
void Problem::SetParameterization(
|
||||
double* values, LocalParameterization* local_parameterization) {
|
||||
impl_->SetParameterization(values, local_parameterization);
|
||||
}
|
||||
|
||||
const LocalParameterization* Problem::GetParameterization(
|
||||
const double* values) const {
|
||||
return impl_->GetParameterization(values);
|
||||
}
|
||||
|
||||
bool Problem::HasParameterization(const double* values) const {
|
||||
return impl_->HasParameterization(values);
|
||||
}
|
||||
|
||||
void Problem::SetManifold(double* values, Manifold* manifold) {
|
||||
impl_->SetManifold(values, manifold);
|
||||
}
|
||||
@@ -149,8 +127,8 @@ double Problem::GetParameterLowerBound(const double* values, int index) const {
|
||||
|
||||
bool Problem::Evaluate(const EvaluateOptions& evaluate_options,
|
||||
double* cost,
|
||||
vector<double>* residuals,
|
||||
vector<double>* gradient,
|
||||
std::vector<double>* residuals,
|
||||
std::vector<double>* gradient,
|
||||
CRSMatrix* jacobian) {
|
||||
return impl_->Evaluate(evaluate_options, cost, residuals, gradient, jacobian);
|
||||
}
|
||||
@@ -194,10 +172,6 @@ int Problem::ParameterBlockSize(const double* values) const {
|
||||
return impl_->ParameterBlockSize(values);
|
||||
}
|
||||
|
||||
int Problem::ParameterBlockLocalSize(const double* values) const {
|
||||
return impl_->ParameterBlockTangentSize(values);
|
||||
}
|
||||
|
||||
int Problem::ParameterBlockTangentSize(const double* values) const {
|
||||
return impl_->ParameterBlockTangentSize(values);
|
||||
}
|
||||
@@ -206,18 +180,18 @@ bool Problem::HasParameterBlock(const double* values) const {
|
||||
return impl_->HasParameterBlock(values);
|
||||
}
|
||||
|
||||
void Problem::GetParameterBlocks(vector<double*>* parameter_blocks) const {
|
||||
void Problem::GetParameterBlocks(std::vector<double*>* parameter_blocks) const {
|
||||
impl_->GetParameterBlocks(parameter_blocks);
|
||||
}
|
||||
|
||||
void Problem::GetResidualBlocks(
|
||||
vector<ResidualBlockId>* residual_blocks) const {
|
||||
std::vector<ResidualBlockId>* residual_blocks) const {
|
||||
impl_->GetResidualBlocks(residual_blocks);
|
||||
}
|
||||
|
||||
void Problem::GetParameterBlocksForResidualBlock(
|
||||
const ResidualBlockId residual_block,
|
||||
vector<double*>* parameter_blocks) const {
|
||||
std::vector<double*>* parameter_blocks) const {
|
||||
impl_->GetParameterBlocksForResidualBlock(residual_block, parameter_blocks);
|
||||
}
|
||||
|
||||
@@ -232,8 +206,12 @@ const LossFunction* Problem::GetLossFunctionForResidualBlock(
|
||||
}
|
||||
|
||||
void Problem::GetResidualBlocksForParameterBlock(
|
||||
const double* values, vector<ResidualBlockId>* residual_blocks) const {
|
||||
const double* values, std::vector<ResidualBlockId>* residual_blocks) const {
|
||||
impl_->GetResidualBlocksForParameterBlock(values, residual_blocks);
|
||||
}
|
||||
|
||||
const Problem::Options& Problem::options() const { return impl_->options(); }
|
||||
|
||||
internal::ProblemImpl* Problem::mutable_impl() { return impl_.get(); }
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
79
extern/ceres/internal/ceres/problem_impl.cc
vendored
79
extern/ceres/internal/ceres/problem_impl.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -53,7 +53,6 @@
|
||||
#include "ceres/internal/fixed_array.h"
|
||||
#include "ceres/loss_function.h"
|
||||
#include "ceres/manifold.h"
|
||||
#include "ceres/manifold_adapter.h"
|
||||
#include "ceres/map_util.h"
|
||||
#include "ceres/parameter_block.h"
|
||||
#include "ceres/program.h"
|
||||
@@ -64,8 +63,7 @@
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
// Returns true if two regions of memory, a and b, with sizes size_a and size_b
|
||||
// respectively, overlap.
|
||||
@@ -257,10 +255,6 @@ ProblemImpl::~ProblemImpl() {
|
||||
DeleteBlock(parameter_block);
|
||||
}
|
||||
|
||||
// Delete the owned parameterizations.
|
||||
STLDeleteUniqueContainerPointers(local_parameterizations_to_delete_.begin(),
|
||||
local_parameterizations_to_delete_.end());
|
||||
|
||||
// Delete the owned manifolds.
|
||||
STLDeleteUniqueContainerPointers(manifolds_to_delete_.begin(),
|
||||
manifolds_to_delete_.end());
|
||||
@@ -365,45 +359,15 @@ void ProblemImpl::AddParameterBlock(double* values, int size) {
|
||||
InternalAddParameterBlock(values, size);
|
||||
}
|
||||
|
||||
void ProblemImpl::InternalSetParameterization(
|
||||
double* values,
|
||||
ParameterBlock* parameter_block,
|
||||
LocalParameterization* local_parameterization) {
|
||||
parameter_block_to_local_param_[values] = local_parameterization;
|
||||
Manifold* manifold = nullptr;
|
||||
if (local_parameterization != nullptr) {
|
||||
if (options_.local_parameterization_ownership == TAKE_OWNERSHIP) {
|
||||
local_parameterizations_to_delete_.push_back(local_parameterization);
|
||||
}
|
||||
|
||||
manifold = new ManifoldAdapter(local_parameterization);
|
||||
// Add the manifold to manifolds_to_delete_ unconditionally since
|
||||
// we own it and it will need to be deleted.
|
||||
manifolds_to_delete_.push_back(manifold);
|
||||
}
|
||||
|
||||
parameter_block->SetManifold(manifold);
|
||||
}
|
||||
|
||||
void ProblemImpl::InternalSetManifold(double* values,
|
||||
void ProblemImpl::InternalSetManifold(double* /*values*/,
|
||||
ParameterBlock* parameter_block,
|
||||
Manifold* manifold) {
|
||||
// Reset any association between this parameter block and a local
|
||||
// parameterization. This only needs done while we are in the transition from
|
||||
// LocalParameterization to Manifold.
|
||||
parameter_block_to_local_param_[values] = nullptr;
|
||||
if (manifold != nullptr && options_.manifold_ownership == TAKE_OWNERSHIP) {
|
||||
manifolds_to_delete_.push_back(manifold);
|
||||
}
|
||||
parameter_block->SetManifold(manifold);
|
||||
}
|
||||
|
||||
void ProblemImpl::AddParameterBlock(
|
||||
double* values, int size, LocalParameterization* local_parameterization) {
|
||||
ParameterBlock* parameter_block = InternalAddParameterBlock(values, size);
|
||||
InternalSetParameterization(values, parameter_block, local_parameterization);
|
||||
}
|
||||
|
||||
void ProblemImpl::AddParameterBlock(double* values,
|
||||
int size,
|
||||
Manifold* manifold) {
|
||||
@@ -539,19 +503,6 @@ void ProblemImpl::SetParameterBlockVariable(double* values) {
|
||||
parameter_block->SetVarying();
|
||||
}
|
||||
|
||||
void ProblemImpl::SetParameterization(
|
||||
double* values, LocalParameterization* local_parameterization) {
|
||||
ParameterBlock* parameter_block =
|
||||
FindWithDefault(parameter_block_map_, values, nullptr);
|
||||
if (parameter_block == nullptr) {
|
||||
LOG(FATAL) << "Parameter block not found: " << values
|
||||
<< ". You must add the parameter block to the problem before "
|
||||
<< "you can set its local parameterization.";
|
||||
}
|
||||
|
||||
InternalSetParameterization(values, parameter_block, local_parameterization);
|
||||
}
|
||||
|
||||
void ProblemImpl::SetManifold(double* values, Manifold* manifold) {
|
||||
ParameterBlock* parameter_block =
|
||||
FindWithDefault(parameter_block_map_, values, nullptr);
|
||||
@@ -564,22 +515,13 @@ void ProblemImpl::SetManifold(double* values, Manifold* manifold) {
|
||||
InternalSetManifold(values, parameter_block, manifold);
|
||||
}
|
||||
|
||||
const LocalParameterization* ProblemImpl::GetParameterization(
|
||||
const double* values) const {
|
||||
return FindWithDefault(parameter_block_to_local_param_, values, nullptr);
|
||||
}
|
||||
|
||||
bool ProblemImpl::HasParameterization(const double* values) const {
|
||||
return GetParameterization(values) != nullptr;
|
||||
}
|
||||
|
||||
const Manifold* ProblemImpl::GetManifold(const double* values) const {
|
||||
ParameterBlock* parameter_block = FindWithDefault(
|
||||
parameter_block_map_, const_cast<double*>(values), nullptr);
|
||||
if (parameter_block == nullptr) {
|
||||
LOG(FATAL) << "Parameter block not found: " << values
|
||||
<< ". You must add the parameter block to the problem before "
|
||||
<< "you can get its local parameterization.";
|
||||
<< "you can get its manifold.";
|
||||
}
|
||||
|
||||
return parameter_block->manifold();
|
||||
@@ -730,17 +672,7 @@ bool ProblemImpl::Evaluate(const Problem::EvaluateOptions& evaluate_options,
|
||||
// the Evaluator decides the storage for the Jacobian based on the
|
||||
// type of linear solver being used.
|
||||
evaluator_options.linear_solver_type = SPARSE_NORMAL_CHOLESKY;
|
||||
#ifdef CERES_NO_THREADS
|
||||
if (evaluate_options.num_threads > 1) {
|
||||
LOG(WARNING)
|
||||
<< "No threading support is compiled into this binary; "
|
||||
<< "only evaluate_options.num_threads = 1 is supported. Switching "
|
||||
<< "to single threaded mode.";
|
||||
}
|
||||
evaluator_options.num_threads = 1;
|
||||
#else
|
||||
evaluator_options.num_threads = evaluate_options.num_threads;
|
||||
#endif // CERES_NO_THREADS
|
||||
|
||||
// The main thread also does work so we only need to launch num_threads - 1.
|
||||
context_impl_->EnsureMinimumThreads(evaluator_options.num_threads - 1);
|
||||
@@ -968,5 +900,4 @@ void ProblemImpl::GetResidualBlocksForParameterBlock(
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
40
extern/ceres/internal/ceres/problem_impl.h
vendored
40
extern/ceres/internal/ceres/problem_impl.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -59,7 +59,6 @@ namespace ceres {
|
||||
class CostFunction;
|
||||
class EvaluationCallback;
|
||||
class LossFunction;
|
||||
class LocalParameterization;
|
||||
struct CRSMatrix;
|
||||
|
||||
namespace internal {
|
||||
@@ -100,10 +99,6 @@ class CERES_NO_EXPORT ProblemImpl {
|
||||
}
|
||||
|
||||
void AddParameterBlock(double* values, int size);
|
||||
void AddParameterBlock(double* values,
|
||||
int size,
|
||||
LocalParameterization* local_parameterization);
|
||||
|
||||
void AddParameterBlock(double* values, int size, Manifold* manifold);
|
||||
|
||||
void RemoveResidualBlock(ResidualBlock* residual_block);
|
||||
@@ -113,11 +108,6 @@ class CERES_NO_EXPORT ProblemImpl {
|
||||
void SetParameterBlockVariable(double* values);
|
||||
bool IsParameterBlockConstant(const double* values) const;
|
||||
|
||||
void SetParameterization(double* values,
|
||||
LocalParameterization* local_parameterization);
|
||||
const LocalParameterization* GetParameterization(const double* values) const;
|
||||
bool HasParameterization(const double* values) const;
|
||||
|
||||
void SetManifold(double* values, Manifold* manifold);
|
||||
const Manifold* GetManifold(const double* values) const;
|
||||
bool HasManifold(const double* values) const;
|
||||
@@ -176,14 +166,12 @@ class CERES_NO_EXPORT ProblemImpl {
|
||||
return residual_block_set_;
|
||||
}
|
||||
|
||||
const Problem::Options& options() const { return options_; }
|
||||
|
||||
ContextImpl* context() { return context_impl_; }
|
||||
|
||||
private:
|
||||
ParameterBlock* InternalAddParameterBlock(double* values, int size);
|
||||
void InternalSetParameterization(
|
||||
double* values,
|
||||
ParameterBlock* parameter_block,
|
||||
LocalParameterization* local_parameterization);
|
||||
void InternalSetManifold(double* values,
|
||||
ParameterBlock* parameter_block,
|
||||
Manifold* manifold);
|
||||
@@ -214,15 +202,8 @@ class CERES_NO_EXPORT ProblemImpl {
|
||||
std::unique_ptr<internal::Program> program_;
|
||||
|
||||
// TODO(sameeragarwal): Unify the shared object handling across object types.
|
||||
// Right now we are using vectors for LocalParameterization and Manifold
|
||||
// objects and reference counting for CostFunctions and LossFunctions. Ideally
|
||||
// this should be done uniformly.
|
||||
|
||||
// When removing parameter blocks, parameterizations have ambiguous
|
||||
// ownership. Instead of scanning the entire problem to see if the
|
||||
// parameterization is shared with other parameter blocks, buffer
|
||||
// them until destruction.
|
||||
std::vector<LocalParameterization*> local_parameterizations_to_delete_;
|
||||
// Right now we are using vectors for Manifold objects and reference counting
|
||||
// for CostFunctions and LossFunctions. Ideally this should be done uniformly.
|
||||
|
||||
// When removing parameter blocks, manifolds have ambiguous
|
||||
// ownership. Instead of scanning the entire problem to see if the
|
||||
@@ -236,17 +217,6 @@ class CERES_NO_EXPORT ProblemImpl {
|
||||
// destroyed.
|
||||
CostFunctionRefCount cost_function_ref_count_;
|
||||
LossFunctionRefCount loss_function_ref_count_;
|
||||
|
||||
// Because we wrap LocalParameterization objects using a ManifoldAdapter, when
|
||||
// the user calls GetParameterization we cannot use the same logic as
|
||||
// GetManifold as the ParameterBlock object only returns a Manifold object. So
|
||||
// this map stores the association between parameter blocks and local
|
||||
// parameterizations.
|
||||
//
|
||||
// This is a temporary object which will be removed once the
|
||||
// LocalParameterization to Manifold transition is complete.
|
||||
std::unordered_map<const double*, LocalParameterization*>
|
||||
parameter_block_to_local_param_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
45
extern/ceres/internal/ceres/program.cc
vendored
45
extern/ceres/internal/ceres/program.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -45,14 +45,14 @@
|
||||
#include "ceres/loss_function.h"
|
||||
#include "ceres/manifold.h"
|
||||
#include "ceres/map_util.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/parameter_block.h"
|
||||
#include "ceres/problem.h"
|
||||
#include "ceres/residual_block.h"
|
||||
#include "ceres/stl_util.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
const std::vector<ParameterBlock*>& Program::parameter_blocks() const {
|
||||
return parameter_blocks_;
|
||||
@@ -109,16 +109,32 @@ bool Program::SetParameterBlockStatePtrsToUserStatePtrs() {
|
||||
|
||||
bool Program::Plus(const double* state,
|
||||
const double* delta,
|
||||
double* state_plus_delta) const {
|
||||
for (auto* parameter_block : parameter_blocks_) {
|
||||
if (!parameter_block->Plus(state, delta, state_plus_delta)) {
|
||||
return false;
|
||||
}
|
||||
state += parameter_block->Size();
|
||||
delta += parameter_block->TangentSize();
|
||||
state_plus_delta += parameter_block->Size();
|
||||
}
|
||||
return true;
|
||||
double* state_plus_delta,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
std::atomic<bool> abort(false);
|
||||
auto* parameter_blocks = parameter_blocks_.data();
|
||||
ParallelFor(
|
||||
context,
|
||||
0,
|
||||
parameter_blocks_.size(),
|
||||
num_threads,
|
||||
[&abort, state, delta, state_plus_delta, parameter_blocks](int block_id) {
|
||||
if (abort) {
|
||||
return;
|
||||
}
|
||||
auto parameter_block = parameter_blocks[block_id];
|
||||
|
||||
auto block_state = state + parameter_block->state_offset();
|
||||
auto block_delta = delta + parameter_block->delta_offset();
|
||||
auto block_state_plus_delta =
|
||||
state_plus_delta + parameter_block->state_offset();
|
||||
if (!parameter_block->Plus(
|
||||
block_state, block_delta, block_state_plus_delta)) {
|
||||
abort = true;
|
||||
}
|
||||
});
|
||||
return abort == false;
|
||||
}
|
||||
|
||||
void Program::SetParameterOffsetsAndIndex() {
|
||||
@@ -545,5 +561,4 @@ std::string Program::ToString() const {
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
13
extern/ceres/internal/ceres/program.h
vendored
13
extern/ceres/internal/ceres/program.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,13 +40,13 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class ParameterBlock;
|
||||
class ProblemImpl;
|
||||
class ResidualBlock;
|
||||
class TripletSparseMatrix;
|
||||
class ContextImpl;
|
||||
|
||||
// A nonlinear least squares optimization problem. This is different from the
|
||||
// similarly-named "Problem" object, which offers a mutation interface for
|
||||
@@ -87,7 +87,9 @@ class CERES_NO_EXPORT Program {
|
||||
// Update a state vector for the program given a delta.
|
||||
bool Plus(const double* state,
|
||||
const double* delta,
|
||||
double* state_plus_delta) const;
|
||||
double* state_plus_delta,
|
||||
ContextImpl* context,
|
||||
int num_threads) const;
|
||||
|
||||
// Set the parameter indices and offsets. This permits mapping backward
|
||||
// from a ParameterBlock* to an index in the parameter_blocks() vector. For
|
||||
@@ -192,8 +194,7 @@ class CERES_NO_EXPORT Program {
|
||||
friend class ProblemImpl;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
103
extern/ceres/internal/ceres/program_evaluator.h
vendored
103
extern/ceres/internal/ceres/program_evaluator.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,7 +43,7 @@
|
||||
// residual jacobians are written directly into their final position in the
|
||||
// block sparse matrix by the user's CostFunction; there is no copying.
|
||||
//
|
||||
// The evaluation is threaded with OpenMP or C++ threads.
|
||||
// The evaluation is threaded with C++ threads.
|
||||
//
|
||||
// The EvaluatePreparer and JacobianWriter interfaces are as follows:
|
||||
//
|
||||
@@ -96,6 +96,7 @@
|
||||
#include "ceres/execution_summary.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/parallel_vector_ops.h"
|
||||
#include "ceres/parameter_block.h"
|
||||
#include "ceres/program.h"
|
||||
#include "ceres/residual_block.h"
|
||||
@@ -105,7 +106,7 @@ namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
struct NullJacobianFinalizer {
|
||||
void operator()(SparseMatrix* jacobian, int num_parameters) {}
|
||||
void operator()(SparseMatrix* /*jacobian*/, int /*num_parameters*/) {}
|
||||
};
|
||||
|
||||
template <typename EvaluatePreparer,
|
||||
@@ -118,19 +119,11 @@ class ProgramEvaluator final : public Evaluator {
|
||||
program_(program),
|
||||
jacobian_writer_(options, program),
|
||||
evaluate_preparers_(std::move(
|
||||
jacobian_writer_.CreateEvaluatePreparers(options.num_threads))) {
|
||||
#ifdef CERES_NO_THREADS
|
||||
if (options_.num_threads > 1) {
|
||||
LOG(WARNING) << "No threading support is compiled into this binary; "
|
||||
<< "only options.num_threads = 1 is supported. Switching "
|
||||
<< "to single threaded mode.";
|
||||
options_.num_threads = 1;
|
||||
}
|
||||
#endif // CERES_NO_THREADS
|
||||
|
||||
jacobian_writer_.CreateEvaluatePreparers(options.num_threads))),
|
||||
num_parameters_(program->NumEffectiveParameters()) {
|
||||
BuildResidualLayout(*program, &residual_layout_);
|
||||
evaluate_scratch_ =
|
||||
std::move(CreateEvaluatorScratch(*program, options.num_threads));
|
||||
evaluate_scratch_ = std::move(CreateEvaluatorScratch(
|
||||
*program, static_cast<unsigned>(options.num_threads)));
|
||||
}
|
||||
|
||||
// Implementation of Evaluator interface.
|
||||
@@ -164,20 +157,24 @@ class ProgramEvaluator final : public Evaluator {
|
||||
}
|
||||
|
||||
if (residuals != nullptr) {
|
||||
VectorRef(residuals, program_->NumResiduals()).setZero();
|
||||
ParallelSetZero(options_.context,
|
||||
options_.num_threads,
|
||||
residuals,
|
||||
program_->NumResiduals());
|
||||
}
|
||||
|
||||
if (jacobian != nullptr) {
|
||||
jacobian->SetZero();
|
||||
jacobian->SetZero(options_.context, options_.num_threads);
|
||||
}
|
||||
|
||||
// Each thread gets it's own cost and evaluate scratch space.
|
||||
for (int i = 0; i < options_.num_threads; ++i) {
|
||||
evaluate_scratch_[i].cost = 0.0;
|
||||
if (gradient != nullptr) {
|
||||
VectorRef(evaluate_scratch_[i].gradient.get(),
|
||||
program_->NumEffectiveParameters())
|
||||
.setZero();
|
||||
ParallelSetZero(options_.context,
|
||||
options_.num_threads,
|
||||
evaluate_scratch_[i].gradient.get(),
|
||||
num_parameters_);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -259,38 +256,55 @@ class ProgramEvaluator final : public Evaluator {
|
||||
}
|
||||
});
|
||||
|
||||
if (!abort) {
|
||||
const int num_parameters = program_->NumEffectiveParameters();
|
||||
if (abort) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Sum the cost and gradient (if requested) from each thread.
|
||||
(*cost) = 0.0;
|
||||
// Sum the cost and gradient (if requested) from each thread.
|
||||
(*cost) = 0.0;
|
||||
if (gradient != nullptr) {
|
||||
auto gradient_vector = VectorRef(gradient, num_parameters_);
|
||||
ParallelSetZero(options_.context, options_.num_threads, gradient_vector);
|
||||
}
|
||||
|
||||
for (int i = 0; i < options_.num_threads; ++i) {
|
||||
(*cost) += evaluate_scratch_[i].cost;
|
||||
if (gradient != nullptr) {
|
||||
VectorRef(gradient, num_parameters).setZero();
|
||||
}
|
||||
for (int i = 0; i < options_.num_threads; ++i) {
|
||||
(*cost) += evaluate_scratch_[i].cost;
|
||||
if (gradient != nullptr) {
|
||||
VectorRef(gradient, num_parameters) +=
|
||||
VectorRef(evaluate_scratch_[i].gradient.get(), num_parameters);
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize the Jacobian if it is available.
|
||||
// `num_parameters` is passed to the finalizer so that additional
|
||||
// storage can be reserved for additional diagonal elements if
|
||||
// necessary.
|
||||
if (jacobian != nullptr) {
|
||||
JacobianFinalizer f;
|
||||
f(jacobian, num_parameters);
|
||||
auto gradient_vector = VectorRef(gradient, num_parameters_);
|
||||
ParallelAssign(
|
||||
options_.context,
|
||||
options_.num_threads,
|
||||
gradient_vector,
|
||||
gradient_vector + VectorRef(evaluate_scratch_[i].gradient.get(),
|
||||
num_parameters_));
|
||||
}
|
||||
}
|
||||
return !abort;
|
||||
|
||||
// It is possible that after accumulation that the cost has become infinite
|
||||
// or a nan.
|
||||
if (!std::isfinite(*cost)) {
|
||||
LOG(ERROR) << "Accumulated cost = " << *cost
|
||||
<< " is not a finite number. Evaluation failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Finalize the Jacobian if it is available.
|
||||
// `num_parameters` is passed to the finalizer so that additional
|
||||
// storage can be reserved for additional diagonal elements if
|
||||
// necessary.
|
||||
if (jacobian != nullptr) {
|
||||
JacobianFinalizer f;
|
||||
f(jacobian, num_parameters_);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Plus(const double* state,
|
||||
const double* delta,
|
||||
double* state_plus_delta) const final {
|
||||
return program_->Plus(state, delta, state_plus_delta);
|
||||
return program_->Plus(
|
||||
state, delta, state_plus_delta, options_.context, options_.num_threads);
|
||||
}
|
||||
|
||||
int NumParameters() const final { return program_->NumParameters(); }
|
||||
@@ -345,7 +359,7 @@ class ProgramEvaluator final : public Evaluator {
|
||||
|
||||
// Create scratch space for each thread evaluating the program.
|
||||
static std::unique_ptr<EvaluateScratch[]> CreateEvaluatorScratch(
|
||||
const Program& program, int num_threads) {
|
||||
const Program& program, unsigned num_threads) {
|
||||
int max_parameters_per_residual_block =
|
||||
program.MaxParametersPerResidualBlock();
|
||||
int max_scratch_doubles_needed_for_evaluate =
|
||||
@@ -370,6 +384,7 @@ class ProgramEvaluator final : public Evaluator {
|
||||
std::unique_ptr<EvaluatePreparer[]> evaluate_preparers_;
|
||||
std::unique_ptr<EvaluateScratch[]> evaluate_scratch_;
|
||||
std::vector<int> residual_layout_;
|
||||
int num_parameters_;
|
||||
::ceres::internal::ExecutionSummary execution_summary_;
|
||||
};
|
||||
|
||||
|
||||
73
extern/ceres/internal/ceres/random.h
vendored
73
extern/ceres/internal/ceres/random.h
vendored
@@ -1,73 +0,0 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: keir@google.com (Keir Mierle)
|
||||
// sameeragarwal@google.com (Sameer Agarwal)
|
||||
|
||||
#ifndef CERES_INTERNAL_RANDOM_H_
|
||||
#define CERES_INTERNAL_RANDOM_H_
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
|
||||
inline void SetRandomState(int state) { srand(state); }
|
||||
|
||||
inline int Uniform(int n) {
|
||||
if (n) {
|
||||
return rand() % n;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline double RandDouble() {
|
||||
auto r = static_cast<double>(rand());
|
||||
return r / RAND_MAX;
|
||||
}
|
||||
|
||||
// Box-Muller algorithm for normal random number generation.
|
||||
// http://en.wikipedia.org/wiki/Box-Muller_transform
|
||||
inline double RandNormal() {
|
||||
double x1, x2, w;
|
||||
do {
|
||||
x1 = 2.0 * RandDouble() - 1.0;
|
||||
x2 = 2.0 * RandDouble() - 1.0;
|
||||
w = x1 * x1 + x2 * x2;
|
||||
} while (w >= 1.0 || w == 0.0);
|
||||
|
||||
w = sqrt((-2.0 * log(w)) / w);
|
||||
return x1 * w;
|
||||
}
|
||||
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_INTERNAL_RANDOM_H_
|
||||
301
extern/ceres/internal/ceres/reorder_program.cc
vendored
301
extern/ceres/internal/ceres/reorder_program.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -31,12 +31,14 @@
|
||||
#include "ceres/reorder_program.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Eigen/SparseCore"
|
||||
#include "ceres/cxsparse.h"
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/ordered_groups.h"
|
||||
@@ -51,18 +53,19 @@
|
||||
#include "ceres/types.h"
|
||||
|
||||
#ifdef CERES_USE_EIGEN_SPARSE
|
||||
|
||||
#ifndef CERES_NO_EIGEN_METIS
|
||||
#include <iostream> // Need this because MetisSupport refers to std::cerr.
|
||||
|
||||
#include "Eigen/MetisSupport"
|
||||
#endif
|
||||
|
||||
#include "Eigen/OrderingMethods"
|
||||
#endif
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -86,7 +89,6 @@ static int MinParameterBlock(const ResidualBlock* residual_block,
|
||||
return min_parameter_block_position;
|
||||
}
|
||||
|
||||
#if defined(CERES_USE_EIGEN_SPARSE)
|
||||
Eigen::SparseMatrix<int> CreateBlockJacobian(
|
||||
const TripletSparseMatrix& block_jacobian_transpose) {
|
||||
using SparseMatrix = Eigen::SparseMatrix<int>;
|
||||
@@ -95,7 +97,7 @@ Eigen::SparseMatrix<int> CreateBlockJacobian(
|
||||
const int* rows = block_jacobian_transpose.rows();
|
||||
const int* cols = block_jacobian_transpose.cols();
|
||||
int num_nonzeros = block_jacobian_transpose.num_nonzeros();
|
||||
vector<Triplet> triplets;
|
||||
std::vector<Triplet> triplets;
|
||||
triplets.reserve(num_nonzeros);
|
||||
for (int i = 0; i < num_nonzeros; ++i) {
|
||||
triplets.emplace_back(cols[i], rows[i], 1);
|
||||
@@ -106,14 +108,20 @@ Eigen::SparseMatrix<int> CreateBlockJacobian(
|
||||
block_jacobian.setFromTriplets(triplets.begin(), triplets.end());
|
||||
return block_jacobian;
|
||||
}
|
||||
#endif
|
||||
|
||||
void OrderingForSparseNormalCholeskyUsingSuiteSparse(
|
||||
const LinearSolverOrderingType linear_solver_ordering_type,
|
||||
const TripletSparseMatrix& tsm_block_jacobian_transpose,
|
||||
const vector<ParameterBlock*>& parameter_blocks,
|
||||
const std::vector<ParameterBlock*>& parameter_blocks,
|
||||
const ParameterBlockOrdering& parameter_block_ordering,
|
||||
int* ordering) {
|
||||
#ifdef CERES_NO_SUITESPARSE
|
||||
// "Void"ing values to avoid compiler warnings about unused parameters
|
||||
(void)linear_solver_ordering_type;
|
||||
(void)tsm_block_jacobian_transpose;
|
||||
(void)parameter_blocks;
|
||||
(void)parameter_block_ordering;
|
||||
(void)ordering;
|
||||
LOG(FATAL) << "Congratulations, you found a Ceres bug! "
|
||||
<< "Please report this error to the developers.";
|
||||
#else
|
||||
@@ -121,61 +129,47 @@ void OrderingForSparseNormalCholeskyUsingSuiteSparse(
|
||||
cholmod_sparse* block_jacobian_transpose = ss.CreateSparseMatrix(
|
||||
const_cast<TripletSparseMatrix*>(&tsm_block_jacobian_transpose));
|
||||
|
||||
// No CAMD or the user did not supply a useful ordering, then just
|
||||
// use regular AMD.
|
||||
if (parameter_block_ordering.NumGroups() <= 1 ||
|
||||
!SuiteSparse::IsConstrainedApproximateMinimumDegreeOrderingAvailable()) {
|
||||
ss.ApproximateMinimumDegreeOrdering(block_jacobian_transpose, &ordering[0]);
|
||||
} else {
|
||||
vector<int> constraints;
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
constraints.push_back(parameter_block_ordering.GroupId(
|
||||
parameter_block->mutable_user_state()));
|
||||
if (linear_solver_ordering_type == ceres::AMD) {
|
||||
if (parameter_block_ordering.NumGroups() <= 1) {
|
||||
// The user did not supply a useful ordering so just go ahead
|
||||
// and use AMD.
|
||||
ss.Ordering(block_jacobian_transpose, OrderingType::AMD, ordering);
|
||||
} else {
|
||||
// The user supplied an ordering, so use CAMD.
|
||||
std::vector<int> constraints;
|
||||
constraints.reserve(parameter_blocks.size());
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
constraints.push_back(parameter_block_ordering.GroupId(
|
||||
parameter_block->mutable_user_state()));
|
||||
}
|
||||
|
||||
// Renumber the entries of constraints to be contiguous integers
|
||||
// as CAMD requires that the group ids be in the range [0,
|
||||
// parameter_blocks.size() - 1].
|
||||
MapValuesToContiguousRange(constraints.size(), constraints.data());
|
||||
ss.ConstrainedApproximateMinimumDegreeOrdering(
|
||||
block_jacobian_transpose, constraints.data(), ordering);
|
||||
}
|
||||
|
||||
// Renumber the entries of constraints to be contiguous integers
|
||||
// as CAMD requires that the group ids be in the range [0,
|
||||
// parameter_blocks.size() - 1].
|
||||
MapValuesToContiguousRange(constraints.size(), &constraints[0]);
|
||||
ss.ConstrainedApproximateMinimumDegreeOrdering(
|
||||
block_jacobian_transpose, &constraints[0], ordering);
|
||||
} else if (linear_solver_ordering_type == ceres::NESDIS) {
|
||||
// If nested dissection is chosen as an ordering algorithm, then
|
||||
// ignore any user provided linear_solver_ordering.
|
||||
CHECK(SuiteSparse::IsNestedDissectionAvailable())
|
||||
<< "Congratulations, you found a Ceres bug! "
|
||||
<< "Please report this error to the developers.";
|
||||
ss.Ordering(block_jacobian_transpose, OrderingType::NESDIS, ordering);
|
||||
} else {
|
||||
LOG(FATAL) << "Congratulations, you found a Ceres bug! "
|
||||
<< "Please report this error to the developers.";
|
||||
}
|
||||
|
||||
VLOG(2) << "Block ordering stats: "
|
||||
<< " flops: " << ss.mutable_cc()->fl
|
||||
<< " lnz : " << ss.mutable_cc()->lnz
|
||||
<< " anz : " << ss.mutable_cc()->anz;
|
||||
|
||||
ss.Free(block_jacobian_transpose);
|
||||
#endif // CERES_NO_SUITESPARSE
|
||||
}
|
||||
|
||||
void OrderingForSparseNormalCholeskyUsingCXSparse(
|
||||
const TripletSparseMatrix& tsm_block_jacobian_transpose, int* ordering) {
|
||||
#ifdef CERES_NO_CXSPARSE
|
||||
LOG(FATAL) << "Congratulations, you found a Ceres bug! "
|
||||
<< "Please report this error to the developers.";
|
||||
#else
|
||||
// CXSparse works with J'J instead of J'. So compute the block
|
||||
// sparsity for J'J and compute an approximate minimum degree
|
||||
// ordering.
|
||||
CXSparse cxsparse;
|
||||
cs_di* block_jacobian_transpose;
|
||||
block_jacobian_transpose = cxsparse.CreateSparseMatrix(
|
||||
const_cast<TripletSparseMatrix*>(&tsm_block_jacobian_transpose));
|
||||
cs_di* block_jacobian = cxsparse.TransposeMatrix(block_jacobian_transpose);
|
||||
cs_di* block_hessian =
|
||||
cxsparse.MatrixMatrixMultiply(block_jacobian_transpose, block_jacobian);
|
||||
cxsparse.Free(block_jacobian);
|
||||
cxsparse.Free(block_jacobian_transpose);
|
||||
|
||||
cxsparse.ApproximateMinimumDegreeOrdering(block_hessian, ordering);
|
||||
cxsparse.Free(block_hessian);
|
||||
#endif // CERES_NO_CXSPARSE
|
||||
}
|
||||
|
||||
void OrderingForSparseNormalCholeskyUsingEigenSparse(
|
||||
const TripletSparseMatrix& tsm_block_jacobian_transpose, int* ordering) {
|
||||
const LinearSolverOrderingType linear_solver_ordering_type,
|
||||
const TripletSparseMatrix& tsm_block_jacobian_transpose,
|
||||
int* ordering) {
|
||||
#ifndef CERES_USE_EIGEN_SPARSE
|
||||
LOG(FATAL) << "SPARSE_NORMAL_CHOLESKY cannot be used with EIGEN_SPARSE "
|
||||
"because Ceres was not built with support for "
|
||||
@@ -183,12 +177,12 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse(
|
||||
"This requires enabling building with -DEIGENSPARSE=ON.";
|
||||
#else
|
||||
|
||||
// This conversion from a TripletSparseMatrix to a Eigen::Triplet
|
||||
// matrix is unfortunate, but unavoidable for now. It is not a
|
||||
// significant performance penalty in the grand scheme of
|
||||
// things. The right thing to do here would be to get a compressed
|
||||
// row sparse matrix representation of the jacobian and go from
|
||||
// there. But that is a project for another day.
|
||||
// TODO(sameeragarwal): This conversion from a TripletSparseMatrix
|
||||
// to a Eigen::Triplet matrix is unfortunate, but unavoidable for
|
||||
// now. It is not a significant performance penalty in the grand
|
||||
// scheme of things. The right thing to do here would be to get a
|
||||
// compressed row sparse matrix representation of the jacobian and
|
||||
// go from there. But that is a project for another day.
|
||||
using SparseMatrix = Eigen::SparseMatrix<int>;
|
||||
|
||||
const SparseMatrix block_jacobian =
|
||||
@@ -196,9 +190,19 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse(
|
||||
const SparseMatrix block_hessian =
|
||||
block_jacobian.transpose() * block_jacobian;
|
||||
|
||||
Eigen::AMDOrdering<int> amd_ordering;
|
||||
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> perm;
|
||||
amd_ordering(block_hessian, perm);
|
||||
if (linear_solver_ordering_type == ceres::AMD) {
|
||||
Eigen::AMDOrdering<int> amd_ordering;
|
||||
amd_ordering(block_hessian, perm);
|
||||
} else {
|
||||
#ifndef CERES_NO_EIGEN_METIS
|
||||
Eigen::MetisOrdering<int> metis_ordering;
|
||||
metis_ordering(block_hessian, perm);
|
||||
#else
|
||||
perm.setIdentity(block_hessian.rows());
|
||||
#endif
|
||||
}
|
||||
|
||||
for (int i = 0; i < block_hessian.rows(); ++i) {
|
||||
ordering[i] = perm.indices()[i];
|
||||
}
|
||||
@@ -210,7 +214,7 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse(
|
||||
bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map,
|
||||
const ParameterBlockOrdering& ordering,
|
||||
Program* program,
|
||||
string* error) {
|
||||
std::string* error) {
|
||||
const int num_parameter_blocks = program->NumParameterBlocks();
|
||||
if (ordering.NumElements() != num_parameter_blocks) {
|
||||
*error = StringPrintf(
|
||||
@@ -222,13 +226,15 @@ bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map,
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<ParameterBlock*>* parameter_blocks =
|
||||
std::vector<ParameterBlock*>* parameter_blocks =
|
||||
program->mutable_parameter_blocks();
|
||||
parameter_blocks->clear();
|
||||
|
||||
const map<int, set<double*>>& groups = ordering.group_to_elements();
|
||||
// TODO(sameeragarwal): Investigate whether this should be a set or an
|
||||
// unordered_set.
|
||||
const std::map<int, std::set<double*>>& groups = ordering.group_to_elements();
|
||||
for (const auto& p : groups) {
|
||||
const set<double*>& group = p.second;
|
||||
const std::set<double*>& group = p.second;
|
||||
for (double* parameter_block_ptr : group) {
|
||||
auto it = parameter_map.find(parameter_block_ptr);
|
||||
if (it == parameter_map.end()) {
|
||||
@@ -248,16 +254,18 @@ bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map,
|
||||
bool LexicographicallyOrderResidualBlocks(
|
||||
const int size_of_first_elimination_group,
|
||||
Program* program,
|
||||
string* error) {
|
||||
std::string* /*error*/) {
|
||||
CHECK_GE(size_of_first_elimination_group, 1)
|
||||
<< "Congratulations, you found a Ceres bug! Please report this error "
|
||||
<< "to the developers.";
|
||||
|
||||
// Create a histogram of the number of residuals for each E block. There is an
|
||||
// extra bucket at the end to catch all non-eliminated F blocks.
|
||||
vector<int> residual_blocks_per_e_block(size_of_first_elimination_group + 1);
|
||||
vector<ResidualBlock*>* residual_blocks = program->mutable_residual_blocks();
|
||||
vector<int> min_position_per_residual(residual_blocks->size());
|
||||
std::vector<int> residual_blocks_per_e_block(size_of_first_elimination_group +
|
||||
1);
|
||||
std::vector<ResidualBlock*>* residual_blocks =
|
||||
program->mutable_residual_blocks();
|
||||
std::vector<int> min_position_per_residual(residual_blocks->size());
|
||||
for (int i = 0; i < residual_blocks->size(); ++i) {
|
||||
ResidualBlock* residual_block = (*residual_blocks)[i];
|
||||
int position =
|
||||
@@ -270,7 +278,7 @@ bool LexicographicallyOrderResidualBlocks(
|
||||
// Run a cumulative sum on the histogram, to obtain offsets to the start of
|
||||
// each histogram bucket (where each bucket is for the residuals for that
|
||||
// E-block).
|
||||
vector<int> offsets(size_of_first_elimination_group + 1);
|
||||
std::vector<int> offsets(size_of_first_elimination_group + 1);
|
||||
std::partial_sum(residual_blocks_per_e_block.begin(),
|
||||
residual_blocks_per_e_block.end(),
|
||||
offsets.begin());
|
||||
@@ -289,9 +297,9 @@ bool LexicographicallyOrderResidualBlocks(
|
||||
// of the bucket. The filling order among the buckets is dictated by the
|
||||
// residual blocks. This loop uses the offsets as counters; subtracting one
|
||||
// from each offset as a residual block is placed in the bucket. When the
|
||||
// filling is finished, the offset pointerts should have shifted down one
|
||||
// filling is finished, the offset pointers should have shifted down one
|
||||
// entry (this is verified below).
|
||||
vector<ResidualBlock*> reordered_residual_blocks(
|
||||
std::vector<ResidualBlock*> reordered_residual_blocks(
|
||||
(*residual_blocks).size(), static_cast<ResidualBlock*>(nullptr));
|
||||
for (int i = 0; i < residual_blocks->size(); ++i) {
|
||||
int bucket = min_position_per_residual[i];
|
||||
@@ -326,18 +334,18 @@ bool LexicographicallyOrderResidualBlocks(
|
||||
return true;
|
||||
}
|
||||
|
||||
// Pre-order the columns corresponding to the schur complement if
|
||||
// Pre-order the columns corresponding to the Schur complement if
|
||||
// possible.
|
||||
static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
|
||||
static void ReorderSchurComplementColumnsUsingSuiteSparse(
|
||||
const ParameterBlockOrdering& parameter_block_ordering, Program* program) {
|
||||
#ifndef CERES_NO_SUITESPARSE
|
||||
#ifdef CERES_NO_SUITESPARSE
|
||||
// "Void"ing values to avoid compiler warnings about unused parameters
|
||||
(void)parameter_block_ordering;
|
||||
(void)program;
|
||||
#else
|
||||
SuiteSparse ss;
|
||||
if (!SuiteSparse::IsConstrainedApproximateMinimumDegreeOrderingAvailable()) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<int> constraints;
|
||||
vector<ParameterBlock*>& parameter_blocks =
|
||||
std::vector<int> constraints;
|
||||
std::vector<ParameterBlock*>& parameter_blocks =
|
||||
*(program->mutable_parameter_blocks());
|
||||
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
@@ -348,7 +356,7 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
|
||||
// Renumber the entries of constraints to be contiguous integers as
|
||||
// CAMD requires that the group ids be in the range [0,
|
||||
// parameter_blocks.size() - 1].
|
||||
MapValuesToContiguousRange(constraints.size(), &constraints[0]);
|
||||
MapValuesToContiguousRange(constraints.size(), constraints.data());
|
||||
|
||||
// Compute a block sparse presentation of J'.
|
||||
std::unique_ptr<TripletSparseMatrix> tsm_block_jacobian_transpose(
|
||||
@@ -357,12 +365,12 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
|
||||
cholmod_sparse* block_jacobian_transpose =
|
||||
ss.CreateSparseMatrix(tsm_block_jacobian_transpose.get());
|
||||
|
||||
vector<int> ordering(parameter_blocks.size(), 0);
|
||||
std::vector<int> ordering(parameter_blocks.size(), 0);
|
||||
ss.ConstrainedApproximateMinimumDegreeOrdering(
|
||||
block_jacobian_transpose, &constraints[0], &ordering[0]);
|
||||
block_jacobian_transpose, constraints.data(), ordering.data());
|
||||
ss.Free(block_jacobian_transpose);
|
||||
|
||||
const vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks);
|
||||
const std::vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks);
|
||||
for (int i = 0; i < program->NumParameterBlocks(); ++i) {
|
||||
parameter_blocks[i] = parameter_blocks_copy[ordering[i]];
|
||||
}
|
||||
@@ -371,14 +379,14 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse(
|
||||
#endif
|
||||
}
|
||||
|
||||
static void MaybeReorderSchurComplementColumnsUsingEigen(
|
||||
static void ReorderSchurComplementColumnsUsingEigen(
|
||||
LinearSolverOrderingType ordering_type,
|
||||
const int size_of_first_elimination_group,
|
||||
const ProblemImpl::ParameterMap& parameter_map,
|
||||
const ProblemImpl::ParameterMap& /*parameter_map*/,
|
||||
Program* program) {
|
||||
#if defined(CERES_USE_EIGEN_SPARSE)
|
||||
std::unique_ptr<TripletSparseMatrix> tsm_block_jacobian_transpose(
|
||||
program->CreateJacobianBlockSparsityTranspose());
|
||||
|
||||
using SparseMatrix = Eigen::SparseMatrix<int>;
|
||||
const SparseMatrix block_jacobian =
|
||||
CreateBlockJacobian(*tsm_block_jacobian_transpose);
|
||||
@@ -399,12 +407,22 @@ static void MaybeReorderSchurComplementColumnsUsingEigen(
|
||||
const SparseMatrix block_schur_complement =
|
||||
F.transpose() * F - F.transpose() * E * E.transpose() * F;
|
||||
|
||||
Eigen::AMDOrdering<int> amd_ordering;
|
||||
Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic, int> perm;
|
||||
amd_ordering(block_schur_complement, perm);
|
||||
if (ordering_type == ceres::AMD) {
|
||||
Eigen::AMDOrdering<int> amd_ordering;
|
||||
amd_ordering(block_schur_complement, perm);
|
||||
} else {
|
||||
#ifndef CERES_NO_EIGEN_METIS
|
||||
Eigen::MetisOrdering<int> metis_ordering;
|
||||
metis_ordering(block_schur_complement, perm);
|
||||
#else
|
||||
perm.setIdentity(block_schur_complement.rows());
|
||||
#endif
|
||||
}
|
||||
|
||||
const vector<ParameterBlock*>& parameter_blocks = program->parameter_blocks();
|
||||
vector<ParameterBlock*> ordering(num_cols);
|
||||
const std::vector<ParameterBlock*>& parameter_blocks =
|
||||
program->parameter_blocks();
|
||||
std::vector<ParameterBlock*> ordering(num_cols);
|
||||
|
||||
// The ordering of the first size_of_first_elimination_group does
|
||||
// not matter, so we preserve the existing ordering.
|
||||
@@ -426,10 +444,11 @@ static void MaybeReorderSchurComplementColumnsUsingEigen(
|
||||
bool ReorderProgramForSchurTypeLinearSolver(
|
||||
const LinearSolverType linear_solver_type,
|
||||
const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
|
||||
const LinearSolverOrderingType linear_solver_ordering_type,
|
||||
const ProblemImpl::ParameterMap& parameter_map,
|
||||
ParameterBlockOrdering* parameter_block_ordering,
|
||||
Program* program,
|
||||
string* error) {
|
||||
std::string* error) {
|
||||
if (parameter_block_ordering->NumElements() !=
|
||||
program->NumParameterBlocks()) {
|
||||
*error = StringPrintf(
|
||||
@@ -447,7 +466,7 @@ bool ReorderProgramForSchurTypeLinearSolver(
|
||||
// parameter block ordering as it sees fit. For Schur type solvers,
|
||||
// this means that the user wishes for Ceres to identify the
|
||||
// e_blocks, which we do by computing a maximal independent set.
|
||||
vector<ParameterBlock*> schur_ordering;
|
||||
std::vector<ParameterBlock*> schur_ordering;
|
||||
const int size_of_first_elimination_group =
|
||||
ComputeStableSchurOrdering(*program, &schur_ordering);
|
||||
|
||||
@@ -470,7 +489,10 @@ bool ReorderProgramForSchurTypeLinearSolver(
|
||||
// group.
|
||||
|
||||
// Verify that the first elimination group is an independent set.
|
||||
const set<double*>& first_elimination_group =
|
||||
|
||||
// TODO(sameeragarwal): Investigate if this should be a set or an
|
||||
// unordered_set.
|
||||
const std::set<double*>& first_elimination_group =
|
||||
parameter_block_ordering->group_to_elements().begin()->second;
|
||||
if (!program->IsParameterBlockSetIndependent(first_elimination_group)) {
|
||||
*error = StringPrintf(
|
||||
@@ -492,12 +514,20 @@ bool ReorderProgramForSchurTypeLinearSolver(
|
||||
parameter_block_ordering->group_to_elements().begin()->second.size();
|
||||
|
||||
if (linear_solver_type == SPARSE_SCHUR) {
|
||||
if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
|
||||
MaybeReorderSchurComplementColumnsUsingSuiteSparse(
|
||||
*parameter_block_ordering, program);
|
||||
if (sparse_linear_algebra_library_type == SUITE_SPARSE &&
|
||||
linear_solver_ordering_type == ceres::AMD) {
|
||||
// Preordering support for schur complement only works with AMD
|
||||
// for now, since we are using CAMD.
|
||||
//
|
||||
// TODO(sameeragarwal): It maybe worth adding pre-ordering support for
|
||||
// nested dissection too.
|
||||
ReorderSchurComplementColumnsUsingSuiteSparse(*parameter_block_ordering,
|
||||
program);
|
||||
} else if (sparse_linear_algebra_library_type == EIGEN_SPARSE) {
|
||||
MaybeReorderSchurComplementColumnsUsingEigen(
|
||||
size_of_first_elimination_group, parameter_map, program);
|
||||
ReorderSchurComplementColumnsUsingEigen(linear_solver_ordering_type,
|
||||
size_of_first_elimination_group,
|
||||
parameter_map,
|
||||
program);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -509,10 +539,11 @@ bool ReorderProgramForSchurTypeLinearSolver(
|
||||
|
||||
bool ReorderProgramForSparseCholesky(
|
||||
const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
|
||||
const LinearSolverOrderingType linear_solver_ordering_type,
|
||||
const ParameterBlockOrdering& parameter_block_ordering,
|
||||
int start_row_block,
|
||||
Program* program,
|
||||
string* error) {
|
||||
std::string* error) {
|
||||
if (parameter_block_ordering.NumElements() != program->NumParameterBlocks()) {
|
||||
*error = StringPrintf(
|
||||
"The program has %d parameter blocks, but the parameter block "
|
||||
@@ -526,19 +557,17 @@ bool ReorderProgramForSparseCholesky(
|
||||
std::unique_ptr<TripletSparseMatrix> tsm_block_jacobian_transpose(
|
||||
program->CreateJacobianBlockSparsityTranspose(start_row_block));
|
||||
|
||||
vector<int> ordering(program->NumParameterBlocks(), 0);
|
||||
vector<ParameterBlock*>& parameter_blocks =
|
||||
std::vector<int> ordering(program->NumParameterBlocks(), 0);
|
||||
std::vector<ParameterBlock*>& parameter_blocks =
|
||||
*(program->mutable_parameter_blocks());
|
||||
|
||||
if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
|
||||
OrderingForSparseNormalCholeskyUsingSuiteSparse(
|
||||
linear_solver_ordering_type,
|
||||
*tsm_block_jacobian_transpose,
|
||||
parameter_blocks,
|
||||
parameter_block_ordering,
|
||||
&ordering[0]);
|
||||
} else if (sparse_linear_algebra_library_type == CX_SPARSE) {
|
||||
OrderingForSparseNormalCholeskyUsingCXSparse(*tsm_block_jacobian_transpose,
|
||||
&ordering[0]);
|
||||
ordering.data());
|
||||
} else if (sparse_linear_algebra_library_type == ACCELERATE_SPARSE) {
|
||||
// Accelerate does not provide a function to perform reordering without
|
||||
// performing a full symbolic factorisation. As such, we have nothing
|
||||
@@ -550,11 +579,13 @@ bool ReorderProgramForSparseCholesky(
|
||||
|
||||
} else if (sparse_linear_algebra_library_type == EIGEN_SPARSE) {
|
||||
OrderingForSparseNormalCholeskyUsingEigenSparse(
|
||||
*tsm_block_jacobian_transpose, &ordering[0]);
|
||||
linear_solver_ordering_type,
|
||||
*tsm_block_jacobian_transpose,
|
||||
ordering.data());
|
||||
}
|
||||
|
||||
// Apply ordering.
|
||||
const vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks);
|
||||
const std::vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks);
|
||||
for (int i = 0; i < program->NumParameterBlocks(); ++i) {
|
||||
parameter_blocks[i] = parameter_blocks_copy[ordering[i]];
|
||||
}
|
||||
@@ -575,5 +606,39 @@ int ReorderResidualBlocksByPartition(
|
||||
return it - residual_blocks->begin();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
bool AreJacobianColumnsOrdered(
|
||||
const LinearSolverType linear_solver_type,
|
||||
const PreconditionerType preconditioner_type,
|
||||
const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
|
||||
const LinearSolverOrderingType linear_solver_ordering_type) {
|
||||
if (sparse_linear_algebra_library_type == SUITE_SPARSE) {
|
||||
if (linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
|
||||
(linear_solver_type == CGNR && preconditioner_type == SUBSET)) {
|
||||
return true;
|
||||
}
|
||||
if (linear_solver_type == SPARSE_SCHUR &&
|
||||
linear_solver_ordering_type == ceres::AMD) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sparse_linear_algebra_library_type == ceres::EIGEN_SPARSE) {
|
||||
if (linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
|
||||
linear_solver_type == SPARSE_SCHUR ||
|
||||
(linear_solver_type == CGNR && preconditioner_type == SUBSET)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sparse_linear_algebra_library_type == ceres::ACCELERATE_SPARSE) {
|
||||
// Apple's accelerate framework does not allow direct access to
|
||||
// ordering algorithms, so jacobian columns are never pre-ordered.
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
19
extern/ceres/internal/ceres/reorder_program.h
vendored
19
extern/ceres/internal/ceres/reorder_program.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,12 +35,12 @@
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/parameter_block_ordering.h"
|
||||
#include "ceres/problem_impl.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Program;
|
||||
|
||||
@@ -76,6 +76,7 @@ CERES_NO_EXPORT bool LexicographicallyOrderResidualBlocks(
|
||||
CERES_NO_EXPORT bool ReorderProgramForSchurTypeLinearSolver(
|
||||
LinearSolverType linear_solver_type,
|
||||
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
|
||||
LinearSolverOrderingType linear_solver_ordering_type,
|
||||
const ProblemImpl::ParameterMap& parameter_map,
|
||||
ParameterBlockOrdering* parameter_block_ordering,
|
||||
Program* program,
|
||||
@@ -93,6 +94,7 @@ CERES_NO_EXPORT bool ReorderProgramForSchurTypeLinearSolver(
|
||||
// ordering will take it into account, otherwise it will be ignored.
|
||||
CERES_NO_EXPORT bool ReorderProgramForSparseCholesky(
|
||||
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
|
||||
LinearSolverOrderingType linear_solver_ordering_type,
|
||||
const ParameterBlockOrdering& parameter_block_ordering,
|
||||
int start_row_block,
|
||||
Program* program,
|
||||
@@ -112,8 +114,15 @@ CERES_NO_EXPORT int ReorderResidualBlocksByPartition(
|
||||
const std::unordered_set<ResidualBlockId>& bottom_residual_blocks,
|
||||
Program* program);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
// The return value of this function indicates whether the columns of
|
||||
// the Jacobian can be reordered using a fill reducing ordering.
|
||||
CERES_NO_EXPORT bool AreJacobianColumnsOrdered(
|
||||
LinearSolverType linear_solver_type,
|
||||
PreconditionerType preconditioner_type,
|
||||
SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
|
||||
LinearSolverOrderingType linear_solver_ordering_type);
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
11
extern/ceres/internal/ceres/residual_block.cc
vendored
11
extern/ceres/internal/ceres/residual_block.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,8 +47,7 @@
|
||||
|
||||
using Eigen::Dynamic;
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
ResidualBlock::ResidualBlock(
|
||||
const CostFunction* cost_function,
|
||||
@@ -114,8 +113,7 @@ bool ResidualBlock::Evaluate(const bool apply_loss_function,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!IsEvaluationValid(
|
||||
*this, parameters.data(), cost, residuals, eval_jacobians)) {
|
||||
if (!IsEvaluationValid(*this, parameters.data(), residuals, eval_jacobians)) {
|
||||
// clang-format off
|
||||
std::string message =
|
||||
"\n\n"
|
||||
@@ -216,5 +214,4 @@ int ResidualBlock::NumScratchDoublesForEvaluate() const {
|
||||
return scratch_doubles;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
2
extern/ceres/internal/ceres/residual_block.h
vendored
2
extern/ceres/internal/ceres/residual_block.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
#include "ceres/array_utils.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
@@ -42,10 +43,7 @@
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::string;
|
||||
namespace ceres::internal {
|
||||
|
||||
void InvalidateEvaluation(const ResidualBlock& block,
|
||||
double* cost,
|
||||
@@ -64,17 +62,17 @@ void InvalidateEvaluation(const ResidualBlock& block,
|
||||
}
|
||||
}
|
||||
|
||||
string EvaluationToString(const ResidualBlock& block,
|
||||
double const* const* parameters,
|
||||
double* cost,
|
||||
double* residuals,
|
||||
double** jacobians) {
|
||||
std::string EvaluationToString(const ResidualBlock& block,
|
||||
double const* const* parameters,
|
||||
double* cost,
|
||||
double* residuals,
|
||||
double** jacobians) {
|
||||
CHECK(cost != nullptr);
|
||||
CHECK(residuals != nullptr);
|
||||
|
||||
const int num_parameter_blocks = block.NumParameterBlocks();
|
||||
const int num_residuals = block.NumResiduals();
|
||||
string result = "";
|
||||
std::string result = "";
|
||||
|
||||
// clang-format off
|
||||
StringAppendF(&result,
|
||||
@@ -89,7 +87,7 @@ string EvaluationToString(const ResidualBlock& block,
|
||||
"to Inf or NaN is also an error. \n\n"; // NOLINT
|
||||
// clang-format on
|
||||
|
||||
string space = "Residuals: ";
|
||||
std::string space = "Residuals: ";
|
||||
result += space;
|
||||
AppendArrayToString(num_residuals, residuals, &result);
|
||||
StringAppendF(&result, "\n\n");
|
||||
@@ -117,9 +115,11 @@ string EvaluationToString(const ResidualBlock& block,
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO(sameeragarwal) Check cost value validness here
|
||||
// Cost value is a part of evaluation but not checked here since according to
|
||||
// residual_block.cc cost is not valid at the time this method is called
|
||||
bool IsEvaluationValid(const ResidualBlock& block,
|
||||
double const* const* parameters,
|
||||
double* cost,
|
||||
double const* const* /*parameters*/,
|
||||
double* residuals,
|
||||
double** jacobians) {
|
||||
const int num_parameter_blocks = block.NumParameterBlocks();
|
||||
@@ -141,5 +141,4 @@ bool IsEvaluationValid(const ResidualBlock& block,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,8 +47,7 @@
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class ResidualBlock;
|
||||
|
||||
@@ -64,7 +63,6 @@ void InvalidateEvaluation(const ResidualBlock& block,
|
||||
CERES_NO_EXPORT
|
||||
bool IsEvaluationValid(const ResidualBlock& block,
|
||||
double const* const* parameters,
|
||||
double* cost,
|
||||
double* residuals,
|
||||
double** jacobians);
|
||||
|
||||
@@ -78,7 +76,6 @@ std::string EvaluationToString(const ResidualBlock& block,
|
||||
double* residuals,
|
||||
double** jacobians);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_RESIDUAL_BLOCK_UTILS_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <ctime>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
@@ -52,58 +53,36 @@
|
||||
#include "ceres/types.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::make_pair;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
|
||||
class BlockRandomAccessSparseMatrixAdapter final : public LinearOperator {
|
||||
class BlockRandomAccessSparseMatrixAdapter final
|
||||
: public ConjugateGradientsLinearOperator<Vector> {
|
||||
public:
|
||||
explicit BlockRandomAccessSparseMatrixAdapter(
|
||||
const BlockRandomAccessSparseMatrix& m)
|
||||
: m_(m) {}
|
||||
|
||||
// y = y + Ax;
|
||||
void RightMultiply(const double* x, double* y) const final {
|
||||
m_.SymmetricRightMultiply(x, y);
|
||||
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
|
||||
m_.SymmetricRightMultiplyAndAccumulate(x.data(), y.data());
|
||||
}
|
||||
|
||||
// y = y + A'x;
|
||||
void LeftMultiply(const double* x, double* y) const final {
|
||||
m_.SymmetricRightMultiply(x, y);
|
||||
}
|
||||
|
||||
int num_rows() const final { return m_.num_rows(); }
|
||||
int num_cols() const final { return m_.num_rows(); }
|
||||
|
||||
private:
|
||||
const BlockRandomAccessSparseMatrix& m_;
|
||||
};
|
||||
|
||||
class BlockRandomAccessDiagonalMatrixAdapter final : public LinearOperator {
|
||||
class BlockRandomAccessDiagonalMatrixAdapter final
|
||||
: public ConjugateGradientsLinearOperator<Vector> {
|
||||
public:
|
||||
explicit BlockRandomAccessDiagonalMatrixAdapter(
|
||||
const BlockRandomAccessDiagonalMatrix& m)
|
||||
: m_(m) {}
|
||||
|
||||
// y = y + Ax;
|
||||
void RightMultiply(const double* x, double* y) const final {
|
||||
m_.RightMultiply(x, y);
|
||||
void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
|
||||
m_.RightMultiplyAndAccumulate(x.data(), y.data());
|
||||
}
|
||||
|
||||
// y = y + A'x;
|
||||
void LeftMultiply(const double* x, double* y) const final {
|
||||
m_.RightMultiply(x, y);
|
||||
}
|
||||
|
||||
int num_rows() const final { return m_.num_rows(); }
|
||||
int num_cols() const final { return m_.num_rows(); }
|
||||
|
||||
private:
|
||||
const BlockRandomAccessDiagonalMatrix& m_;
|
||||
};
|
||||
@@ -126,7 +105,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl(
|
||||
EventLogger event_logger("SchurComplementSolver::Solve");
|
||||
|
||||
const CompressedRowBlockStructure* bs = A->block_structure();
|
||||
if (eliminator_.get() == nullptr) {
|
||||
if (eliminator_ == nullptr) {
|
||||
const int num_eliminate_blocks = options_.elimination_groups[0];
|
||||
const int num_f_blocks = bs->cols.size() - num_eliminate_blocks;
|
||||
|
||||
@@ -161,7 +140,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl(
|
||||
b,
|
||||
per_solve_options.D,
|
||||
lhs_.get(),
|
||||
rhs_.get());
|
||||
rhs_.data());
|
||||
event_logger.AddEvent("Eliminate");
|
||||
|
||||
double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
|
||||
@@ -169,7 +148,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl(
|
||||
SolveReducedLinearSystem(per_solve_options, reduced_solution);
|
||||
event_logger.AddEvent("ReducedSolve");
|
||||
|
||||
if (summary.termination_type == LINEAR_SOLVER_SUCCESS) {
|
||||
if (summary.termination_type == LinearSolverTerminationType::SUCCESS) {
|
||||
eliminator_->BackSubstitute(
|
||||
BlockSparseMatrixData(*A), b, per_solve_options.D, reduced_solution, x);
|
||||
event_logger.AddEvent("BackSubstitute");
|
||||
@@ -190,24 +169,21 @@ void DenseSchurComplementSolver::InitStorage(
|
||||
const CompressedRowBlockStructure* bs) {
|
||||
const int num_eliminate_blocks = options().elimination_groups[0];
|
||||
const int num_col_blocks = bs->cols.size();
|
||||
|
||||
vector<int> blocks(num_col_blocks - num_eliminate_blocks, 0);
|
||||
for (int i = num_eliminate_blocks, j = 0; i < num_col_blocks; ++i, ++j) {
|
||||
blocks[j] = bs->cols[i].size;
|
||||
}
|
||||
|
||||
set_lhs(std::make_unique<BlockRandomAccessDenseMatrix>(blocks));
|
||||
set_rhs(std::make_unique<double[]>(lhs()->num_rows()));
|
||||
auto blocks = Tail(bs->cols, num_col_blocks - num_eliminate_blocks);
|
||||
set_lhs(std::make_unique<BlockRandomAccessDenseMatrix>(
|
||||
blocks, options().context, options().num_threads));
|
||||
ResizeRhs(lhs()->num_rows());
|
||||
}
|
||||
|
||||
// Solve the system Sx = r, assuming that the matrix S is stored in a
|
||||
// BlockRandomAccessDenseMatrix. The linear system is solved using
|
||||
// Eigen's Cholesky factorization.
|
||||
LinearSolver::Summary DenseSchurComplementSolver::SolveReducedLinearSystem(
|
||||
const LinearSolver::PerSolveOptions& per_solve_options, double* solution) {
|
||||
const LinearSolver::PerSolveOptions& /*per_solve_options*/,
|
||||
double* solution) {
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message = "Success.";
|
||||
|
||||
auto* m = down_cast<BlockRandomAccessDenseMatrix*>(mutable_lhs());
|
||||
@@ -221,7 +197,7 @@ LinearSolver::Summary DenseSchurComplementSolver::SolveReducedLinearSystem(
|
||||
|
||||
summary.num_iterations = 1;
|
||||
summary.termination_type = cholesky_->FactorAndSolve(
|
||||
num_rows, m->mutable_values(), rhs(), solution, &summary.message);
|
||||
num_rows, m->mutable_values(), rhs().data(), solution, &summary.message);
|
||||
return summary;
|
||||
}
|
||||
|
||||
@@ -233,7 +209,14 @@ SparseSchurComplementSolver::SparseSchurComplementSolver(
|
||||
}
|
||||
}
|
||||
|
||||
SparseSchurComplementSolver::~SparseSchurComplementSolver() = default;
|
||||
SparseSchurComplementSolver::~SparseSchurComplementSolver() {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (scratch_[i]) {
|
||||
delete scratch_[i];
|
||||
scratch_[i] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine the non-zero blocks in the Schur Complement matrix, and
|
||||
// initialize a BlockRandomAccessSparseMatrix object.
|
||||
@@ -243,14 +226,11 @@ void SparseSchurComplementSolver::InitStorage(
|
||||
const int num_col_blocks = bs->cols.size();
|
||||
const int num_row_blocks = bs->rows.size();
|
||||
|
||||
blocks_.resize(num_col_blocks - num_eliminate_blocks, 0);
|
||||
for (int i = num_eliminate_blocks; i < num_col_blocks; ++i) {
|
||||
blocks_[i - num_eliminate_blocks] = bs->cols[i].size;
|
||||
}
|
||||
blocks_ = Tail(bs->cols, num_col_blocks - num_eliminate_blocks);
|
||||
|
||||
set<pair<int, int>> block_pairs;
|
||||
std::set<std::pair<int, int>> block_pairs;
|
||||
for (int i = 0; i < blocks_.size(); ++i) {
|
||||
block_pairs.insert(make_pair(i, i));
|
||||
block_pairs.emplace(i, i);
|
||||
}
|
||||
|
||||
int r = 0;
|
||||
@@ -259,7 +239,7 @@ void SparseSchurComplementSolver::InitStorage(
|
||||
if (e_block_id >= num_eliminate_blocks) {
|
||||
break;
|
||||
}
|
||||
vector<int> f_blocks;
|
||||
std::vector<int> f_blocks;
|
||||
|
||||
// Add to the chunk until the first block in the row is
|
||||
// different than the one in the first row for the chunk.
|
||||
@@ -281,7 +261,7 @@ void SparseSchurComplementSolver::InitStorage(
|
||||
f_blocks.erase(unique(f_blocks.begin(), f_blocks.end()), f_blocks.end());
|
||||
for (int i = 0; i < f_blocks.size(); ++i) {
|
||||
for (int j = i + 1; j < f_blocks.size(); ++j) {
|
||||
block_pairs.insert(make_pair(f_blocks[i], f_blocks[j]));
|
||||
block_pairs.emplace(f_blocks[i], f_blocks[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -296,15 +276,15 @@ void SparseSchurComplementSolver::InitStorage(
|
||||
for (const auto& cell : row.cells) {
|
||||
int r_block2_id = cell.block_id - num_eliminate_blocks;
|
||||
if (r_block1_id <= r_block2_id) {
|
||||
block_pairs.insert(make_pair(r_block1_id, r_block2_id));
|
||||
block_pairs.emplace(r_block1_id, r_block2_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
set_lhs(
|
||||
std::make_unique<BlockRandomAccessSparseMatrix>(blocks_, block_pairs));
|
||||
set_rhs(std::make_unique<double[]>(lhs()->num_rows()));
|
||||
set_lhs(std::make_unique<BlockRandomAccessSparseMatrix>(
|
||||
blocks_, block_pairs, options().context, options().num_threads));
|
||||
ResizeRhs(lhs()->num_rows());
|
||||
}
|
||||
|
||||
LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystem(
|
||||
@@ -316,32 +296,39 @@ LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystem(
|
||||
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message = "Success.";
|
||||
|
||||
const TripletSparseMatrix* tsm =
|
||||
const BlockSparseMatrix* bsm =
|
||||
down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix();
|
||||
if (tsm->num_rows() == 0) {
|
||||
if (bsm->num_rows() == 0) {
|
||||
return summary;
|
||||
}
|
||||
|
||||
std::unique_ptr<CompressedRowSparseMatrix> lhs;
|
||||
const CompressedRowSparseMatrix::StorageType storage_type =
|
||||
sparse_cholesky_->StorageType();
|
||||
if (storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR) {
|
||||
lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm);
|
||||
lhs->set_storage_type(CompressedRowSparseMatrix::UPPER_TRIANGULAR);
|
||||
if (storage_type ==
|
||||
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
|
||||
if (!crs_lhs_) {
|
||||
crs_lhs_ = bsm->ToCompressedRowSparseMatrix();
|
||||
crs_lhs_->set_storage_type(
|
||||
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
|
||||
} else {
|
||||
bsm->UpdateCompressedRowSparseMatrix(crs_lhs_.get());
|
||||
}
|
||||
} else {
|
||||
lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm);
|
||||
lhs->set_storage_type(CompressedRowSparseMatrix::LOWER_TRIANGULAR);
|
||||
if (!crs_lhs_) {
|
||||
crs_lhs_ = bsm->ToCompressedRowSparseMatrixTranspose();
|
||||
crs_lhs_->set_storage_type(
|
||||
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
|
||||
} else {
|
||||
bsm->UpdateCompressedRowSparseMatrixTranspose(crs_lhs_.get());
|
||||
}
|
||||
}
|
||||
|
||||
*lhs->mutable_col_blocks() = blocks_;
|
||||
*lhs->mutable_row_blocks() = blocks_;
|
||||
|
||||
summary.num_iterations = 1;
|
||||
summary.termination_type = sparse_cholesky_->FactorAndSolve(
|
||||
lhs.get(), rhs(), solution, &summary.message);
|
||||
crs_lhs_.get(), rhs().data(), solution, &summary.message);
|
||||
return summary;
|
||||
}
|
||||
|
||||
@@ -355,7 +342,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
|
||||
if (num_rows == 0) {
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 0;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message = "Success.";
|
||||
return summary;
|
||||
}
|
||||
@@ -363,9 +350,9 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
|
||||
// Only SCHUR_JACOBI is supported over here right now.
|
||||
CHECK_EQ(options().preconditioner_type, SCHUR_JACOBI);
|
||||
|
||||
if (preconditioner_.get() == nullptr) {
|
||||
preconditioner_ =
|
||||
std::make_unique<BlockRandomAccessDiagonalMatrix>(blocks_);
|
||||
if (preconditioner_ == nullptr) {
|
||||
preconditioner_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(
|
||||
blocks_, options().context, options().num_threads);
|
||||
}
|
||||
|
||||
auto* sc = down_cast<BlockRandomAccessSparseMatrix*>(mutable_lhs());
|
||||
@@ -373,7 +360,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
|
||||
// Extract block diagonal from the Schur complement to construct the
|
||||
// schur_jacobi preconditioner.
|
||||
for (int i = 0; i < blocks_.size(); ++i) {
|
||||
const int block_size = blocks_[i];
|
||||
const int block_size = blocks_[i].size;
|
||||
|
||||
int sc_r, sc_c, sc_row_stride, sc_col_stride;
|
||||
CellInfo* sc_cell_info =
|
||||
@@ -394,25 +381,28 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
|
||||
|
||||
VectorRef(solution, num_rows).setZero();
|
||||
|
||||
std::unique_ptr<LinearOperator> lhs_adapter =
|
||||
std::make_unique<BlockRandomAccessSparseMatrixAdapter>(*sc);
|
||||
std::unique_ptr<LinearOperator> preconditioner_adapter =
|
||||
auto lhs = std::make_unique<BlockRandomAccessSparseMatrixAdapter>(*sc);
|
||||
auto preconditioner =
|
||||
std::make_unique<BlockRandomAccessDiagonalMatrixAdapter>(
|
||||
*preconditioner_);
|
||||
|
||||
LinearSolver::Options cg_options;
|
||||
ConjugateGradientsSolverOptions cg_options;
|
||||
cg_options.min_num_iterations = options().min_num_iterations;
|
||||
cg_options.max_num_iterations = options().max_num_iterations;
|
||||
ConjugateGradientsSolver cg_solver(cg_options);
|
||||
cg_options.residual_reset_period = options().residual_reset_period;
|
||||
cg_options.q_tolerance = per_solve_options.q_tolerance;
|
||||
cg_options.r_tolerance = per_solve_options.r_tolerance;
|
||||
|
||||
LinearSolver::PerSolveOptions cg_per_solve_options;
|
||||
cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance;
|
||||
cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance;
|
||||
cg_per_solve_options.preconditioner = preconditioner_adapter.get();
|
||||
|
||||
return cg_solver.Solve(
|
||||
lhs_adapter.get(), rhs(), cg_per_solve_options, solution);
|
||||
cg_solution_ = Vector::Zero(sc->num_rows());
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (scratch_[i] == nullptr) {
|
||||
scratch_[i] = new Vector(sc->num_rows());
|
||||
}
|
||||
}
|
||||
auto summary = ConjugateGradientsSolver<Vector>(
|
||||
cg_options, *lhs, rhs(), *preconditioner, scratch_, cg_solution_);
|
||||
VectorRef(solution, sc->num_rows()) = cg_solution_;
|
||||
return summary;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -54,8 +54,7 @@
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockSparseMatrix;
|
||||
class SparseCholesky;
|
||||
@@ -66,7 +65,7 @@ class SparseCholesky;
|
||||
//
|
||||
// E y + F z = b
|
||||
//
|
||||
// Where x = [y;z] is a partition of the variables. The paritioning
|
||||
// Where x = [y;z] is a partition of the variables. The partitioning
|
||||
// of the variables is such that, E'E is a block diagonal
|
||||
// matrix. Further, the rows of A are ordered so that for every
|
||||
// variable block in y, all the rows containing that variable block
|
||||
@@ -131,9 +130,8 @@ class CERES_NO_EXPORT SchurComplementSolver : public BlockSparseMatrixSolver {
|
||||
}
|
||||
const BlockRandomAccessMatrix* lhs() const { return lhs_.get(); }
|
||||
BlockRandomAccessMatrix* mutable_lhs() { return lhs_.get(); }
|
||||
|
||||
void set_rhs(std::unique_ptr<double[]> rhs) { rhs_ = std::move(rhs); }
|
||||
const double* rhs() const { return rhs_.get(); }
|
||||
void ResizeRhs(int n) { rhs_.resize(n); }
|
||||
const Vector& rhs() const { return rhs_; }
|
||||
|
||||
private:
|
||||
virtual void InitStorage(const CompressedRowBlockStructure* bs) = 0;
|
||||
@@ -145,7 +143,7 @@ class CERES_NO_EXPORT SchurComplementSolver : public BlockSparseMatrixSolver {
|
||||
|
||||
std::unique_ptr<SchurEliminatorBase> eliminator_;
|
||||
std::unique_ptr<BlockRandomAccessMatrix> lhs_;
|
||||
std::unique_ptr<double[]> rhs_;
|
||||
Vector rhs_;
|
||||
};
|
||||
|
||||
// Dense Cholesky factorization based solver.
|
||||
@@ -185,14 +183,15 @@ class CERES_NO_EXPORT SparseSchurComplementSolver final
|
||||
LinearSolver::Summary SolveReducedLinearSystemUsingConjugateGradients(
|
||||
const LinearSolver::PerSolveOptions& per_solve_options, double* solution);
|
||||
|
||||
// Size of the blocks in the Schur complement.
|
||||
std::vector<int> blocks_;
|
||||
std::vector<Block> blocks_;
|
||||
std::unique_ptr<SparseCholesky> sparse_cholesky_;
|
||||
std::unique_ptr<BlockRandomAccessDiagonalMatrix> preconditioner_;
|
||||
std::unique_ptr<CompressedRowSparseMatrix> crs_lhs_;
|
||||
Vector cg_solution_;
|
||||
Vector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr};
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,8 +44,7 @@
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/schur_eliminator.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
SchurEliminatorBase::~SchurEliminatorBase() = default;
|
||||
|
||||
@@ -161,5 +160,4 @@ std::unique_ptr<SchurEliminatorBase> SchurEliminatorBase::Create(
|
||||
Eigen::Dynamic>>(options);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
21
extern/ceres/internal/ceres/schur_eliminator.h
vendored
21
extern/ceres/internal/ceres/schur_eliminator.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2019 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,8 +46,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Classes implementing the SchurEliminatorBase interface implement
|
||||
// variable elimination for linear least squares problems. Assuming
|
||||
@@ -169,9 +168,9 @@ class CERES_NO_EXPORT SchurEliminatorBase {
|
||||
public:
|
||||
virtual ~SchurEliminatorBase();
|
||||
|
||||
// Initialize the eliminator. It is the user's responsibilty to call
|
||||
// Initialize the eliminator. It is the user's responsibility to call
|
||||
// this function before calling Eliminate or BackSubstitute. It is
|
||||
// also the caller's responsibilty to ensure that the
|
||||
// also the caller's responsibility to ensure that the
|
||||
// CompressedRowBlockStructure object passed to this method is the
|
||||
// same one (or is equivalent to) the one associated with the
|
||||
// BlockSparseMatrix objects below.
|
||||
@@ -383,8 +382,9 @@ template <int kRowBlockSize = Eigen::Dynamic,
|
||||
class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
|
||||
: public SchurEliminatorBase {
|
||||
public:
|
||||
// TODO(sameeragarwal) Find out why "assume_full_rank_ete" is not used here
|
||||
void Init(int num_eliminate_blocks,
|
||||
bool assume_full_rank_ete,
|
||||
bool /*assume_full_rank_ete*/,
|
||||
const CompressedRowBlockStructure* bs) override {
|
||||
CHECK_GT(num_eliminate_blocks, 0)
|
||||
<< "SchurComplementSolver cannot be initialized with "
|
||||
@@ -447,7 +447,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
|
||||
const CompressedRowBlockStructure* bs = A.block_structure();
|
||||
const double* values = A.values();
|
||||
|
||||
// Add the diagonal to the schur complement.
|
||||
// Add the diagonal to the Schur complement.
|
||||
if (D != nullptr) {
|
||||
typename EigenTypes<kFBlockSize>::ConstVectorRef diag(
|
||||
D + bs->cols[num_eliminate_blocks_].position, kFBlockSize);
|
||||
@@ -479,7 +479,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
|
||||
const Chunk& chunk = chunks_[i];
|
||||
const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
|
||||
|
||||
// Naming covention, e_t_e = e_block.transpose() * e_block;
|
||||
// Naming convention, e_t_e = e_block.transpose() * e_block;
|
||||
Eigen::Matrix<double, kEBlockSize, kEBlockSize> e_t_e;
|
||||
Eigen::Matrix<double, kEBlockSize, kFBlockSize> e_t_f;
|
||||
Eigen::Matrix<double, kEBlockSize, 1> e_t_b;
|
||||
@@ -570,7 +570,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
|
||||
// y_i = e_t_e_inverse * sum_i e_i^T * (b_i - f_i * z);
|
||||
void BackSubstitute(const BlockSparseMatrixData& A,
|
||||
const double* b,
|
||||
const double* D,
|
||||
const double* /*D*/,
|
||||
const double* z_ptr,
|
||||
double* y) override {
|
||||
typename EigenTypes<kFBlockSize>::ConstVectorRef z(z_ptr, kFBlockSize);
|
||||
@@ -623,8 +623,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final
|
||||
std::vector<double> e_t_e_inverse_matrices_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -69,8 +69,7 @@
|
||||
#include "ceres/thread_token_provider.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::~SchurEliminator() {
|
||||
@@ -107,7 +106,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::Init(
|
||||
}
|
||||
|
||||
// TODO(sameeragarwal): Now that we may have subset block structure,
|
||||
// we need to make sure that we account for the fact that somep
|
||||
// we need to make sure that we account for the fact that some
|
||||
// point blocks only have a "diagonal" row and nothing more.
|
||||
//
|
||||
// This likely requires a slightly different algorithm, which works
|
||||
@@ -206,8 +205,6 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::Eliminate(
|
||||
const int block_size = bs->cols[i].size;
|
||||
typename EigenTypes<Eigen::Dynamic>::ConstVectorRef diag(
|
||||
D + bs->cols[i].position, block_size);
|
||||
|
||||
std::lock_guard<std::mutex> l(cell_info->m);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
m.block(r, c, block_size, block_size).diagonal() +=
|
||||
diag.array().square().matrix();
|
||||
@@ -301,7 +298,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::Eliminate(
|
||||
thread_id, bs, inverse_ete, buffer, chunk.buffer_layout, lhs);
|
||||
});
|
||||
|
||||
// For rows with no e_blocks, the schur complement update reduces to
|
||||
// For rows with no e_blocks, the Schur complement update reduces to
|
||||
// S += F'F.
|
||||
NoEBlockRowsUpdate(A, b, uneliminated_row_begins_, lhs, rhs);
|
||||
}
|
||||
@@ -410,7 +407,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::UpdateRhs(
|
||||
const int block_id = row.cells[c].block_id;
|
||||
const int block_size = bs->cols[block_id].size;
|
||||
const int block = block_id - num_eliminate_blocks_;
|
||||
std::lock_guard<std::mutex> l(*rhs_locks_[block]);
|
||||
auto lock = MakeConditionalLock(num_threads_, *rhs_locks_[block]);
|
||||
// clang-format off
|
||||
MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
|
||||
values + row.cells[c].position,
|
||||
@@ -433,7 +430,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::UpdateRhs(
|
||||
//
|
||||
// ete = y11 * y11' + y12 * y12'
|
||||
//
|
||||
// and the off diagonal blocks in the Guass Newton Hessian.
|
||||
// and the off diagonal blocks in the Gauss Newton Hessian.
|
||||
//
|
||||
// buffer = [y11'(z11 + z12), y12' * z22, y11' * z51]
|
||||
//
|
||||
@@ -550,7 +547,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride);
|
||||
if (cell_info != nullptr) {
|
||||
const int block2_size = bs->cols[it2->first].size;
|
||||
std::lock_guard<std::mutex> l(cell_info->m);
|
||||
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
|
||||
// clang-format off
|
||||
MatrixMatrixMultiply
|
||||
<kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>(
|
||||
@@ -563,7 +560,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
}
|
||||
}
|
||||
|
||||
// For rows with no e_blocks, the schur complement update reduces to S
|
||||
// For rows with no e_blocks, the Schur complement update reduces to S
|
||||
// += F'F. This function iterates over the rows of A with no e_block,
|
||||
// and calls NoEBlockRowOuterProduct on each row.
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
@@ -596,7 +593,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
}
|
||||
|
||||
// A row r of A, which has no e_blocks gets added to the Schur
|
||||
// Complement as S += r r'. This function is responsible for computing
|
||||
// complement as S += r r'. This function is responsible for computing
|
||||
// the contribution of a single row r to the Schur complement. It is
|
||||
// very similar in structure to EBlockRowOuterProduct except for
|
||||
// one difference. It does not use any of the template
|
||||
@@ -627,7 +624,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
CellInfo* cell_info =
|
||||
lhs->GetCell(block1, block1, &r, &c, &row_stride, &col_stride);
|
||||
if (cell_info != nullptr) {
|
||||
std::lock_guard<std::mutex> l(cell_info->m);
|
||||
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
|
||||
// This multiply currently ignores the fact that this is a
|
||||
// symmetric outer product.
|
||||
// clang-format off
|
||||
@@ -648,7 +645,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride);
|
||||
if (cell_info != nullptr) {
|
||||
const int block2_size = bs->cols[row.cells[j].block_id].size;
|
||||
std::lock_guard<std::mutex> l(cell_info->m);
|
||||
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
|
||||
// clang-format off
|
||||
MatrixTransposeMatrixMultiply
|
||||
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
@@ -682,7 +679,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
CellInfo* cell_info =
|
||||
lhs->GetCell(block1, block1, &r, &c, &row_stride, &col_stride);
|
||||
if (cell_info != nullptr) {
|
||||
std::lock_guard<std::mutex> l(cell_info->m);
|
||||
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
|
||||
// block += b1.transpose() * b1;
|
||||
// clang-format off
|
||||
MatrixTransposeMatrixMultiply
|
||||
@@ -703,7 +700,7 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride);
|
||||
if (cell_info != nullptr) {
|
||||
// block += b1.transpose() * b2;
|
||||
std::lock_guard<std::mutex> l(cell_info->m);
|
||||
auto lock = MakeConditionalLock(num_threads_, cell_info->m);
|
||||
// clang-format off
|
||||
MatrixTransposeMatrixMultiply
|
||||
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
|
||||
@@ -716,7 +713,6 @@ void SchurEliminator<kRowBlockSize, kEBlockSize, kFBlockSize>::
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
|
||||
|
||||
150
extern/ceres/internal/ceres/schur_eliminator_template.py
vendored
Normal file
150
extern/ceres/internal/ceres/schur_eliminator_template.py
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
# Ceres Solver - A fast non-linear least squares minimizer
|
||||
# Copyright 2023 Google Inc. All rights reserved.
|
||||
# http://ceres-solver.org/
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
# used to endorse or promote products derived from this software without
|
||||
# specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
#
|
||||
# Script for explicitly generating template specialization of the
|
||||
# SchurEliminator class. It is a rather large class
|
||||
# and the number of explicit instantiations is also large. Explicitly
|
||||
# generating these instantiations in separate .cc files breaks the
|
||||
# compilation into separate compilation unit rather than one large cc
|
||||
# file which takes 2+GB of RAM to compile.
|
||||
#
|
||||
# This script creates two sets of files.
|
||||
#
|
||||
# 1. schur_eliminator_x_x_x.cc
|
||||
# where, the x indicates the template parameters and
|
||||
#
|
||||
# 2. schur_eliminator.cc
|
||||
#
|
||||
# that contains a factory function for instantiating these classes
|
||||
# based on runtime parameters.
|
||||
#
|
||||
# The list of tuples, specializations indicates the set of
|
||||
# specializations that is generated.
|
||||
|
||||
# Set of template specializations to generate
|
||||
|
||||
HEADER = """// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
//
|
||||
// Template specialization of SchurEliminator.
|
||||
//
|
||||
// ========================================
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
// THIS FILE IS AUTOGENERATED. DO NOT EDIT.
|
||||
//=========================================
|
||||
//
|
||||
// This file is generated using generate_template_specializations.py.
|
||||
"""
|
||||
|
||||
DYNAMIC_FILE = """
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<%s, %s, %s>;
|
||||
|
||||
} // namespace ceres::internal
|
||||
"""
|
||||
|
||||
SPECIALIZATION_FILE = """
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
|
||||
#include "ceres/schur_eliminator_impl.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
template class SchurEliminator<%s, %s, %s>;
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
"""
|
||||
|
||||
FACTORY_FILE_HEADER = """
|
||||
#include <memory>
|
||||
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/schur_eliminator.h"
|
||||
|
||||
namespace ceres::internal {
|
||||
|
||||
SchurEliminatorBase::~SchurEliminatorBase() = default;
|
||||
|
||||
std::unique_ptr<SchurEliminatorBase> SchurEliminatorBase::Create(
|
||||
const LinearSolver::Options& options) {
|
||||
#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION
|
||||
"""
|
||||
|
||||
FACTORY = """ return std::make_unique<SchurEliminator<%s, %s, %s>>(options);"""
|
||||
|
||||
FACTORY_FOOTER = """
|
||||
#endif
|
||||
VLOG(1) << "Template specializations not found for <"
|
||||
<< options.row_block_size << "," << options.e_block_size << ","
|
||||
<< options.f_block_size << ">";
|
||||
return std::make_unique<SchurEliminator<Eigen::Dynamic,
|
||||
Eigen::Dynamic,
|
||||
Eigen::Dynamic>>(options);
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
"""
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,6 +30,7 @@
|
||||
|
||||
#include "ceres/schur_jacobi_preconditioner.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -39,8 +40,7 @@
|
||||
#include "ceres/schur_eliminator.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
SchurJacobiPreconditioner::SchurJacobiPreconditioner(
|
||||
const CompressedRowBlockStructure& bs, Preconditioner::Options options)
|
||||
@@ -52,12 +52,16 @@ SchurJacobiPreconditioner::SchurJacobiPreconditioner(
|
||||
<< "SCHUR_JACOBI preconditioner.";
|
||||
CHECK(options_.context != nullptr);
|
||||
|
||||
std::vector<int> blocks(num_blocks);
|
||||
std::vector<Block> blocks(num_blocks);
|
||||
int position = 0;
|
||||
for (int i = 0; i < num_blocks; ++i) {
|
||||
blocks[i] = bs.cols[i + options_.elimination_groups[0]].size;
|
||||
blocks[i] =
|
||||
Block(bs.cols[i + options_.elimination_groups[0]].size, position);
|
||||
position += blocks[i].size;
|
||||
}
|
||||
|
||||
m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(blocks);
|
||||
m_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(
|
||||
blocks, options_.context, options_.num_threads);
|
||||
InitEliminator(bs);
|
||||
}
|
||||
|
||||
@@ -92,12 +96,11 @@ bool SchurJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
|
||||
return true;
|
||||
}
|
||||
|
||||
void SchurJacobiPreconditioner::RightMultiply(const double* x,
|
||||
double* y) const {
|
||||
m_->RightMultiply(x, y);
|
||||
void SchurJacobiPreconditioner::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
m_->RightMultiplyAndAccumulate(x, y);
|
||||
}
|
||||
|
||||
int SchurJacobiPreconditioner::num_rows() const { return m_->num_rows(); }
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,8 +47,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/preconditioner.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockRandomAccessDiagonalMatrix;
|
||||
class BlockSparseMatrix;
|
||||
@@ -72,8 +71,10 @@ class SchurEliminatorBase;
|
||||
// SchurJacobiPreconditioner preconditioner(
|
||||
// *A.block_structure(), options);
|
||||
// preconditioner.Update(A, nullptr);
|
||||
// preconditioner.RightMultiply(x, y);
|
||||
// preconditioner.RightMultiplyAndAccumulate(x, y);
|
||||
//
|
||||
// TODO(https://github.com/ceres-solver/ceres-solver/issues/935):
|
||||
// SchurJacobiPreconditioner::RightMultiply will benefit from multithreading
|
||||
class CERES_NO_EXPORT SchurJacobiPreconditioner
|
||||
: public BlockSparseMatrixPreconditioner {
|
||||
public:
|
||||
@@ -91,7 +92,7 @@ class CERES_NO_EXPORT SchurJacobiPreconditioner
|
||||
~SchurJacobiPreconditioner() override;
|
||||
|
||||
// Preconditioner interface.
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
int num_rows() const final;
|
||||
|
||||
private:
|
||||
@@ -104,8 +105,7 @@ class CERES_NO_EXPORT SchurJacobiPreconditioner
|
||||
std::unique_ptr<BlockRandomAccessDiagonalMatrix> m_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,14 +36,12 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
CERES_NO_EXPORT
|
||||
void GetBestSchurTemplateSpecialization(int* row_block_size,
|
||||
int* e_block_size,
|
||||
int* f_block_size);
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_SCHUR_TEMPLATES_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,8 +34,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/thread_token_provider.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Helper class for ThreadTokenProvider. This object acquires a token in its
|
||||
// constructor and puts that token back with destruction.
|
||||
@@ -55,7 +54,6 @@ class CERES_NO_EXPORT ScopedThreadToken {
|
||||
int token_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_SCOPED_THREAD_TOKEN_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,23 +36,22 @@
|
||||
#include "ceres/program.h"
|
||||
#include "ceres/residual_block.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
std::unique_ptr<ScratchEvaluatePreparer[]> ScratchEvaluatePreparer::Create(
|
||||
const Program& program, int num_threads) {
|
||||
const Program& program, unsigned num_threads) {
|
||||
auto preparers = std::make_unique<ScratchEvaluatePreparer[]>(num_threads);
|
||||
int max_derivatives_per_residual_block =
|
||||
program.MaxDerivativesPerResidualBlock();
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
for (unsigned i = 0; i < num_threads; i++) {
|
||||
preparers[i].Init(max_derivatives_per_residual_block);
|
||||
}
|
||||
return preparers;
|
||||
}
|
||||
|
||||
void ScratchEvaluatePreparer::Init(int max_derivatives_per_residual_block) {
|
||||
jacobian_scratch_ =
|
||||
std::make_unique<double[]>(max_derivatives_per_residual_block);
|
||||
jacobian_scratch_ = std::make_unique<double[]>(
|
||||
static_cast<std::size_t>(max_derivatives_per_residual_block));
|
||||
}
|
||||
|
||||
// Point the jacobian blocks into the scratch area of this evaluate preparer.
|
||||
@@ -75,5 +74,4 @@ void ScratchEvaluatePreparer::Prepare(const ResidualBlock* residual_block,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class Program;
|
||||
class ResidualBlock;
|
||||
@@ -51,7 +50,7 @@ class CERES_NO_EXPORT ScratchEvaluatePreparer {
|
||||
public:
|
||||
// Create num_threads ScratchEvaluatePreparers.
|
||||
static std::unique_ptr<ScratchEvaluatePreparer[]> Create(
|
||||
const Program& program, int num_threads);
|
||||
const Program& program, unsigned num_threads);
|
||||
|
||||
// EvaluatePreparer interface
|
||||
void Init(int max_derivatives_per_residual_block);
|
||||
@@ -66,8 +65,7 @@ class CERES_NO_EXPORT ScratchEvaluatePreparer {
|
||||
std::unique_ptr<double[]> jacobian_scratch_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -36,8 +36,7 @@
|
||||
#include "ceres/graph.h"
|
||||
#include "ceres/graph_algorithms.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
int ComputeSingleLinkageClustering(
|
||||
const SingleLinkageClusteringOptions& options,
|
||||
@@ -91,5 +90,4 @@ int ComputeSingleLinkageClustering(
|
||||
return num_clusters;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,8 +37,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
struct SingleLinkageClusteringOptions {
|
||||
// Graph edges with edge weight less than min_similarity are ignored
|
||||
@@ -61,8 +60,7 @@ CERES_NO_EXPORT int ComputeSingleLinkageClustering(
|
||||
const WeightedGraph<int>& graph,
|
||||
std::unordered_map<int, int>* membership);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
8
extern/ceres/internal/ceres/small_blas.h
vendored
8
extern/ceres/internal/ceres/small_blas.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
#include "glog/logging.h"
|
||||
#include "small_blas_generic.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// The following three macros are used to share code and reduce
|
||||
// template junk across the various GEMM variants.
|
||||
@@ -561,7 +560,6 @@ inline void MatrixTransposeVectorMultiply(const double* A,
|
||||
#undef CERES_GEMM_STORE_SINGLE
|
||||
#undef CERES_GEMM_STORE_PAIR
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_SMALL_BLAS_H_
|
||||
|
||||
90
extern/ceres/internal/ceres/small_blas_generic.h
vendored
90
extern/ceres/internal/ceres/small_blas_generic.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,38 +35,35 @@
|
||||
#ifndef CERES_INTERNAL_SMALL_BLAS_GENERIC_H_
|
||||
#define CERES_INTERNAL_SMALL_BLAS_GENERIC_H_
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// The following macros are used to share code
|
||||
#define CERES_GEMM_OPT_NAIVE_HEADER \
|
||||
double c0 = 0.0; \
|
||||
double c1 = 0.0; \
|
||||
double c2 = 0.0; \
|
||||
double c3 = 0.0; \
|
||||
const double* pa = a; \
|
||||
const double* pb = b; \
|
||||
const int span = 4; \
|
||||
int col_r = col_a & (span - 1); \
|
||||
#define CERES_GEMM_OPT_NAIVE_HEADER \
|
||||
double cvec4[4] = {0.0, 0.0, 0.0, 0.0}; \
|
||||
const double* pa = a; \
|
||||
const double* pb = b; \
|
||||
const int span = 4; \
|
||||
int col_r = col_a & (span - 1); \
|
||||
int col_m = col_a - col_r;
|
||||
|
||||
#define CERES_GEMM_OPT_STORE_MAT1X4 \
|
||||
if (kOperation > 0) { \
|
||||
*c++ += c0; \
|
||||
*c++ += c1; \
|
||||
*c++ += c2; \
|
||||
*c++ += c3; \
|
||||
c[0] += cvec4[0]; \
|
||||
c[1] += cvec4[1]; \
|
||||
c[2] += cvec4[2]; \
|
||||
c[3] += cvec4[3]; \
|
||||
} else if (kOperation < 0) { \
|
||||
*c++ -= c0; \
|
||||
*c++ -= c1; \
|
||||
*c++ -= c2; \
|
||||
*c++ -= c3; \
|
||||
c[0] -= cvec4[0]; \
|
||||
c[1] -= cvec4[1]; \
|
||||
c[2] -= cvec4[2]; \
|
||||
c[3] -= cvec4[3]; \
|
||||
} else { \
|
||||
*c++ = c0; \
|
||||
*c++ = c1; \
|
||||
*c++ = c2; \
|
||||
*c++ = c3; \
|
||||
}
|
||||
c[0] = cvec4[0]; \
|
||||
c[1] = cvec4[1]; \
|
||||
c[2] = cvec4[2]; \
|
||||
c[3] = cvec4[3]; \
|
||||
} \
|
||||
c += 4;
|
||||
|
||||
// Matrix-Matrix Multiplication
|
||||
// Figure out 1x4 of Matrix C in one batch
|
||||
@@ -100,10 +97,10 @@ static inline void MMM_mat1x4(const int col_a,
|
||||
#define CERES_GEMM_OPT_MMM_MAT1X4_MUL \
|
||||
av = pa[k]; \
|
||||
pb = b + bi; \
|
||||
c0 += av * pb[0]; \
|
||||
c1 += av * pb[1]; \
|
||||
c2 += av * pb[2]; \
|
||||
c3 += av * pb[3]; \
|
||||
cvec4[0] += av * pb[0]; \
|
||||
cvec4[1] += av * pb[1]; \
|
||||
cvec4[2] += av * pb[2]; \
|
||||
cvec4[3] += av * pb[3]; \
|
||||
pb += 4; \
|
||||
bi += col_stride_b; \
|
||||
k++;
|
||||
@@ -168,10 +165,10 @@ static inline void MTM_mat1x4(const int col_a,
|
||||
#define CERES_GEMM_OPT_MTM_MAT1X4_MUL \
|
||||
av = pa[ai]; \
|
||||
pb = b + bi; \
|
||||
c0 += av * pb[0]; \
|
||||
c1 += av * pb[1]; \
|
||||
c2 += av * pb[2]; \
|
||||
c3 += av * pb[3]; \
|
||||
cvec4[0] += av * pb[0]; \
|
||||
cvec4[1] += av * pb[1]; \
|
||||
cvec4[2] += av * pb[2]; \
|
||||
cvec4[3] += av * pb[3]; \
|
||||
pb += 4; \
|
||||
ai += col_stride_a; \
|
||||
bi += col_stride_b;
|
||||
@@ -221,13 +218,13 @@ static inline void MVM_mat4x1(const int col_a,
|
||||
double bv = 0.0;
|
||||
|
||||
// clang-format off
|
||||
#define CERES_GEMM_OPT_MVM_MAT4X1_MUL \
|
||||
bv = *pb; \
|
||||
c0 += *(pa ) * bv; \
|
||||
c1 += *(pa + col_stride_a ) * bv; \
|
||||
c2 += *(pa + col_stride_a * 2) * bv; \
|
||||
c3 += *(pa + col_stride_a * 3) * bv; \
|
||||
pa++; \
|
||||
#define CERES_GEMM_OPT_MVM_MAT4X1_MUL \
|
||||
bv = *pb; \
|
||||
cvec4[0] += *(pa ) * bv; \
|
||||
cvec4[1] += *(pa + col_stride_a ) * bv; \
|
||||
cvec4[2] += *(pa + col_stride_a * 2) * bv; \
|
||||
cvec4[3] += *(pa + col_stride_a * 3) * bv; \
|
||||
pa++; \
|
||||
pb++;
|
||||
// clang-format on
|
||||
|
||||
@@ -285,16 +282,14 @@ static inline void MTV_mat4x1(const int col_a,
|
||||
CERES_GEMM_OPT_NAIVE_HEADER
|
||||
double bv = 0.0;
|
||||
|
||||
// clang-format off
|
||||
#define CERES_GEMM_OPT_MTV_MAT4X1_MUL \
|
||||
bv = *pb; \
|
||||
c0 += *(pa ) * bv; \
|
||||
c1 += *(pa + 1) * bv; \
|
||||
c2 += *(pa + 2) * bv; \
|
||||
c3 += *(pa + 3) * bv; \
|
||||
cvec4[0] += pa[0] * bv; \
|
||||
cvec4[1] += pa[1] * bv; \
|
||||
cvec4[2] += pa[2] * bv; \
|
||||
cvec4[3] += pa[3] * bv; \
|
||||
pa += col_stride_a; \
|
||||
pb++;
|
||||
// clang-format on
|
||||
|
||||
for (int k = 0; k < col_m; k += span) {
|
||||
CERES_GEMM_OPT_MTV_MAT4X1_MUL
|
||||
@@ -315,7 +310,6 @@ static inline void MTV_mat4x1(const int col_a,
|
||||
#undef CERES_GEMM_OPT_NAIVE_HEADER
|
||||
#undef CERES_GEMM_OPT_STORE_MAT1X4
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_SMALL_BLAS_GENERIC_H_
|
||||
|
||||
585
extern/ceres/internal/ceres/solver.cc
vendored
585
extern/ceres/internal/ceres/solver.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,14 +32,17 @@
|
||||
#include "ceres/solver.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <sstream> // NOLINT
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/casts.h"
|
||||
#include "ceres/context.h"
|
||||
#include "ceres/context_impl.h"
|
||||
#include "ceres/detect_structure.h"
|
||||
#include "ceres/eigensparse.h"
|
||||
#include "ceres/gradient_checking_cost_function.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/parameter_block_ordering.h"
|
||||
@@ -50,6 +53,7 @@
|
||||
#include "ceres/schur_templates.h"
|
||||
#include "ceres/solver_utils.h"
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "ceres/suitesparse.h"
|
||||
#include "ceres/types.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
@@ -58,32 +62,29 @@ namespace {
|
||||
|
||||
using internal::StringAppendF;
|
||||
using internal::StringPrintf;
|
||||
using std::map;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
#define OPTION_OP(x, y, OP) \
|
||||
if (!(options.x OP y)) { \
|
||||
std::stringstream ss; \
|
||||
ss << "Invalid configuration. "; \
|
||||
ss << string("Solver::Options::" #x " = ") << options.x << ". "; \
|
||||
ss << "Violated constraint: "; \
|
||||
ss << string("Solver::Options::" #x " " #OP " " #y); \
|
||||
*error = ss.str(); \
|
||||
return false; \
|
||||
#define OPTION_OP(x, y, OP) \
|
||||
if (!(options.x OP y)) { \
|
||||
std::stringstream ss; \
|
||||
ss << "Invalid configuration. "; \
|
||||
ss << std::string("Solver::Options::" #x " = ") << options.x << ". "; \
|
||||
ss << "Violated constraint: "; \
|
||||
ss << std::string("Solver::Options::" #x " " #OP " " #y); \
|
||||
*error = ss.str(); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
#define OPTION_OP_OPTION(x, y, OP) \
|
||||
if (!(options.x OP options.y)) { \
|
||||
std::stringstream ss; \
|
||||
ss << "Invalid configuration. "; \
|
||||
ss << string("Solver::Options::" #x " = ") << options.x << ". "; \
|
||||
ss << string("Solver::Options::" #y " = ") << options.y << ". "; \
|
||||
ss << "Violated constraint: "; \
|
||||
ss << string("Solver::Options::" #x); \
|
||||
ss << string(#OP " Solver::Options::" #y "."); \
|
||||
*error = ss.str(); \
|
||||
return false; \
|
||||
#define OPTION_OP_OPTION(x, y, OP) \
|
||||
if (!(options.x OP options.y)) { \
|
||||
std::stringstream ss; \
|
||||
ss << "Invalid configuration. "; \
|
||||
ss << std::string("Solver::Options::" #x " = ") << options.x << ". "; \
|
||||
ss << std::string("Solver::Options::" #y " = ") << options.y << ". "; \
|
||||
ss << "Violated constraint: "; \
|
||||
ss << std::string("Solver::Options::" #x); \
|
||||
ss << std::string(#OP " Solver::Options::" #y "."); \
|
||||
*error = ss.str(); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
#define OPTION_GE(x, y) OPTION_OP(x, y, >=);
|
||||
@@ -93,7 +94,7 @@ using std::vector;
|
||||
#define OPTION_LE_OPTION(x, y) OPTION_OP_OPTION(x, y, <=)
|
||||
#define OPTION_LT_OPTION(x, y) OPTION_OP_OPTION(x, y, <)
|
||||
|
||||
bool CommonOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
bool CommonOptionsAreValid(const Solver::Options& options, std::string* error) {
|
||||
OPTION_GE(max_num_iterations, 0);
|
||||
OPTION_GE(max_solver_time_in_seconds, 0.0);
|
||||
OPTION_GE(function_tolerance, 0.0);
|
||||
@@ -107,7 +108,286 @@ bool CommonOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
bool IsNestedDissectionAvailable(SparseLinearAlgebraLibraryType type) {
|
||||
return (((type == SUITE_SPARSE) &&
|
||||
internal::SuiteSparse::IsNestedDissectionAvailable()) ||
|
||||
(type == ACCELERATE_SPARSE) ||
|
||||
((type == EIGEN_SPARSE) &&
|
||||
internal::EigenSparse::IsNestedDissectionAvailable()));
|
||||
}
|
||||
|
||||
bool IsIterativeSolver(LinearSolverType type) {
|
||||
return (type == CGNR || type == ITERATIVE_SCHUR);
|
||||
}
|
||||
|
||||
bool OptionsAreValidForDenseSolver(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
const char* library_name = DenseLinearAlgebraLibraryTypeToString(
|
||||
options.dense_linear_algebra_library_type);
|
||||
const char* solver_name =
|
||||
LinearSolverTypeToString(options.linear_solver_type);
|
||||
constexpr char kFormat[] =
|
||||
"Can't use %s with dense_linear_algebra_library_type = %s "
|
||||
"because support not enabled when Ceres was built.";
|
||||
|
||||
if (!IsDenseLinearAlgebraLibraryTypeAvailable(
|
||||
options.dense_linear_algebra_library_type)) {
|
||||
*error = StringPrintf(kFormat, solver_name, library_name);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptionsAreValidForSparseCholeskyBasedSolver(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
const char* library_name = SparseLinearAlgebraLibraryTypeToString(
|
||||
options.sparse_linear_algebra_library_type);
|
||||
// Sparse factorization based solvers and some preconditioners require a
|
||||
// sparse Cholesky factorization.
|
||||
const char* solver_name =
|
||||
IsIterativeSolver(options.linear_solver_type)
|
||||
? PreconditionerTypeToString(options.preconditioner_type)
|
||||
: LinearSolverTypeToString(options.linear_solver_type);
|
||||
|
||||
constexpr char kNoSparseFormat[] =
|
||||
"Can't use %s with sparse_linear_algebra_library_type = %s.";
|
||||
constexpr char kNoLibraryFormat[] =
|
||||
"Can't use %s sparse_linear_algebra_library_type = %s, because support "
|
||||
"was not enabled when Ceres Solver was built.";
|
||||
constexpr char kNoNesdisFormat[] =
|
||||
"NESDIS is not available with sparse_linear_algebra_library_type = %s.";
|
||||
constexpr char kMixedFormat[] =
|
||||
"use_mixed_precision_solves with %s is not supported with "
|
||||
"sparse_linear_algebra_library_type = %s";
|
||||
constexpr char kDynamicSparsityFormat[] =
|
||||
"dynamic sparsity is not supported with "
|
||||
"sparse_linear_algebra_library_type = %s";
|
||||
|
||||
if (options.sparse_linear_algebra_library_type == NO_SPARSE) {
|
||||
*error = StringPrintf(kNoSparseFormat, solver_name, library_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!IsSparseLinearAlgebraLibraryTypeAvailable(
|
||||
options.sparse_linear_algebra_library_type)) {
|
||||
*error = StringPrintf(kNoLibraryFormat, solver_name, library_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.linear_solver_ordering_type == ceres::NESDIS &&
|
||||
!IsNestedDissectionAvailable(
|
||||
options.sparse_linear_algebra_library_type)) {
|
||||
*error = StringPrintf(kNoNesdisFormat, library_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.use_mixed_precision_solves &&
|
||||
options.sparse_linear_algebra_library_type == SUITE_SPARSE) {
|
||||
*error = StringPrintf(kMixedFormat, solver_name, library_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.dynamic_sparsity &&
|
||||
options.sparse_linear_algebra_library_type == ACCELERATE_SPARSE) {
|
||||
*error = StringPrintf(kDynamicSparsityFormat, library_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptionsAreValidForDenseNormalCholesky(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
CHECK_EQ(options.linear_solver_type, DENSE_NORMAL_CHOLESKY);
|
||||
return OptionsAreValidForDenseSolver(options, error);
|
||||
}
|
||||
|
||||
bool OptionsAreValidForDenseQr(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
CHECK_EQ(options.linear_solver_type, DENSE_QR);
|
||||
|
||||
if (!OptionsAreValidForDenseSolver(options, error)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.use_mixed_precision_solves) {
|
||||
*error = "Can't use use_mixed_precision_solves with DENSE_QR.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptionsAreValidForSparseNormalCholesky(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
CHECK_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY);
|
||||
return OptionsAreValidForSparseCholeskyBasedSolver(options, error);
|
||||
}
|
||||
|
||||
bool OptionsAreValidForDenseSchur(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
CHECK_EQ(options.linear_solver_type, DENSE_SCHUR);
|
||||
|
||||
if (options.dynamic_sparsity) {
|
||||
*error = "dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!OptionsAreValidForDenseSolver(options, error)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptionsAreValidForSparseSchur(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
CHECK_EQ(options.linear_solver_type, SPARSE_SCHUR);
|
||||
if (options.dynamic_sparsity) {
|
||||
*error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
|
||||
return false;
|
||||
}
|
||||
return OptionsAreValidForSparseCholeskyBasedSolver(options, error);
|
||||
}
|
||||
|
||||
bool OptionsAreValidForIterativeSchur(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
CHECK_EQ(options.linear_solver_type, ITERATIVE_SCHUR);
|
||||
if (options.dynamic_sparsity) {
|
||||
*error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.use_explicit_schur_complement) {
|
||||
if (options.preconditioner_type != SCHUR_JACOBI) {
|
||||
*error =
|
||||
"use_explicit_schur_complement only supports "
|
||||
"SCHUR_JACOBI as the preconditioner.";
|
||||
return false;
|
||||
}
|
||||
if (options.use_spse_initialization) {
|
||||
*error =
|
||||
"use_explicit_schur_complement does not support "
|
||||
"use_spse_initialization.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (options.use_spse_initialization ||
|
||||
options.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION) {
|
||||
OPTION_GE(max_num_spse_iterations, 1)
|
||||
OPTION_GE(spse_tolerance, 0.0)
|
||||
}
|
||||
|
||||
if (options.use_mixed_precision_solves) {
|
||||
*error = "Can't use use_mixed_precision_solves with ITERATIVE_SCHUR";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.dynamic_sparsity) {
|
||||
*error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.preconditioner_type == SUBSET) {
|
||||
*error = "Can't use SUBSET preconditioner with ITERATIVE_SCHUR";
|
||||
return false;
|
||||
}
|
||||
|
||||
// CLUSTER_JACOBI and CLUSTER_TRIDIAGONAL require sparse Cholesky
|
||||
// factorization.
|
||||
if (options.preconditioner_type == CLUSTER_JACOBI ||
|
||||
options.preconditioner_type == CLUSTER_TRIDIAGONAL) {
|
||||
return OptionsAreValidForSparseCholeskyBasedSolver(options, error);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptionsAreValidForCgnr(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
CHECK_EQ(options.linear_solver_type, CGNR);
|
||||
|
||||
if (options.preconditioner_type != IDENTITY &&
|
||||
options.preconditioner_type != JACOBI &&
|
||||
options.preconditioner_type != SUBSET) {
|
||||
*error =
|
||||
StringPrintf("Can't use CGNR with preconditioner_type = %s.",
|
||||
PreconditionerTypeToString(options.preconditioner_type));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.use_mixed_precision_solves) {
|
||||
*error = "use_mixed_precision_solves cannot be used with CGNR";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.dynamic_sparsity) {
|
||||
*error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.preconditioner_type == SUBSET) {
|
||||
if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) {
|
||||
*error =
|
||||
"Can't use CGNR with preconditioner_type = SUBSET when "
|
||||
"sparse_linear_algebra_library_type = CUDA_SPARSE.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.residual_blocks_for_subset_preconditioner.empty()) {
|
||||
*error =
|
||||
"When using SUBSET preconditioner, "
|
||||
"residual_blocks_for_subset_preconditioner cannot be empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
// SUBSET preconditioner requires sparse Cholesky factorization.
|
||||
if (!OptionsAreValidForSparseCholeskyBasedSolver(options, error)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check options for CGNR with CUDA_SPARSE.
|
||||
if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) {
|
||||
if (!IsSparseLinearAlgebraLibraryTypeAvailable(CUDA_SPARSE)) {
|
||||
*error =
|
||||
"Can't use CGNR with sparse_linear_algebra_library_type = "
|
||||
"CUDA_SPARSE because support was not enabled when Ceres was built.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OptionsAreValidForLinearSolver(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
switch (options.linear_solver_type) {
|
||||
case DENSE_NORMAL_CHOLESKY:
|
||||
return OptionsAreValidForDenseNormalCholesky(options, error);
|
||||
case DENSE_QR:
|
||||
return OptionsAreValidForDenseQr(options, error);
|
||||
case SPARSE_NORMAL_CHOLESKY:
|
||||
return OptionsAreValidForSparseNormalCholesky(options, error);
|
||||
case DENSE_SCHUR:
|
||||
return OptionsAreValidForDenseSchur(options, error);
|
||||
case SPARSE_SCHUR:
|
||||
return OptionsAreValidForSparseSchur(options, error);
|
||||
case ITERATIVE_SCHUR:
|
||||
return OptionsAreValidForIterativeSchur(options, error);
|
||||
case CGNR:
|
||||
return OptionsAreValidForCgnr(options, error);
|
||||
default:
|
||||
LOG(FATAL) << "Congratulations you have found a bug. Please report "
|
||||
"this to the "
|
||||
"Ceres Solver developers. Unknown linear solver type: "
|
||||
<< LinearSolverTypeToString(options.linear_solver_type);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TrustRegionOptionsAreValid(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
OPTION_GT(initial_trust_region_radius, 0.0);
|
||||
OPTION_GT(min_trust_region_radius, 0.0);
|
||||
OPTION_GT(max_trust_region_radius, 0.0);
|
||||
@@ -121,7 +401,7 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
OPTION_GE(max_num_consecutive_invalid_steps, 0);
|
||||
OPTION_GT(eta, 0.0);
|
||||
OPTION_GE(min_linear_solver_iterations, 0);
|
||||
OPTION_GE(max_linear_solver_iterations, 1);
|
||||
OPTION_GE(max_linear_solver_iterations, 0);
|
||||
OPTION_LE_OPTION(min_linear_solver_iterations, max_linear_solver_iterations);
|
||||
|
||||
if (options.use_inner_iterations) {
|
||||
@@ -132,80 +412,19 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
OPTION_GT(max_consecutive_nonmonotonic_steps, 0);
|
||||
}
|
||||
|
||||
if (options.linear_solver_type == ITERATIVE_SCHUR &&
|
||||
options.use_explicit_schur_complement &&
|
||||
options.preconditioner_type != SCHUR_JACOBI) {
|
||||
if ((options.trust_region_strategy_type == DOGLEG) &&
|
||||
IsIterativeSolver(options.linear_solver_type)) {
|
||||
*error =
|
||||
"use_explicit_schur_complement only supports "
|
||||
"SCHUR_JACOBI as the preconditioner.";
|
||||
"DOGLEG only supports exact factorization based linear "
|
||||
"solvers. If you want to use an iterative solver please "
|
||||
"use LEVENBERG_MARQUARDT as the trust_region_strategy_type";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!IsDenseLinearAlgebraLibraryTypeAvailable(
|
||||
options.dense_linear_algebra_library_type) &&
|
||||
(options.linear_solver_type == DENSE_NORMAL_CHOLESKY ||
|
||||
options.linear_solver_type == DENSE_QR ||
|
||||
options.linear_solver_type == DENSE_SCHUR)) {
|
||||
*error = StringPrintf(
|
||||
"Can't use %s with "
|
||||
"Solver::Options::dense_linear_algebra_library_type = %s "
|
||||
"because %s was not enabled when Ceres was built.",
|
||||
LinearSolverTypeToString(options.linear_solver_type),
|
||||
DenseLinearAlgebraLibraryTypeToString(
|
||||
options.dense_linear_algebra_library_type),
|
||||
DenseLinearAlgebraLibraryTypeToString(
|
||||
options.dense_linear_algebra_library_type));
|
||||
if (!OptionsAreValidForLinearSolver(options, error)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
const char* sparse_linear_algebra_library_name =
|
||||
SparseLinearAlgebraLibraryTypeToString(
|
||||
options.sparse_linear_algebra_library_type);
|
||||
const char* name = nullptr;
|
||||
if (options.linear_solver_type == SPARSE_NORMAL_CHOLESKY ||
|
||||
options.linear_solver_type == SPARSE_SCHUR) {
|
||||
name = LinearSolverTypeToString(options.linear_solver_type);
|
||||
} else if ((options.linear_solver_type == ITERATIVE_SCHUR &&
|
||||
(options.preconditioner_type == CLUSTER_JACOBI ||
|
||||
options.preconditioner_type == CLUSTER_TRIDIAGONAL)) ||
|
||||
(options.linear_solver_type == CGNR &&
|
||||
options.preconditioner_type == SUBSET)) {
|
||||
name = PreconditionerTypeToString(options.preconditioner_type);
|
||||
}
|
||||
|
||||
if (name) {
|
||||
if (options.sparse_linear_algebra_library_type == NO_SPARSE) {
|
||||
*error = StringPrintf(
|
||||
"Can't use %s with "
|
||||
"Solver::Options::sparse_linear_algebra_library_type = %s.",
|
||||
name,
|
||||
sparse_linear_algebra_library_name);
|
||||
return false;
|
||||
} else if (!IsSparseLinearAlgebraLibraryTypeAvailable(
|
||||
options.sparse_linear_algebra_library_type)) {
|
||||
*error = StringPrintf(
|
||||
"Can't use %s with "
|
||||
"Solver::Options::sparse_linear_algebra_library_type = %s, "
|
||||
"because support was not enabled when Ceres Solver was built.",
|
||||
name,
|
||||
sparse_linear_algebra_library_name);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (options.trust_region_strategy_type == DOGLEG) {
|
||||
if (options.linear_solver_type == ITERATIVE_SCHUR ||
|
||||
options.linear_solver_type == CGNR) {
|
||||
*error =
|
||||
"DOGLEG only supports exact factorization based linear "
|
||||
"solvers. If you want to use an iterative solver please "
|
||||
"use LEVENBERG_MARQUARDT as the trust_region_strategy_type";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!options.trust_region_minimizer_iterations_to_dump.empty() &&
|
||||
options.trust_region_problem_dump_format_type != CONSOLE &&
|
||||
options.trust_region_problem_dump_directory.empty()) {
|
||||
@@ -213,33 +432,11 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (options.dynamic_sparsity) {
|
||||
if (options.linear_solver_type != SPARSE_NORMAL_CHOLESKY) {
|
||||
*error =
|
||||
"Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY.";
|
||||
return false;
|
||||
}
|
||||
if (options.sparse_linear_algebra_library_type == ACCELERATE_SPARSE) {
|
||||
*error =
|
||||
"ACCELERATE_SPARSE is not currently supported with dynamic sparsity.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (options.linear_solver_type == CGNR &&
|
||||
options.preconditioner_type == SUBSET &&
|
||||
options.residual_blocks_for_subset_preconditioner.empty()) {
|
||||
*error =
|
||||
"When using SUBSET preconditioner, "
|
||||
"Solver::Options::residual_blocks_for_subset_preconditioner cannot be "
|
||||
"empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
bool LineSearchOptionsAreValid(const Solver::Options& options,
|
||||
std::string* error) {
|
||||
OPTION_GT(max_lbfgs_rank, 0);
|
||||
OPTION_GT(min_line_search_step_size, 0.0);
|
||||
OPTION_GT(max_line_search_step_contraction, 0.0);
|
||||
@@ -259,9 +456,10 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
options.line_search_direction_type == ceres::LBFGS) &&
|
||||
options.line_search_type != ceres::WOLFE) {
|
||||
*error =
|
||||
string("Invalid configuration: Solver::Options::line_search_type = ") +
|
||||
string(LineSearchTypeToString(options.line_search_type)) +
|
||||
string(
|
||||
std::string(
|
||||
"Invalid configuration: Solver::Options::line_search_type = ") +
|
||||
std::string(LineSearchTypeToString(options.line_search_type)) +
|
||||
std::string(
|
||||
". When using (L)BFGS, "
|
||||
"Solver::Options::line_search_type must be set to WOLFE.");
|
||||
return false;
|
||||
@@ -269,8 +467,8 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
|
||||
// Warn user if they have requested BISECTION interpolation, but constraints
|
||||
// on max/min step size change during line search prevent bisection scaling
|
||||
// from occurring. Warn only, as this is likely a user mistake, but one which
|
||||
// does not prevent us from continuing.
|
||||
// from occurring. Warn only, as this is likely a user mistake, but one
|
||||
// which does not prevent us from continuing.
|
||||
if (options.line_search_interpolation_type == ceres::BISECTION &&
|
||||
(options.max_line_search_step_contraction > 0.5 ||
|
||||
options.min_line_search_step_contraction < 0.5)) {
|
||||
@@ -295,7 +493,7 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) {
|
||||
#undef OPTION_LE_OPTION
|
||||
#undef OPTION_LT_OPTION
|
||||
|
||||
void StringifyOrdering(const vector<int>& ordering, string* report) {
|
||||
void StringifyOrdering(const std::vector<int>& ordering, std::string* report) {
|
||||
if (ordering.empty()) {
|
||||
internal::StringAppendF(report, "AUTOMATIC");
|
||||
return;
|
||||
@@ -339,7 +537,7 @@ void PreSolveSummarize(const Solver::Options& options,
|
||||
&(summary->inner_iteration_ordering_given));
|
||||
|
||||
// clang-format off
|
||||
summary->dense_linear_algebra_library_type = options.dense_linear_algebra_library_type; // NOLINT
|
||||
summary->dense_linear_algebra_library_type = options.dense_linear_algebra_library_type;
|
||||
summary->dogleg_type = options.dogleg_type;
|
||||
summary->inner_iteration_time_in_seconds = 0.0;
|
||||
summary->num_line_search_steps = 0;
|
||||
@@ -348,18 +546,19 @@ void PreSolveSummarize(const Solver::Options& options,
|
||||
summary->line_search_polynomial_minimization_time_in_seconds = 0.0;
|
||||
summary->line_search_total_time_in_seconds = 0.0;
|
||||
summary->inner_iterations_given = options.use_inner_iterations;
|
||||
summary->line_search_direction_type = options.line_search_direction_type; // NOLINT
|
||||
summary->line_search_interpolation_type = options.line_search_interpolation_type; // NOLINT
|
||||
summary->line_search_direction_type = options.line_search_direction_type;
|
||||
summary->line_search_interpolation_type = options.line_search_interpolation_type;
|
||||
summary->line_search_type = options.line_search_type;
|
||||
summary->linear_solver_type_given = options.linear_solver_type;
|
||||
summary->max_lbfgs_rank = options.max_lbfgs_rank;
|
||||
summary->minimizer_type = options.minimizer_type;
|
||||
summary->nonlinear_conjugate_gradient_type = options.nonlinear_conjugate_gradient_type; // NOLINT
|
||||
summary->nonlinear_conjugate_gradient_type = options.nonlinear_conjugate_gradient_type;
|
||||
summary->num_threads_given = options.num_threads;
|
||||
summary->preconditioner_type_given = options.preconditioner_type;
|
||||
summary->sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type; // NOLINT
|
||||
summary->trust_region_strategy_type = options.trust_region_strategy_type; // NOLINT
|
||||
summary->visibility_clustering_type = options.visibility_clustering_type; // NOLINT
|
||||
summary->sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type;
|
||||
summary->linear_solver_ordering_type = options.linear_solver_ordering_type;
|
||||
summary->trust_region_strategy_type = options.trust_region_strategy_type;
|
||||
summary->visibility_clustering_type = options.visibility_clustering_type;
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
@@ -367,19 +566,23 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp,
|
||||
Solver::Summary* summary) {
|
||||
internal::OrderingToGroupSizes(pp.options.linear_solver_ordering.get(),
|
||||
&(summary->linear_solver_ordering_used));
|
||||
// TODO(sameeragarwal): Update the preprocessor to collapse the
|
||||
// second and higher groups into one group when nested dissection is
|
||||
// used.
|
||||
internal::OrderingToGroupSizes(pp.options.inner_iteration_ordering.get(),
|
||||
&(summary->inner_iteration_ordering_used));
|
||||
|
||||
// clang-format off
|
||||
summary->inner_iterations_used = pp.inner_iteration_minimizer.get() != nullptr; // NOLINT
|
||||
summary->inner_iterations_used = pp.inner_iteration_minimizer != nullptr;
|
||||
summary->linear_solver_type_used = pp.linear_solver_options.type;
|
||||
summary->mixed_precision_solves_used = pp.options.use_mixed_precision_solves;
|
||||
summary->num_threads_used = pp.options.num_threads;
|
||||
summary->preconditioner_type_used = pp.options.preconditioner_type;
|
||||
// clang-format on
|
||||
|
||||
internal::SetSummaryFinalCost(summary);
|
||||
|
||||
if (pp.reduced_program.get() != nullptr) {
|
||||
if (pp.reduced_program != nullptr) {
|
||||
SummarizeReducedProgram(*pp.reduced_program, summary);
|
||||
}
|
||||
|
||||
@@ -389,8 +592,8 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp,
|
||||
// case if the preprocessor failed, or if the reduced problem did
|
||||
// not contain any parameter blocks. Thus, only extract the
|
||||
// evaluator statistics if one exists.
|
||||
if (pp.evaluator.get() != nullptr) {
|
||||
const map<string, CallStatistics>& evaluator_statistics =
|
||||
if (pp.evaluator != nullptr) {
|
||||
const std::map<std::string, CallStatistics>& evaluator_statistics =
|
||||
pp.evaluator->Statistics();
|
||||
{
|
||||
const CallStatistics& call_stats = FindWithDefault(
|
||||
@@ -411,8 +614,8 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp,
|
||||
// Again, like the evaluator, there may or may not be a linear
|
||||
// solver from which we can extract run time statistics. In
|
||||
// particular the line search solver does not use a linear solver.
|
||||
if (pp.linear_solver.get() != nullptr) {
|
||||
const map<string, CallStatistics>& linear_solver_statistics =
|
||||
if (pp.linear_solver != nullptr) {
|
||||
const std::map<std::string, CallStatistics>& linear_solver_statistics =
|
||||
pp.linear_solver->Statistics();
|
||||
const CallStatistics& call_stats = FindWithDefault(
|
||||
linear_solver_statistics, "LinearSolver::Solve", CallStatistics());
|
||||
@@ -468,9 +671,23 @@ std::string SchurStructureToString(const int row_block_size,
|
||||
return internal::StringPrintf("%s,%s,%s", row.c_str(), e.c_str(), f.c_str());
|
||||
}
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
bool IsCudaRequired(const Solver::Options& options) {
|
||||
if (options.linear_solver_type == DENSE_NORMAL_CHOLESKY ||
|
||||
options.linear_solver_type == DENSE_SCHUR ||
|
||||
options.linear_solver_type == DENSE_QR) {
|
||||
return (options.dense_linear_algebra_library_type == CUDA);
|
||||
}
|
||||
if (options.linear_solver_type == CGNR) {
|
||||
return (options.sparse_linear_algebra_library_type == CUDA_SPARSE);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
bool Solver::Options::IsValid(string* error) const {
|
||||
bool Solver::Options::IsValid(std::string* error) const {
|
||||
if (!CommonOptionsAreValid(*this, error)) {
|
||||
return false;
|
||||
}
|
||||
@@ -509,10 +726,19 @@ void Solver::Solve(const Solver::Options& options,
|
||||
return;
|
||||
}
|
||||
|
||||
ProblemImpl* problem_impl = problem->impl_.get();
|
||||
ProblemImpl* problem_impl = problem->mutable_impl();
|
||||
Program* program = problem_impl->mutable_program();
|
||||
PreSolveSummarize(options, problem_impl, summary);
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
if (IsCudaRequired(options)) {
|
||||
if (!problem_impl->context()->InitCuda(&summary->message)) {
|
||||
LOG(ERROR) << "Terminating: " << summary->message;
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
// If gradient_checking is enabled, wrap all cost functions in a
|
||||
// gradient checker and install a callback that terminates if any gradient
|
||||
// error is detected.
|
||||
@@ -582,7 +808,7 @@ void Solver::Solve(const Solver::Options& options,
|
||||
}
|
||||
|
||||
const double postprocessor_start_time = WallTimeInSeconds();
|
||||
problem_impl = problem->impl_.get();
|
||||
problem_impl = problem->mutable_impl();
|
||||
program = problem_impl->mutable_program();
|
||||
// On exit, ensure that the parameter blocks again point at the user
|
||||
// provided values and the parameter blocks are numbered according
|
||||
@@ -610,7 +836,7 @@ void Solve(const Solver::Options& options,
|
||||
solver.Solve(options, problem, summary);
|
||||
}
|
||||
|
||||
string Solver::Summary::BriefReport() const {
|
||||
std::string Solver::Summary::BriefReport() const {
|
||||
return StringPrintf(
|
||||
"Ceres Solver Report: "
|
||||
"Iterations: %d, "
|
||||
@@ -623,10 +849,12 @@ string Solver::Summary::BriefReport() const {
|
||||
TerminationTypeToString(termination_type));
|
||||
}
|
||||
|
||||
string Solver::Summary::FullReport() const {
|
||||
std::string Solver::Summary::FullReport() const {
|
||||
using internal::VersionString;
|
||||
|
||||
string report = string("\nSolver Summary (v " + VersionString() + ")\n\n");
|
||||
// NOTE operator+ is not usable for concatenating a string and a string_view.
|
||||
std::string report =
|
||||
std::string{"\nSolver Summary (v "}.append(VersionString()) + ")\n\n";
|
||||
|
||||
StringAppendF(&report, "%45s %21s\n", "Original", "Reduced");
|
||||
StringAppendF(&report,
|
||||
@@ -660,21 +888,13 @@ string Solver::Summary::FullReport() const {
|
||||
if (linear_solver_type_used == DENSE_NORMAL_CHOLESKY ||
|
||||
linear_solver_type_used == DENSE_SCHUR ||
|
||||
linear_solver_type_used == DENSE_QR) {
|
||||
const char* mixed_precision_suffix =
|
||||
(mixed_precision_solves_used ? "(Mixed Precision)" : "");
|
||||
StringAppendF(&report,
|
||||
"\nDense linear algebra library %15s\n",
|
||||
"\nDense linear algebra library %15s %s\n",
|
||||
DenseLinearAlgebraLibraryTypeToString(
|
||||
dense_linear_algebra_library_type));
|
||||
}
|
||||
|
||||
if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY ||
|
||||
linear_solver_type_used == SPARSE_SCHUR ||
|
||||
(linear_solver_type_used == ITERATIVE_SCHUR &&
|
||||
(preconditioner_type_used == CLUSTER_JACOBI ||
|
||||
preconditioner_type_used == CLUSTER_TRIDIAGONAL))) {
|
||||
StringAppendF(&report,
|
||||
"\nSparse linear algebra library %15s\n",
|
||||
SparseLinearAlgebraLibraryTypeToString(
|
||||
sparse_linear_algebra_library_type));
|
||||
dense_linear_algebra_library_type),
|
||||
mixed_precision_suffix);
|
||||
}
|
||||
|
||||
StringAppendF(&report,
|
||||
@@ -687,17 +907,50 @@ string Solver::Summary::FullReport() const {
|
||||
StringAppendF(&report, " (SUBSPACE)");
|
||||
}
|
||||
}
|
||||
StringAppendF(&report, "\n");
|
||||
StringAppendF(&report, "\n");
|
||||
|
||||
const bool used_sparse_linear_algebra_library =
|
||||
linear_solver_type_used == SPARSE_NORMAL_CHOLESKY ||
|
||||
linear_solver_type_used == SPARSE_SCHUR ||
|
||||
linear_solver_type_used == CGNR ||
|
||||
(linear_solver_type_used == ITERATIVE_SCHUR &&
|
||||
(preconditioner_type_used == CLUSTER_JACOBI ||
|
||||
preconditioner_type_used == CLUSTER_TRIDIAGONAL));
|
||||
|
||||
const bool linear_solver_ordering_required =
|
||||
linear_solver_type_used == SPARSE_SCHUR ||
|
||||
(linear_solver_type_used == ITERATIVE_SCHUR &&
|
||||
(preconditioner_type_used == CLUSTER_JACOBI ||
|
||||
preconditioner_type_used == CLUSTER_TRIDIAGONAL)) ||
|
||||
(linear_solver_type_used == CGNR && preconditioner_type_used == SUBSET);
|
||||
|
||||
if (used_sparse_linear_algebra_library) {
|
||||
const char* mixed_precision_suffix =
|
||||
(mixed_precision_solves_used ? "(Mixed Precision)" : "");
|
||||
if (linear_solver_ordering_required) {
|
||||
StringAppendF(
|
||||
&report,
|
||||
"\nSparse linear algebra library %15s + %s %s\n",
|
||||
SparseLinearAlgebraLibraryTypeToString(
|
||||
sparse_linear_algebra_library_type),
|
||||
LinearSolverOrderingTypeToString(linear_solver_ordering_type),
|
||||
mixed_precision_suffix);
|
||||
} else {
|
||||
StringAppendF(&report,
|
||||
"\nSparse linear algebra library %15s %s\n",
|
||||
SparseLinearAlgebraLibraryTypeToString(
|
||||
sparse_linear_algebra_library_type),
|
||||
mixed_precision_suffix);
|
||||
}
|
||||
}
|
||||
|
||||
StringAppendF(&report, "\n");
|
||||
StringAppendF(&report, "%45s %21s\n", "Given", "Used");
|
||||
StringAppendF(&report,
|
||||
"Linear solver %25s%25s\n",
|
||||
LinearSolverTypeToString(linear_solver_type_given),
|
||||
LinearSolverTypeToString(linear_solver_type_used));
|
||||
|
||||
if (linear_solver_type_given == CGNR ||
|
||||
linear_solver_type_given == ITERATIVE_SCHUR) {
|
||||
if (IsIterativeSolver(linear_solver_type_given)) {
|
||||
StringAppendF(&report,
|
||||
"Preconditioner %25s%25s\n",
|
||||
PreconditionerTypeToString(preconditioner_type_given),
|
||||
@@ -717,9 +970,9 @@ string Solver::Summary::FullReport() const {
|
||||
num_threads_given,
|
||||
num_threads_used);
|
||||
|
||||
string given;
|
||||
std::string given;
|
||||
StringifyOrdering(linear_solver_ordering_given, &given);
|
||||
string used;
|
||||
std::string used;
|
||||
StringifyOrdering(linear_solver_ordering_used, &used);
|
||||
StringAppendF(&report,
|
||||
"Linear solver ordering %22s %24s\n",
|
||||
@@ -740,9 +993,9 @@ string Solver::Summary::FullReport() const {
|
||||
}
|
||||
|
||||
if (inner_iterations_used) {
|
||||
string given;
|
||||
std::string given;
|
||||
StringifyOrdering(inner_iteration_ordering_given, &given);
|
||||
string used;
|
||||
std::string used;
|
||||
StringifyOrdering(inner_iteration_ordering_used, &used);
|
||||
StringAppendF(&report,
|
||||
"Inner iteration ordering %20s %24s\n",
|
||||
@@ -753,7 +1006,7 @@ string Solver::Summary::FullReport() const {
|
||||
// LINE_SEARCH HEADER
|
||||
StringAppendF(&report, "\nMinimizer %19s\n", "LINE_SEARCH");
|
||||
|
||||
string line_search_direction_string;
|
||||
std::string line_search_direction_string;
|
||||
if (line_search_direction_type == LBFGS) {
|
||||
line_search_direction_string = StringPrintf("LBFGS (%d)", max_lbfgs_rank);
|
||||
} else if (line_search_direction_type == NONLINEAR_CONJUGATE_GRADIENT) {
|
||||
@@ -768,7 +1021,7 @@ string Solver::Summary::FullReport() const {
|
||||
"Line search direction %19s\n",
|
||||
line_search_direction_string.c_str());
|
||||
|
||||
const string line_search_type_string = StringPrintf(
|
||||
const std::string line_search_type_string = StringPrintf(
|
||||
"%s %s",
|
||||
LineSearchInterpolationTypeToString(line_search_interpolation_type),
|
||||
LineSearchTypeToString(line_search_type));
|
||||
|
||||
48
extern/ceres/internal/ceres/solver_utils.cc
vendored
48
extern/ceres/internal/ceres/solver_utils.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,8 +30,6 @@
|
||||
|
||||
#include "ceres/solver_utils.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/internal/export.h"
|
||||
@@ -40,8 +38,7 @@
|
||||
#include "cuda_runtime.h"
|
||||
#endif // CERES_NO_CUDA
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// clang-format off
|
||||
#define CERES_EIGEN_VERSION \
|
||||
@@ -50,52 +47,47 @@ namespace internal {
|
||||
CERES_TO_STRING(EIGEN_MINOR_VERSION)
|
||||
// clang-format on
|
||||
|
||||
std::string VersionString() {
|
||||
std::string value = std::string(CERES_VERSION_STRING);
|
||||
value += "-eigen-(" + std::string(CERES_EIGEN_VERSION) + ")";
|
||||
constexpr char kVersion[] =
|
||||
// clang-format off
|
||||
CERES_VERSION_STRING
|
||||
"-eigen-(" CERES_EIGEN_VERSION ")"
|
||||
|
||||
#ifdef CERES_NO_LAPACK
|
||||
value += "-no_lapack";
|
||||
"-no_lapack"
|
||||
#else
|
||||
value += "-lapack";
|
||||
"-lapack"
|
||||
#endif
|
||||
|
||||
#ifndef CERES_NO_SUITESPARSE
|
||||
value += "-suitesparse-(" + std::string(CERES_SUITESPARSE_VERSION) + ")";
|
||||
"-suitesparse-(" CERES_SUITESPARSE_VERSION ")"
|
||||
#endif
|
||||
|
||||
#ifndef CERES_NO_CXSPARSE
|
||||
value += "-cxsparse-(" + std::string(CERES_CXSPARSE_VERSION) + ")";
|
||||
#if !defined(CERES_NO_EIGEN_METIS) || !defined(CERES_NO_CHOLMOD_PARTITION)
|
||||
"-metis-(" CERES_METIS_VERSION ")"
|
||||
#endif
|
||||
|
||||
#ifndef CERES_NO_ACCELERATE_SPARSE
|
||||
value += "-acceleratesparse";
|
||||
"-acceleratesparse"
|
||||
#endif
|
||||
|
||||
#ifdef CERES_USE_EIGEN_SPARSE
|
||||
value += "-eigensparse";
|
||||
"-eigensparse"
|
||||
#endif
|
||||
|
||||
#ifdef CERES_RESTRUCT_SCHUR_SPECIALIZATIONS
|
||||
value += "-no_schur_specializations";
|
||||
#endif
|
||||
|
||||
#ifdef CERES_USE_OPENMP
|
||||
value += "-openmp";
|
||||
#else
|
||||
value += "-no_openmp";
|
||||
"-no_schur_specializations"
|
||||
#endif
|
||||
|
||||
#ifdef CERES_NO_CUSTOM_BLAS
|
||||
value += "-no_custom_blas";
|
||||
"-no_custom_blas"
|
||||
#endif
|
||||
|
||||
#ifndef CERES_NO_CUDA
|
||||
value += "-cuda-(" + std::to_string(CUDART_VERSION) + ")";
|
||||
"-cuda-(" CERES_TO_STRING(CUDART_VERSION) ")"
|
||||
#endif
|
||||
;
|
||||
// clang-format on
|
||||
|
||||
return value;
|
||||
}
|
||||
std::string_view VersionString() noexcept { return kVersion; }
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
12
extern/ceres/internal/ceres/solver_utils.h
vendored
12
extern/ceres/internal/ceres/solver_utils.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,15 +32,14 @@
|
||||
#define CERES_INTERNAL_SOLVER_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/iteration_callback.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
template <typename SummaryType>
|
||||
bool IsSolutionUsable(const SummaryType& summary) {
|
||||
@@ -61,10 +60,9 @@ void SetSummaryFinalCost(SummaryType* summary) {
|
||||
}
|
||||
|
||||
CERES_NO_EXPORT
|
||||
std::string VersionString();
|
||||
std::string_view VersionString() noexcept;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
55
extern/ceres/internal/ceres/sparse_cholesky.cc
vendored
55
extern/ceres/internal/ceres/sparse_cholesky.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -31,30 +31,28 @@
|
||||
#include "ceres/sparse_cholesky.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "ceres/accelerate_sparse.h"
|
||||
#include "ceres/cxsparse.h"
|
||||
#include "ceres/eigensparse.h"
|
||||
#include "ceres/float_cxsparse.h"
|
||||
#include "ceres/float_suitesparse.h"
|
||||
#include "ceres/iterative_refiner.h"
|
||||
#include "ceres/suitesparse.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
std::unique_ptr<SparseCholesky> SparseCholesky::Create(
|
||||
const LinearSolver::Options& options) {
|
||||
const OrderingType ordering_type = options.use_postordering ? AMD : NATURAL;
|
||||
std::unique_ptr<SparseCholesky> sparse_cholesky;
|
||||
|
||||
switch (options.sparse_linear_algebra_library_type) {
|
||||
case SUITE_SPARSE:
|
||||
#ifndef CERES_NO_SUITESPARSE
|
||||
if (options.use_mixed_precision_solves) {
|
||||
sparse_cholesky = FloatSuiteSparseCholesky::Create(ordering_type);
|
||||
sparse_cholesky =
|
||||
FloatSuiteSparseCholesky::Create(options.ordering_type);
|
||||
} else {
|
||||
sparse_cholesky = SuiteSparseCholesky::Create(ordering_type);
|
||||
sparse_cholesky = SuiteSparseCholesky::Create(options.ordering_type);
|
||||
}
|
||||
break;
|
||||
#else
|
||||
@@ -64,9 +62,10 @@ std::unique_ptr<SparseCholesky> SparseCholesky::Create(
|
||||
case EIGEN_SPARSE:
|
||||
#ifdef CERES_USE_EIGEN_SPARSE
|
||||
if (options.use_mixed_precision_solves) {
|
||||
sparse_cholesky = FloatEigenSparseCholesky::Create(ordering_type);
|
||||
sparse_cholesky =
|
||||
FloatEigenSparseCholesky::Create(options.ordering_type);
|
||||
} else {
|
||||
sparse_cholesky = EigenSparseCholesky::Create(ordering_type);
|
||||
sparse_cholesky = EigenSparseCholesky::Create(options.ordering_type);
|
||||
}
|
||||
break;
|
||||
#else
|
||||
@@ -74,25 +73,14 @@ std::unique_ptr<SparseCholesky> SparseCholesky::Create(
|
||||
<< "Eigen's sparse Cholesky factorization routines.";
|
||||
#endif
|
||||
|
||||
case CX_SPARSE:
|
||||
#ifndef CERES_NO_CXSPARSE
|
||||
if (options.use_mixed_precision_solves) {
|
||||
sparse_cholesky = FloatCXSparseCholesky::Create(ordering_type);
|
||||
} else {
|
||||
sparse_cholesky = CXSparseCholesky::Create(ordering_type);
|
||||
}
|
||||
break;
|
||||
#else
|
||||
LOG(FATAL) << "Ceres was compiled without support for CXSparse.";
|
||||
#endif
|
||||
|
||||
case ACCELERATE_SPARSE:
|
||||
#ifndef CERES_NO_ACCELERATE_SPARSE
|
||||
if (options.use_mixed_precision_solves) {
|
||||
sparse_cholesky = AppleAccelerateCholesky<float>::Create(ordering_type);
|
||||
sparse_cholesky =
|
||||
AppleAccelerateCholesky<float>::Create(options.ordering_type);
|
||||
} else {
|
||||
sparse_cholesky =
|
||||
AppleAccelerateCholesky<double>::Create(ordering_type);
|
||||
AppleAccelerateCholesky<double>::Create(options.ordering_type);
|
||||
}
|
||||
break;
|
||||
#else
|
||||
@@ -107,10 +95,10 @@ std::unique_ptr<SparseCholesky> SparseCholesky::Create(
|
||||
}
|
||||
|
||||
if (options.max_num_refinement_iterations > 0) {
|
||||
std::unique_ptr<IterativeRefiner> refiner(
|
||||
new IterativeRefiner(options.max_num_refinement_iterations));
|
||||
sparse_cholesky = std::unique_ptr<SparseCholesky>(new RefinedSparseCholesky(
|
||||
std::move(sparse_cholesky), std::move(refiner)));
|
||||
auto refiner = std::make_unique<SparseIterativeRefiner>(
|
||||
options.max_num_refinement_iterations);
|
||||
sparse_cholesky = std::make_unique<RefinedSparseCholesky>(
|
||||
std::move(sparse_cholesky), std::move(refiner));
|
||||
}
|
||||
return sparse_cholesky;
|
||||
}
|
||||
@@ -123,7 +111,7 @@ LinearSolverTerminationType SparseCholesky::FactorAndSolve(
|
||||
double* solution,
|
||||
std::string* message) {
|
||||
LinearSolverTerminationType termination_type = Factorize(lhs, message);
|
||||
if (termination_type == LINEAR_SOLVER_SUCCESS) {
|
||||
if (termination_type == LinearSolverTerminationType::SUCCESS) {
|
||||
termination_type = Solve(rhs, solution, message);
|
||||
}
|
||||
return termination_type;
|
||||
@@ -131,7 +119,7 @@ LinearSolverTerminationType SparseCholesky::FactorAndSolve(
|
||||
|
||||
RefinedSparseCholesky::RefinedSparseCholesky(
|
||||
std::unique_ptr<SparseCholesky> sparse_cholesky,
|
||||
std::unique_ptr<IterativeRefiner> iterative_refiner)
|
||||
std::unique_ptr<SparseIterativeRefiner> iterative_refiner)
|
||||
: sparse_cholesky_(std::move(sparse_cholesky)),
|
||||
iterative_refiner_(std::move(iterative_refiner)) {}
|
||||
|
||||
@@ -153,13 +141,12 @@ LinearSolverTerminationType RefinedSparseCholesky::Solve(const double* rhs,
|
||||
std::string* message) {
|
||||
CHECK(lhs_ != nullptr);
|
||||
auto termination_type = sparse_cholesky_->Solve(rhs, solution, message);
|
||||
if (termination_type != LINEAR_SOLVER_SUCCESS) {
|
||||
if (termination_type != LinearSolverTerminationType::SUCCESS) {
|
||||
return termination_type;
|
||||
}
|
||||
|
||||
iterative_refiner_->Refine(*lhs_, rhs, sparse_cholesky_.get(), solution);
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
26
extern/ceres/internal/ceres/sparse_cholesky.h
vendored
26
extern/ceres/internal/ceres/sparse_cholesky.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,8 +43,7 @@
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// An interface that abstracts away the internal details of various
|
||||
// sparse linear algebra libraries and offers a simple API for solving
|
||||
@@ -63,11 +62,12 @@ namespace internal {
|
||||
//
|
||||
// CompressedRowSparseMatrix lhs = ...;
|
||||
// std::string message;
|
||||
// CHECK_EQ(sparse_cholesky->Factorize(&lhs, &message), LINEAR_SOLVER_SUCCESS);
|
||||
// CHECK_EQ(sparse_cholesky->Factorize(&lhs, &message),
|
||||
// LinearSolverTerminationType::SUCCESS);
|
||||
// Vector rhs = ...;
|
||||
// Vector solution = ...;
|
||||
// CHECK_EQ(sparse_cholesky->Solve(rhs.data(), solution.data(), &message),
|
||||
// LINEAR_SOLVER_SUCCESS);
|
||||
// LinearSolverTerminationType::SUCCESS);
|
||||
|
||||
class CERES_NO_EXPORT SparseCholesky {
|
||||
public:
|
||||
@@ -105,21 +105,22 @@ class CERES_NO_EXPORT SparseCholesky {
|
||||
|
||||
// Convenience method which combines a call to Factorize and
|
||||
// Solve. Solve is only called if Factorize returns
|
||||
// LINEAR_SOLVER_SUCCESS.
|
||||
// LinearSolverTerminationType::SUCCESS.
|
||||
LinearSolverTerminationType FactorAndSolve(CompressedRowSparseMatrix* lhs,
|
||||
const double* rhs,
|
||||
double* solution,
|
||||
std::string* message);
|
||||
};
|
||||
|
||||
class IterativeRefiner;
|
||||
class SparseIterativeRefiner;
|
||||
|
||||
// Computes an initial solution using the given instance of
|
||||
// SparseCholesky, and then refines it using the IterativeRefiner.
|
||||
// SparseCholesky, and then refines it using the SparseIterativeRefiner.
|
||||
class CERES_NO_EXPORT RefinedSparseCholesky final : public SparseCholesky {
|
||||
public:
|
||||
RefinedSparseCholesky(std::unique_ptr<SparseCholesky> sparse_cholesky,
|
||||
std::unique_ptr<IterativeRefiner> iterative_refiner);
|
||||
RefinedSparseCholesky(
|
||||
std::unique_ptr<SparseCholesky> sparse_cholesky,
|
||||
std::unique_ptr<SparseIterativeRefiner> iterative_refiner);
|
||||
~RefinedSparseCholesky() override;
|
||||
|
||||
CompressedRowSparseMatrix::StorageType StorageType() const override;
|
||||
@@ -131,12 +132,11 @@ class CERES_NO_EXPORT RefinedSparseCholesky final : public SparseCholesky {
|
||||
|
||||
private:
|
||||
std::unique_ptr<SparseCholesky> sparse_cholesky_;
|
||||
std::unique_ptr<IterativeRefiner> iterative_refiner_;
|
||||
std::unique_ptr<SparseIterativeRefiner> iterative_refiner_;
|
||||
CompressedRowSparseMatrix* lhs_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
24
extern/ceres/internal/ceres/sparse_matrix.cc
vendored
24
extern/ceres/internal/ceres/sparse_matrix.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,10 +30,24 @@
|
||||
|
||||
#include "ceres/sparse_matrix.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
SparseMatrix::~SparseMatrix() = default;
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
void SparseMatrix::SquaredColumnNorm(double* x,
|
||||
ContextImpl* context,
|
||||
int num_threads) const {
|
||||
(void)context;
|
||||
(void)num_threads;
|
||||
SquaredColumnNorm(x);
|
||||
}
|
||||
|
||||
void SparseMatrix::ScaleColumns(const double* scale,
|
||||
ContextImpl* context,
|
||||
int num_threads) {
|
||||
(void)context;
|
||||
(void)num_threads;
|
||||
ScaleColumns(scale);
|
||||
}
|
||||
|
||||
} // namespace ceres::internal
|
||||
|
||||
25
extern/ceres/internal/ceres/sparse_matrix.h
vendored
25
extern/ceres/internal/ceres/sparse_matrix.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,8 @@
|
||||
#include "ceres/linear_operator.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
class ContextImpl;
|
||||
|
||||
// This class defines the interface for storing and manipulating
|
||||
// sparse matrices. The key property that differentiates different
|
||||
@@ -69,18 +69,30 @@ class CERES_NO_EXPORT SparseMatrix : public LinearOperator {
|
||||
~SparseMatrix() override;
|
||||
|
||||
// y += Ax;
|
||||
void RightMultiply(const double* x, double* y) const override = 0;
|
||||
using LinearOperator::RightMultiplyAndAccumulate;
|
||||
void RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const override = 0;
|
||||
|
||||
// y += A'x;
|
||||
void LeftMultiply(const double* x, double* y) const override = 0;
|
||||
void LeftMultiplyAndAccumulate(const double* x, double* y) const override = 0;
|
||||
|
||||
// In MATLAB notation sum(A.*A, 1)
|
||||
virtual void SquaredColumnNorm(double* x) const = 0;
|
||||
virtual void SquaredColumnNorm(double* x,
|
||||
ContextImpl* context,
|
||||
int num_threads) const;
|
||||
// A = A * diag(scale)
|
||||
virtual void ScaleColumns(const double* scale) = 0;
|
||||
virtual void ScaleColumns(const double* scale,
|
||||
ContextImpl* context,
|
||||
int num_threads);
|
||||
|
||||
// A = 0. A->num_nonzeros() == 0 is true after this call. The
|
||||
// sparsity pattern is preserved.
|
||||
virtual void SetZero() = 0;
|
||||
virtual void SetZero(ContextImpl* /*context*/, int /*num_threads*/) {
|
||||
SetZero();
|
||||
}
|
||||
|
||||
// Resize and populate dense_matrix with a dense version of the
|
||||
// sparse matrix.
|
||||
@@ -103,7 +115,6 @@ class CERES_NO_EXPORT SparseMatrix : public LinearOperator {
|
||||
virtual int num_nonzeros() const = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_SPARSE_MATRIX_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -45,8 +45,7 @@
|
||||
#include "ceres/types.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
SparseNormalCholeskySolver::SparseNormalCholeskySolver(
|
||||
const LinearSolver::Options& options)
|
||||
@@ -64,7 +63,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl(
|
||||
EventLogger event_logger("SparseNormalCholeskySolver::Solve");
|
||||
LinearSolver::Summary summary;
|
||||
summary.num_iterations = 1;
|
||||
summary.termination_type = LINEAR_SOLVER_SUCCESS;
|
||||
summary.termination_type = LinearSolverTerminationType::SUCCESS;
|
||||
summary.message = "Success.";
|
||||
|
||||
const int num_cols = A->num_cols();
|
||||
@@ -72,7 +71,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl(
|
||||
xref.setZero();
|
||||
rhs_.resize(num_cols);
|
||||
rhs_.setZero();
|
||||
A->LeftMultiply(b, rhs_.data());
|
||||
A->LeftMultiplyAndAccumulate(b, rhs_.data());
|
||||
event_logger.AddEvent("Compute RHS");
|
||||
|
||||
if (per_solve_options.D != nullptr) {
|
||||
@@ -110,5 +109,4 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl(
|
||||
return summary;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -45,8 +45,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CompressedRowSparseMatrix;
|
||||
class InnerProductComputer;
|
||||
@@ -75,7 +74,6 @@ class CERES_NO_EXPORT SparseNormalCholeskySolver
|
||||
std::unique_ptr<InnerProductComputer> inner_product_computer_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_SPARSE_NORMAL_CHOLESKY_SOLVER_H_
|
||||
|
||||
2
extern/ceres/internal/ceres/stl_util.h
vendored
2
extern/ceres/internal/ceres/stl_util.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
||||
20
extern/ceres/internal/ceres/stringprintf.cc
vendored
20
extern/ceres/internal/ceres/stringprintf.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,12 +38,9 @@
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
using std::string;
|
||||
|
||||
void StringAppendV(string* dst, const char* format, va_list ap) {
|
||||
void StringAppendV(std::string* dst, const char* format, va_list ap) {
|
||||
// First try with a small fixed size buffer
|
||||
char space[1024];
|
||||
|
||||
@@ -93,16 +90,16 @@ void StringAppendV(string* dst, const char* format, va_list ap) {
|
||||
delete[] buf;
|
||||
}
|
||||
|
||||
string StringPrintf(const char* format, ...) {
|
||||
std::string StringPrintf(const char* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
string result;
|
||||
std::string result;
|
||||
StringAppendV(&result, format, ap);
|
||||
va_end(ap);
|
||||
return result;
|
||||
}
|
||||
|
||||
const string& SStringPrintf(string* dst, const char* format, ...) {
|
||||
const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
dst->clear();
|
||||
@@ -111,12 +108,11 @@ const string& SStringPrintf(string* dst, const char* format, ...) {
|
||||
return *dst;
|
||||
}
|
||||
|
||||
void StringAppendF(string* dst, const char* format, ...) {
|
||||
void StringAppendF(std::string* dst, const char* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
StringAppendV(dst, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/stringprintf.h
vendored
8
extern/ceres/internal/ceres/stringprintf.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,8 +44,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
#if (defined(__GNUC__) || defined(__clang__))
|
||||
// Tell the compiler to do printf format string checking if the compiler
|
||||
@@ -90,8 +89,7 @@ CERES_NO_EXPORT extern void StringAppendV(std::string* dst,
|
||||
|
||||
#undef CERES_PRINTF_ATTRIBUTE
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,8 +40,7 @@
|
||||
#include "ceres/sparse_cholesky.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
SubsetPreconditioner::SubsetPreconditioner(Preconditioner::Options options,
|
||||
const BlockSparseMatrix& A)
|
||||
@@ -52,13 +51,14 @@ SubsetPreconditioner::SubsetPreconditioner(Preconditioner::Options options,
|
||||
LinearSolver::Options sparse_cholesky_options;
|
||||
sparse_cholesky_options.sparse_linear_algebra_library_type =
|
||||
options_.sparse_linear_algebra_library_type;
|
||||
sparse_cholesky_options.use_postordering = options_.use_postordering;
|
||||
sparse_cholesky_options.ordering_type = options_.ordering_type;
|
||||
sparse_cholesky_ = SparseCholesky::Create(sparse_cholesky_options);
|
||||
}
|
||||
|
||||
SubsetPreconditioner::~SubsetPreconditioner() = default;
|
||||
|
||||
void SubsetPreconditioner::RightMultiply(const double* x, double* y) const {
|
||||
void SubsetPreconditioner::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
std::string message;
|
||||
@@ -106,7 +106,7 @@ bool SubsetPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
|
||||
const LinearSolverTerminationType termination_type =
|
||||
sparse_cholesky_->Factorize(inner_product_computer_->mutable_result(),
|
||||
&message);
|
||||
if (termination_type != LINEAR_SOLVER_SUCCESS) {
|
||||
if (termination_type != LinearSolverTerminationType::SUCCESS) {
|
||||
LOG(ERROR) << "Preconditioner factorization failed: " << message;
|
||||
return false;
|
||||
}
|
||||
@@ -114,5 +114,4 @@ bool SubsetPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,8 +37,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/preconditioner.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockSparseMatrix;
|
||||
class SparseCholesky;
|
||||
@@ -76,7 +75,7 @@ class CERES_NO_EXPORT SubsetPreconditioner
|
||||
~SubsetPreconditioner() override;
|
||||
|
||||
// Preconditioner interface
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
int num_rows() const final { return num_cols_; }
|
||||
int num_cols() const final { return num_cols_; }
|
||||
|
||||
@@ -89,8 +88,7 @@ class CERES_NO_EXPORT SubsetPreconditioner
|
||||
std::unique_ptr<InnerProductComputer> inner_product_computer_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
250
extern/ceres/internal/ceres/suitesparse.cc
vendored
250
extern/ceres/internal/ceres/suitesparse.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,7 +32,9 @@
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifndef CERES_NO_SUITESPARSE
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/compressed_col_sparse_matrix_utils.h"
|
||||
@@ -42,11 +44,24 @@
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
#include "cholmod.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
int OrderingTypeToCHOLMODEnum(OrderingType ordering_type) {
|
||||
if (ordering_type == OrderingType::AMD) {
|
||||
return CHOLMOD_AMD;
|
||||
}
|
||||
if (ordering_type == OrderingType::NESDIS) {
|
||||
return CHOLMOD_NESDIS;
|
||||
}
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
if (ordering_type == OrderingType::NATURAL) {
|
||||
return CHOLMOD_NATURAL;
|
||||
}
|
||||
LOG(FATAL) << "Congratulations you have discovered a bug in Ceres Solver."
|
||||
<< "Please report it to the developers. " << ordering_type;
|
||||
return -1;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
SuiteSparse::SuiteSparse() { cholmod_start(&cc_); }
|
||||
|
||||
@@ -103,9 +118,11 @@ cholmod_sparse SuiteSparse::CreateSparseMatrixTransposeView(
|
||||
m.x = reinterpret_cast<void*>(A->mutable_values());
|
||||
m.z = nullptr;
|
||||
|
||||
if (A->storage_type() == CompressedRowSparseMatrix::LOWER_TRIANGULAR) {
|
||||
if (A->storage_type() ==
|
||||
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) {
|
||||
m.stype = 1;
|
||||
} else if (A->storage_type() == CompressedRowSparseMatrix::UPPER_TRIANGULAR) {
|
||||
} else if (A->storage_type() ==
|
||||
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
|
||||
m.stype = -1;
|
||||
} else {
|
||||
m.stype = 0;
|
||||
@@ -144,19 +161,18 @@ cholmod_dense* SuiteSparse::CreateDenseVector(const double* x,
|
||||
}
|
||||
|
||||
cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A,
|
||||
string* message) {
|
||||
// Cholmod can try multiple re-ordering strategies to find a fill
|
||||
// reducing ordering. Here we just tell it use AMD with automatic
|
||||
// matrix dependence choice of supernodal versus simplicial
|
||||
// factorization.
|
||||
OrderingType ordering_type,
|
||||
std::string* message) {
|
||||
cc_.nmethods = 1;
|
||||
cc_.method[0].ordering = CHOLMOD_AMD;
|
||||
cc_.supernodal = CHOLMOD_AUTO;
|
||||
cc_.method[0].ordering = OrderingTypeToCHOLMODEnum(ordering_type);
|
||||
|
||||
// postordering with a NATURAL ordering leads to a significant regression in
|
||||
// performance. See https://github.com/ceres-solver/ceres-solver/issues/905
|
||||
if (ordering_type == OrderingType::NATURAL) {
|
||||
cc_.postorder = 0;
|
||||
}
|
||||
|
||||
cholmod_factor* factor = cholmod_analyze(A, &cc_);
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
|
||||
}
|
||||
|
||||
if (cc_.status != CHOLMOD_OK) {
|
||||
*message =
|
||||
@@ -165,32 +181,22 @@ cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A,
|
||||
}
|
||||
|
||||
CHECK(factor != nullptr);
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
|
||||
}
|
||||
|
||||
return factor;
|
||||
}
|
||||
|
||||
cholmod_factor* SuiteSparse::BlockAnalyzeCholesky(cholmod_sparse* A,
|
||||
const vector<int>& row_blocks,
|
||||
const vector<int>& col_blocks,
|
||||
string* message) {
|
||||
vector<int> ordering;
|
||||
if (!BlockAMDOrdering(A, row_blocks, col_blocks, &ordering)) {
|
||||
return nullptr;
|
||||
}
|
||||
return AnalyzeCholeskyWithUserOrdering(A, ordering, message);
|
||||
}
|
||||
|
||||
cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(
|
||||
cholmod_sparse* A, const vector<int>& ordering, string* message) {
|
||||
cholmod_factor* SuiteSparse::AnalyzeCholeskyWithGivenOrdering(
|
||||
cholmod_sparse* A, const std::vector<int>& ordering, std::string* message) {
|
||||
CHECK_EQ(ordering.size(), A->nrow);
|
||||
|
||||
cc_.nmethods = 1;
|
||||
cc_.method[0].ordering = CHOLMOD_GIVEN;
|
||||
|
||||
cholmod_factor* factor =
|
||||
cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), nullptr, 0, &cc_);
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
|
||||
}
|
||||
cholmod_analyze_p(A, const_cast<int*>(ordering.data()), nullptr, 0, &cc_);
|
||||
|
||||
if (cc_.status != CHOLMOD_OK) {
|
||||
*message =
|
||||
StringPrintf("cholmod_analyze failed. error code: %d", cc_.status);
|
||||
@@ -198,40 +204,33 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(
|
||||
}
|
||||
|
||||
CHECK(factor != nullptr);
|
||||
return factor;
|
||||
}
|
||||
|
||||
cholmod_factor* SuiteSparse::AnalyzeCholeskyWithNaturalOrdering(
|
||||
cholmod_sparse* A, string* message) {
|
||||
cc_.nmethods = 1;
|
||||
cc_.method[0].ordering = CHOLMOD_NATURAL;
|
||||
cc_.postorder = 0;
|
||||
|
||||
cholmod_factor* factor = cholmod_analyze(A, &cc_);
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
|
||||
}
|
||||
if (cc_.status != CHOLMOD_OK) {
|
||||
*message =
|
||||
StringPrintf("cholmod_analyze failed. error code: %d", cc_.status);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CHECK(factor != nullptr);
|
||||
return factor;
|
||||
}
|
||||
|
||||
bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
|
||||
const vector<int>& row_blocks,
|
||||
const vector<int>& col_blocks,
|
||||
vector<int>* ordering) {
|
||||
bool SuiteSparse::BlockOrdering(const cholmod_sparse* A,
|
||||
OrderingType ordering_type,
|
||||
const std::vector<Block>& row_blocks,
|
||||
const std::vector<Block>& col_blocks,
|
||||
std::vector<int>* ordering) {
|
||||
if (ordering_type == OrderingType::NATURAL) {
|
||||
ordering->resize(A->nrow);
|
||||
for (int i = 0; i < A->nrow; ++i) {
|
||||
(*ordering)[i] = i;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const int num_row_blocks = row_blocks.size();
|
||||
const int num_col_blocks = col_blocks.size();
|
||||
|
||||
// Arrays storing the compressed column structure of the matrix
|
||||
// incoding the block sparsity of A.
|
||||
vector<int> block_cols;
|
||||
vector<int> block_rows;
|
||||
// encoding the block sparsity of A.
|
||||
std::vector<int> block_cols;
|
||||
std::vector<int> block_rows;
|
||||
|
||||
CompressedColumnScalarMatrixToBlockMatrix(reinterpret_cast<const int*>(A->i),
|
||||
reinterpret_cast<const int*>(A->p),
|
||||
@@ -243,8 +242,8 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
|
||||
block_matrix.nrow = num_row_blocks;
|
||||
block_matrix.ncol = num_col_blocks;
|
||||
block_matrix.nzmax = block_rows.size();
|
||||
block_matrix.p = reinterpret_cast<void*>(&block_cols[0]);
|
||||
block_matrix.i = reinterpret_cast<void*>(&block_rows[0]);
|
||||
block_matrix.p = reinterpret_cast<void*>(block_cols.data());
|
||||
block_matrix.i = reinterpret_cast<void*>(block_rows.data());
|
||||
block_matrix.x = nullptr;
|
||||
block_matrix.stype = A->stype;
|
||||
block_matrix.itype = CHOLMOD_INT;
|
||||
@@ -253,8 +252,8 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
|
||||
block_matrix.sorted = 1;
|
||||
block_matrix.packed = 1;
|
||||
|
||||
vector<int> block_ordering(num_row_blocks);
|
||||
if (!cholmod_amd(&block_matrix, nullptr, 0, &block_ordering[0], &cc_)) {
|
||||
std::vector<int> block_ordering(num_row_blocks);
|
||||
if (!Ordering(&block_matrix, ordering_type, block_ordering.data())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -262,9 +261,22 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A,
|
||||
return true;
|
||||
}
|
||||
|
||||
cholmod_factor* SuiteSparse::BlockAnalyzeCholesky(
|
||||
cholmod_sparse* A,
|
||||
OrderingType ordering_type,
|
||||
const std::vector<Block>& row_blocks,
|
||||
const std::vector<Block>& col_blocks,
|
||||
std::string* message) {
|
||||
std::vector<int> ordering;
|
||||
if (!BlockOrdering(A, ordering_type, row_blocks, col_blocks, &ordering)) {
|
||||
return nullptr;
|
||||
}
|
||||
return AnalyzeCholeskyWithGivenOrdering(A, ordering, message);
|
||||
}
|
||||
|
||||
LinearSolverTerminationType SuiteSparse::Cholesky(cholmod_sparse* A,
|
||||
cholmod_factor* L,
|
||||
string* message) {
|
||||
std::string* message) {
|
||||
CHECK(A != nullptr);
|
||||
CHECK(L != nullptr);
|
||||
|
||||
@@ -282,48 +294,48 @@ LinearSolverTerminationType SuiteSparse::Cholesky(cholmod_sparse* A,
|
||||
switch (cc_.status) {
|
||||
case CHOLMOD_NOT_INSTALLED:
|
||||
*message = "CHOLMOD failure: Method not installed.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
case CHOLMOD_OUT_OF_MEMORY:
|
||||
*message = "CHOLMOD failure: Out of memory.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
case CHOLMOD_TOO_LARGE:
|
||||
*message = "CHOLMOD failure: Integer overflow occurred.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
case CHOLMOD_INVALID:
|
||||
*message = "CHOLMOD failure: Invalid input.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
case CHOLMOD_NOT_POSDEF:
|
||||
*message = "CHOLMOD warning: Matrix not positive definite.";
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
case CHOLMOD_DSMALL:
|
||||
*message =
|
||||
"CHOLMOD warning: D for LDL' or diag(L) or "
|
||||
"LL' has tiny absolute value.";
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
case CHOLMOD_OK:
|
||||
if (cholmod_status != 0) {
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
*message =
|
||||
"CHOLMOD failure: cholmod_factorize returned false "
|
||||
"but cholmod_common::status is CHOLMOD_OK."
|
||||
"Please report this to ceres-solver@googlegroups.com.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
default:
|
||||
*message = StringPrintf(
|
||||
"Unknown cholmod return code: %d. "
|
||||
"Please report this to ceres-solver@googlegroups.com.",
|
||||
cc_.status);
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
|
||||
cholmod_dense* SuiteSparse::Solve(cholmod_factor* L,
|
||||
cholmod_dense* b,
|
||||
string* message) {
|
||||
std::string* message) {
|
||||
if (cc_.status != CHOLMOD_OK) {
|
||||
*message = "cholmod_solve failed. CHOLMOD status is not CHOLMOD_OK";
|
||||
return nullptr;
|
||||
@@ -332,22 +344,34 @@ cholmod_dense* SuiteSparse::Solve(cholmod_factor* L,
|
||||
return cholmod_solve(CHOLMOD_A, L, b, &cc_);
|
||||
}
|
||||
|
||||
bool SuiteSparse::ApproximateMinimumDegreeOrdering(cholmod_sparse* matrix,
|
||||
int* ordering) {
|
||||
return cholmod_amd(matrix, nullptr, 0, ordering, &cc_);
|
||||
bool SuiteSparse::Ordering(cholmod_sparse* matrix,
|
||||
OrderingType ordering_type,
|
||||
int* ordering) {
|
||||
CHECK_NE(ordering_type, OrderingType::NATURAL);
|
||||
if (ordering_type == OrderingType::AMD) {
|
||||
return cholmod_amd(matrix, nullptr, 0, ordering, &cc_);
|
||||
}
|
||||
|
||||
#ifdef CERES_NO_CHOLMOD_PARTITION
|
||||
return false;
|
||||
#else
|
||||
std::vector<int> CParent(matrix->nrow, 0);
|
||||
std::vector<int> CMember(matrix->nrow, 0);
|
||||
return cholmod_nested_dissection(
|
||||
matrix, nullptr, 0, ordering, CParent.data(), CMember.data(), &cc_);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool SuiteSparse::ConstrainedApproximateMinimumDegreeOrdering(
|
||||
cholmod_sparse* matrix, int* constraints, int* ordering) {
|
||||
#ifndef CERES_NO_CAMD
|
||||
return cholmod_camd(matrix, nullptr, 0, constraints, ordering, &cc_);
|
||||
#else
|
||||
LOG(FATAL) << "Congratulations you have found a bug in Ceres."
|
||||
<< "Ceres Solver was compiled with SuiteSparse "
|
||||
<< "version 4.1.0 or less. Calling this function "
|
||||
<< "in that case is a bug. Please contact the"
|
||||
<< "the Ceres Solver developers.";
|
||||
}
|
||||
|
||||
bool SuiteSparse::IsNestedDissectionAvailable() {
|
||||
#ifdef CERES_NO_CHOLMOD_PARTITION
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -367,48 +391,61 @@ SuiteSparseCholesky::~SuiteSparseCholesky() {
|
||||
}
|
||||
|
||||
LinearSolverTerminationType SuiteSparseCholesky::Factorize(
|
||||
CompressedRowSparseMatrix* lhs, string* message) {
|
||||
CompressedRowSparseMatrix* lhs, std::string* message) {
|
||||
if (lhs == nullptr) {
|
||||
*message = "Failure: Input lhs is nullptr.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
|
||||
cholmod_sparse cholmod_lhs = ss_.CreateSparseMatrixTransposeView(lhs);
|
||||
|
||||
// If a factorization does not exist, compute the symbolic
|
||||
// factorization first.
|
||||
//
|
||||
// If the ordering type is NATURAL, then there is no fill reducing
|
||||
// ordering to be computed, regardless of block structure, so we can
|
||||
// just call the scalar version of symbolic factorization. For
|
||||
// SuiteSparse this is the common case since we have already
|
||||
// pre-ordered the columns of the Jacobian.
|
||||
//
|
||||
// Similarly regardless of ordering type, if there is no block
|
||||
// structure in the matrix we call the scalar version of symbolic
|
||||
// factorization.
|
||||
if (factor_ == nullptr) {
|
||||
if (ordering_type_ == NATURAL) {
|
||||
factor_ = ss_.AnalyzeCholeskyWithNaturalOrdering(&cholmod_lhs, message);
|
||||
if (ordering_type_ == OrderingType::NATURAL ||
|
||||
(lhs->col_blocks().empty() || lhs->row_blocks().empty())) {
|
||||
factor_ = ss_.AnalyzeCholesky(&cholmod_lhs, ordering_type_, message);
|
||||
} else {
|
||||
if (!lhs->col_blocks().empty() && !(lhs->row_blocks().empty())) {
|
||||
factor_ = ss_.BlockAnalyzeCholesky(
|
||||
&cholmod_lhs, lhs->col_blocks(), lhs->row_blocks(), message);
|
||||
} else {
|
||||
factor_ = ss_.AnalyzeCholesky(&cholmod_lhs, message);
|
||||
}
|
||||
}
|
||||
|
||||
if (factor_ == nullptr) {
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
factor_ = ss_.BlockAnalyzeCholesky(&cholmod_lhs,
|
||||
ordering_type_,
|
||||
lhs->col_blocks(),
|
||||
lhs->row_blocks(),
|
||||
message);
|
||||
}
|
||||
}
|
||||
|
||||
if (factor_ == nullptr) {
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
|
||||
// Compute and return the numeric factorization.
|
||||
return ss_.Cholesky(&cholmod_lhs, factor_, message);
|
||||
}
|
||||
|
||||
CompressedRowSparseMatrix::StorageType SuiteSparseCholesky::StorageType()
|
||||
const {
|
||||
return ((ordering_type_ == NATURAL)
|
||||
? CompressedRowSparseMatrix::UPPER_TRIANGULAR
|
||||
: CompressedRowSparseMatrix::LOWER_TRIANGULAR);
|
||||
return ((ordering_type_ == OrderingType::NATURAL)
|
||||
? CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR
|
||||
: CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
|
||||
}
|
||||
|
||||
LinearSolverTerminationType SuiteSparseCholesky::Solve(const double* rhs,
|
||||
double* solution,
|
||||
string* message) {
|
||||
std::string* message) {
|
||||
// Error checking
|
||||
if (factor_ == nullptr) {
|
||||
*message = "Solve called without a call to Factorize first.";
|
||||
return LINEAR_SOLVER_FATAL_ERROR;
|
||||
return LinearSolverTerminationType::FATAL_ERROR;
|
||||
}
|
||||
|
||||
const int num_cols = factor_->n;
|
||||
@@ -417,15 +454,14 @@ LinearSolverTerminationType SuiteSparseCholesky::Solve(const double* rhs,
|
||||
ss_.Solve(factor_, &cholmod_rhs, message);
|
||||
|
||||
if (cholmod_dense_solution == nullptr) {
|
||||
return LINEAR_SOLVER_FAILURE;
|
||||
return LinearSolverTerminationType::FAILURE;
|
||||
}
|
||||
|
||||
memcpy(solution, cholmod_dense_solution->x, num_cols * sizeof(*solution));
|
||||
ss_.Free(cholmod_dense_solution);
|
||||
return LINEAR_SOLVER_SUCCESS;
|
||||
return LinearSolverTerminationType::SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_NO_SUITESPARSE
|
||||
|
||||
157
extern/ceres/internal/ceres/suitesparse.h
vendored
157
extern/ceres/internal/ceres/suitesparse.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,37 +44,14 @@
|
||||
#include <vector>
|
||||
|
||||
#include "SuiteSparseQR.hpp"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/sparse_cholesky.h"
|
||||
#include "cholmod.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
// Before SuiteSparse version 4.2.0, cholmod_camd was only enabled
|
||||
// if SuiteSparse was compiled with Metis support. This makes
|
||||
// calling and linking into cholmod_camd problematic even though it
|
||||
// has nothing to do with Metis. This has been fixed reliably in
|
||||
// 4.2.0.
|
||||
//
|
||||
// The fix was actually committed in 4.1.0, but there is
|
||||
// some confusion about a silent update to the tar ball, so we are
|
||||
// being conservative and choosing the next minor version where
|
||||
// things are stable.
|
||||
#if (SUITESPARSE_VERSION < 4002)
|
||||
#define CERES_NO_CAMD
|
||||
#endif
|
||||
|
||||
// UF_long is deprecated but SuiteSparse_long is only available in
|
||||
// newer versions of SuiteSparse. So for older versions of
|
||||
// SuiteSparse, we define SuiteSparse_long to be the same as UF_long,
|
||||
// which is what recent versions of SuiteSparse do anyways.
|
||||
#ifndef SuiteSparse_long
|
||||
#define SuiteSparse_long UF_long
|
||||
#endif
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CompressedRowSparseMatrix;
|
||||
class TripletSparseMatrix;
|
||||
@@ -91,7 +68,7 @@ class CERES_NO_EXPORT SuiteSparse {
|
||||
|
||||
// Functions for building cholmod_sparse objects from sparse
|
||||
// matrices stored in triplet form. The matrix A is not
|
||||
// modifed. Called owns the result.
|
||||
// modified. Called owns the result.
|
||||
cholmod_sparse* CreateSparseMatrix(TripletSparseMatrix* A);
|
||||
|
||||
// This function works like CreateSparseMatrix, except that the
|
||||
@@ -142,12 +119,11 @@ class CERES_NO_EXPORT SuiteSparse {
|
||||
cholmod_sdmult(A, 0, alpha_, beta_, x, y, &cc_);
|
||||
}
|
||||
|
||||
// Find an ordering of A or AA' (if A is unsymmetric) that minimizes
|
||||
// the fill-in in the Cholesky factorization of the corresponding
|
||||
// matrix. This is done by using the AMD algorithm.
|
||||
//
|
||||
// Using this ordering, the symbolic Cholesky factorization of A (or
|
||||
// AA') is computed and returned.
|
||||
// Compute a symbolic factorization for A or AA' (if A is
|
||||
// unsymmetric). If ordering_type is NATURAL, then no fill reducing
|
||||
// ordering is computed, otherwise depending on the value of
|
||||
// ordering_type AMD or Nested Dissection is used to compute a fill
|
||||
// reducing ordering before the symbolic factorization is computed.
|
||||
//
|
||||
// A is not modified, only the pattern of non-zeros of A is used,
|
||||
// the actual numerical values in A are of no consequence.
|
||||
@@ -155,11 +131,15 @@ class CERES_NO_EXPORT SuiteSparse {
|
||||
// message contains an explanation of the failures if any.
|
||||
//
|
||||
// Caller owns the result.
|
||||
cholmod_factor* AnalyzeCholesky(cholmod_sparse* A, std::string* message);
|
||||
cholmod_factor* AnalyzeCholesky(cholmod_sparse* A,
|
||||
OrderingType ordering_type,
|
||||
std::string* message);
|
||||
|
||||
// Block oriented version of AnalyzeCholesky.
|
||||
cholmod_factor* BlockAnalyzeCholesky(cholmod_sparse* A,
|
||||
const std::vector<int>& row_blocks,
|
||||
const std::vector<int>& col_blocks,
|
||||
OrderingType ordering_type,
|
||||
const std::vector<Block>& row_blocks,
|
||||
const std::vector<Block>& col_blocks,
|
||||
std::string* message);
|
||||
|
||||
// If A is symmetric, then compute the symbolic Cholesky
|
||||
@@ -173,20 +153,11 @@ class CERES_NO_EXPORT SuiteSparse {
|
||||
// message contains an explanation of the failures if any.
|
||||
//
|
||||
// Caller owns the result.
|
||||
cholmod_factor* AnalyzeCholeskyWithUserOrdering(
|
||||
cholmod_factor* AnalyzeCholeskyWithGivenOrdering(
|
||||
cholmod_sparse* A,
|
||||
const std::vector<int>& ordering,
|
||||
std::string* message);
|
||||
|
||||
// Perform a symbolic factorization of A without re-ordering A. No
|
||||
// postordering of the elimination tree is performed. This ensures
|
||||
// that the symbolic factor does not introduce an extra permutation
|
||||
// on the matrix. See the documentation for CHOLMOD for more details.
|
||||
//
|
||||
// message contains an explanation of the failures if any.
|
||||
cholmod_factor* AnalyzeCholeskyWithNaturalOrdering(cholmod_sparse* A,
|
||||
std::string* message);
|
||||
|
||||
// Use the symbolic factorization in L, to find the numerical
|
||||
// factorization for the matrix A or AA^T. Return true if
|
||||
// successful, false otherwise. L contains the numeric factorization
|
||||
@@ -206,51 +177,39 @@ class CERES_NO_EXPORT SuiteSparse {
|
||||
cholmod_dense* b,
|
||||
std::string* message);
|
||||
|
||||
// Find a fill reducing ordering. ordering is expected to be large
|
||||
// enough to hold the ordering. ordering_type must be AMD or NESDIS.
|
||||
bool Ordering(cholmod_sparse* matrix,
|
||||
OrderingType ordering_type,
|
||||
int* ordering);
|
||||
|
||||
// Find the block oriented fill reducing ordering of a matrix A,
|
||||
// whose row and column blocks are given by row_blocks, and
|
||||
// col_blocks respectively. The matrix may or may not be
|
||||
// symmetric. The entries of col_blocks do not need to sum to the
|
||||
// number of columns in A. If this is the case, only the first
|
||||
// sum(col_blocks) are used to compute the ordering.
|
||||
//
|
||||
// By virtue of the modeling layer in Ceres being block oriented,
|
||||
// all the matrices used by Ceres are also block oriented. When
|
||||
// doing sparse direct factorization of these matrices the
|
||||
// fill-reducing ordering algorithms (in particular AMD) can either
|
||||
// be run on the block or the scalar form of these matrices. The two
|
||||
// SuiteSparse::AnalyzeCholesky methods allows the client to
|
||||
// compute the symbolic factorization of a matrix by either using
|
||||
// AMD on the matrix or a user provided ordering of the rows.
|
||||
//
|
||||
// But since the underlying matrices are block oriented, it is worth
|
||||
// running AMD on just the block structure of these matrices and then
|
||||
// lifting these block orderings to a full scalar ordering. This
|
||||
// preserves the block structure of the permuted matrix, and exposes
|
||||
// more of the super-nodal structure of the matrix to the numerical
|
||||
// factorization routines.
|
||||
//
|
||||
// Find the block oriented AMD ordering of a matrix A, whose row and
|
||||
// column blocks are given by row_blocks, and col_blocks
|
||||
// respectively. The matrix may or may not be symmetric. The entries
|
||||
// of col_blocks do not need to sum to the number of columns in
|
||||
// A. If this is the case, only the first sum(col_blocks) are used
|
||||
// to compute the ordering.
|
||||
bool BlockAMDOrdering(const cholmod_sparse* A,
|
||||
const std::vector<int>& row_blocks,
|
||||
const std::vector<int>& col_blocks,
|
||||
std::vector<int>* ordering);
|
||||
// fill-reducing ordering algorithms can either be run on the block
|
||||
// or the scalar form of these matrices. But since the underlying
|
||||
// matrices are block oriented, it is worth running the fill
|
||||
// reducing ordering on just the block structure of these matrices
|
||||
// and then lifting these block orderings to a full scalar
|
||||
// ordering. This preserves the block structure of the permuted
|
||||
// matrix, and exposes more of the super-nodal structure of the
|
||||
// matrix to the numerical factorization routines.
|
||||
bool BlockOrdering(const cholmod_sparse* A,
|
||||
OrderingType ordering_type,
|
||||
const std::vector<Block>& row_blocks,
|
||||
const std::vector<Block>& col_blocks,
|
||||
std::vector<int>* ordering);
|
||||
|
||||
// Find a fill reducing approximate minimum degree
|
||||
// ordering. ordering is expected to be large enough to hold the
|
||||
// ordering.
|
||||
bool ApproximateMinimumDegreeOrdering(cholmod_sparse* matrix, int* ordering);
|
||||
|
||||
// Before SuiteSparse version 4.2.0, cholmod_camd was only enabled
|
||||
// if SuiteSparse was compiled with Metis support. This makes
|
||||
// calling and linking into cholmod_camd problematic even though it
|
||||
// has nothing to do with Metis. This has been fixed reliably in
|
||||
// 4.2.0.
|
||||
//
|
||||
// The fix was actually committed in 4.1.0, but there is
|
||||
// some confusion about a silent update to the tar ball, so we are
|
||||
// being conservative and choosing the next minor version where
|
||||
// things are stable.
|
||||
static bool IsConstrainedApproximateMinimumDegreeOrderingAvailable() {
|
||||
return (SUITESPARSE_VERSION > 4001);
|
||||
}
|
||||
// Nested dissection is only available if SuiteSparse is compiled
|
||||
// with Metis support.
|
||||
static bool IsNestedDissectionAvailable();
|
||||
|
||||
// Find a fill reducing approximate minimum degree
|
||||
// ordering. constraints is an array which associates with each
|
||||
@@ -262,9 +221,6 @@ class CERES_NO_EXPORT SuiteSparse {
|
||||
// Calling ApproximateMinimumDegreeOrdering is equivalent to calling
|
||||
// ConstrainedApproximateMinimumDegreeOrdering with a constraint
|
||||
// array that puts all columns in the same elimination group.
|
||||
//
|
||||
// If CERES_NO_CAMD is defined then calling this function will
|
||||
// result in a crash.
|
||||
bool ConstrainedApproximateMinimumDegreeOrdering(cholmod_sparse* matrix,
|
||||
int* constraints,
|
||||
int* ordering);
|
||||
@@ -312,14 +268,13 @@ class CERES_NO_EXPORT SuiteSparseCholesky final : public SparseCholesky {
|
||||
cholmod_factor* factor_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
#else // CERES_NO_SUITESPARSE
|
||||
|
||||
typedef void cholmod_factor;
|
||||
using cholmod_factor = void;
|
||||
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
|
||||
@@ -328,17 +283,9 @@ namespace internal {
|
||||
|
||||
class CERES_NO_EXPORT SuiteSparse {
|
||||
public:
|
||||
// Defining this static function even when SuiteSparse is not
|
||||
// available, allows client code to check for the presence of CAMD
|
||||
// without checking for the absence of the CERES_NO_CAMD symbol.
|
||||
//
|
||||
// This is safer because the symbol maybe missing due to a user
|
||||
// accidentally not including suitesparse.h in their code when
|
||||
// checking for the symbol.
|
||||
static bool IsConstrainedApproximateMinimumDegreeOrderingAvailable() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Nested dissection is only available if SuiteSparse is compiled
|
||||
// with Metis support.
|
||||
static bool IsNestedDissectionAvailable() { return false; }
|
||||
void Free(void* /*arg*/) {}
|
||||
};
|
||||
|
||||
|
||||
17
extern/ceres/internal/ceres/thread_pool.cc
vendored
17
extern/ceres/internal/ceres/thread_pool.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -28,18 +28,14 @@
|
||||
//
|
||||
// Author: vitus@google.com (Michael Vitus)
|
||||
|
||||
// This include must come before any #ifndef check on Ceres compile options.
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
#include "ceres/thread_pool.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "ceres/thread_pool.h"
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
namespace {
|
||||
|
||||
// Constrain the total number of threads to the amount the hardware can support.
|
||||
@@ -105,7 +101,4 @@ void ThreadPool::ThreadMainLoop() {
|
||||
|
||||
void ThreadPool::Stop() { task_queue_.StopWaiters(); }
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_USE_CXX_THREADS
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/thread_pool.h
vendored
8
extern/ceres/internal/ceres/thread_pool.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2018 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,8 +39,7 @@
|
||||
#include "ceres/concurrent_queue.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// A thread-safe thread pool with an unbounded task queue and a resizable number
|
||||
// of workers. The size of the thread pool can be increased but never decreased
|
||||
@@ -115,7 +114,6 @@ class CERES_NO_EXPORT ThreadPool {
|
||||
std::mutex thread_pool_mutex_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_THREAD_POOL_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,44 +30,20 @@
|
||||
|
||||
#include "ceres/thread_token_provider.h"
|
||||
|
||||
#ifdef CERES_USE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
ThreadTokenProvider::ThreadTokenProvider(int num_threads) {
|
||||
(void)num_threads;
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
pool_.Push(i);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int ThreadTokenProvider::Acquire() {
|
||||
#ifdef CERES_USE_OPENMP
|
||||
return omp_get_thread_num();
|
||||
#endif
|
||||
|
||||
#ifdef CERES_NO_THREADS
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
int thread_id;
|
||||
CHECK(pool_.Wait(&thread_id));
|
||||
return thread_id;
|
||||
#endif
|
||||
}
|
||||
|
||||
void ThreadTokenProvider::Release(int thread_id) {
|
||||
(void)thread_id;
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
pool_.Push(thread_id);
|
||||
#endif
|
||||
}
|
||||
void ThreadTokenProvider::Release(int thread_id) { pool_.Push(thread_id); }
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -31,15 +31,11 @@
|
||||
#ifndef CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_
|
||||
#define CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_
|
||||
|
||||
#include "ceres/concurrent_queue.h"
|
||||
#include "ceres/internal/config.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
#include "ceres/concurrent_queue.h"
|
||||
#endif
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Helper for C++ thread number identification that is similar to
|
||||
// omp_get_thread_num() behaviour. This is necessary to support C++
|
||||
@@ -48,12 +44,6 @@ namespace internal {
|
||||
// 0 to num_threads - 1 that can be acquired to identify the thread in a thread
|
||||
// pool.
|
||||
//
|
||||
// If CERES_NO_THREADS is defined, Acquire() always returns 0 and Release()
|
||||
// takes no action.
|
||||
//
|
||||
// If CERES_USE_OPENMP, omp_get_thread_num() is used to Acquire() with no action
|
||||
// in Release()
|
||||
//
|
||||
//
|
||||
// Example usage pseudocode:
|
||||
//
|
||||
@@ -78,20 +68,16 @@ class CERES_NO_EXPORT ThreadTokenProvider {
|
||||
void Release(int thread_id);
|
||||
|
||||
private:
|
||||
#ifdef CERES_USE_CXX_THREADS
|
||||
// This queue initially holds a sequence from 0..num_threads-1. Every
|
||||
// Acquire() call the first number is removed from here. When the token is not
|
||||
// needed anymore it shall be given back with corresponding Release()
|
||||
// call. This concurrent queue is more expensive than TBB's version, so you
|
||||
// should not acquire the thread ID on every for loop iteration.
|
||||
ConcurrentQueue<int> pool_;
|
||||
#endif
|
||||
|
||||
ThreadTokenProvider(ThreadTokenProvider&) = delete;
|
||||
ThreadTokenProvider& operator=(ThreadTokenProvider&) = delete;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,15 +32,16 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/crs_matrix.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/random.h"
|
||||
#include "ceres/types.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
TripletSparseMatrix::TripletSparseMatrix()
|
||||
: num_rows_(0), num_cols_(0), max_num_nonzeros_(0), num_nonzeros_(0) {}
|
||||
@@ -168,13 +169,15 @@ void TripletSparseMatrix::CopyData(const TripletSparseMatrix& orig) {
|
||||
}
|
||||
}
|
||||
|
||||
void TripletSparseMatrix::RightMultiply(const double* x, double* y) const {
|
||||
void TripletSparseMatrix::RightMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
for (int i = 0; i < num_nonzeros_; ++i) {
|
||||
y[rows_[i]] += values_[i] * x[cols_[i]];
|
||||
}
|
||||
}
|
||||
|
||||
void TripletSparseMatrix::LeftMultiply(const double* x, double* y) const {
|
||||
void TripletSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
|
||||
double* y) const {
|
||||
for (int i = 0; i < num_nonzeros_; ++i) {
|
||||
y[cols_[i]] += values_[i] * x[rows_[i]];
|
||||
}
|
||||
@@ -195,6 +198,11 @@ void TripletSparseMatrix::ScaleColumns(const double* scale) {
|
||||
}
|
||||
}
|
||||
|
||||
void TripletSparseMatrix::ToCRSMatrix(CRSMatrix* crs_matrix) const {
|
||||
CompressedRowSparseMatrix::FromTripletSparseMatrix(*this)->ToCRSMatrix(
|
||||
crs_matrix);
|
||||
}
|
||||
|
||||
void TripletSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
|
||||
dense_matrix->resize(num_rows_, num_cols_);
|
||||
dense_matrix->setZero();
|
||||
@@ -276,8 +284,34 @@ void TripletSparseMatrix::ToTextFile(FILE* file) const {
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<TripletSparseMatrix> TripletSparseMatrix::CreateFromTextFile(
|
||||
FILE* file) {
|
||||
CHECK(file != nullptr);
|
||||
int num_rows = 0;
|
||||
int num_cols = 0;
|
||||
std::vector<int> rows;
|
||||
std::vector<int> cols;
|
||||
std::vector<double> values;
|
||||
while (true) {
|
||||
int row, col;
|
||||
double value;
|
||||
if (fscanf(file, "%d %d %lf", &row, &col, &value) != 3) {
|
||||
break;
|
||||
}
|
||||
rows.push_back(row);
|
||||
cols.push_back(col);
|
||||
values.push_back(value);
|
||||
num_rows = std::max(num_rows, row + 1);
|
||||
num_cols = std::max(num_cols, col + 1);
|
||||
}
|
||||
VLOG(1) << "Read " << rows.size() << " nonzeros from file.";
|
||||
return std::make_unique<TripletSparseMatrix>(
|
||||
num_rows, num_cols, rows, cols, values);
|
||||
}
|
||||
|
||||
std::unique_ptr<TripletSparseMatrix> TripletSparseMatrix::CreateRandomMatrix(
|
||||
const TripletSparseMatrix::RandomMatrixOptions& options) {
|
||||
const TripletSparseMatrix::RandomMatrixOptions& options,
|
||||
std::mt19937& prng) {
|
||||
CHECK_GT(options.num_rows, 0);
|
||||
CHECK_GT(options.num_cols, 0);
|
||||
CHECK_GT(options.density, 0.0);
|
||||
@@ -286,16 +320,18 @@ std::unique_ptr<TripletSparseMatrix> TripletSparseMatrix::CreateRandomMatrix(
|
||||
std::vector<int> rows;
|
||||
std::vector<int> cols;
|
||||
std::vector<double> values;
|
||||
std::uniform_real_distribution<double> uniform01(0.0, 1.0);
|
||||
std::normal_distribution<double> standard_normal;
|
||||
while (rows.empty()) {
|
||||
rows.clear();
|
||||
cols.clear();
|
||||
values.clear();
|
||||
for (int r = 0; r < options.num_rows; ++r) {
|
||||
for (int c = 0; c < options.num_cols; ++c) {
|
||||
if (RandDouble() <= options.density) {
|
||||
if (uniform01(prng) <= options.density) {
|
||||
rows.push_back(r);
|
||||
cols.push_back(c);
|
||||
values.push_back(RandNormal());
|
||||
values.push_back(standard_normal(prng));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -305,5 +341,4 @@ std::unique_ptr<TripletSparseMatrix> TripletSparseMatrix::CreateRandomMatrix(
|
||||
options.num_rows, options.num_cols, rows, cols, values);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,16 +32,17 @@
|
||||
#define CERES_INTERNAL_TRIPLET_SPARSE_MATRIX_H_
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/crs_matrix.h"
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/sparse_matrix.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// An implementation of the SparseMatrix interface to store and
|
||||
// manipulate sparse matrices in triplet (i,j,s) form. This object is
|
||||
@@ -65,10 +66,11 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix {
|
||||
|
||||
// Implementation of the SparseMatrix interface.
|
||||
void SetZero() final;
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void LeftMultiply(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void LeftMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
void SquaredColumnNorm(double* x) const final;
|
||||
void ScaleColumns(const double* scale) final;
|
||||
void ToCRSMatrix(CRSMatrix* matrix) const;
|
||||
void ToDenseMatrix(Matrix* dense_matrix) const final;
|
||||
void ToTextFile(FILE* file) const final;
|
||||
// clang-format off
|
||||
@@ -134,7 +136,11 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix {
|
||||
// normally distributed and whose structure is determined by
|
||||
// RandomMatrixOptions.
|
||||
static std::unique_ptr<TripletSparseMatrix> CreateRandomMatrix(
|
||||
const TripletSparseMatrix::RandomMatrixOptions& options);
|
||||
const TripletSparseMatrix::RandomMatrixOptions& options,
|
||||
std::mt19937& prng);
|
||||
|
||||
// Load a triplet sparse matrix from a text file.
|
||||
static std::unique_ptr<TripletSparseMatrix> CreateFromTextFile(FILE* file);
|
||||
|
||||
private:
|
||||
void AllocateMemory();
|
||||
@@ -154,8 +160,7 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix {
|
||||
std::unique_ptr<double[]> values_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2016 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,9 +42,11 @@
|
||||
#include "Eigen/Core"
|
||||
#include "ceres/array_utils.h"
|
||||
#include "ceres/coordinate_descent_minimizer.h"
|
||||
#include "ceres/eigen_vector_ops.h"
|
||||
#include "ceres/evaluator.h"
|
||||
#include "ceres/file.h"
|
||||
#include "ceres/line_search.h"
|
||||
#include "ceres/parallel_for.h"
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "ceres/types.h"
|
||||
#include "ceres/wall_time.h"
|
||||
@@ -59,8 +61,7 @@
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
|
||||
double* parameters,
|
||||
@@ -79,6 +80,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
|
||||
? options_.max_consecutive_nonmonotonic_steps
|
||||
: 0);
|
||||
|
||||
bool atleast_one_successful_step = false;
|
||||
while (FinalizeIterationAndCheckIfMinimizerCanContinue()) {
|
||||
iteration_start_time_in_secs_ = WallTimeInSeconds();
|
||||
|
||||
@@ -106,7 +108,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
|
||||
ComputeCandidatePointAndEvaluateCost();
|
||||
DoInnerIterationsIfNeeded();
|
||||
|
||||
if (ParameterToleranceReached()) {
|
||||
if (atleast_one_successful_step && ParameterToleranceReached()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -115,6 +117,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
|
||||
}
|
||||
|
||||
if (IsStepSuccessful()) {
|
||||
atleast_one_successful_step = true;
|
||||
RETURN_IF_ERROR_AND_LOG(HandleSuccessfulStep());
|
||||
} else {
|
||||
// Declare the step unsuccessful and inform the trust region strategy.
|
||||
@@ -137,8 +140,8 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options,
|
||||
double* parameters,
|
||||
Solver::Summary* solver_summary) {
|
||||
options_ = options;
|
||||
sort(options_.trust_region_minimizer_iterations_to_dump.begin(),
|
||||
options_.trust_region_minimizer_iterations_to_dump.end());
|
||||
std::sort(options_.trust_region_minimizer_iterations_to_dump.begin(),
|
||||
options_.trust_region_minimizer_iterations_to_dump.end());
|
||||
|
||||
parameters_ = parameters;
|
||||
|
||||
@@ -166,7 +169,6 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options,
|
||||
num_consecutive_invalid_steps_ = 0;
|
||||
|
||||
x_ = ConstVectorRef(parameters_, num_parameters_);
|
||||
x_norm_ = x_.norm();
|
||||
residuals_.resize(num_residuals_);
|
||||
trust_region_step_.resize(num_effective_parameters_);
|
||||
delta_.resize(num_effective_parameters_);
|
||||
@@ -180,7 +182,6 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options,
|
||||
// the Jacobian, we will compute and overwrite this vector.
|
||||
jacobian_scaling_ = Vector::Ones(num_effective_parameters_);
|
||||
|
||||
x_norm_ = -1; // Invalid value
|
||||
x_cost_ = std::numeric_limits<double>::max();
|
||||
minimum_cost_ = x_cost_;
|
||||
model_cost_change_ = 0.0;
|
||||
@@ -214,10 +215,11 @@ bool TrustRegionMinimizer::IterationZero() {
|
||||
}
|
||||
|
||||
x_ = candidate_x_;
|
||||
x_norm_ = x_.norm();
|
||||
}
|
||||
|
||||
if (!EvaluateGradientAndJacobian(/*new_evaluation_point=*/true)) {
|
||||
solver_summary_->message =
|
||||
"Initial residual and Jacobian evaluation failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -270,7 +272,8 @@ bool TrustRegionMinimizer::EvaluateGradientAndJacobian(
|
||||
}
|
||||
|
||||
// jacobian = jacobian * diag(J'J) ^{-1}
|
||||
jacobian_->ScaleColumns(jacobian_scaling_.data());
|
||||
jacobian_->ScaleColumns(
|
||||
jacobian_scaling_.data(), options_.context, options_.num_threads);
|
||||
}
|
||||
|
||||
// The gradient exists in the local tangent space. To account for
|
||||
@@ -357,13 +360,13 @@ bool TrustRegionMinimizer::FinalizeIterationAndCheckIfMinimizerCanContinue() {
|
||||
// Compute the trust region step using the TrustRegionStrategy chosen
|
||||
// by the user.
|
||||
//
|
||||
// If the strategy returns with LINEAR_SOLVER_FATAL_ERROR, which
|
||||
// If the strategy returns with LinearSolverTerminationType::FATAL_ERROR, which
|
||||
// indicates an unrecoverable error, return false. This is the only
|
||||
// condition that returns false.
|
||||
//
|
||||
// If the strategy returns with LINEAR_SOLVER_FAILURE, which indicates
|
||||
// a numerical failure that could be recovered from by retrying
|
||||
// (e.g. by increasing the strength of the regularization), we set
|
||||
// If the strategy returns with LinearSolverTerminationType::FAILURE, which
|
||||
// indicates a numerical failure that could be recovered from by retrying (e.g.
|
||||
// by increasing the strength of the regularization), we set
|
||||
// iteration_summary_.step_is_valid to false and return true.
|
||||
//
|
||||
// In all other cases, we compute the decrease in the trust region
|
||||
@@ -395,7 +398,8 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() {
|
||||
residuals_.data(),
|
||||
trust_region_step_.data());
|
||||
|
||||
if (strategy_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) {
|
||||
if (strategy_summary.termination_type ==
|
||||
LinearSolverTerminationType::FATAL_ERROR) {
|
||||
solver_summary_->message =
|
||||
"Linear solver failed due to unrecoverable "
|
||||
"non-numeric causes. Please see the error log for clues. ";
|
||||
@@ -407,7 +411,8 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() {
|
||||
WallTimeInSeconds() - strategy_start_time;
|
||||
iteration_summary_.linear_solver_iterations = strategy_summary.num_iterations;
|
||||
|
||||
if (strategy_summary.termination_type == LINEAR_SOLVER_FAILURE) {
|
||||
if (strategy_summary.termination_type ==
|
||||
LinearSolverTerminationType::FAILURE) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -419,10 +424,15 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() {
|
||||
// = f'f/2 - 1/2 [ f'f + 2f'J * step + step' * J' * J * step]
|
||||
// = -f'J * step - step' * J' * J * step / 2
|
||||
// = -(J * step)'(f + J * step / 2)
|
||||
model_residuals_.setZero();
|
||||
jacobian_->RightMultiply(trust_region_step_.data(), model_residuals_.data());
|
||||
model_cost_change_ =
|
||||
-model_residuals_.dot(residuals_ + model_residuals_ / 2.0);
|
||||
ParallelSetZero(options_.context, options_.num_threads, model_residuals_);
|
||||
jacobian_->RightMultiplyAndAccumulate(trust_region_step_.data(),
|
||||
model_residuals_.data(),
|
||||
options_.context,
|
||||
options_.num_threads);
|
||||
model_cost_change_ = -Dot(model_residuals_,
|
||||
residuals_ + model_residuals_ / 2.0,
|
||||
options_.context,
|
||||
options_.num_threads);
|
||||
|
||||
// TODO(sameeragarwal)
|
||||
//
|
||||
@@ -432,7 +442,10 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() {
|
||||
iteration_summary_.step_is_valid = (model_cost_change_ > 0.0);
|
||||
if (iteration_summary_.step_is_valid) {
|
||||
// Undo the Jacobian column scaling.
|
||||
delta_ = (trust_region_step_.array() * jacobian_scaling_.array()).matrix();
|
||||
ParallelAssign(options_.context,
|
||||
options_.num_threads,
|
||||
delta_,
|
||||
(trust_region_step_.array() * jacobian_scaling_.array()));
|
||||
num_consecutive_invalid_steps_ = 0;
|
||||
}
|
||||
|
||||
@@ -702,10 +715,12 @@ bool TrustRegionMinimizer::MinTrustRegionRadiusReached() {
|
||||
|
||||
// Solver::Options::parameter_tolerance based convergence check.
|
||||
bool TrustRegionMinimizer::ParameterToleranceReached() {
|
||||
const double x_norm = x_.norm();
|
||||
|
||||
// Compute the norm of the step in the ambient space.
|
||||
iteration_summary_.step_norm = (x_ - candidate_x_).norm();
|
||||
const double step_size_tolerance =
|
||||
options_.parameter_tolerance * (x_norm_ + options_.parameter_tolerance);
|
||||
options_.parameter_tolerance * (x_norm + options_.parameter_tolerance);
|
||||
|
||||
if (iteration_summary_.step_norm > step_size_tolerance) {
|
||||
return false;
|
||||
@@ -714,7 +729,7 @@ bool TrustRegionMinimizer::ParameterToleranceReached() {
|
||||
solver_summary_->message = StringPrintf(
|
||||
"Parameter tolerance reached. "
|
||||
"Relative step_norm: %e <= %e.",
|
||||
(iteration_summary_.step_norm / (x_norm_ + options_.parameter_tolerance)),
|
||||
(iteration_summary_.step_norm / (x_norm + options_.parameter_tolerance)),
|
||||
options_.parameter_tolerance);
|
||||
solver_summary_->termination_type = CONVERGENCE;
|
||||
if (is_not_silent_) {
|
||||
@@ -807,7 +822,6 @@ bool TrustRegionMinimizer::IsStepSuccessful() {
|
||||
// evaluator know that the step has been accepted.
|
||||
bool TrustRegionMinimizer::HandleSuccessfulStep() {
|
||||
x_ = candidate_x_;
|
||||
x_norm_ = x_.norm();
|
||||
|
||||
// Since the step was successful, this point has already had the residual
|
||||
// evaluated (but not the jacobian). So indicate that to the evaluator.
|
||||
@@ -821,5 +835,4 @@ bool TrustRegionMinimizer::HandleSuccessfulStep() {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2016 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,8 +43,7 @@
|
||||
#include "ceres/trust_region_strategy.h"
|
||||
#include "ceres/types.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Generic trust region minimization algorithm.
|
||||
//
|
||||
@@ -139,8 +138,6 @@ class CERES_NO_EXPORT TrustRegionMinimizer final : public Minimizer {
|
||||
// Scaling vector to scale the columns of the Jacobian.
|
||||
Vector jacobian_scaling_;
|
||||
|
||||
// Euclidean norm of x_.
|
||||
double x_norm_;
|
||||
// Cost at x_.
|
||||
double x_cost_;
|
||||
// Minimum cost encountered up till now.
|
||||
@@ -160,8 +157,7 @@ class CERES_NO_EXPORT TrustRegionMinimizer final : public Minimizer {
|
||||
int num_consecutive_invalid_steps_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,6 +32,7 @@
|
||||
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/callbacks.h"
|
||||
#include "ceres/context_impl.h"
|
||||
@@ -48,10 +49,7 @@
|
||||
#include "ceres/trust_region_strategy.h"
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -59,7 +57,8 @@ std::shared_ptr<ParameterBlockOrdering> CreateDefaultLinearSolverOrdering(
|
||||
const Program& program) {
|
||||
std::shared_ptr<ParameterBlockOrdering> ordering =
|
||||
std::make_shared<ParameterBlockOrdering>();
|
||||
const vector<ParameterBlock*>& parameter_blocks = program.parameter_blocks();
|
||||
const std::vector<ParameterBlock*>& parameter_blocks =
|
||||
program.parameter_blocks();
|
||||
for (auto* parameter_block : parameter_blocks) {
|
||||
ordering->AddElementToGroup(
|
||||
const_cast<double*>(parameter_block->user_state()), 0);
|
||||
@@ -114,6 +113,7 @@ bool ReorderProgram(PreprocessedProblem* pp) {
|
||||
return ReorderProgramForSchurTypeLinearSolver(
|
||||
options.linear_solver_type,
|
||||
options.sparse_linear_algebra_library_type,
|
||||
options.linear_solver_ordering_type,
|
||||
pp->problem->parameter_map(),
|
||||
options.linear_solver_ordering.get(),
|
||||
pp->reduced_program.get(),
|
||||
@@ -124,6 +124,7 @@ bool ReorderProgram(PreprocessedProblem* pp) {
|
||||
!options.dynamic_sparsity) {
|
||||
return ReorderProgramForSparseCholesky(
|
||||
options.sparse_linear_algebra_library_type,
|
||||
options.linear_solver_ordering_type,
|
||||
*options.linear_solver_ordering,
|
||||
0, /* use all the rows of the jacobian */
|
||||
pp->reduced_program.get(),
|
||||
@@ -139,6 +140,7 @@ bool ReorderProgram(PreprocessedProblem* pp) {
|
||||
|
||||
return ReorderProgramForSparseCholesky(
|
||||
options.sparse_linear_algebra_library_type,
|
||||
options.linear_solver_ordering_type,
|
||||
*options.linear_solver_ordering,
|
||||
pp->linear_solver_options.subset_preconditioner_start_row_block,
|
||||
pp->reduced_program.get(),
|
||||
@@ -197,10 +199,16 @@ bool SetupLinearSolver(PreprocessedProblem* pp) {
|
||||
options.max_linear_solver_iterations;
|
||||
pp->linear_solver_options.type = options.linear_solver_type;
|
||||
pp->linear_solver_options.preconditioner_type = options.preconditioner_type;
|
||||
pp->linear_solver_options.use_spse_initialization =
|
||||
options.use_spse_initialization;
|
||||
pp->linear_solver_options.spse_tolerance = options.spse_tolerance;
|
||||
pp->linear_solver_options.max_num_spse_iterations =
|
||||
options.max_num_spse_iterations;
|
||||
pp->linear_solver_options.visibility_clustering_type =
|
||||
options.visibility_clustering_type;
|
||||
pp->linear_solver_options.sparse_linear_algebra_library_type =
|
||||
options.sparse_linear_algebra_library_type;
|
||||
|
||||
pp->linear_solver_options.dense_linear_algebra_library_type =
|
||||
options.dense_linear_algebra_library_type;
|
||||
pp->linear_solver_options.use_explicit_schur_complement =
|
||||
@@ -211,7 +219,6 @@ bool SetupLinearSolver(PreprocessedProblem* pp) {
|
||||
pp->linear_solver_options.max_num_refinement_iterations =
|
||||
options.max_num_refinement_iterations;
|
||||
pp->linear_solver_options.num_threads = options.num_threads;
|
||||
pp->linear_solver_options.use_postordering = options.use_postordering;
|
||||
pp->linear_solver_options.context = pp->problem->context();
|
||||
|
||||
if (IsSchurType(pp->linear_solver_options.type)) {
|
||||
@@ -225,26 +232,23 @@ bool SetupLinearSolver(PreprocessedProblem* pp) {
|
||||
if (pp->linear_solver_options.elimination_groups.size() == 1) {
|
||||
pp->linear_solver_options.elimination_groups.push_back(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.linear_solver_type == SPARSE_SCHUR) {
|
||||
// When using SPARSE_SCHUR, we ignore the user's postordering
|
||||
// preferences in certain cases.
|
||||
//
|
||||
// 1. SUITE_SPARSE is the sparse linear algebra library requested
|
||||
// but cholmod_camd is not available.
|
||||
// 2. CX_SPARSE is the sparse linear algebra library requested.
|
||||
//
|
||||
// This ensures that the linear solver does not assume that a
|
||||
// fill-reducing pre-ordering has been done.
|
||||
//
|
||||
// TODO(sameeragarwal): Implement the reordering of parameter
|
||||
// blocks for CX_SPARSE.
|
||||
if ((options.sparse_linear_algebra_library_type == SUITE_SPARSE &&
|
||||
!SuiteSparse::
|
||||
IsConstrainedApproximateMinimumDegreeOrderingAvailable()) ||
|
||||
(options.sparse_linear_algebra_library_type == CX_SPARSE)) {
|
||||
pp->linear_solver_options.use_postordering = true;
|
||||
}
|
||||
if (!options.dynamic_sparsity &&
|
||||
AreJacobianColumnsOrdered(options.linear_solver_type,
|
||||
options.preconditioner_type,
|
||||
options.sparse_linear_algebra_library_type,
|
||||
options.linear_solver_ordering_type)) {
|
||||
pp->linear_solver_options.ordering_type = OrderingType::NATURAL;
|
||||
} else {
|
||||
if (options.linear_solver_ordering_type == ceres::AMD) {
|
||||
pp->linear_solver_options.ordering_type = OrderingType::AMD;
|
||||
} else if (options.linear_solver_ordering_type == ceres::NESDIS) {
|
||||
pp->linear_solver_options.ordering_type = OrderingType::NESDIS;
|
||||
} else {
|
||||
LOG(FATAL) << "Congratulations you have found a bug in Ceres Solver."
|
||||
<< " Please report this to the maintainers. : "
|
||||
<< options.linear_solver_ordering_type;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -257,6 +261,8 @@ bool SetupEvaluator(PreprocessedProblem* pp) {
|
||||
const Solver::Options& options = pp->options;
|
||||
pp->evaluator_options = Evaluator::Options();
|
||||
pp->evaluator_options.linear_solver_type = options.linear_solver_type;
|
||||
pp->evaluator_options.sparse_linear_algebra_library_type =
|
||||
options.sparse_linear_algebra_library_type;
|
||||
pp->evaluator_options.num_eliminate_blocks = 0;
|
||||
if (IsSchurType(options.linear_solver_type)) {
|
||||
pp->evaluator_options.num_eliminate_blocks =
|
||||
@@ -330,13 +336,19 @@ bool SetupInnerIterationMinimizer(PreprocessedProblem* pp) {
|
||||
}
|
||||
|
||||
// Configure and create a TrustRegionMinimizer object.
|
||||
void SetupMinimizerOptions(PreprocessedProblem* pp) {
|
||||
bool SetupMinimizerOptions(PreprocessedProblem* pp) {
|
||||
const Solver::Options& options = pp->options;
|
||||
|
||||
SetupCommonMinimizerOptions(pp);
|
||||
pp->minimizer_options.is_constrained =
|
||||
pp->reduced_program->IsBoundsConstrained();
|
||||
pp->minimizer_options.jacobian = pp->evaluator->CreateJacobian();
|
||||
if (pp->minimizer_options.jacobian == nullptr) {
|
||||
pp->error =
|
||||
"Unable to create Jacobian matrix. Likely because it is too large.";
|
||||
return false;
|
||||
}
|
||||
|
||||
pp->minimizer_options.inner_iteration_minimizer =
|
||||
pp->inner_iteration_minimizer;
|
||||
|
||||
@@ -349,9 +361,12 @@ void SetupMinimizerOptions(PreprocessedProblem* pp) {
|
||||
strategy_options.trust_region_strategy_type =
|
||||
options.trust_region_strategy_type;
|
||||
strategy_options.dogleg_type = options.dogleg_type;
|
||||
strategy_options.context = pp->problem->context();
|
||||
strategy_options.num_threads = options.num_threads;
|
||||
pp->minimizer_options.trust_region_strategy =
|
||||
TrustRegionStrategy::Create(strategy_options);
|
||||
CHECK(pp->minimizer_options.trust_region_strategy != nullptr);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -387,9 +402,7 @@ bool TrustRegionPreprocessor::Preprocess(const Solver::Options& options,
|
||||
return false;
|
||||
}
|
||||
|
||||
SetupMinimizerOptions(pp);
|
||||
return true;
|
||||
return SetupMinimizerOptions(pp);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,8 +35,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/preprocessor.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class CERES_NO_EXPORT TrustRegionPreprocessor final : public Preprocessor {
|
||||
public:
|
||||
@@ -45,8 +44,7 @@ class CERES_NO_EXPORT TrustRegionPreprocessor final : public Preprocessor {
|
||||
PreprocessedProblem* preprocessed_problem) override;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2016 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,8 +35,7 @@
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
TrustRegionStepEvaluator::TrustRegionStepEvaluator(
|
||||
const double initial_cost, const int max_consecutive_nonmonotonic_steps)
|
||||
@@ -111,5 +110,4 @@ void TrustRegionStepEvaluator::StepAccepted(const double cost,
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2016 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -33,8 +33,7 @@
|
||||
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// The job of the TrustRegionStepEvaluator is to evaluate the quality
|
||||
// of a step, i.e., how the cost of a step compares with the reduction
|
||||
@@ -118,7 +117,6 @@ class CERES_NO_EXPORT TrustRegionStepEvaluator {
|
||||
int num_consecutive_nonmonotonic_steps_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_TRUST_REGION_STEP_EVALUATOR_H_
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,8 +37,7 @@
|
||||
#include "ceres/dogleg_strategy.h"
|
||||
#include "ceres/levenberg_marquardt_strategy.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
TrustRegionStrategy::~TrustRegionStrategy() = default;
|
||||
|
||||
@@ -59,5 +58,4 @@ std::unique_ptr<TrustRegionStrategy> TrustRegionStrategy::Create(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,7 @@
|
||||
#include "ceres/internal/export.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class LinearSolver;
|
||||
class SparseMatrix;
|
||||
@@ -74,6 +73,9 @@ class CERES_NO_EXPORT TrustRegionStrategy {
|
||||
|
||||
// Further specify which dogleg method to use
|
||||
DoglegType dogleg_type = TRADITIONAL_DOGLEG;
|
||||
|
||||
ContextImpl* context = nullptr;
|
||||
int num_threads = 1;
|
||||
};
|
||||
|
||||
// Factory.
|
||||
@@ -112,7 +114,8 @@ class CERES_NO_EXPORT TrustRegionStrategy {
|
||||
int num_iterations = -1;
|
||||
|
||||
// Status of the linear solver used to solve the Newton system.
|
||||
LinearSolverTerminationType termination_type = LINEAR_SOLVER_FAILURE;
|
||||
LinearSolverTerminationType termination_type =
|
||||
LinearSolverTerminationType::FAILURE;
|
||||
};
|
||||
|
||||
// Use the current radius to solve for the trust region step.
|
||||
@@ -141,8 +144,7 @@ class CERES_NO_EXPORT TrustRegionStrategy {
|
||||
virtual double Radius() const = 0;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
78
extern/ceres/internal/ceres/types.cc
vendored
78
extern/ceres/internal/ceres/types.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,14 +39,12 @@
|
||||
|
||||
namespace ceres {
|
||||
|
||||
using std::string;
|
||||
|
||||
// clang-format off
|
||||
#define CASESTR(x) case x: return #x
|
||||
#define STRENUM(x) if (value == #x) { *type = x; return true; }
|
||||
// clang-format on
|
||||
|
||||
static void UpperCase(string* input) {
|
||||
static void UpperCase(std::string* input) {
|
||||
std::transform(input->begin(), input->end(), input->begin(), ::toupper);
|
||||
}
|
||||
|
||||
@@ -64,7 +62,7 @@ const char* LinearSolverTypeToString(LinearSolverType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToLinearSolverType(string value, LinearSolverType* type) {
|
||||
bool StringToLinearSolverType(std::string value, LinearSolverType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(DENSE_NORMAL_CHOLESKY);
|
||||
STRENUM(DENSE_QR);
|
||||
@@ -81,6 +79,7 @@ const char* PreconditionerTypeToString(PreconditionerType type) {
|
||||
CASESTR(IDENTITY);
|
||||
CASESTR(JACOBI);
|
||||
CASESTR(SCHUR_JACOBI);
|
||||
CASESTR(SCHUR_POWER_SERIES_EXPANSION);
|
||||
CASESTR(CLUSTER_JACOBI);
|
||||
CASESTR(CLUSTER_TRIDIAGONAL);
|
||||
CASESTR(SUBSET);
|
||||
@@ -89,11 +88,12 @@ const char* PreconditionerTypeToString(PreconditionerType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToPreconditionerType(string value, PreconditionerType* type) {
|
||||
bool StringToPreconditionerType(std::string value, PreconditionerType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(IDENTITY);
|
||||
STRENUM(JACOBI);
|
||||
STRENUM(SCHUR_JACOBI);
|
||||
STRENUM(SCHUR_POWER_SERIES_EXPANSION);
|
||||
STRENUM(CLUSTER_JACOBI);
|
||||
STRENUM(CLUSTER_TRIDIAGONAL);
|
||||
STRENUM(SUBSET);
|
||||
@@ -104,9 +104,9 @@ const char* SparseLinearAlgebraLibraryTypeToString(
|
||||
SparseLinearAlgebraLibraryType type) {
|
||||
switch (type) {
|
||||
CASESTR(SUITE_SPARSE);
|
||||
CASESTR(CX_SPARSE);
|
||||
CASESTR(EIGEN_SPARSE);
|
||||
CASESTR(ACCELERATE_SPARSE);
|
||||
CASESTR(CUDA_SPARSE);
|
||||
CASESTR(NO_SPARSE);
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
@@ -114,16 +114,33 @@ const char* SparseLinearAlgebraLibraryTypeToString(
|
||||
}
|
||||
|
||||
bool StringToSparseLinearAlgebraLibraryType(
|
||||
string value, SparseLinearAlgebraLibraryType* type) {
|
||||
std::string value, SparseLinearAlgebraLibraryType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(SUITE_SPARSE);
|
||||
STRENUM(CX_SPARSE);
|
||||
STRENUM(EIGEN_SPARSE);
|
||||
STRENUM(ACCELERATE_SPARSE);
|
||||
STRENUM(CUDA_SPARSE);
|
||||
STRENUM(NO_SPARSE);
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* LinearSolverOrderingTypeToString(LinearSolverOrderingType type) {
|
||||
switch (type) {
|
||||
CASESTR(AMD);
|
||||
CASESTR(NESDIS);
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToLinearSolverOrderingType(std::string value,
|
||||
LinearSolverOrderingType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(AMD);
|
||||
STRENUM(NESDIS);
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* DenseLinearAlgebraLibraryTypeToString(
|
||||
DenseLinearAlgebraLibraryType type) {
|
||||
switch (type) {
|
||||
@@ -136,7 +153,7 @@ const char* DenseLinearAlgebraLibraryTypeToString(
|
||||
}
|
||||
|
||||
bool StringToDenseLinearAlgebraLibraryType(
|
||||
string value, DenseLinearAlgebraLibraryType* type) {
|
||||
std::string value, DenseLinearAlgebraLibraryType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(EIGEN);
|
||||
STRENUM(LAPACK);
|
||||
@@ -153,7 +170,7 @@ const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToTrustRegionStrategyType(string value,
|
||||
bool StringToTrustRegionStrategyType(std::string value,
|
||||
TrustRegionStrategyType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(LEVENBERG_MARQUARDT);
|
||||
@@ -170,7 +187,7 @@ const char* DoglegTypeToString(DoglegType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToDoglegType(string value, DoglegType* type) {
|
||||
bool StringToDoglegType(std::string value, DoglegType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(TRADITIONAL_DOGLEG);
|
||||
STRENUM(SUBSPACE_DOGLEG);
|
||||
@@ -186,7 +203,7 @@ const char* MinimizerTypeToString(MinimizerType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToMinimizerType(string value, MinimizerType* type) {
|
||||
bool StringToMinimizerType(std::string value, MinimizerType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(TRUST_REGION);
|
||||
STRENUM(LINE_SEARCH);
|
||||
@@ -204,7 +221,7 @@ const char* LineSearchDirectionTypeToString(LineSearchDirectionType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToLineSearchDirectionType(string value,
|
||||
bool StringToLineSearchDirectionType(std::string value,
|
||||
LineSearchDirectionType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(STEEPEST_DESCENT);
|
||||
@@ -223,7 +240,7 @@ const char* LineSearchTypeToString(LineSearchType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToLineSearchType(string value, LineSearchType* type) {
|
||||
bool StringToLineSearchType(std::string value, LineSearchType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(ARMIJO);
|
||||
STRENUM(WOLFE);
|
||||
@@ -241,7 +258,7 @@ const char* LineSearchInterpolationTypeToString(
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToLineSearchInterpolationType(string value,
|
||||
bool StringToLineSearchInterpolationType(std::string value,
|
||||
LineSearchInterpolationType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(BISECTION);
|
||||
@@ -262,7 +279,7 @@ const char* NonlinearConjugateGradientTypeToString(
|
||||
}
|
||||
|
||||
bool StringToNonlinearConjugateGradientType(
|
||||
string value, NonlinearConjugateGradientType* type) {
|
||||
std::string value, NonlinearConjugateGradientType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(FLETCHER_REEVES);
|
||||
STRENUM(POLAK_RIBIERE);
|
||||
@@ -279,7 +296,7 @@ const char* CovarianceAlgorithmTypeToString(CovarianceAlgorithmType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToCovarianceAlgorithmType(string value,
|
||||
bool StringToCovarianceAlgorithmType(std::string value,
|
||||
CovarianceAlgorithmType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(DENSE_SVD);
|
||||
@@ -297,7 +314,8 @@ const char* NumericDiffMethodTypeToString(NumericDiffMethodType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToNumericDiffMethodType(string value, NumericDiffMethodType* type) {
|
||||
bool StringToNumericDiffMethodType(std::string value,
|
||||
NumericDiffMethodType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(CENTRAL);
|
||||
STRENUM(FORWARD);
|
||||
@@ -314,7 +332,7 @@ const char* VisibilityClusteringTypeToString(VisibilityClusteringType type) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringToVisibilityClusteringType(string value,
|
||||
bool StringToVisibilityClusteringType(std::string value,
|
||||
VisibilityClusteringType* type) {
|
||||
UpperCase(&value);
|
||||
STRENUM(CANONICAL_VIEWS);
|
||||
@@ -387,14 +405,6 @@ bool IsSparseLinearAlgebraLibraryTypeAvailable(
|
||||
#endif
|
||||
}
|
||||
|
||||
if (type == CX_SPARSE) {
|
||||
#ifdef CERES_NO_CXSPARSE
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (type == ACCELERATE_SPARSE) {
|
||||
#ifdef CERES_NO_ACCELERATE_SPARSE
|
||||
return false;
|
||||
@@ -411,6 +421,18 @@ bool IsSparseLinearAlgebraLibraryTypeAvailable(
|
||||
#endif
|
||||
}
|
||||
|
||||
if (type == CUDA_SPARSE) {
|
||||
#ifdef CERES_NO_CUDA
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (type == NO_SPARSE) {
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG(WARNING) << "Unknown sparse linear algebra library " << type;
|
||||
return false;
|
||||
}
|
||||
|
||||
30
extern/ceres/internal/ceres/visibility.cc
vendored
30
extern/ceres/internal/ceres/visibility.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -44,18 +44,11 @@
|
||||
#include "ceres/pair_hash.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::make_pair;
|
||||
using std::max;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
void ComputeVisibility(const CompressedRowBlockStructure& block_structure,
|
||||
const int num_eliminate_blocks,
|
||||
vector<set<int>>* visibility) {
|
||||
std::vector<std::set<int>>* visibility) {
|
||||
CHECK(visibility != nullptr);
|
||||
|
||||
// Clear the visibility vector and resize it to hold a
|
||||
@@ -64,7 +57,7 @@ void ComputeVisibility(const CompressedRowBlockStructure& block_structure,
|
||||
visibility->resize(block_structure.cols.size() - num_eliminate_blocks);
|
||||
|
||||
for (const auto& row : block_structure.rows) {
|
||||
const vector<Cell>& cells = row.cells;
|
||||
const std::vector<Cell>& cells = row.cells;
|
||||
int block_id = cells[0].block_id;
|
||||
// If the first block is not an e_block, then skip this row block.
|
||||
if (block_id >= num_eliminate_blocks) {
|
||||
@@ -81,7 +74,7 @@ void ComputeVisibility(const CompressedRowBlockStructure& block_structure,
|
||||
}
|
||||
|
||||
std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
|
||||
const vector<set<int>>& visibility) {
|
||||
const std::vector<std::set<int>>& visibility) {
|
||||
const time_t start_time = time(nullptr);
|
||||
// Compute the number of e_blocks/point blocks. Since the visibility
|
||||
// set for each e_block/camera contains the set of e_blocks/points
|
||||
@@ -89,7 +82,7 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
|
||||
int num_points = 0;
|
||||
for (const auto& visible : visibility) {
|
||||
if (!visible.empty()) {
|
||||
num_points = max(num_points, (*visible.rbegin()) + 1);
|
||||
num_points = std::max(num_points, (*visible.rbegin()) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,9 +91,9 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
|
||||
// cameras. However, to compute the sparsity structure of the Schur
|
||||
// Complement efficiently, its better to have the point->camera
|
||||
// mapping.
|
||||
vector<set<int>> inverse_visibility(num_points);
|
||||
std::vector<std::set<int>> inverse_visibility(num_points);
|
||||
for (int i = 0; i < visibility.size(); i++) {
|
||||
const set<int>& visibility_set = visibility[i];
|
||||
const std::set<int>& visibility_set = visibility[i];
|
||||
for (int v : visibility_set) {
|
||||
inverse_visibility[v].insert(i);
|
||||
}
|
||||
@@ -108,7 +101,7 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
|
||||
|
||||
// Map from camera pairs to number of points visible to both cameras
|
||||
// in the pair.
|
||||
std::unordered_map<pair<int, int>, int, pair_hash> camera_pairs;
|
||||
std::unordered_map<std::pair<int, int>, int, pair_hash> camera_pairs;
|
||||
|
||||
// Count the number of points visible to each camera/f_block pair.
|
||||
for (const auto& inverse_visibility_set : inverse_visibility) {
|
||||
@@ -117,7 +110,7 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
|
||||
++camera1) {
|
||||
auto camera2 = camera1;
|
||||
for (++camera2; camera2 != inverse_visibility_set.end(); ++camera2) {
|
||||
++(camera_pairs[make_pair(*camera1, *camera2)]);
|
||||
++(camera_pairs[std::make_pair(*camera1, *camera2)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -151,5 +144,4 @@ std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
|
||||
return graph;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
8
extern/ceres/internal/ceres/visibility.h
vendored
8
extern/ceres/internal/ceres/visibility.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,8 +43,7 @@
|
||||
#include "ceres/internal/disable_warnings.h"
|
||||
#include "ceres/internal/export.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
struct CompressedRowBlockStructure;
|
||||
|
||||
@@ -77,8 +76,7 @@ CERES_NO_EXPORT void ComputeVisibility(
|
||||
CERES_NO_EXPORT std::unique_ptr<WeightedGraph<int>> CreateSchurComplementGraph(
|
||||
const std::vector<std::set<int>>& visibility);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2022 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -35,6 +35,8 @@
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -50,14 +52,7 @@
|
||||
#include "ceres/visibility.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
using std::make_pair;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::swap;
|
||||
using std::vector;
|
||||
namespace ceres::internal {
|
||||
|
||||
// TODO(sameeragarwal): Currently these are magic weights for the
|
||||
// preconditioner construction. Move these higher up into the Options
|
||||
@@ -82,10 +77,7 @@ VisibilityBasedPreconditioner::VisibilityBasedPreconditioner(
|
||||
CHECK(options_.context != nullptr);
|
||||
|
||||
// Vector of camera block sizes
|
||||
block_size_.resize(num_blocks_);
|
||||
for (int i = 0; i < num_blocks_; ++i) {
|
||||
block_size_[i] = bs.cols[i + options_.elimination_groups[0]].size;
|
||||
}
|
||||
blocks_ = Tail(bs.cols, bs.cols.size() - options_.elimination_groups[0]);
|
||||
|
||||
const time_t start_time = time(nullptr);
|
||||
switch (options_.type) {
|
||||
@@ -107,14 +99,7 @@ VisibilityBasedPreconditioner::VisibilityBasedPreconditioner(
|
||||
LinearSolver::Options sparse_cholesky_options;
|
||||
sparse_cholesky_options.sparse_linear_algebra_library_type =
|
||||
options_.sparse_linear_algebra_library_type;
|
||||
|
||||
// The preconditioner's sparsity is not available in the
|
||||
// preprocessor, so the columns of the Jacobian have not been
|
||||
// reordered to minimize fill in when computing its sparse Cholesky
|
||||
// factorization. So we must tell the SparseCholesky object to
|
||||
// perform approximate minimum-degree reordering, which is done by
|
||||
// setting use_postordering to true.
|
||||
sparse_cholesky_options.use_postordering = true;
|
||||
sparse_cholesky_options.ordering_type = options_.ordering_type;
|
||||
sparse_cholesky_ = SparseCholesky::Create(sparse_cholesky_options);
|
||||
|
||||
const time_t init_time = time(nullptr);
|
||||
@@ -132,13 +117,13 @@ VisibilityBasedPreconditioner::~VisibilityBasedPreconditioner() = default;
|
||||
// preconditioner matrix.
|
||||
void VisibilityBasedPreconditioner::ComputeClusterJacobiSparsity(
|
||||
const CompressedRowBlockStructure& bs) {
|
||||
vector<set<int>> visibility;
|
||||
std::vector<std::set<int>> visibility;
|
||||
ComputeVisibility(bs, options_.elimination_groups[0], &visibility);
|
||||
CHECK_EQ(num_blocks_, visibility.size());
|
||||
ClusterCameras(visibility);
|
||||
cluster_pairs_.clear();
|
||||
for (int i = 0; i < num_clusters_; ++i) {
|
||||
cluster_pairs_.insert(make_pair(i, i));
|
||||
cluster_pairs_.insert(std::make_pair(i, i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,7 +135,7 @@ void VisibilityBasedPreconditioner::ComputeClusterJacobiSparsity(
|
||||
// of edges in this forest are the cluster pairs.
|
||||
void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity(
|
||||
const CompressedRowBlockStructure& bs) {
|
||||
vector<set<int>> visibility;
|
||||
std::vector<std::set<int>> visibility;
|
||||
ComputeVisibility(bs, options_.elimination_groups[0], &visibility);
|
||||
CHECK_EQ(num_blocks_, visibility.size());
|
||||
ClusterCameras(visibility);
|
||||
@@ -159,7 +144,7 @@ void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity(
|
||||
// edges are the number of 3D points/e_blocks visible in both the
|
||||
// clusters at the ends of the edge. Return an approximate degree-2
|
||||
// maximum spanning forest of this graph.
|
||||
vector<set<int>> cluster_visibility;
|
||||
std::vector<std::set<int>> cluster_visibility;
|
||||
ComputeClusterVisibility(visibility, &cluster_visibility);
|
||||
auto cluster_graph = CreateClusterGraph(cluster_visibility);
|
||||
CHECK(cluster_graph != nullptr);
|
||||
@@ -172,8 +157,8 @@ void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity(
|
||||
void VisibilityBasedPreconditioner::InitStorage(
|
||||
const CompressedRowBlockStructure& bs) {
|
||||
ComputeBlockPairsInPreconditioner(bs);
|
||||
m_ = std::make_unique<BlockRandomAccessSparseMatrix>(block_size_,
|
||||
block_pairs_);
|
||||
m_ = std::make_unique<BlockRandomAccessSparseMatrix>(
|
||||
blocks_, block_pairs_, options_.context, options_.num_threads);
|
||||
}
|
||||
|
||||
// Call the canonical views algorithm and cluster the cameras based on
|
||||
@@ -183,14 +168,14 @@ void VisibilityBasedPreconditioner::InitStorage(
|
||||
// The cluster_membership_ vector is updated to indicate cluster
|
||||
// memberships for each camera block.
|
||||
void VisibilityBasedPreconditioner::ClusterCameras(
|
||||
const vector<set<int>>& visibility) {
|
||||
const std::vector<std::set<int>>& visibility) {
|
||||
auto schur_complement_graph = CreateSchurComplementGraph(visibility);
|
||||
CHECK(schur_complement_graph != nullptr);
|
||||
|
||||
std::unordered_map<int, int> membership;
|
||||
|
||||
if (options_.visibility_clustering_type == CANONICAL_VIEWS) {
|
||||
vector<int> centers;
|
||||
std::vector<int> centers;
|
||||
CanonicalViewsClusteringOptions clustering_options;
|
||||
clustering_options.size_penalty_weight = kCanonicalViewsSizePenaltyWeight;
|
||||
clustering_options.similarity_penalty_weight =
|
||||
@@ -236,7 +221,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner(
|
||||
const CompressedRowBlockStructure& bs) {
|
||||
block_pairs_.clear();
|
||||
for (int i = 0; i < num_blocks_; ++i) {
|
||||
block_pairs_.insert(make_pair(i, i));
|
||||
block_pairs_.insert(std::make_pair(i, i));
|
||||
}
|
||||
|
||||
int r = 0;
|
||||
@@ -264,7 +249,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner(
|
||||
break;
|
||||
}
|
||||
|
||||
set<int> f_blocks;
|
||||
std::set<int> f_blocks;
|
||||
for (; r < num_row_blocks; ++r) {
|
||||
const CompressedRow& row = bs.rows[r];
|
||||
if (row.cells.front().block_id != e_block_id) {
|
||||
@@ -303,7 +288,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner(
|
||||
const int block2 = cell.block_id - num_eliminate_blocks;
|
||||
if (block1 <= block2) {
|
||||
if (IsBlockPairInPreconditioner(block1, block2)) {
|
||||
block_pairs_.insert(make_pair(block1, block2));
|
||||
block_pairs_.insert(std::make_pair(block1, block2));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -354,7 +339,7 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
|
||||
// scaling is not needed, which is quite often in our experience.
|
||||
LinearSolverTerminationType status = Factorize();
|
||||
|
||||
if (status == LINEAR_SOLVER_FATAL_ERROR) {
|
||||
if (status == LinearSolverTerminationType::FATAL_ERROR) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -363,7 +348,8 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
|
||||
// belong to the edges of the degree-2 forest. In the CLUSTER_JACOBI
|
||||
// case, the preconditioner is guaranteed to be positive
|
||||
// semidefinite.
|
||||
if (status == LINEAR_SOLVER_FAILURE && options_.type == CLUSTER_TRIDIAGONAL) {
|
||||
if (status == LinearSolverTerminationType::FAILURE &&
|
||||
options_.type == CLUSTER_TRIDIAGONAL) {
|
||||
VLOG(1) << "Unscaled factorization failed. Retrying with off-diagonal "
|
||||
<< "scaling";
|
||||
ScaleOffDiagonalCells();
|
||||
@@ -371,7 +357,7 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
|
||||
}
|
||||
|
||||
VLOG(2) << "Compute time: " << time(nullptr) - start_time;
|
||||
return (status == LINEAR_SOLVER_SUCCESS);
|
||||
return (status == LinearSolverTerminationType::SUCCESS);
|
||||
}
|
||||
|
||||
// Consider the preconditioner matrix as meta-block matrix, whose
|
||||
@@ -399,35 +385,44 @@ void VisibilityBasedPreconditioner::ScaleOffDiagonalCells() {
|
||||
// dominance. See Lemma 1 in "Visibility Based Preconditioning
|
||||
// For Bundle Adjustment".
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
m.block(r, c, block_size_[block1], block_size_[block2]) *= 0.5;
|
||||
m.block(r, c, blocks_[block1].size, blocks_[block2].size) *= 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the sparse Cholesky factorization of the preconditioner
|
||||
// matrix.
|
||||
LinearSolverTerminationType VisibilityBasedPreconditioner::Factorize() {
|
||||
// Extract the TripletSparseMatrix that is used for actually storing
|
||||
// Extract the BlockSparseMatrix that is used for actually storing
|
||||
// S and convert it into a CompressedRowSparseMatrix.
|
||||
const TripletSparseMatrix* tsm =
|
||||
down_cast<BlockRandomAccessSparseMatrix*>(m_.get())->mutable_matrix();
|
||||
|
||||
std::unique_ptr<CompressedRowSparseMatrix> lhs;
|
||||
const BlockSparseMatrix* bsm =
|
||||
down_cast<BlockRandomAccessSparseMatrix*>(m_.get())->matrix();
|
||||
const CompressedRowSparseMatrix::StorageType storage_type =
|
||||
sparse_cholesky_->StorageType();
|
||||
if (storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR) {
|
||||
lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm);
|
||||
lhs->set_storage_type(CompressedRowSparseMatrix::UPPER_TRIANGULAR);
|
||||
if (storage_type ==
|
||||
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
|
||||
if (!m_crs_) {
|
||||
m_crs_ = bsm->ToCompressedRowSparseMatrix();
|
||||
m_crs_->set_storage_type(
|
||||
CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
|
||||
} else {
|
||||
bsm->UpdateCompressedRowSparseMatrix(m_crs_.get());
|
||||
}
|
||||
} else {
|
||||
lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm);
|
||||
lhs->set_storage_type(CompressedRowSparseMatrix::LOWER_TRIANGULAR);
|
||||
if (!m_crs_) {
|
||||
m_crs_ = bsm->ToCompressedRowSparseMatrixTranspose();
|
||||
m_crs_->set_storage_type(
|
||||
CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
|
||||
} else {
|
||||
bsm->UpdateCompressedRowSparseMatrixTranspose(m_crs_.get());
|
||||
}
|
||||
}
|
||||
|
||||
std::string message;
|
||||
return sparse_cholesky_->Factorize(lhs.get(), &message);
|
||||
return sparse_cholesky_->Factorize(m_crs_.get(), &message);
|
||||
}
|
||||
|
||||
void VisibilityBasedPreconditioner::RightMultiply(const double* x,
|
||||
double* y) const {
|
||||
void VisibilityBasedPreconditioner::RightMultiplyAndAccumulate(
|
||||
const double* x, double* y) const {
|
||||
CHECK(x != nullptr);
|
||||
CHECK(y != nullptr);
|
||||
CHECK(sparse_cholesky_ != nullptr);
|
||||
@@ -445,9 +440,9 @@ bool VisibilityBasedPreconditioner::IsBlockPairInPreconditioner(
|
||||
int cluster1 = cluster_membership_[block1];
|
||||
int cluster2 = cluster_membership_[block2];
|
||||
if (cluster1 > cluster2) {
|
||||
swap(cluster1, cluster2);
|
||||
std::swap(cluster1, cluster2);
|
||||
}
|
||||
return (cluster_pairs_.count(make_pair(cluster1, cluster2)) > 0);
|
||||
return (cluster_pairs_.count(std::make_pair(cluster1, cluster2)) > 0);
|
||||
}
|
||||
|
||||
bool VisibilityBasedPreconditioner::IsBlockPairOffDiagonal(
|
||||
@@ -459,7 +454,7 @@ bool VisibilityBasedPreconditioner::IsBlockPairOffDiagonal(
|
||||
// each vertex.
|
||||
void VisibilityBasedPreconditioner::ForestToClusterPairs(
|
||||
const WeightedGraph<int>& forest,
|
||||
std::unordered_set<pair<int, int>, pair_hash>* cluster_pairs) const {
|
||||
std::unordered_set<std::pair<int, int>, pair_hash>* cluster_pairs) const {
|
||||
CHECK(cluster_pairs != nullptr);
|
||||
cluster_pairs->clear();
|
||||
const std::unordered_set<int>& vertices = forest.vertices();
|
||||
@@ -468,11 +463,11 @@ void VisibilityBasedPreconditioner::ForestToClusterPairs(
|
||||
// Add all the cluster pairs corresponding to the edges in the
|
||||
// forest.
|
||||
for (const int cluster1 : vertices) {
|
||||
cluster_pairs->insert(make_pair(cluster1, cluster1));
|
||||
cluster_pairs->insert(std::make_pair(cluster1, cluster1));
|
||||
const std::unordered_set<int>& neighbors = forest.Neighbors(cluster1);
|
||||
for (const int cluster2 : neighbors) {
|
||||
if (cluster1 < cluster2) {
|
||||
cluster_pairs->insert(make_pair(cluster1, cluster2));
|
||||
cluster_pairs->insert(std::make_pair(cluster1, cluster2));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -482,8 +477,8 @@ void VisibilityBasedPreconditioner::ForestToClusterPairs(
|
||||
// of all its cameras. In other words, the set of points visible to
|
||||
// any camera in the cluster.
|
||||
void VisibilityBasedPreconditioner::ComputeClusterVisibility(
|
||||
const vector<set<int>>& visibility,
|
||||
vector<set<int>>* cluster_visibility) const {
|
||||
const std::vector<std::set<int>>& visibility,
|
||||
std::vector<std::set<int>>* cluster_visibility) const {
|
||||
CHECK(cluster_visibility != nullptr);
|
||||
cluster_visibility->resize(0);
|
||||
cluster_visibility->resize(num_clusters_);
|
||||
@@ -499,7 +494,7 @@ void VisibilityBasedPreconditioner::ComputeClusterVisibility(
|
||||
// vertices.
|
||||
std::unique_ptr<WeightedGraph<int>>
|
||||
VisibilityBasedPreconditioner::CreateClusterGraph(
|
||||
const vector<set<int>>& cluster_visibility) const {
|
||||
const std::vector<std::set<int>>& cluster_visibility) const {
|
||||
auto cluster_graph = std::make_unique<WeightedGraph<int>>();
|
||||
|
||||
for (int i = 0; i < num_clusters_; ++i) {
|
||||
@@ -507,15 +502,15 @@ VisibilityBasedPreconditioner::CreateClusterGraph(
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_clusters_; ++i) {
|
||||
const set<int>& cluster_i = cluster_visibility[i];
|
||||
const std::set<int>& cluster_i = cluster_visibility[i];
|
||||
for (int j = i + 1; j < num_clusters_; ++j) {
|
||||
vector<int> intersection;
|
||||
const set<int>& cluster_j = cluster_visibility[j];
|
||||
set_intersection(cluster_i.begin(),
|
||||
cluster_i.end(),
|
||||
cluster_j.begin(),
|
||||
cluster_j.end(),
|
||||
back_inserter(intersection));
|
||||
std::vector<int> intersection;
|
||||
const std::set<int>& cluster_j = cluster_visibility[j];
|
||||
std::set_intersection(cluster_i.begin(),
|
||||
cluster_i.end(),
|
||||
cluster_j.begin(),
|
||||
cluster_j.end(),
|
||||
std::back_inserter(intersection));
|
||||
|
||||
if (intersection.size() > 0) {
|
||||
// Clusters interact strongly when they share a large number
|
||||
@@ -540,7 +535,7 @@ VisibilityBasedPreconditioner::CreateClusterGraph(
|
||||
// of integers so that the cluster ids are in [0, num_clusters_).
|
||||
void VisibilityBasedPreconditioner::FlattenMembershipMap(
|
||||
const std::unordered_map<int, int>& membership_map,
|
||||
vector<int>* membership_vector) const {
|
||||
std::vector<int>* membership_vector) const {
|
||||
CHECK(membership_vector != nullptr);
|
||||
membership_vector->resize(0);
|
||||
membership_vector->resize(num_blocks_, -1);
|
||||
@@ -576,5 +571,4 @@ void VisibilityBasedPreconditioner::FlattenMembershipMap(
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2017 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -55,14 +55,14 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/graph.h"
|
||||
#include "ceres/linear_solver.h"
|
||||
#include "ceres/pair_hash.h"
|
||||
#include "ceres/preconditioner.h"
|
||||
#include "ceres/sparse_cholesky.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
class BlockRandomAccessSparseMatrix;
|
||||
class BlockSparseMatrix;
|
||||
@@ -123,7 +123,7 @@ class SchurEliminatorBase;
|
||||
// VisibilityBasedPreconditioner preconditioner(
|
||||
// *A.block_structure(), options);
|
||||
// preconditioner.Update(A, nullptr);
|
||||
// preconditioner.RightMultiply(x, y);
|
||||
// preconditioner.RightMultiplyAndAccumulate(x, y);
|
||||
class CERES_NO_EXPORT VisibilityBasedPreconditioner
|
||||
: public BlockSparseMatrixPreconditioner {
|
||||
public:
|
||||
@@ -141,7 +141,7 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner
|
||||
~VisibilityBasedPreconditioner() override;
|
||||
|
||||
// Preconditioner interface
|
||||
void RightMultiply(const double* x, double* y) const final;
|
||||
void RightMultiplyAndAccumulate(const double* x, double* y) const final;
|
||||
int num_rows() const final;
|
||||
|
||||
friend class VisibilityBasedPreconditionerTest;
|
||||
@@ -177,7 +177,7 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner
|
||||
int num_clusters_;
|
||||
|
||||
// Sizes of the blocks in the schur complement.
|
||||
std::vector<int> block_size_;
|
||||
std::vector<Block> blocks_;
|
||||
|
||||
// Mapping from cameras to clusters.
|
||||
std::vector<int> cluster_membership_;
|
||||
@@ -194,10 +194,10 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner
|
||||
|
||||
// Preconditioner matrix.
|
||||
std::unique_ptr<BlockRandomAccessSparseMatrix> m_;
|
||||
std::unique_ptr<CompressedRowSparseMatrix> m_crs_;
|
||||
std::unique_ptr<SparseCholesky> sparse_cholesky_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#endif // CERES_INTERNAL_VISIBILITY_BASED_PRECONDITIONER_H_
|
||||
|
||||
22
extern/ceres/internal/ceres/wall_time.cc
vendored
22
extern/ceres/internal/ceres/wall_time.cc
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,13 +30,9 @@
|
||||
|
||||
#include "ceres/wall_time.h"
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifdef CERES_USE_OPENMP
|
||||
#include <omp.h>
|
||||
#else
|
||||
#include <ctime>
|
||||
#endif
|
||||
|
||||
#include "ceres/internal/config.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
@@ -44,13 +40,9 @@
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
double WallTimeInSeconds() {
|
||||
#ifdef CERES_USE_OPENMP
|
||||
return omp_get_wtime();
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER count;
|
||||
LARGE_INTEGER frequency;
|
||||
@@ -63,7 +55,6 @@ double WallTimeInSeconds() {
|
||||
gettimeofday(&time_val, nullptr);
|
||||
return (time_val.tv_sec + time_val.tv_usec * 1e-6);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
EventLogger::EventLogger(const std::string& logger_name) {
|
||||
@@ -74,7 +65,7 @@ EventLogger::EventLogger(const std::string& logger_name) {
|
||||
start_time_ = WallTimeInSeconds();
|
||||
last_event_time_ = start_time_;
|
||||
events_ = StringPrintf(
|
||||
"\n%s\n Delta Cumulative\n",
|
||||
"\n%s\n Delta Cumulative\n",
|
||||
logger_name.c_str());
|
||||
}
|
||||
|
||||
@@ -103,5 +94,4 @@ void EventLogger::AddEvent(const std::string& event_name) {
|
||||
absolute_time_delta);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
14
extern/ceres/internal/ceres/wall_time.h
vendored
14
extern/ceres/internal/ceres/wall_time.h
vendored
@@ -1,5 +1,5 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
// http://ceres-solver.org/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,13 +39,10 @@
|
||||
#include "ceres/stringprintf.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
namespace ceres::internal {
|
||||
|
||||
// Returns time, in seconds, from some arbitrary starting point. If
|
||||
// OpenMP is available then the high precision openmp_get_wtime()
|
||||
// function is used. Otherwise on unixes, gettimeofday is used. The
|
||||
// granularity is in seconds on windows systems.
|
||||
// Returns time, in seconds, from some arbitrary starting point. On unixes,
|
||||
// gettimeofday is used. The granularity is microseconds.
|
||||
CERES_NO_EXPORT double WallTimeInSeconds();
|
||||
|
||||
// Log a series of events, recording for each event the time elapsed
|
||||
@@ -84,8 +81,7 @@ class CERES_NO_EXPORT EventLogger {
|
||||
std::string events_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
} // namespace ceres::internal
|
||||
|
||||
#include "ceres/internal/reenable_warnings.h"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user