diff --git a/extern/ceres/CMakeLists.txt b/extern/ceres/CMakeLists.txt index c5965ce6d5c..a5b14a634d9 100644 --- a/extern/ceres/CMakeLists.txt +++ b/extern/ceres/CMakeLists.txt @@ -17,11 +17,11 @@ set(INC_SYS set(SRC include/ceres/autodiff_cost_function.h include/ceres/autodiff_first_order_function.h - include/ceres/autodiff_local_parameterization.h include/ceres/autodiff_manifold.h include/ceres/c_api.h include/ceres/ceres.h include/ceres/conditioned_cost_function.h + include/ceres/constants.h include/ceres/context.h include/ceres/cost_function.h include/ceres/cost_function_to_functor.h @@ -41,7 +41,6 @@ set(SRC include/ceres/jet.h include/ceres/jet_fwd.h include/ceres/line_manifold.h - include/ceres/local_parameterization.h include/ceres/loss_function.h include/ceres/manifold.h include/ceres/manifold_test_utils.h @@ -66,6 +65,7 @@ set(SRC include/ceres/internal/autodiff.h include/ceres/internal/disable_warnings.h include/ceres/internal/eigen.h + include/ceres/internal/euler_angles.h include/ceres/internal/fixed_array.h include/ceres/internal/householder_vector.h include/ceres/internal/integer_sequence_algorithm.h @@ -107,7 +107,6 @@ set(SRC internal/ceres/canonical_views_clustering.cc internal/ceres/canonical_views_clustering.h internal/ceres/casts.h - internal/ceres/cgnr_linear_operator.h internal/ceres/cgnr_solver.cc internal/ceres/cgnr_solver.h internal/ceres/compressed_col_sparse_matrix_utils.cc @@ -118,7 +117,6 @@ set(SRC internal/ceres/compressed_row_sparse_matrix.h internal/ceres/concurrent_queue.h internal/ceres/conditioned_cost_function.cc - internal/ceres/conjugate_gradients_solver.cc internal/ceres/conjugate_gradients_solver.h internal/ceres/context.cc internal/ceres/context_impl.cc @@ -131,9 +129,23 @@ set(SRC internal/ceres/covariance.cc internal/ceres/covariance_impl.cc internal/ceres/covariance_impl.h + internal/ceres/cuda_block_sparse_crs_view.cc + internal/ceres/cuda_block_sparse_crs_view.h + internal/ceres/cuda_block_structure.cc + internal/ceres/cuda_block_structure.h internal/ceres/cuda_buffer.h - internal/ceres/cxsparse.cc - internal/ceres/cxsparse.h + # internal/ceres/cuda_kernels_bsm_to_crs.cu.cc + # internal/ceres/cuda_kernels_bsm_to_crs.h + internal/ceres/cuda_kernels_utils.h + # internal/ceres/cuda_kernels_vector_ops.cu.cc + internal/ceres/cuda_kernels_vector_ops.h + internal/ceres/cuda_partitioned_block_sparse_crs_view.cc + internal/ceres/cuda_partitioned_block_sparse_crs_view.h + internal/ceres/cuda_sparse_matrix.cc + internal/ceres/cuda_sparse_matrix.h + internal/ceres/cuda_streamed_buffer.h + internal/ceres/cuda_vector.cc + internal/ceres/cuda_vector.h internal/ceres/dense_cholesky.cc internal/ceres/dense_cholesky.h internal/ceres/dense_jacobian_writer.h @@ -156,21 +168,25 @@ set(SRC internal/ceres/dynamic_compressed_row_sparse_matrix.h internal/ceres/dynamic_sparse_normal_cholesky_solver.cc internal/ceres/dynamic_sparse_normal_cholesky_solver.h + internal/ceres/eigen_vector_ops.h internal/ceres/eigensparse.cc internal/ceres/eigensparse.h internal/ceres/evaluation_callback.cc internal/ceres/evaluator.cc internal/ceres/evaluator.h internal/ceres/execution_summary.h + internal/ceres/fake_bundle_adjustment_jacobian.cc + internal/ceres/fake_bundle_adjustment_jacobian.h internal/ceres/file.cc internal/ceres/file.h internal/ceres/first_order_function.cc - internal/ceres/float_cxsparse.cc - internal/ceres/float_cxsparse.h internal/ceres/float_suitesparse.cc internal/ceres/float_suitesparse.h internal/ceres/function_sample.cc internal/ceres/function_sample.h + internal/ceres/generate_bundle_adjustment_tests.py + internal/ceres/generate_template_specializations.py + internal/ceres/generated internal/ceres/gradient_checker.cc internal/ceres/gradient_checking_cost_function.cc internal/ceres/gradient_checking_cost_function.h @@ -207,31 +223,34 @@ set(SRC internal/ceres/linear_operator.h internal/ceres/linear_solver.cc internal/ceres/linear_solver.h - internal/ceres/local_parameterization.cc internal/ceres/loss_function.cc internal/ceres/low_rank_inverse_hessian.cc internal/ceres/low_rank_inverse_hessian.h internal/ceres/manifold.cc - internal/ceres/manifold_adapter.h internal/ceres/map_util.h internal/ceres/minimizer.cc internal/ceres/minimizer.h internal/ceres/normal_prior.cc internal/ceres/pair_hash.h internal/ceres/parallel_for.h - internal/ceres/parallel_for_cxx.cc - internal/ceres/parallel_for_nothreads.cc - internal/ceres/parallel_for_openmp.cc + internal/ceres/parallel_invoke.cc + internal/ceres/parallel_invoke.h internal/ceres/parallel_utils.cc internal/ceres/parallel_utils.h + internal/ceres/parallel_vector_ops.cc + internal/ceres/parallel_vector_ops.h internal/ceres/parameter_block.h internal/ceres/parameter_block_ordering.cc internal/ceres/parameter_block_ordering.h + internal/ceres/partition_range_for_parallel_for.h internal/ceres/partitioned_matrix_view.cc internal/ceres/partitioned_matrix_view.h internal/ceres/partitioned_matrix_view_impl.h + internal/ceres/partitioned_matrix_view_template.py internal/ceres/polynomial.cc internal/ceres/polynomial.h + internal/ceres/power_series_expansion_preconditioner.cc + internal/ceres/power_series_expansion_preconditioner.h internal/ceres/preconditioner.cc internal/ceres/preconditioner.h internal/ceres/preprocessor.cc @@ -242,7 +261,6 @@ set(SRC internal/ceres/program.cc internal/ceres/program.h internal/ceres/program_evaluator.h - internal/ceres/random.h internal/ceres/reorder_program.cc internal/ceres/reorder_program.h internal/ceres/residual_block.cc @@ -254,6 +272,7 @@ set(SRC internal/ceres/schur_eliminator.cc internal/ceres/schur_eliminator.h internal/ceres/schur_eliminator_impl.h + internal/ceres/schur_eliminator_template.py internal/ceres/schur_jacobi_preconditioner.cc internal/ceres/schur_jacobi_preconditioner.h internal/ceres/schur_templates.cc diff --git a/extern/ceres/LICENSE b/extern/ceres/LICENSE index cf69df2e02f..b5d967ca7c9 100644 --- a/extern/ceres/LICENSE +++ b/extern/ceres/LICENSE @@ -1,5 +1,5 @@ Ceres Solver - A fast non-linear least squares minimizer -Copyright 2015 Google Inc. All rights reserved. +Copyright 2023 Google Inc. All rights reserved. http://ceres-solver.org/ Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/README.blender b/extern/ceres/README.blender index e90f783ccad..4b868268084 100644 --- a/extern/ceres/README.blender +++ b/extern/ceres/README.blender @@ -1,6 +1,6 @@ Project: Ceres Solver URL: http://ceres-solver.org/ License: SPDX:BSD-3-Clause -Upstream version 2.1.0 -Copyright: Copyright 2015 Google Inc. All rights reserved. +Upstream version 2.2.0 +Copyright: Copyright 2023 Google Inc. All rights reserved. Local modifications: None diff --git a/extern/ceres/config/ceres/internal/config.h b/extern/ceres/config/ceres/internal/config.h index 2566945e084..fafc01f6ec6 100644 --- a/extern/ceres/config/ceres/internal/config.h +++ b/extern/ceres/config/ceres/internal/config.h @@ -50,9 +50,6 @@ // If defined, Ceres was compiled without SuiteSparse. #define CERES_NO_SUITESPARSE -// If defined, Ceres was compiled without CXSparse. -#define CERES_NO_CXSPARSE - // If defined, Ceres was compiled without CUDA. #define CERES_NO_CUDA @@ -61,7 +58,6 @@ #if defined(CERES_NO_SUITESPARSE) && \ defined(CERES_NO_ACCELERATE_SPARSE) && \ - defined(CERES_NO_CXSPARSE) && \ !defined(CERES_USE_EIGEN_SPARSE) // NOLINT // If defined Ceres was compiled without any sparse linear algebra support. #define CERES_NO_SPARSE @@ -74,12 +70,11 @@ // routines. // #define CERES_NO_CUSTOM_BLAS -// If defined, Ceres was compiled without multithreading support. -// #define CERES_NO_THREADS -// If defined Ceres was compiled with OpenMP multithreading. -// #define CERES_USE_OPENMP -// If defined Ceres was compiled with modern C++ multithreading. -#define CERES_USE_CXX_THREADS +// If defined, Ceres was compiled with a version of SuiteSparse/CHOLMOD without +// the Partition module (requires METIS). +#define CERES_NO_CHOLMOD_PARTITION +// If defined Ceres was compiled without support for METIS via Eigen. +#define CERES_NO_EIGEN_METIS // If defined, Ceres was compiled with a version MSVC >= 2005 which // deprecated the standard POSIX names for bessel functions, replacing them @@ -88,22 +83,6 @@ #define CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS #endif -#if defined(CERES_USE_OPENMP) -#if defined(CERES_USE_CXX_THREADS) || defined(CERES_NO_THREADS) -#error CERES_USE_OPENMP is mutually exclusive to CERES_USE_CXX_THREADS and CERES_NO_THREADS -#endif -#elif defined(CERES_USE_CXX_THREADS) -#if defined(CERES_USE_OPENMP) || defined(CERES_NO_THREADS) -#error CERES_USE_CXX_THREADS is mutually exclusive to CERES_USE_OPENMP, CERES_USE_CXX_THREADS and CERES_NO_THREADS -#endif -#elif defined(CERES_NO_THREADS) -#if defined(CERES_USE_OPENMP) || defined(CERES_USE_CXX_THREADS) -#error CERES_NO_THREADS is mutually exclusive to CERES_USE_OPENMP and CERES_USE_CXX_THREADS -#endif -#else -# error One of CERES_USE_OPENMP, CERES_USE_CXX_THREADS or CERES_NO_THREADS must be defined. -#endif - // CERES_NO_SPARSE should be automatically defined by config.h if Ceres was // compiled without any sparse back-end. Verify that it has not subsequently // been inconsistently redefined. @@ -111,9 +90,6 @@ #if !defined(CERES_NO_SUITESPARSE) #error CERES_NO_SPARSE requires CERES_NO_SUITESPARSE. #endif -#if !defined(CERES_NO_CXSPARSE) -#error CERES_NO_SPARSE requires CERES_NO_CXSPARSE -#endif #if !defined(CERES_NO_ACCELERATE_SPARSE) #error CERES_NO_SPARSE requires CERES_NO_ACCELERATE_SPARSE #endif diff --git a/extern/ceres/config/ceres/internal/export.h b/extern/ceres/config/ceres/internal/export.h index c85bc5ca65d..4b66d075b4d 100644 --- a/extern/ceres/config/ceres/internal/export.h +++ b/extern/ceres/config/ceres/internal/export.h @@ -33,6 +33,7 @@ # define CERES_DEPRECATED_NO_EXPORT CERES_NO_EXPORT CERES_DEPRECATED #endif +/* NOLINTNEXTLINE(readability-avoid-unconditional-preprocessor-if) */ #if 0 /* DEFINE_NO_DEPRECATED */ # ifndef CERES_NO_DEPRECATED # define CERES_NO_DEPRECATED diff --git a/extern/ceres/include/ceres/autodiff_cost_function.h b/extern/ceres/include/ceres/autodiff_cost_function.h index cd256432a98..7e2fa711ad1 100644 --- a/extern/ceres/include/ceres/autodiff_cost_function.h +++ b/extern/ceres/include/ceres/autodiff_cost_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/autodiff_first_order_function.h b/extern/ceres/include/ceres/autodiff_first_order_function.h index 7c13f4239a6..de7e8f12443 100644 --- a/extern/ceres/include/ceres/autodiff_first_order_function.h +++ b/extern/ceres/include/ceres/autodiff_first_order_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/autodiff_local_parameterization.h b/extern/ceres/include/ceres/autodiff_local_parameterization.h deleted file mode 100644 index 5f9b04d0670..00000000000 --- a/extern/ceres/include/ceres/autodiff_local_parameterization.h +++ /dev/null @@ -1,158 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: sergey.vfx@gmail.com (Sergey Sharybin) -// mierle@gmail.com (Keir Mierle) -// sameeragarwal@google.com (Sameer Agarwal) - -#ifndef CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_ -#define CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_ - -#include - -#include "ceres/internal/autodiff.h" -#include "ceres/local_parameterization.h" - -namespace ceres { - -// WARNING: LocalParameterizations are deprecated, so is -// AutoDiffLocalParameterization. They will be removed from Ceres Solver in -// version 2.2.0. Please use Manifolds and AutoDiffManifold instead. - -// Create local parameterization with Jacobians computed via automatic -// differentiation. For more information on local parameterizations, -// see include/ceres/local_parameterization.h -// -// To get an auto differentiated local parameterization, you must define -// a class with a templated operator() (a functor) that computes -// -// x_plus_delta = Plus(x, delta); -// -// the template parameter T. The autodiff framework substitutes appropriate -// "Jet" objects for T in order to compute the derivative when necessary, but -// this is hidden, and you should write the function as if T were a scalar type -// (e.g. a double-precision floating point number). -// -// The function must write the computed value in the last argument (the only -// non-const one) and return true to indicate success. -// -// For example, Quaternions have a three dimensional local -// parameterization. It's plus operation can be implemented as (taken -// from internal/ceres/auto_diff_local_parameterization_test.cc) -// -// struct QuaternionPlus { -// template -// bool operator()(const T* x, const T* delta, T* x_plus_delta) const { -// const T squared_norm_delta = -// delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]; -// -// T q_delta[4]; -// if (squared_norm_delta > T(0.0)) { -// T norm_delta = sqrt(squared_norm_delta); -// const T sin_delta_by_delta = sin(norm_delta) / norm_delta; -// q_delta[0] = cos(norm_delta); -// q_delta[1] = sin_delta_by_delta * delta[0]; -// q_delta[2] = sin_delta_by_delta * delta[1]; -// q_delta[3] = sin_delta_by_delta * delta[2]; -// } else { -// // We do not just use q_delta = [1,0,0,0] here because that is a -// // constant and when used for automatic differentiation will -// // lead to a zero derivative. Instead we take a first order -// // approximation and evaluate it at zero. -// q_delta[0] = T(1.0); -// q_delta[1] = delta[0]; -// q_delta[2] = delta[1]; -// q_delta[3] = delta[2]; -// } -// -// QuaternionProduct(q_delta, x, x_plus_delta); -// return true; -// } -// }; -// -// Then given this struct, the auto differentiated local -// parameterization can now be constructed as -// -// LocalParameterization* local_parameterization = -// new AutoDiffLocalParameterization; -// | | -// Global Size ---------------+ | -// Local Size -------------------+ -// -// WARNING: Since the functor will get instantiated with different types for -// T, you must to convert from other numeric types to T before mixing -// computations with other variables of type T. In the example above, this is -// seen where instead of using k_ directly, k_ is wrapped with T(k_). - -template -class CERES_DEPRECATED_WITH_MSG("Use AutoDiffManifold instead.") - AutoDiffLocalParameterization : public LocalParameterization { - public: - AutoDiffLocalParameterization() : functor_(new Functor()) {} - - // Takes ownership of functor. - explicit AutoDiffLocalParameterization(Functor* functor) - : functor_(functor) {} - - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override { - return (*functor_)(x, delta, x_plus_delta); - } - - bool ComputeJacobian(const double* x, double* jacobian) const override { - double zero_delta[kLocalSize]; - for (int i = 0; i < kLocalSize; ++i) { - zero_delta[i] = 0.0; - } - - double x_plus_delta[kGlobalSize]; - for (int i = 0; i < kGlobalSize; ++i) { - x_plus_delta[i] = 0.0; - } - - const double* parameter_ptrs[2] = {x, zero_delta}; - double* jacobian_ptrs[2] = {nullptr, jacobian}; - return internal::AutoDifferentiate< - kGlobalSize, - internal::StaticParameterDims>( - *functor_, parameter_ptrs, kGlobalSize, x_plus_delta, jacobian_ptrs); - } - - int GlobalSize() const override { return kGlobalSize; } - int LocalSize() const override { return kLocalSize; } - - const Functor& functor() const { return *functor_; } - - private: - std::unique_ptr functor_; -}; - -} // namespace ceres - -#endif // CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_ diff --git a/extern/ceres/include/ceres/autodiff_manifold.h b/extern/ceres/include/ceres/autodiff_manifold.h index 3063e19e802..09b0aa2b471 100644 --- a/extern/ceres/include/ceres/autodiff_manifold.h +++ b/extern/ceres/include/ceres/autodiff_manifold.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/c_api.h b/extern/ceres/include/ceres/c_api.h index 1be8ca2e077..30bcaaf835e 100644 --- a/extern/ceres/include/ceres/c_api.h +++ b/extern/ceres/include/ceres/c_api.h @@ -1,5 +1,5 @@ /* Ceres Solver - A fast non-linear least squares minimizer - * Copyright 2019 Google Inc. All rights reserved. + * Copyright 2023 Google Inc. All rights reserved. * http://ceres-solver.org/ * * Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/ceres.h b/extern/ceres/include/ceres/ceres.h index c32477d4254..51f9d89631c 100644 --- a/extern/ceres/include/ceres/ceres.h +++ b/extern/ceres/include/ceres/ceres.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -34,11 +34,12 @@ #ifndef CERES_PUBLIC_CERES_H_ #define CERES_PUBLIC_CERES_H_ +// IWYU pragma: begin_exports #include "ceres/autodiff_cost_function.h" #include "ceres/autodiff_first_order_function.h" -#include "ceres/autodiff_local_parameterization.h" #include "ceres/autodiff_manifold.h" #include "ceres/conditioned_cost_function.h" +#include "ceres/constants.h" #include "ceres/context.h" #include "ceres/cost_function.h" #include "ceres/cost_function_to_functor.h" @@ -56,7 +57,6 @@ #include "ceres/iteration_callback.h" #include "ceres/jet.h" #include "ceres/line_manifold.h" -#include "ceres/local_parameterization.h" #include "ceres/loss_function.h" #include "ceres/manifold.h" #include "ceres/numeric_diff_cost_function.h" @@ -70,5 +70,6 @@ #include "ceres/sphere_manifold.h" #include "ceres/types.h" #include "ceres/version.h" +// IWYU pragma: end_exports #endif // CERES_PUBLIC_CERES_H_ diff --git a/extern/ceres/include/ceres/conditioned_cost_function.h b/extern/ceres/include/ceres/conditioned_cost_function.h index e4c3decbfd5..1edc006a937 100644 --- a/extern/ceres/include/ceres/conditioned_cost_function.h +++ b/extern/ceres/include/ceres/conditioned_cost_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/float_cxsparse.cc b/extern/ceres/include/ceres/constants.h similarity index 75% rename from extern/ceres/internal/ceres/float_cxsparse.cc rename to extern/ceres/include/ceres/constants.h index a6d5e811efd..584b6697b92 100644 --- a/extern/ceres/internal/ceres/float_cxsparse.cc +++ b/extern/ceres/include/ceres/constants.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -26,24 +26,17 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // -// Author: sameeragarwal@google.com (Sameer Agarwal) +// Author: hellston20a@gmail.com (H S Helson Go) -#include "ceres/float_cxsparse.h" +#ifndef CERES_PUBLIC_CONSTANTS_H_ +#define CERES_PUBLIC_CONSTANTS_H_ -#include +// TODO(HSHelson): This header should no longer be necessary once C++20's +// (e.g. std::numbers::pi_v) becomes usable +namespace ceres::constants { +template +inline constexpr T pi_v(3.141592653589793238462643383279502884); +inline constexpr double pi = pi_v; +} // namespace ceres::constants -#if !defined(CERES_NO_CXSPARSE) - -namespace ceres { -namespace internal { - -std::unique_ptr FloatCXSparseCholesky::Create( - OrderingType ordering_type) { - LOG(FATAL) << "FloatCXSparseCholesky is not available."; - return {}; -} - -} // namespace internal -} // namespace ceres - -#endif // !defined(CERES_NO_CXSPARSE) +#endif // CERES_PUBLIC_CONSTANTS_H_ diff --git a/extern/ceres/include/ceres/context.h b/extern/ceres/include/ceres/context.h index 6c6e8f4c953..fe187266f9c 100644 --- a/extern/ceres/include/ceres/context.h +++ b/extern/ceres/include/ceres/context.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/cost_function.h b/extern/ceres/include/ceres/cost_function.h index fef972b75af..79d49128712 100644 --- a/extern/ceres/include/ceres/cost_function.h +++ b/extern/ceres/include/ceres/cost_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/cost_function_to_functor.h b/extern/ceres/include/ceres/cost_function_to_functor.h index 08a8050c5f8..e9592ed5754 100644 --- a/extern/ceres/include/ceres/cost_function_to_functor.h +++ b/extern/ceres/include/ceres/cost_function_to_functor.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -120,7 +120,7 @@ class CostFunctionToFunctor { if (parameter_block_sizes.size() == num_parameter_blocks) { for (int block = 0; block < num_parameter_blocks; ++block) { CHECK_EQ(ParameterDims::GetDim(block), parameter_block_sizes[block]) - << "Parameter block size missmatch. The specified static parameter " + << "Parameter block size mismatch. The specified static parameter " "block dimension does not match the one from the cost function."; } } diff --git a/extern/ceres/include/ceres/covariance.h b/extern/ceres/include/ceres/covariance.h index 60bcc80b80f..d477f317025 100644 --- a/extern/ceres/include/ceres/covariance.h +++ b/extern/ceres/include/ceres/covariance.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -146,7 +146,7 @@ class CovarianceImpl; // a. The rank deficiency arises from overparameterization. e.g., a // four dimensional quaternion used to parameterize SO(3), which is // a three dimensional manifold. In cases like this, the user should -// use an appropriate LocalParameterization/Manifold. Not only will this lead +// use an appropriate Manifold. Not only will this lead // to better numerical behaviour of the Solver, it will also expose // the rank deficiency to the Covariance object so that it can // handle it correctly. @@ -246,6 +246,20 @@ class CERES_EXPORT Covariance { // used. CovarianceAlgorithmType algorithm_type = SPARSE_QR; + // During QR factorization, if a column with Euclidean norm less + // than column_pivot_threshold is encountered it is treated as + // zero. + // + // If column_pivot_threshold < 0, then an automatic default value + // of 20*(m+n)*eps*sqrt(max(diag(J’*J))) is used. Here m and n are + // the number of rows and columns of the Jacobian (J) + // respectively. + // + // This is an advanced option meant for users who know enough + // about their Jacobian matrices that they can determine a value + // better than the default. + double column_pivot_threshold = -1; + // If the Jacobian matrix is near singular, then inverting J'J // will result in unreliable results, e.g, if // @@ -266,7 +280,7 @@ class CERES_EXPORT Covariance { // // min_sigma / max_sigma < sqrt(min_reciprocal_condition_number) // - // where min_sigma and max_sigma are the minimum and maxiumum + // where min_sigma and max_sigma are the minimum and maximum // singular values of J respectively. // // 2. SPARSE_QR @@ -394,11 +408,9 @@ class CERES_EXPORT Covariance { const double* parameter_block2, double* covariance_block) const; - // Return the block of the cross-covariance matrix corresponding to - // parameter_block1 and parameter_block2. - // Returns cross-covariance in the tangent space if a local - // parameterization is associated with either parameter block; - // else returns cross-covariance in the ambient space. + // Returns the block of the cross-covariance in the tangent space if a + // manifold is associated with either parameter block; else returns + // cross-covariance in the ambient space. // // Compute must be called before the first call to // GetCovarianceBlock and the pair functor_; Ownership ownership_; }; +// Deduction guide that allows the user to avoid explicitly specifying the +// template parameter of DynamicAutoDiffCostFunction. The class can instead be +// instantiated as follows: +// +// new DynamicAutoDiffCostFunction{new MyCostFunctor{}}; +// +template +DynamicAutoDiffCostFunction(CostFunctor* functor, Ownership ownership) + -> DynamicAutoDiffCostFunction; + } // namespace ceres #endif // CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ diff --git a/extern/ceres/include/ceres/dynamic_cost_function.h b/extern/ceres/include/ceres/dynamic_cost_function.h index c84a366dafb..02ce1e9f2a4 100644 --- a/extern/ceres/include/ceres/dynamic_cost_function.h +++ b/extern/ceres/include/ceres/dynamic_cost_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/dynamic_cost_function_to_functor.h b/extern/ceres/include/ceres/dynamic_cost_function_to_functor.h index 5b5feaaf58e..cd124a2e2bb 100644 --- a/extern/ceres/include/ceres/dynamic_cost_function_to_functor.h +++ b/extern/ceres/include/ceres/dynamic_cost_function_to_functor.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/dynamic_numeric_diff_cost_function.h b/extern/ceres/include/ceres/dynamic_numeric_diff_cost_function.h index e1892e8ba4a..d9cd945b573 100644 --- a/extern/ceres/include/ceres/dynamic_numeric_diff_cost_function.h +++ b/extern/ceres/include/ceres/dynamic_numeric_diff_cost_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -76,7 +76,7 @@ namespace ceres { // cost_function.AddParameterBlock(5); // cost_function.AddParameterBlock(10); // cost_function.SetNumResiduals(21); -template +template class DynamicNumericDiffCostFunction final : public DynamicCostFunction { public: explicit DynamicNumericDiffCostFunction( @@ -134,7 +134,7 @@ class DynamicNumericDiffCostFunction final : public DynamicCostFunction { for (size_t block = 0; block < block_sizes.size(); ++block) { if (jacobians[block] != nullptr && !NumericDiff* local_parameterizations, - const NumericDiffOptions& options); - // This will not take ownership of the cost function or manifolds. // // function: The cost function to probe. @@ -102,7 +79,6 @@ class CERES_EXPORT GradientChecker { GradientChecker(const CostFunction* function, const std::vector* manifolds, const NumericDiffOptions& options); - ~GradientChecker(); // Contains results from a call to Probe for later inspection. struct CERES_EXPORT ProbeResults { @@ -166,17 +142,6 @@ class CERES_EXPORT GradientChecker { GradientChecker(const GradientChecker&) = delete; void operator=(const GradientChecker&) = delete; - // This bool is used to determine whether the constructor with the - // LocalParameterizations is called or the one with Manifolds is called. If - // the former, then the vector of manifolds is a vector of ManifoldAdapter - // objects which we own and should be deleted. If the latter then they are - // real Manifold objects owned by the caller and will not be deleted. - // - // This bool is only needed during the LocalParameterization to Manifold - // transition, once this transition is complete the LocalParameterization - // based constructor and this bool will be removed. - const bool delete_manifolds_ = false; - std::vector manifolds_; const CostFunction* function_; std::unique_ptr finite_diff_cost_function_; diff --git a/extern/ceres/include/ceres/gradient_problem.h b/extern/ceres/include/ceres/gradient_problem.h index b6a8b867421..96d6493d514 100644 --- a/extern/ceres/include/ceres/gradient_problem.h +++ b/extern/ceres/include/ceres/gradient_problem.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,7 +36,6 @@ #include "ceres/first_order_function.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -#include "ceres/local_parameterization.h" #include "ceres/manifold.h" namespace ceres { @@ -90,47 +89,19 @@ class FirstOrderFunction; // }; // // ceres::GradientProblem problem(new Rosenbrock()); -// -// NOTE: We are currently in the process of transitioning from -// LocalParameterization to Manifolds in the Ceres API. During this period, -// GradientProblem will support using both Manifold and LocalParameterization -// objects interchangably. For methods in the API affected by this change, see -// their documentation below. class CERES_EXPORT GradientProblem { public: // Takes ownership of the function. explicit GradientProblem(FirstOrderFunction* function); - // Takes ownership of the function and the parameterization. - // - // NOTE: This constructor is deprecated and will be removed in the next public - // release of Ceres Solver. Please move to using the Manifold based - // constructor. - CERES_DEPRECATED_WITH_MSG( - "LocalParameterizations are deprecated. Please use the constructor that " - "uses Manifold instead.") - GradientProblem(FirstOrderFunction* function, - LocalParameterization* parameterization); - // Takes ownership of the function and the manifold. GradientProblem(FirstOrderFunction* function, Manifold* manifold); int NumParameters() const; // Dimension of the manifold (and its tangent space). - // - // During the transition from LocalParameterization to Manifold, this method - // reports the LocalSize of the LocalParameterization or the TangentSize of - // the Manifold object associated with this problem. int NumTangentParameters() const; - // Dimension of the manifold (and its tangent space). - // - // NOTE: This method is deprecated and will be removed in the next public - // release of Ceres Solver. Please move to using NumTangentParameters() - // instead. - int NumLocalParameters() const { return NumTangentParameters(); } - // This call is not thread safe. bool Evaluate(const double* parameters, double* cost, double* gradient) const; bool Plus(const double* x, const double* delta, double* x_plus_delta) const; @@ -138,42 +109,11 @@ class CERES_EXPORT GradientProblem { const FirstOrderFunction* function() const { return function_.get(); } FirstOrderFunction* mutable_function() { return function_.get(); } - // NOTE: During the transition from LocalParameterization to Manifold we need - // to support both The LocalParameterization and Manifold based constructors. - // - // When the user uses the LocalParameterization, internally the solver will - // wrap it in a ManifoldAdapter object and return it when manifold or - // mutable_manifold are called. - // - // As a result this method will return a non-nullptr result if a Manifold or a - // LocalParameterization was used when constructing the GradientProblem. const Manifold* manifold() const { return manifold_.get(); } Manifold* mutable_manifold() { return manifold_.get(); } - // If the problem is constructed without a LocalParameterization or with a - // Manifold this method will return a nullptr. - // - // NOTE: This method is deprecated and will be removed in the next public - // release of Ceres Solver. - CERES_DEPRECATED_WITH_MSG("Use Manifolds instead.") - const LocalParameterization* parameterization() const { - return parameterization_.get(); - } - - // If the problem is constructed without a LocalParameterization or with a - // Manifold this method will return a nullptr. - // - // NOTE: This method is deprecated and will be removed in the next public - // release of Ceres Solver. - CERES_DEPRECATED_WITH_MSG("Use Manifolds instead.") - LocalParameterization* mutable_parameterization() { - return parameterization_.get(); - } - private: std::unique_ptr function_; - CERES_DEPRECATED_WITH_MSG("") - std::unique_ptr parameterization_; std::unique_ptr manifold_; std::unique_ptr scratch_; }; diff --git a/extern/ceres/include/ceres/gradient_problem_solver.h b/extern/ceres/include/ceres/gradient_problem_solver.h index b6290c80c28..f4c392fd9f6 100644 --- a/extern/ceres/include/ceres/gradient_problem_solver.h +++ b/extern/ceres/include/ceres/gradient_problem_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -305,10 +305,6 @@ class CERES_EXPORT GradientProblemSolver { // Number of parameters in the problem. int num_parameters = -1; - // Dimension of the tangent space of the problem. - CERES_DEPRECATED_WITH_MSG("Use num_tangent_parameters.") - int num_local_parameters = -1; - // Dimension of the tangent space of the problem. int num_tangent_parameters = -1; diff --git a/extern/ceres/include/ceres/internal/array_selector.h b/extern/ceres/include/ceres/internal/array_selector.h index b4db012f00b..94801468c90 100644 --- a/extern/ceres/include/ceres/internal/array_selector.h +++ b/extern/ceres/include/ceres/internal/array_selector.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2020 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "ceres/internal/fixed_array.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // StaticFixedArray selects the best array implementation based on template // arguments. If the size is not known at compile-time, pass @@ -91,7 +90,6 @@ struct ArraySelector } }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_PUBLIC_INTERNAL_ARRAY_SELECTOR_H_ diff --git a/extern/ceres/include/ceres/internal/autodiff.h b/extern/ceres/include/ceres/internal/autodiff.h index c796618cd2d..8b02a2bb42f 100644 --- a/extern/ceres/include/ceres/internal/autodiff.h +++ b/extern/ceres/include/ceres/internal/autodiff.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -164,8 +164,7 @@ #define CERES_AUTODIFF_MAX_RESIDUALS_ON_STACK 20 #endif -namespace ceres { -namespace internal { +namespace ceres::internal { // Extends src by a 1st order perturbation for every dimension and puts it in // dst. The size of src is N. Since this is also used for perturbations in @@ -359,7 +358,6 @@ inline bool AutoDifferentiate(const Functor& functor, return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_PUBLIC_INTERNAL_AUTODIFF_H_ diff --git a/extern/ceres/include/ceres/internal/disable_warnings.h b/extern/ceres/include/ceres/internal/disable_warnings.h index d7766a0a08f..b6e38aa3390 100644 --- a/extern/ceres/include/ceres/internal/disable_warnings.h +++ b/extern/ceres/include/ceres/internal/disable_warnings.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/internal/eigen.h b/extern/ceres/include/ceres/internal/eigen.h index 111cc7a07bb..fee6b523364 100644 --- a/extern/ceres/include/ceres/internal/eigen.h +++ b/extern/ceres/include/ceres/internal/eigen.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/internal/euler_angles.h b/extern/ceres/include/ceres/internal/euler_angles.h new file mode 100644 index 00000000000..38f2702b6b4 --- /dev/null +++ b/extern/ceres/include/ceres/internal/euler_angles.h @@ -0,0 +1,199 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#ifndef CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_ +#define CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_ + +#include + +namespace ceres { +namespace internal { + +// The EulerSystem struct represents an Euler Angle Convention in compile time. +// It acts like a trait structure and is also used as a tag for dispatching +// Euler angle conversion function templates +// +// Internally, it implements the convention laid out in "Euler angle +// conversion", Ken Shoemake, Graphics Gems IV, where a choice of axis for the +// first rotation (out of 3) and 3 binary choices compactly specify all 24 +// rotation conventions +// +// - InnerAxis: Axis for the first rotation. This is specified by struct tags +// axis::X, axis::Y, and axis::Z +// +// - Parity: Defines the parity of the axis permutation. The axis sequence has +// Even parity if the second axis of rotation is 'greater-than' the first axis +// of rotation according to the order X {}; +struct Y : std::integral_constant {}; +struct Z : std::integral_constant {}; +} // namespace axis + +struct Even; +struct Odd; + +struct ProperEuler; +struct TaitBryan; + +struct Extrinsic; +struct Intrinsic; + +template +struct EulerSystem { + static constexpr bool kIsParityOdd = std::is_same_v; + static constexpr bool kIsProperEuler = + std::is_same_v; + static constexpr bool kIsIntrinsic = + std::is_same_v; + + static constexpr int kAxes[3] = { + InnerAxisType::value, + (InnerAxisType::value + 1 + static_cast(kIsParityOdd)) % 3, + (InnerAxisType::value + 2 - static_cast(kIsParityOdd)) % 3}; +}; + +} // namespace internal + +// Define human readable aliases to the type of the tags +using ExtrinsicXYZ = internal::EulerSystem; +using ExtrinsicXYX = internal::EulerSystem; +using ExtrinsicXZY = internal::EulerSystem; +using ExtrinsicXZX = internal::EulerSystem; +using ExtrinsicYZX = internal::EulerSystem; +using ExtrinsicYZY = internal::EulerSystem; +using ExtrinsicYXZ = internal::EulerSystem; +using ExtrinsicYXY = internal::EulerSystem; +using ExtrinsicZXY = internal::EulerSystem; +using ExtrinsicZXZ = internal::EulerSystem; +using ExtrinsicZYX = internal::EulerSystem; +using ExtrinsicZYZ = internal::EulerSystem; +/* Rotating axes */ +using IntrinsicZYX = internal::EulerSystem; +using IntrinsicXYX = internal::EulerSystem; +using IntrinsicYZX = internal::EulerSystem; +using IntrinsicXZX = internal::EulerSystem; +using IntrinsicXZY = internal::EulerSystem; +using IntrinsicYZY = internal::EulerSystem; +using IntrinsicZXY = internal::EulerSystem; +using IntrinsicYXY = internal::EulerSystem; +using IntrinsicYXZ = internal::EulerSystem; +using IntrinsicZXZ = internal::EulerSystem; +using IntrinsicXYZ = internal::EulerSystem; +using IntrinsicZYZ = internal::EulerSystem; + +} // namespace ceres + +#endif // CERES_PUBLIC_INTERNAL_EULER_ANGLES_H_ diff --git a/extern/ceres/include/ceres/internal/fixed_array.h b/extern/ceres/include/ceres/internal/fixed_array.h index dcbddcd3a1d..0e35f632497 100644 --- a/extern/ceres/include/ceres/internal/fixed_array.h +++ b/extern/ceres/include/ceres/internal/fixed_array.h @@ -41,8 +41,7 @@ #include "ceres/internal/memory.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { constexpr static auto kFixedArrayUseDefault = static_cast(-1); @@ -372,8 +371,8 @@ class FixedArray { return std::addressof(ptr->array); } - static_assert(sizeof(StorageElement) == sizeof(value_type), ""); - static_assert(alignof(StorageElement) == alignof(value_type), ""); + static_assert(sizeof(StorageElement) == sizeof(value_type)); + static_assert(alignof(StorageElement) == alignof(value_type)); class NonEmptyInlinedStorage { public: @@ -461,7 +460,6 @@ template constexpr typename FixedArray::size_type FixedArray::inline_elements; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_PUBLIC_INTERNAL_FIXED_ARRAY_H_ diff --git a/extern/ceres/include/ceres/internal/householder_vector.h b/extern/ceres/include/ceres/internal/householder_vector.h index 7700208be22..dd8361c335b 100644 --- a/extern/ceres/include/ceres/internal/householder_vector.h +++ b/extern/ceres/include/ceres/internal/householder_vector.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://code.google.com/p/ceres-solver/ // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,7 @@ #include "Eigen/Core" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Algorithm 5.1.1 from 'Matrix Computations' by Golub et al. (Johns Hopkins // Studies in Mathematical Sciences) but using the nth element of the input @@ -90,7 +89,6 @@ typename Derived::PlainObject ApplyHouseholderVector( return (y - v * (beta * (v.transpose() * y))); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_PUBLIC_INTERNAL_HOUSEHOLDER_VECTOR_H_ diff --git a/extern/ceres/include/ceres/internal/integer_sequence_algorithm.h b/extern/ceres/include/ceres/internal/integer_sequence_algorithm.h index 777c119a77f..0c27d727199 100644 --- a/extern/ceres/include/ceres/internal/integer_sequence_algorithm.h +++ b/extern/ceres/include/ceres/internal/integer_sequence_algorithm.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,70 +40,7 @@ #include "ceres/jet_fwd.h" -namespace ceres { -namespace internal { - -// Implementation of calculating the sum of an integer sequence. -// Recursively instantiate SumImpl and calculate the sum of the N first -// numbers. This reduces the number of instantiations and speeds up -// compilation. -// -// Examples: -// 1) integer_sequence: -// Value = 5 -// -// 2) integer_sequence: -// Value = 4 + 2 + SumImpl>::Value -// Value = 4 + 2 + 0 -// -// 3) integer_sequence: -// Value = 2 + 1 + SumImpl>::Value -// Value = 2 + 1 + 4 -template -struct SumImpl; - -// Strip of and sum the first number. -template -struct SumImpl> { - static constexpr T Value = - N + SumImpl>::Value; -}; - -// Strip of and sum the first two numbers. -template -struct SumImpl> { - static constexpr T Value = - N1 + N2 + SumImpl>::Value; -}; - -// Strip of and sum the first four numbers. -template -struct SumImpl> { - static constexpr T Value = - N1 + N2 + N3 + N4 + SumImpl>::Value; -}; - -// Only one number is left. 'Value' is just that number ('recursion' ends). -template -struct SumImpl> { - static constexpr T Value = N; -}; - -// No number is left. 'Value' is the identity element (for sum this is zero). -template -struct SumImpl> { - static constexpr T Value = T(0); -}; - -// Calculate the sum of an integer sequence. The resulting sum will be stored in -// 'Value'. -template -class Sum { - using T = typename Seq::value_type; - - public: - static constexpr T Value = SumImpl::Value; -}; +namespace ceres::internal { // Implementation of calculating an exclusive scan (exclusive prefix sum) of an // integer sequence. Exclusive means that the i-th input element is not included @@ -232,40 +169,11 @@ struct RemoveValue template using RemoveValue_t = typename RemoveValue::type; -// Determines whether the values of an integer sequence are all the same. +// Returns true if all elements of Values are equal to HeadValue. // -// The integer sequence must contain at least one value. The predicate is -// undefined for empty sequences. The evaluation result of the predicate for a -// sequence containing only one value is defined to be true. -template -struct AreAllEqual; - -// The predicate result for a sequence containing one element is defined to be -// true. -template -struct AreAllEqual> : std::true_type {}; - -// Recursion end. -template -struct AreAllEqual> - : std::integral_constant {}; - -// Recursion for sequences containing at least two elements. -template -// clang-format off -struct AreAllEqual > - : std::integral_constant -< - bool, - AreAllEqual >::value && - AreAllEqual >::value -> -// clang-format on -{}; - -// Convenience variable template for AreAllEqual. -template -constexpr bool AreAllEqual_v = AreAllEqual::value; +// Returns true if Values is empty. +template +inline constexpr bool AreAllEqual_v = ((HeadValue == Values) && ...); // Predicate determining whether an integer sequence is either empty or all // values are equal. @@ -279,13 +187,13 @@ struct IsEmptyOrAreAllEqual> : std::true_type {}; // General case for sequences containing at least one value. template struct IsEmptyOrAreAllEqual> - : AreAllEqual> {}; + : std::integral_constant> {}; // Convenience variable template for IsEmptyOrAreAllEqual. template -constexpr bool IsEmptyOrAreAllEqual_v = IsEmptyOrAreAllEqual::value; +inline constexpr bool IsEmptyOrAreAllEqual_v = + IsEmptyOrAreAllEqual::value; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_PUBLIC_INTERNAL_INTEGER_SEQUENCE_ALGORITHM_H_ diff --git a/extern/ceres/include/ceres/internal/jet_traits.h b/extern/ceres/include/ceres/internal/jet_traits.h index 2a38c05b7da..f504a610052 100644 --- a/extern/ceres/include/ceres/internal/jet_traits.h +++ b/extern/ceres/include/ceres/internal/jet_traits.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -42,17 +42,6 @@ namespace ceres { namespace internal { -// Predicate that determines whether T is a Jet. -template -struct IsJet : std::false_type {}; - -template -struct IsJet> : std::true_type {}; - -// Convenience variable template for IsJet. -template -constexpr bool IsJet_v = IsJet::value; - // Predicate that determines whether any of the Types is a Jet. template struct AreAnyJet : std::false_type {}; @@ -65,7 +54,7 @@ struct AreAnyJet, Types...> : std::true_type {}; // Convenience variable template for AreAnyJet. template -constexpr bool AreAnyJet_v = AreAnyJet::value; +inline constexpr bool AreAnyJet_v = AreAnyJet::value; // Extracts the underlying floating-point from a type T. template @@ -84,27 +73,8 @@ using UnderlyingScalar_t = typename UnderlyingScalar::type; // // Specifically, the predicate applies std::is_same recursively to pairs of // Types in the pack. -// -// The predicate is defined only for template packs containing at least two -// types. -template -// clang-format off -struct AreAllSame : std::integral_constant -< - bool, - AreAllSame::value && - AreAllSame::value -> -// clang-format on -{}; - -// AreAllSame pairwise test. -template -struct AreAllSame : std::is_same {}; - -// Convenience variable template for AreAllSame. -template -constexpr bool AreAllSame_v = AreAllSame::value; +template +inline constexpr bool AreAllSame_v = (std::is_same::value && ...); // Determines the rank of a type. This allows to ensure that types passed as // arguments are compatible to each other. The rank of Jet is determined by the @@ -124,7 +94,7 @@ struct Rank> : std::integral_constant {}; // Convenience variable template for Rank. template -constexpr int Rank_v = Rank::value; +inline constexpr int Rank_v = Rank::value; // Constructs an integer sequence of ranks for each of the Types in the pack. template @@ -186,7 +156,8 @@ struct CompatibleJetOperands<> : std::false_type {}; // This trait is a candidate for a concept definition once C++20 features can // be used. template -constexpr bool CompatibleJetOperands_v = CompatibleJetOperands::value; +inline constexpr bool CompatibleJetOperands_v = + CompatibleJetOperands::value; // Type trait ensuring at least one of the types is a Jet, // the underlying scalar types are compatible among each other and Jet @@ -216,7 +187,8 @@ struct PromotableJetOperands : std::integral_constant // This trait is a candidate for a concept definition once C++20 features can // be used. template -constexpr bool PromotableJetOperands_v = PromotableJetOperands::value; +inline constexpr bool PromotableJetOperands_v = + PromotableJetOperands::value; } // namespace ceres diff --git a/extern/ceres/include/ceres/internal/line_parameterization.h b/extern/ceres/include/ceres/internal/line_parameterization.h index eda390148df..f50603d5e1f 100644 --- a/extern/ceres/include/ceres/internal/line_parameterization.h +++ b/extern/ceres/include/ceres/internal/line_parameterization.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2020 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/internal/memory.h b/extern/ceres/include/ceres/internal/memory.h index 45c5b67c353..e54cf2be5fd 100644 --- a/extern/ceres/include/ceres/internal/memory.h +++ b/extern/ceres/include/ceres/internal/memory.h @@ -40,8 +40,7 @@ } while (false) #endif // CERES_HAVE_EXCEPTIONS -namespace ceres { -namespace internal { +namespace ceres::internal { template void ConstructRange(Allocator& alloc, @@ -84,7 +83,6 @@ void CopyRange(Allocator& alloc, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_PUBLIC_INTERNAL_MEMORY_H_ diff --git a/extern/ceres/include/ceres/internal/numeric_diff.h b/extern/ceres/include/ceres/internal/numeric_diff.h index 351845c05fb..ba28bec1680 100644 --- a/extern/ceres/include/ceres/internal/numeric_diff.h +++ b/extern/ceres/include/ceres/internal/numeric_diff.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,8 +47,7 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // This is split from the main class because C++ doesn't allow partial template // specializations for member functions. The alternative is to repeat the main @@ -502,7 +501,6 @@ struct EvaluateJacobianForParameterBlocks) { - return true; -} - -template -constexpr bool IsValidParameterDimensionSequence( - std::integer_sequence) { - return (N <= 0) ? false - : IsValidParameterDimensionSequence( - std::integer_sequence()); -} +namespace ceres::internal { // Helper class that represents the parameter dimensions. The parameter // dimensions are either dynamic or the sizes are known at compile time. It is @@ -70,8 +55,7 @@ class ParameterDims { // The parameter dimensions are only valid if all parameter block dimensions // are greater than zero. - static constexpr bool kIsValid = - IsValidParameterDimensionSequence(Parameters()); + static constexpr bool kIsValid = ((Ns > 0) && ...); static_assert(kIsValid, "Invalid parameter block dimension detected. Each parameter " "block dimension must be bigger than zero."); @@ -81,8 +65,7 @@ class ParameterDims { static_assert(kIsDynamic || kNumParameterBlocks > 0, "At least one parameter block must be specified."); - static constexpr int kNumParameters = - Sum>::Value; + static constexpr int kNumParameters = (Ns + ... + 0); static constexpr int GetDim(int dim) { return params_[dim]; } @@ -118,7 +101,6 @@ template using StaticParameterDims = ParameterDims; using DynamicParameterDims = ParameterDims; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_PUBLIC_INTERNAL_PARAMETER_DIMS_H_ diff --git a/extern/ceres/include/ceres/internal/port.h b/extern/ceres/include/ceres/internal/port.h index 4275b0e15c3..b8cb0ffcffd 100644 --- a/extern/ceres/include/ceres/internal/port.h +++ b/extern/ceres/include/ceres/internal/port.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,14 +47,6 @@ #define CERES_GET_FLAG(X) X #endif -// Indicates whether C++17 is currently active -#ifndef CERES_HAS_CPP17 -#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) -#define CERES_HAS_CPP17 -#endif // __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= - // 201703L) -#endif // !defined(CERES_HAS_CPP17) - // Indicates whether C++20 is currently active #ifndef CERES_HAS_CPP20 #if __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) @@ -85,4 +77,15 @@ // #define CERES_PREVENT_MACRO_SUBSTITUTION // Yes, it's empty +// CERES_DISABLE_DEPRECATED_WARNING and CERES_RESTORE_DEPRECATED_WARNING allow +// to temporarily disable deprecation warnings +#if defined(_MSC_VER) +#define CERES_DISABLE_DEPRECATED_WARNING \ + _Pragma("warning(push)") _Pragma("warning(disable : 4996)") +#define CERES_RESTORE_DEPRECATED_WARNING _Pragma("warning(pop)") +#else // defined(_MSC_VER) +#define CERES_DISABLE_DEPRECATED_WARNING +#define CERES_RESTORE_DEPRECATED_WARNING +#endif // defined(_MSC_VER) + #endif // CERES_PUBLIC_INTERNAL_PORT_H_ diff --git a/extern/ceres/include/ceres/internal/reenable_warnings.h b/extern/ceres/include/ceres/internal/reenable_warnings.h index 2c5db061fd7..a183c2531c6 100644 --- a/extern/ceres/include/ceres/internal/reenable_warnings.h +++ b/extern/ceres/include/ceres/internal/reenable_warnings.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/internal/sphere_manifold_functions.h b/extern/ceres/include/ceres/internal/sphere_manifold_functions.h index 5be3321a579..479344261e7 100644 --- a/extern/ceres/include/ceres/internal/sphere_manifold_functions.h +++ b/extern/ceres/include/ceres/internal/sphere_manifold_functions.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,6 +32,7 @@ #ifndef CERES_PUBLIC_INTERNAL_SPHERE_MANIFOLD_HELPERS_H_ #define CERES_PUBLIC_INTERNAL_SPHERE_MANIFOLD_HELPERS_H_ +#include "ceres/constants.h" #include "ceres/internal/householder_vector.h" // This module contains functions to compute the SphereManifold plus and minus @@ -58,26 +59,23 @@ // used in order to allow also Eigen::Ref and Eigen block expressions to // be passed to the function. -namespace ceres { -namespace internal { +namespace ceres::internal { template inline void ComputeSphereManifoldPlus(const VT& v, double beta, const XT& x, const DeltaT& delta, - double norm_delta, + const double norm_delta, XPlusDeltaT* x_plus_delta) { constexpr int AmbientDim = VT::RowsAtCompileTime; // Map the delta from the minimum representation to the over parameterized // homogeneous vector. See B.2 p.25 equation (106) - (107) for more details. - const double norm_delta_div_2 = 0.5 * norm_delta; - const double sin_delta_by_delta = - std::sin(norm_delta_div_2) / norm_delta_div_2; + const double sin_delta_by_delta = std::sin(norm_delta) / norm_delta; Eigen::Matrix y(v.size()); - y << 0.5 * sin_delta_by_delta * delta, std::cos(norm_delta_div_2); + y << sin_delta_by_delta * delta, std::cos(norm_delta); // Apply the delta update to remain on the sphere. *x_plus_delta = x.norm() * ApplyHouseholderVector(y, v, beta); @@ -99,11 +97,11 @@ inline void ComputeSphereManifoldPlusJacobian(const VT& x, // have trouble deducing the type of v automatically. ComputeHouseholderVector(x, &v, &beta); - // The Jacobian is equal to J = 0.5 * H.leftCols(size_ - 1) where H is the + // The Jacobian is equal to J = H.leftCols(size_ - 1) where H is the // Householder matrix (H = I - beta * v * v'). for (int i = 0; i < tangent_size; ++i) { - (*jacobian).col(i) = -0.5 * beta * v(i) * v; - (*jacobian)(i, i) += 0.5; + (*jacobian).col(i) = -beta * v(i) * v; + (*jacobian)(i, i) += 1.0; } (*jacobian) *= x.norm(); } @@ -116,18 +114,19 @@ inline void ComputeSphereManifoldMinus( AmbientSpaceDim == Eigen::Dynamic ? Eigen::Dynamic : AmbientSpaceDim - 1; using AmbientVector = Eigen::Matrix; - const int tanget_size = v.size() - 1; + const int tangent_size = v.size() - 1; const AmbientVector hy = ApplyHouseholderVector(y, v, beta) / x.norm(); // Calculate y - x. See B.2 p.25 equation (108). - double y_last = hy[tanget_size]; - double hy_norm = hy.template head(tanget_size).norm(); + const double y_last = hy[tangent_size]; + const double hy_norm = hy.template head(tangent_size).norm(); if (hy_norm == 0.0) { y_minus_x->setZero(); + y_minus_x->data()[tangent_size - 1] = y_last >= 0 ? 0.0 : constants::pi; } else { - *y_minus_x = 2.0 * std::atan2(hy_norm, y_last) / hy_norm * - hy.template head(tanget_size); + *y_minus_x = std::atan2(hy_norm, y_last) / hy_norm * + hy.template head(tangent_size); } } @@ -147,16 +146,18 @@ inline void ComputeSphereManifoldMinusJacobian(const VT& x, // have trouble deducing the type of v automatically. ComputeHouseholderVector(x, &v, &beta); - // The Jacobian is equal to J = 2.0 * H.leftCols(size_ - 1) where H is the + // The Jacobian is equal to J = H.leftCols(size_ - 1) where H is the // Householder matrix (H = I - beta * v * v'). for (int i = 0; i < tangent_size; ++i) { - (*jacobian).row(i) = -2.0 * beta * v(i) * v; - (*jacobian)(i, i) += 2.0; + // NOTE: The transpose is used for correctness (the product is expected to + // be a row vector), although here there seems to be no difference between + // transposing or not for Eigen (possibly a compile-time auto fix). + (*jacobian).row(i) = -beta * v(i) * v.transpose(); + (*jacobian)(i, i) += 1.0; } (*jacobian) /= x.norm(); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif diff --git a/extern/ceres/include/ceres/internal/variadic_evaluate.h b/extern/ceres/include/ceres/internal/variadic_evaluate.h index b8408237cc3..61af6b2e918 100644 --- a/extern/ceres/include/ceres/internal/variadic_evaluate.h +++ b/extern/ceres/include/ceres/internal/variadic_evaluate.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "ceres/cost_function.h" #include "ceres/internal/parameter_dims.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // For fixed size cost functors template @@ -50,7 +49,7 @@ inline bool VariadicEvaluateImpl(const Functor& functor, T* output, std::false_type /*is_dynamic*/, std::integer_sequence) { - static_assert(sizeof...(Indices), + static_assert(sizeof...(Indices) > 0, "Invalid number of parameter blocks. At least one parameter " "block must be specified."); return functor(input[Indices]..., output); @@ -107,7 +106,29 @@ inline bool VariadicEvaluate(const Functor& functor, return VariadicEvaluateImpl(functor, input, output, &functor); } -} // namespace internal -} // namespace ceres +// When differentiating dynamically sized CostFunctions, VariadicEvaluate +// expects a functor with the signature: +// +// bool operator()(double const* const* parameters, double* cost) const +// +// However for NumericDiffFirstOrderFunction, the functor has the signature +// +// bool operator()(double const* parameters, double* cost) const +// +// This thin wrapper adapts the latter to the former. +template +class FirstOrderFunctorAdapter { + public: + explicit FirstOrderFunctorAdapter(const Functor& functor) + : functor_(functor) {} + bool operator()(double const* const* parameters, double* cost) const { + return functor_(*parameters, cost); + } + + private: + const Functor& functor_; +}; + +} // namespace ceres::internal #endif // CERES_PUBLIC_INTERNAL_VARIADIC_EVALUATE_H_ diff --git a/extern/ceres/include/ceres/iteration_callback.h b/extern/ceres/include/ceres/iteration_callback.h index 3d7e8e94f30..955e2addb7b 100644 --- a/extern/ceres/include/ceres/iteration_callback.h +++ b/extern/ceres/include/ceres/iteration_callback.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/jet.h b/extern/ceres/include/ceres/jet.h index fba1e2ab6e0..f279ba3d803 100644 --- a/extern/ceres/include/ceres/jet.h +++ b/extern/ceres/include/ceres/jet.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -724,7 +724,6 @@ inline Jet hypot(const Jet& x, const Jet& y) { return Jet(tmp, x.a / tmp * x.v + y.a / tmp * y.v); } -#ifdef CERES_HAS_CPP17 // Like sqrt(x^2 + y^2 + z^2), // but acts to prevent underflow/overflow for small/large x/y/z. // Note that the function is non-smooth at x=y=z=0, @@ -744,7 +743,6 @@ inline Jet hypot(const Jet& x, const T tmp = hypot(x.a, y.a, z.a); return Jet(tmp, x.a / tmp * x.v + y.a / tmp * y.v + z.a / tmp * z.v); } -#endif // defined(CERES_HAS_CPP17) // Like x * y + z but rounded only once. template @@ -757,28 +755,76 @@ inline Jet fma(const Jet& x, return Jet(fma(x.a, y.a, z.a), y.a * x.v + x.a * y.v + z.v); } -// Returns the larger of the two arguments. NaNs are treated as missing data. +// Return value of fmax() and fmin() on equality +// --------------------------------------------- +// +// There is arguably no good answer to what fmax() & fmin() should return on +// equality, which for Jets by definition ONLY compares the scalar parts. We +// choose what we think is the least worst option (averaging as Jets) which +// minimises undesirable/unexpected behaviour as used, and also supports client +// code written against Ceres versions prior to type promotion being supported +// in Jet comparisons (< v2.1). +// +// The std::max() convention of returning the first argument on equality is +// problematic, as it means that the derivative component may or may not be +// preserved (when comparing a Jet with a scalar) depending upon the ordering. +// +// Always returning the Jet in {Jet, scalar} cases on equality is problematic +// as it is inconsistent with the behaviour that would be obtained if the scalar +// was first cast to Jet and the {Jet, Jet} case was used. Prior to type +// promotion (Ceres v2.1) client code would typically cast constants to Jets +// e.g: fmax(x, T(2.0)) which means the {Jet, Jet} case predominates, and we +// still want the result to be order independent. +// +// Our intuition is that preserving a non-zero derivative is best, even if +// its value does not match either of the inputs. Averaging achieves this +// whilst ensuring argument ordering independence. This is also the approach +// used by the Jax library, and TensorFlow's reduce_max(). + +// Returns the larger of the two arguments, with Jet averaging on equality. +// NaNs are treated as missing data. // // NOTE: This function is NOT subject to any of the error conditions specified -// in `math_errhandling`. +// in `math_errhandling`. template >* = nullptr> -inline decltype(auto) fmax(const Lhs& f, const Rhs& g) { +inline decltype(auto) fmax(const Lhs& x, const Rhs& y) { using J = std::common_type_t; - return (isnan(g) || isgreater(f, g)) ? J{f} : J{g}; + // As x == y may set FP exceptions in the presence of NaNs when used with + // non-default compiler options so we avoid its use here. + if (isnan(x) || isnan(y) || islessgreater(x, y)) { + return isnan(x) || isless(x, y) ? J{y} : J{x}; + } + // x == y (scalar parts) return the average of their Jet representations. +#if defined(CERES_HAS_CPP20) + return midpoint(J{x}, J{y}); +#else + return (J{x} + J{y}) * typename J::Scalar(0.5); +#endif // defined(CERES_HAS_CPP20) } -// Returns the smaller of the two arguments. NaNs are treated as missing data. +// Returns the smaller of the two arguments, with Jet averaging on equality. +// NaNs are treated as missing data. // // NOTE: This function is NOT subject to any of the error conditions specified -// in `math_errhandling`. +// in `math_errhandling`. template >* = nullptr> -inline decltype(auto) fmin(const Lhs& f, const Rhs& g) { +inline decltype(auto) fmin(const Lhs& x, const Rhs& y) { using J = std::common_type_t; - return (isnan(f) || isless(g, f)) ? J{g} : J{f}; + // As x == y may set FP exceptions in the presence of NaNs when used with + // non-default compiler options so we avoid its use here. + if (isnan(x) || isnan(y) || islessgreater(x, y)) { + return isnan(x) || isgreater(x, y) ? J{y} : J{x}; + } + // x == y (scalar parts) return the average of their Jet representations. +#if defined(CERES_HAS_CPP20) + return midpoint(J{x}, J{y}); +#else + return (J{x} + J{y}) * typename J::Scalar(0.5); +#endif // defined(CERES_HAS_CPP20) } // Returns the positive difference (f - g) of two arguments and zero if f <= g. @@ -804,7 +850,7 @@ template inline Jet erf(const Jet& x) { // We evaluate the constant as follows: // 2 / sqrt(pi) = 1 / sqrt(atan(1.)) - // On POSIX sytems it is defined as M_2_SQRTPI, but this is not + // On POSIX systems it is defined as M_2_SQRTPI, but this is not // portable and the type may not be T. The above expression // evaluates to full precision with IEEE arithmetic and, since it's // constant, the compiler can generate exactly the same code. gcc @@ -828,25 +874,19 @@ inline Jet erfc(const Jet& x) { // function errors in client code (the specific warning is suppressed when // Ceres itself is built). inline double BesselJ0(double x) { -#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS) - return _j0(x); -#else + CERES_DISABLE_DEPRECATED_WARNING return j0(x); -#endif + CERES_RESTORE_DEPRECATED_WARNING } inline double BesselJ1(double x) { -#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS) - return _j1(x); -#else + CERES_DISABLE_DEPRECATED_WARNING return j1(x); -#endif + CERES_RESTORE_DEPRECATED_WARNING } inline double BesselJn(int n, double x) { -#if defined(CERES_MSVC_USE_UNDERSCORE_PREFIXED_BESSEL_FUNCTIONS) - return _jn(n, x); -#else + CERES_DISABLE_DEPRECATED_WARNING return jn(n, x); -#endif + CERES_RESTORE_DEPRECATED_WARNING } // For the formulae of the derivatives of the Bessel functions see the book: @@ -1264,8 +1304,13 @@ struct numeric_limits> { static constexpr bool is_bounded = std::numeric_limits::is_bounded; static constexpr bool is_modulo = std::numeric_limits::is_modulo; + // has_denorm (and has_denorm_loss, not defined for Jet) has been deprecated + // in C++23. However, without an intent to remove the declaration. Disable + // deprecation warnings temporarily just for the corresponding symbols. + CERES_DISABLE_DEPRECATED_WARNING static constexpr std::float_denorm_style has_denorm = std::numeric_limits::has_denorm; + CERES_RESTORE_DEPRECATED_WARNING static constexpr std::float_round_style round_style = std::numeric_limits::round_style; @@ -1335,6 +1380,7 @@ struct NumTraits> { } static inline int digits10() { return NumTraits::digits10(); } + static inline int max_digits10() { return NumTraits::max_digits10(); } enum { IsComplex = 0, diff --git a/extern/ceres/include/ceres/jet_fwd.h b/extern/ceres/include/ceres/jet_fwd.h index fbb6286958c..b5216da723d 100644 --- a/extern/ceres/include/ceres/jet_fwd.h +++ b/extern/ceres/include/ceres/jet_fwd.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/line_manifold.h b/extern/ceres/include/ceres/line_manifold.h index f8f1b235220..dad9737cf69 100644 --- a/extern/ceres/include/ceres/line_manifold.h +++ b/extern/ceres/include/ceres/line_manifold.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -156,7 +156,7 @@ bool LineManifold::Plus(const double* x_ptr, // // The direction update function Plus_d is the same as as the SphereManifold: // - // d* = H_{v(d)} [0.5 sinc(0.5 |delta_d|) delta_d, cos(0.5 |delta_d|)]^T + // d* = H_{v(d)} [sinc(|delta_d|) delta_d, cos(|delta_d|)]^T // // where H is the householder matrix // H_{v} = I - (2 / |v|^2) v v^T @@ -165,7 +165,7 @@ bool LineManifold::Plus(const double* x_ptr, // // The origin point update function Plus_o is defined as // - // o* = o + H_{v(d)} [0.5 delta_o, 0]^T. + // o* = o + H_{v(d)} [delta_o, 0]^T. Eigen::Map o(x_ptr, size_); Eigen::Map d(x_ptr + size_, size_); @@ -208,11 +208,8 @@ bool LineManifold::Plus(const double* x_ptr, // perpendicular to the line direction. This is achieved by using the // householder matrix of the direction and allow only movements // perpendicular to e_n. - // - // The factor of 0.5 is used to be consistent with the line direction - // update. AmbientVector y(size_); - y << 0.5 * delta_o, 0; + y << delta_o, 0; o_plus_delta += internal::ApplyHouseholderVector(y, v, beta); return true; @@ -266,7 +263,7 @@ bool LineManifold::Minus(const double* y_ptr, AmbientVector delta_o = y_o - x_o; const AmbientVector h_delta_o = - 2.0 * internal::ApplyHouseholderVector(delta_o, v, beta); + internal::ApplyHouseholderVector(delta_o, v, beta); y_minus_x_o = h_delta_o.template head(size_ - 1); return true; diff --git a/extern/ceres/include/ceres/local_parameterization.h b/extern/ceres/include/ceres/local_parameterization.h deleted file mode 100644 index 5815dd17d15..00000000000 --- a/extern/ceres/include/ceres/local_parameterization.h +++ /dev/null @@ -1,371 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: keir@google.com (Keir Mierle) -// sameeragarwal@google.com (Sameer Agarwal) - -#ifndef CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_ -#define CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_ - -#include -#include -#include - -#include "ceres/internal/disable_warnings.h" -#include "ceres/internal/export.h" -#include "ceres/internal/port.h" - -namespace ceres { - -// WARNING: LocalParameterizations are deprecated. They will be removed from -// Ceres Solver in version 2.2.0. Please use Manifolds instead. - -// Purpose: Sometimes parameter blocks x can overparameterize a problem -// -// min f(x) -// x -// -// In that case it is desirable to choose a parameterization for the -// block itself to remove the null directions of the cost. More -// generally, if x lies on a manifold of a smaller dimension than the -// ambient space that it is embedded in, then it is numerically and -// computationally more effective to optimize it using a -// parameterization that lives in the tangent space of that manifold -// at each point. -// -// For example, a sphere in three dimensions is a 2 dimensional -// manifold, embedded in a three dimensional space. At each point on -// the sphere, the plane tangent to it defines a two dimensional -// tangent space. For a cost function defined on this sphere, given a -// point x, moving in the direction normal to the sphere at that point -// is not useful. Thus a better way to do a local optimization is to -// optimize over two dimensional vector delta in the tangent space at -// that point and then "move" to the point x + delta, where the move -// operation involves projecting back onto the sphere. Doing so -// removes a redundant dimension from the optimization, making it -// numerically more robust and efficient. -// -// More generally we can define a function -// -// x_plus_delta = Plus(x, delta), -// -// where x_plus_delta has the same size as x, and delta is of size -// less than or equal to x. The function Plus, generalizes the -// definition of vector addition. Thus it satisfies the identify -// -// Plus(x, 0) = x, for all x. -// -// A trivial version of Plus is when delta is of the same size as x -// and -// -// Plus(x, delta) = x + delta -// -// A more interesting case if x is two dimensional vector, and the -// user wishes to hold the first coordinate constant. Then, delta is a -// scalar and Plus is defined as -// -// Plus(x, delta) = x + [0] * delta -// [1] -// -// An example that occurs commonly in Structure from Motion problems -// is when camera rotations are parameterized using Quaternion. There, -// it is useful to only make updates orthogonal to that 4-vector -// defining the quaternion. One way to do this is to let delta be a 3 -// dimensional vector and define Plus to be -// -// Plus(x, delta) = [cos(|delta|), sin(|delta|) delta / |delta|] * x -// -// The multiplication between the two 4-vectors on the RHS is the -// standard quaternion product. -// -// Given f and a point x, optimizing f can now be restated as -// -// min f(Plus(x, delta)) -// delta -// -// Given a solution delta to this problem, the optimal value is then -// given by -// -// x* = Plus(x, delta) -// -// The class LocalParameterization defines the function Plus and its -// Jacobian which is needed to compute the Jacobian of f w.r.t delta. -class CERES_DEPRECATED_WITH_MSG( - "LocalParameterizations will be removed from the Ceres Solver API in " - "version 2.2.0. Use Manifolds instead.") - CERES_EXPORT LocalParameterization { - public: - virtual ~LocalParameterization(); - - // Generalization of the addition operation, - // - // x_plus_delta = Plus(x, delta) - // - // with the condition that Plus(x, 0) = x. - // - virtual bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const = 0; - - // The jacobian of Plus(x, delta) w.r.t delta at delta = 0. - // - // jacobian is a row-major GlobalSize() x LocalSize() matrix. - virtual bool ComputeJacobian(const double* x, double* jacobian) const = 0; - - // local_matrix = global_matrix * jacobian - // - // global_matrix is a num_rows x GlobalSize row major matrix. - // local_matrix is a num_rows x LocalSize row major matrix. - // jacobian(x) is the matrix returned by ComputeJacobian at x. - // - // This is only used by GradientProblem. For most normal uses, it is - // okay to use the default implementation. - virtual bool MultiplyByJacobian(const double* x, - const int num_rows, - const double* global_matrix, - double* local_matrix) const; - - // Size of x. - virtual int GlobalSize() const = 0; - - // Size of delta. - virtual int LocalSize() const = 0; -}; - -// Some basic parameterizations - -// Identity Parameterization: Plus(x, delta) = x + delta -class CERES_DEPRECATED_WITH_MSG("Use EuclideanManifold instead.") - CERES_EXPORT IdentityParameterization : public LocalParameterization { - public: - explicit IdentityParameterization(int size); - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override; - bool ComputeJacobian(const double* x, double* jacobian) const override; - bool MultiplyByJacobian(const double* x, - const int num_cols, - const double* global_matrix, - double* local_matrix) const override; - int GlobalSize() const override { return size_; } - int LocalSize() const override { return size_; } - - private: - const int size_; -}; - -// Hold a subset of the parameters inside a parameter block constant. -class CERES_DEPRECATED_WITH_MSG("Use SubsetManifold instead.") - CERES_EXPORT SubsetParameterization : public LocalParameterization { - public: - explicit SubsetParameterization(int size, - const std::vector& constant_parameters); - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override; - bool ComputeJacobian(const double* x, double* jacobian) const override; - bool MultiplyByJacobian(const double* x, - const int num_cols, - const double* global_matrix, - double* local_matrix) const override; - int GlobalSize() const override { - return static_cast(constancy_mask_.size()); - } - int LocalSize() const override { return local_size_; } - - private: - const int local_size_; - std::vector constancy_mask_; -}; - -// Plus(x, delta) = [cos(|delta|), sin(|delta|) delta / |delta|] * x -// with * being the quaternion multiplication operator. Here we assume -// that the first element of the quaternion vector is the real (cos -// theta) part. -class CERES_DEPRECATED_WITH_MSG("Use QuaternionManifold instead.") - CERES_EXPORT QuaternionParameterization : public LocalParameterization { - public: - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override; - bool ComputeJacobian(const double* x, double* jacobian) const override; - int GlobalSize() const override { return 4; } - int LocalSize() const override { return 3; } -}; - -// Implements the quaternion local parameterization for Eigen's representation -// of the quaternion. Eigen uses a different internal memory layout for the -// elements of the quaternion than what is commonly used. Specifically, Eigen -// stores the elements in memory as [x, y, z, w] where the real part is last -// whereas it is typically stored first. Note, when creating an Eigen quaternion -// through the constructor the elements are accepted in w, x, y, z order. Since -// Ceres operates on parameter blocks which are raw double pointers this -// difference is important and requires a different parameterization. -// -// Plus(x, delta) = [sin(|delta|) delta / |delta|, cos(|delta|)] * x -// with * being the quaternion multiplication operator. -class CERES_DEPRECATED_WITH_MSG("Use EigenQuaternionManifold instead.") - CERES_EXPORT EigenQuaternionParameterization - : public ceres::LocalParameterization { - public: - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override; - bool ComputeJacobian(const double* x, double* jacobian) const override; - int GlobalSize() const override { return 4; } - int LocalSize() const override { return 3; } -}; - -// This provides a parameterization for homogeneous vectors which are commonly -// used in Structure from Motion problems. One example where they are used is -// in representing points whose triangulation is ill-conditioned. Here it is -// advantageous to use an over-parameterization since homogeneous vectors can -// represent points at infinity. -// -// The plus operator is defined as -// Plus(x, delta) = -// [sin(0.5 * |delta|) * delta / |delta|, cos(0.5 * |delta|)] * x -// -// with * defined as an operator which applies the update orthogonal to x to -// remain on the sphere. We assume that the last element of x is the scalar -// component. The size of the homogeneous vector is required to be greater than -// 1. -class CERES_DEPRECATED_WITH_MSG("Use SphereManifold instead.") CERES_EXPORT - HomogeneousVectorParameterization : public LocalParameterization { - public: - explicit HomogeneousVectorParameterization(int size); - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override; - bool ComputeJacobian(const double* x, double* jacobian) const override; - int GlobalSize() const override { return size_; } - int LocalSize() const override { return size_ - 1; } - - private: - const int size_; -}; - -// This provides a parameterization for lines, where the line is -// over-parameterized by an origin point and a direction vector. So the -// parameter vector size needs to be two times the ambient space dimension, -// where the first half is interpreted as the origin point and the second half -// as the direction. -// -// The plus operator for the line direction is the same as for the -// HomogeneousVectorParameterization. The update of the origin point is -// perpendicular to the line direction before the update. -// -// This local parameterization is a special case of the affine Grassmannian -// manifold (see https://en.wikipedia.org/wiki/Affine_Grassmannian_(manifold)) -// for the case Graff_1(R^n). -template -class CERES_DEPRECATED_WITH_MSG("Use LineManifold instead.") - LineParameterization : public LocalParameterization { - public: - static_assert(AmbientSpaceDimension >= 2, - "The ambient space must be at least 2"); - - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override; - bool ComputeJacobian(const double* x, double* jacobian) const override; - int GlobalSize() const override { return 2 * AmbientSpaceDimension; } - int LocalSize() const override { return 2 * (AmbientSpaceDimension - 1); } -}; - -// Construct a local parameterization by taking the Cartesian product -// of a number of other local parameterizations. This is useful, when -// a parameter block is the cartesian product of two or more -// manifolds. For example the parameters of a camera consist of a -// rotation and a translation, i.e., SO(3) x R^3. -// -// Example usage: -// -// ProductParameterization product_param(new QuaterionionParameterization(), -// new IdentityParameterization(3)); -// -// is the local parameterization for a rigid transformation, where the -// rotation is represented using a quaternion. -// -class CERES_DEPRECATED_WITH_MSG("Use ProductManifold instead.") - CERES_EXPORT ProductParameterization : public LocalParameterization { - public: - ProductParameterization(const ProductParameterization&) = delete; - ProductParameterization& operator=(const ProductParameterization&) = delete; - // - // NOTE: The constructor takes ownership of the input local - // parameterizations. - // - template - explicit ProductParameterization(LocalParams*... local_params) - : local_params_(sizeof...(LocalParams)) { - constexpr int kNumLocalParams = sizeof...(LocalParams); - static_assert(kNumLocalParams >= 2, - "At least two local parameterizations must be specified."); - - using LocalParameterizationPtr = std::unique_ptr; - - // Wrap all raw pointers into std::unique_ptr for exception safety. - std::array local_params_array{ - LocalParameterizationPtr(local_params)...}; - - // Initialize internal state. - for (int i = 0; i < kNumLocalParams; ++i) { - LocalParameterizationPtr& param = local_params_[i]; - param = std::move(local_params_array[i]); - - buffer_size_ = - std::max(buffer_size_, param->LocalSize() * param->GlobalSize()); - global_size_ += param->GlobalSize(); - local_size_ += param->LocalSize(); - } - } - - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override; - bool ComputeJacobian(const double* x, double* jacobian) const override; - int GlobalSize() const override { return global_size_; } - int LocalSize() const override { return local_size_; } - - private: - std::vector> local_params_; - int local_size_{0}; - int global_size_{0}; - int buffer_size_{0}; -}; - -} // namespace ceres - -// clang-format off -#include "ceres/internal/reenable_warnings.h" -// clang-format on - -#include "ceres/internal/line_parameterization.h" - -#endif // CERES_PUBLIC_LOCAL_PARAMETERIZATION_H_ diff --git a/extern/ceres/include/ceres/loss_function.h b/extern/ceres/include/ceres/loss_function.h index 8a5a37ff665..b8582f85b67 100644 --- a/extern/ceres/include/ceres/loss_function.h +++ b/extern/ceres/include/ceres/loss_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/manifold.h b/extern/ceres/include/ceres/manifold.h index 4d6e9fa0f59..9bd6459eb9a 100644 --- a/extern/ceres/include/ceres/manifold.h +++ b/extern/ceres/include/ceres/manifold.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/manifold_test_utils.h b/extern/ceres/include/ceres/manifold_test_utils.h index 3f9fb21e8f3..3e614570a53 100644 --- a/extern/ceres/include/ceres/manifold_test_utils.h +++ b/extern/ceres/include/ceres/manifold_test_utils.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -42,24 +42,54 @@ namespace ceres { -// Matchers and macros for help with testing Manifold objects. +// Matchers and macros to simplify testing of custom Manifold objects using the +// gtest testing framework. // // Testing a Manifold has two parts. // -// 1. Checking that Manifold::Plus is correctly defined. This requires per -// manifold tests. +// 1. Checking that Manifold::Plus() and Manifold::Minus() are correctly +// defined. This requires per manifold tests. // // 2. The other methods of the manifold have mathematical properties that make -// it compatible with Plus, as described in: +// them compatible with Plus() and Minus(), as described in [1]. // -// "Integrating Generic Sensor Fusion Algorithms with Sound State -// Representations through Encapsulation of Manifolds" -// By C. Hertzberg, R. Wagner, U. Frese and L. Schroder -// https://arxiv.org/pdf/1107.1119.pdf +// To verify these general requirements for a custom Manifold, use the +// EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD() macro from within a gtest test. Note +// that additional domain-specific tests may also be prudent, e.g to verify the +// behaviour of a Quaternion Manifold about pi. // -// These tests are implemented using generic matchers defined below which can -// all be called by the macro EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, -// delta, y, tolerance). See manifold_test.cc for example usage. +// [1] "Integrating Generic Sensor Fusion Algorithms with Sound State +// Representations through Encapsulation of Manifolds", C. Hertzberg, +// R. Wagner, U. Frese and L. Schroder, https://arxiv.org/pdf/1107.1119.pdf + +// Verifies the general requirements for a custom Manifold are satisfied to +// within the specified (numerical) tolerance. +// +// Example usage for a custom Manifold: ExampleManifold: +// +// TEST(ExampleManifold, ManifoldInvariantsHold) { +// constexpr double kTolerance = 1.0e-9; +// ExampleManifold manifold; +// ceres::Vector x = ceres::Vector::Zero(manifold.AmbientSize()); +// ceres::Vector y = ceres::Vector::Zero(manifold.AmbientSize()); +// ceres::Vector delta = ceres::Vector::Zero(manifold.TangentSize()); +// EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, kTolerance); +// } +#define EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, tolerance) \ + ::ceres::Vector zero_tangent = \ + ::ceres::Vector::Zero(manifold.TangentSize()); \ + EXPECT_THAT(manifold, ::ceres::XPlusZeroIsXAt(x, tolerance)); \ + EXPECT_THAT(manifold, ::ceres::XMinusXIsZeroAt(x, tolerance)); \ + EXPECT_THAT(manifold, ::ceres::MinusPlusIsIdentityAt(x, delta, tolerance)); \ + EXPECT_THAT(manifold, \ + ::ceres::MinusPlusIsIdentityAt(x, zero_tangent, tolerance)); \ + EXPECT_THAT(manifold, ::ceres::PlusMinusIsIdentityAt(x, x, tolerance)); \ + EXPECT_THAT(manifold, ::ceres::PlusMinusIsIdentityAt(x, y, tolerance)); \ + EXPECT_THAT(manifold, ::ceres::HasCorrectPlusJacobianAt(x, tolerance)); \ + EXPECT_THAT(manifold, ::ceres::HasCorrectMinusJacobianAt(x, tolerance)); \ + EXPECT_THAT(manifold, ::ceres::MinusPlusJacobianIsIdentityAt(x, tolerance)); \ + EXPECT_THAT(manifold, \ + ::ceres::HasCorrectRightMultiplyByPlusJacobianAt(x, tolerance)); // Checks that the invariant Plus(x, 0) == x holds. MATCHER_P2(XPlusZeroIsXAt, x, tolerance, "") { @@ -69,7 +99,7 @@ MATCHER_P2(XPlusZeroIsXAt, x, tolerance, "") { Vector actual = Vector::Zero(ambient_size); Vector zero = Vector::Zero(tangent_size); EXPECT_TRUE(arg.Plus(x.data(), zero.data(), actual.data())); - const double n = (actual - x).norm(); + const double n = (actual - Vector{x}).norm(); const double d = x.norm(); const double diffnorm = (d == 0.0) ? n : (n / d); if (diffnorm > tolerance) { @@ -159,7 +189,7 @@ MATCHER_P3(MinusPlusIsIdentityAt, x, delta, tolerance, "") { Vector actual = Vector::Zero(tangent_size); EXPECT_TRUE(arg.Minus(x_plus_delta.data(), x.data(), actual.data())); - const double n = (actual - delta).norm(); + const double n = (actual - Vector{delta}).norm(); const double d = delta.norm(); const double diffnorm = (d == 0.0) ? n : (n / d); if (diffnorm > tolerance) { @@ -184,7 +214,7 @@ MATCHER_P3(PlusMinusIsIdentityAt, x, y, tolerance, "") { Vector actual = Vector::Zero(ambient_size); EXPECT_TRUE(arg.Plus(x.data(), y_minus_x.data(), actual.data())); - const double n = (actual - y).norm(); + const double n = (actual - Vector{y}).norm(); const double d = y.norm(); const double diffnorm = (d == 0.0) ? n : (n / d); if (diffnorm > tolerance) { @@ -312,17 +342,4 @@ MATCHER_P2(HasCorrectRightMultiplyByPlusJacobianAt, x, tolerance, "") { return true; } -#define EXPECT_THAT_MANIFOLD_INVARIANTS_HOLD(manifold, x, delta, y, tolerance) \ - Vector zero_tangent = Vector::Zero(manifold.TangentSize()); \ - EXPECT_THAT(manifold, XPlusZeroIsXAt(x, tolerance)); \ - EXPECT_THAT(manifold, XMinusXIsZeroAt(x, tolerance)); \ - EXPECT_THAT(manifold, MinusPlusIsIdentityAt(x, delta, tolerance)); \ - EXPECT_THAT(manifold, MinusPlusIsIdentityAt(x, zero_tangent, tolerance)); \ - EXPECT_THAT(manifold, PlusMinusIsIdentityAt(x, x, tolerance)); \ - EXPECT_THAT(manifold, PlusMinusIsIdentityAt(x, y, tolerance)); \ - EXPECT_THAT(manifold, HasCorrectPlusJacobianAt(x, tolerance)); \ - EXPECT_THAT(manifold, HasCorrectMinusJacobianAt(x, tolerance)); \ - EXPECT_THAT(manifold, MinusPlusJacobianIsIdentityAt(x, tolerance)); \ - EXPECT_THAT(manifold, HasCorrectRightMultiplyByPlusJacobianAt(x, tolerance)); - } // namespace ceres diff --git a/extern/ceres/include/ceres/normal_prior.h b/extern/ceres/include/ceres/normal_prior.h index c5c7f3e623e..5a26e015fe6 100644 --- a/extern/ceres/include/ceres/normal_prior.h +++ b/extern/ceres/include/ceres/normal_prior.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -61,7 +61,7 @@ class CERES_EXPORT NormalPrior final : public CostFunction { public: // Check that the number of rows in the vector b are the same as the // number of columns in the matrix A, crash otherwise. - NormalPrior(const Matrix& A, const Vector& b); + NormalPrior(const Matrix& A, Vector b); bool Evaluate(double const* const* parameters, double* residuals, double** jacobians) const override; diff --git a/extern/ceres/include/ceres/numeric_diff_cost_function.h b/extern/ceres/include/ceres/numeric_diff_cost_function.h index 6ec53175030..00a7d53e311 100644 --- a/extern/ceres/include/ceres/numeric_diff_cost_function.h +++ b/extern/ceres/include/ceres/numeric_diff_cost_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -176,7 +176,7 @@ namespace ceres { template // Parameters dimensions for each block. class NumericDiffCostFunction final @@ -236,7 +236,7 @@ class NumericDiffCostFunction final } internal::EvaluateJacobianForParameterBlocks:: - template Apply( + template Apply( functor_.get(), residuals, options_, diff --git a/extern/ceres/include/ceres/numeric_diff_first_order_function.h b/extern/ceres/include/ceres/numeric_diff_first_order_function.h index f5bb005be58..ccd420cfbc6 100644 --- a/extern/ceres/include/ceres/numeric_diff_first_order_function.h +++ b/extern/ceres/include/ceres/numeric_diff_first_order_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -42,6 +42,7 @@ #include "ceres/internal/variadic_evaluate.h" #include "ceres/numeric_diff_options.h" #include "ceres/types.h" +#include "glog/logging.h" namespace ceres { @@ -99,19 +100,55 @@ namespace ceres { // "QuadraticCostFunctor", "CENTRAL, 4", describe the finite // differencing scheme as "central differencing" and the functor as // computing its cost from a 4 dimensional input. +// +// If the size of the parameter vector is not known at compile time, then an +// alternate construction syntax can be used: +// +// FirstOrderFunction* function +// = new NumericDiffFirstOrderFunction( +// new QuadraticCostFunctor(1.0), 4); +// +// Note that instead of passing 4 as a template argument, it is now passed as +// the second argument to the constructor. template + NumericDiffMethodType kMethod, + int kNumParameters = DYNAMIC> class NumericDiffFirstOrderFunction final : public FirstOrderFunction { public: + // Constructor for the case where the parameter size is known at compile time. explicit NumericDiffFirstOrderFunction( FirstOrderFunctor* functor, Ownership ownership = TAKE_OWNERSHIP, const NumericDiffOptions& options = NumericDiffOptions()) - : functor_(functor), ownership_(ownership), options_(options) { + : functor_(functor), + num_parameters_(kNumParameters), + ownership_(ownership), + options_(options) { + static_assert(kNumParameters != DYNAMIC, + "Number of parameters must be static when defined via the " + "template parameter. Use the other constructor for " + "dynamically sized functions."); static_assert(kNumParameters > 0, "kNumParameters must be positive"); } + // Constructor for the case where the parameter size is specified at run time. + explicit NumericDiffFirstOrderFunction( + FirstOrderFunctor* functor, + int num_parameters, + Ownership ownership = TAKE_OWNERSHIP, + const NumericDiffOptions& options = NumericDiffOptions()) + : functor_(functor), + num_parameters_(num_parameters), + ownership_(ownership), + options_(options) { + static_assert( + kNumParameters == DYNAMIC, + "Template parameter must be DYNAMIC when using this constructor. If " + "you want to provide the number of parameters statically use the other " + "constructor."); + CHECK_GT(num_parameters, 0); + } + ~NumericDiffFirstOrderFunction() override { if (ownership_ != TAKE_OWNERSHIP) { functor_.release(); @@ -121,12 +158,8 @@ class NumericDiffFirstOrderFunction final : public FirstOrderFunction { bool Evaluate(const double* const parameters, double* cost, double* gradient) const override { - using ParameterDims = internal::StaticParameterDims; - constexpr int kNumResiduals = 1; - // Get the function value (cost) at the the point to evaluate. - if (!internal::VariadicEvaluate( - *functor_, ¶meters, cost)) { + if (!(*functor_)(parameters, cost)) { return false; } @@ -135,27 +168,47 @@ class NumericDiffFirstOrderFunction final : public FirstOrderFunction { } // Create a copy of the parameters which will get mutated. - internal::FixedArray parameters_copy(kNumParameters); - std::copy_n(parameters, kNumParameters, parameters_copy.data()); + internal::FixedArray parameters_copy(num_parameters_); + std::copy_n(parameters, num_parameters_, parameters_copy.data()); double* parameters_ptr = parameters_copy.data(); - internal::EvaluateJacobianForParameterBlocks< - ParameterDims>::template Apply(functor_.get(), - cost, - options_, - kNumResiduals, - ¶meters_ptr, - &gradient); - return true; + constexpr int kNumResiduals = 1; + if constexpr (kNumParameters == DYNAMIC) { + internal::FirstOrderFunctorAdapter fofa(*functor_); + return internal::NumericDiff< + internal::FirstOrderFunctorAdapter, + kMethod, + kNumResiduals, + internal::DynamicParameterDims, + 0, + DYNAMIC>::EvaluateJacobianForParameterBlock(&fofa, + cost, + options_, + kNumResiduals, + 0, + num_parameters_, + ¶meters_ptr, + gradient); + } else { + return internal::EvaluateJacobianForParameterBlocks< + internal::StaticParameterDims>:: + template Apply(functor_.get(), + cost, + options_, + kNumResiduals, + ¶meters_ptr, + &gradient); + } } - int NumParameters() const override { return kNumParameters; } + int NumParameters() const override { return num_parameters_; } const FirstOrderFunctor& functor() const { return *functor_; } private: std::unique_ptr functor_; - Ownership ownership_; - NumericDiffOptions options_; + const int num_parameters_; + const Ownership ownership_; + const NumericDiffOptions options_; }; } // namespace ceres diff --git a/extern/ceres/include/ceres/numeric_diff_options.h b/extern/ceres/include/ceres/numeric_diff_options.h index b025b51d938..eefb7ad1685 100644 --- a/extern/ceres/include/ceres/numeric_diff_options.h +++ b/extern/ceres/include/ceres/numeric_diff_options.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/ordered_groups.h b/extern/ceres/include/ceres/ordered_groups.h index c1531cce65f..d15d22d47be 100644 --- a/extern/ceres/include/ceres/ordered_groups.h +++ b/extern/ceres/include/ceres/ordered_groups.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/problem.h b/extern/ceres/include/ceres/problem.h index 819fa454b21..4c6fd1beed9 100644 --- a/extern/ceres/include/ceres/problem.h +++ b/extern/ceres/include/ceres/problem.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2021 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -53,7 +53,6 @@ namespace ceres { class CostFunction; class EvaluationCallback; class LossFunction; -class LocalParameterization; class Manifold; class Solver; struct CRSMatrix; @@ -118,29 +117,17 @@ using ResidualBlockId = internal::ResidualBlock*; // problem.AddResidualBlock(new MyBinaryCostFunction(...), nullptr, x2, x3); // // Please see cost_function.h for details of the CostFunction object. -// -// NOTE: We are currently in the process of transitioning from -// LocalParameterization to Manifolds in the Ceres API. During this period, -// Problem will support using both Manifold and LocalParameterization objects -// interchangably. In particular, adding a LocalParameterization to a parameter -// block is the same as adding a Manifold to that parameter block. For methods -// in the API affected by this change, see their documentation below. class CERES_EXPORT Problem { public: struct CERES_EXPORT Options { // These flags control whether the Problem object owns the CostFunctions, - // LossFunctions, LocalParameterizations, and Manifolds passed into the - // Problem. + // LossFunctions, and Manifolds passed into the Problem. // // If set to TAKE_OWNERSHIP, then the problem object will delete the // corresponding object on destruction. The destructor is careful to delete // the pointers only once, since sharing objects is allowed. Ownership cost_function_ownership = TAKE_OWNERSHIP; Ownership loss_function_ownership = TAKE_OWNERSHIP; - CERES_DEPRECATED_WITH_MSG( - "Local Parameterizations are deprecated. Use Manifold and " - "manifold_ownership instead.") - Ownership local_parameterization_ownership = TAKE_OWNERSHIP; Ownership manifold_ownership = TAKE_OWNERSHIP; // If true, trades memory for faster RemoveResidualBlock() and @@ -271,66 +258,23 @@ class CERES_EXPORT Problem { // pointer but a different size will result in a crash. void AddParameterBlock(double* values, int size); - // Add a parameter block with appropriate size and parameterization to the - // problem. It is okay for local_parameterization to be nullptr. - // - // Repeated calls with the same arguments are ignored. Repeated calls - // with the same double pointer but a different size results in a crash - // (unless Solver::Options::diable_all_safety_checks is set to true). - // - // Repeated calls with the same double pointer and size but different - // LocalParameterization is equivalent to calling - // SetParameterization(local_parameterization), i.e., any previously - // associated LocalParameterization or Manifold object will be replaced with - // the local_parameterization. - // - // NOTE: - // ---- - // - // This method is deprecated and will be removed in the next public - // release of Ceres Solver. Please move to using the Manifold based version of - // AddParameterBlock. - // - // During the transition from LocalParameterization to Manifold, internally - // the LocalParameterization is treated as a Manifold by wrapping it using a - // ManifoldAdapter object. So HasManifold() will return true, GetManifold() - // will return the wrapped object and ParameterBlockTangentSize() will return - // the LocalSize of the LocalParameterization. - CERES_DEPRECATED_WITH_MSG( - "LocalParameterizations are deprecated. Use the version with Manifolds " - "instead.") - void AddParameterBlock(double* values, - int size, - LocalParameterization* local_parameterization); - // Add a parameter block with appropriate size and Manifold to the // problem. It is okay for manifold to be nullptr. // // Repeated calls with the same arguments are ignored. Repeated calls // with the same double pointer but a different size results in a crash - // (unless Solver::Options::diable_all_safety_checks is set to true). + // (unless Solver::Options::disable_all_safety_checks is set to true). // // Repeated calls with the same double pointer and size but different Manifold // is equivalent to calling SetManifold(manifold), i.e., any previously - // associated LocalParameterization or Manifold object will be replaced with - // the manifold. - // - // Note: - // ---- - // - // During the transition from LocalParameterization to Manifold, calling - // AddParameterBlock with a Manifold when a LocalParameterization is already - // associated with the parameter block is okay. It is equivalent to calling - // SetManifold(manifold), i.e., any previously associated - // LocalParameterization or Manifold object will be replaced with the - // manifold. + // associated Manifold object will be replaced with the manifold. void AddParameterBlock(double* values, int size, Manifold* manifold); - // Remove a parameter block from the problem. The LocalParameterization or - // Manifold of the parameter block, if it exists, will persist until the - // deletion of the problem (similar to cost/loss functions in residual block - // removal). Any residual blocks that depend on the parameter are also - // removed, as described above in RemoveResidualBlock(). + // Remove a parameter block from the problem. The Manifold of the parameter + // block, if it exists, will persist until the deletion of the problem + // (similar to cost/loss functions in residual block removal). Any residual + // blocks that depend on the parameter are also removed, as described above + // in RemoveResidualBlock(). // // If Problem::Options::enable_fast_removal is true, then the removal is fast // (almost constant time). Otherwise, removing a parameter block will incur a @@ -361,76 +305,15 @@ class CERES_EXPORT Problem { // Returns true if a parameter block is set constant, and false otherwise. A // parameter block may be set constant in two ways: either by calling - // SetParameterBlockConstant or by associating a LocalParameterization or - // Manifold with a zero dimensional tangent space with it. + // SetParameterBlockConstant or by associating a Manifold with a zero + // dimensional tangent space with it. bool IsParameterBlockConstant(const double* values) const; - // Set the LocalParameterization for the parameter block. Calling - // SetParameterization with nullptr will clear any previously set - // LocalParameterization or Manifold for the parameter block. - // - // Repeated calls will cause any previously associated LocalParameterization - // or Manifold object to be replaced with the local_parameterization. - // - // The local_parameterization is owned by the Problem by default (See - // Problem::Options to override this behaviour). - // - // It is acceptable to set the same LocalParameterization for multiple - // parameter blocks; the destructor is careful to delete - // LocalParamaterizations only once. - // - // NOTE: - // ---- - // - // This method is deprecated and will be removed in the next public - // release of Ceres Solver. Please move to using the SetManifold instead. - // - // During the transition from LocalParameterization to Manifold, internally - // the LocalParameterization is treated as a Manifold by wrapping it using a - // ManifoldAdapter object. So HasManifold() will return true, GetManifold() - // will return the wrapped object and ParameterBlockTangentSize will return - // the same value of ParameterBlockLocalSize. - CERES_DEPRECATED_WITH_MSG( - "LocalParameterizations are deprecated. Use SetManifold instead.") - void SetParameterization(double* values, - LocalParameterization* local_parameterization); - - // Get the LocalParameterization object associated with this parameter block. - // If there is no LocalParameterization associated then nullptr is returned. - // - // NOTE: This method is deprecated and will be removed in the next public - // release of Ceres Solver. Use GetManifold instead. - // - // Note also that if a LocalParameterization is associated with a parameter - // block, HasManifold will return true and GetManifold will return the - // LocalParameterization wrapped in a ManifoldAdapter. - // - // The converse is NOT true, i.e., if a Manifold is associated with a - // parameter block, HasParameterization will return false and - // GetParameterization will return a nullptr. - CERES_DEPRECATED_WITH_MSG( - "LocalParameterizations are deprecated. Use GetManifold " - "instead.") - const LocalParameterization* GetParameterization(const double* values) const; - - // Returns true if a LocalParameterization is associated with this parameter - // block, false otherwise. - // - // NOTE: This method is deprecated and will be removed in the next public - // release of Ceres Solver. Use HasManifold instead. - // - // Note also that if a Manifold is associated with the parameter block, this - // method will return false. - CERES_DEPRECATED_WITH_MSG( - "LocalParameterizations are deprecated. Use HasManifold instead.") - bool HasParameterization(const double* values) const; - // Set the Manifold for the parameter block. Calling SetManifold with nullptr - // will clear any previously set LocalParameterization or Manifold for the - // parameter block. + // will clear any previously set Manifold for the parameter block. // - // Repeated calls will result in any previously associated - // LocalParameterization or Manifold object to be replaced with the manifold. + // Repeated calls will result in any previously associated Manifold object to + // be replaced with the manifold. // // The manifold is owned by the Problem by default (See Problem::Options to // override this behaviour). @@ -440,18 +323,11 @@ class CERES_EXPORT Problem { // Get the Manifold object associated with this parameter block. // - // If there is no Manifold Or LocalParameterization object associated then - // nullptr is returned. - // - // NOTE: During the transition from LocalParameterization to Manifold, - // internally the LocalParameterization is treated as a Manifold by wrapping - // it using a ManifoldAdapter object. So calling GetManifold on a parameter - // block with a LocalParameterization associated with it will return the - // LocalParameterization wrapped in a ManifoldAdapter + // If there is no Manifold object associated then nullptr is returned. const Manifold* GetManifold(const double* values) const; - // Returns true if a Manifold or a LocalParameterization is associated with - // this parameter block, false otherwise. + // Returns true if a Manifold is associated with this parameter block, false + // otherwise. bool HasManifold(const double* values) const; // Set the lower/upper bound for the parameter at position "index". @@ -484,19 +360,9 @@ class CERES_EXPORT Problem { // The size of the parameter block. int ParameterBlockSize(const double* values) const; - // The dimension of the tangent space of the LocalParameterization or Manifold - // for the parameter block. If there is no LocalParameterization or Manifold - // associated with this parameter block, then ParameterBlockLocalSize = - // ParameterBlockSize. - CERES_DEPRECATED_WITH_MSG( - "LocalParameterizations are deprecated. Use ParameterBlockTangentSize " - "instead.") - int ParameterBlockLocalSize(const double* values) const; - - // The dimenion of the tangent space of the LocalParameterization or Manifold - // for the parameter block. If there is no LocalParameterization or Manifold - // associated with this parameter block, then ParameterBlockTangentSize = - // ParameterBlockSize. + // The dimension of the tangent space of the Manifold for the parameter block. + // If there is no Manifold associated with this parameter block, then + // ParameterBlockTangentSize = ParameterBlockSize. int ParameterBlockTangentSize(const double* values) const; // Is the given parameter block present in this problem or not? @@ -596,11 +462,11 @@ class CERES_EXPORT Problem { // // is the way to do so. // - // Note 2: If no LocalParameterizations or Manifolds are used, then the size - // of the gradient vector (and the number of columns in the jacobian) is the - // sum of the sizes of all the parameter blocks. If a parameter block has a - // LocalParameterization or Manifold, then it contributes "TangentSize" - // entries to the gradient vector (and the number of columns in the jacobian). + // Note 2: If no Manifolds are used, then the size of the gradient vector (and + // the number of columns in the jacobian) is the sum of the sizes of all the + // parameter blocks. If a parameter block has a Manifold, then it contributes + // "TangentSize" entries to the gradient vector (and the number of columns in + // the jacobian). // // Note 3: This function cannot be called while the problem is being solved, // for example it cannot be called from an IterationCallback at the end of an @@ -631,11 +497,10 @@ class CERES_EXPORT Problem { // returns false, the caller should expect the output memory locations to have // been modified. // - // The returned cost and jacobians have had robustification and - // LocalParameterization/Manifold applied already; for example, the jacobian - // for a 4-dimensional quaternion parameter using the - // "QuaternionParameterization" is num_residuals by 3 instead of num_residuals - // by 4. + // The returned cost and jacobians have had robustification and Manifold + // applied already; for example, the jacobian for a 4-dimensional quaternion + // parameter using the "QuaternionParameterization" is num_residuals by 3 + // instead of num_residuals by 4. // // apply_loss_function as the name implies allows the user to switch the // application of the loss function on and off. @@ -672,9 +537,13 @@ class CERES_EXPORT Problem { double* residuals, double** jacobians) const; + // Returns reference to the options with which the Problem was constructed. + const Options& options() const; + + // Returns pointer to Problem implementation + internal::ProblemImpl* mutable_impl(); + private: - friend class Solver; - friend class Covariance; std::unique_ptr impl_; }; diff --git a/extern/ceres/include/ceres/product_manifold.h b/extern/ceres/include/ceres/product_manifold.h index 33f046da24e..ed2d1f43740 100644 --- a/extern/ceres/include/ceres/product_manifold.h +++ b/extern/ceres/include/ceres/product_manifold.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -257,28 +257,21 @@ class ProductManifold final : public Manifold { template static std::array ExclusiveScan(const std::array& values) { std::array result; + // TODO Replace with std::exclusive_scan once all platforms have full C++17 + // STL support. T init = 0; - - // TODO Replace by std::exclusive_scan once C++17 is available for (std::size_t i = 0; i != N; ++i) { result[i] = init; init += values[i]; } - return result; } - // TODO Replace by std::void_t once C++17 is available - template - struct Void { - using type = void; - }; - template struct IsDereferenceable : std::false_type {}; template - struct IsDereferenceable())>::type> + struct IsDereferenceable())>> : std::true_type {}; template ProductManifold(Manifold0&&, Manifold1&&, Manifolds&&...) -> ProductManifold; -#endif } // namespace ceres diff --git a/extern/ceres/include/ceres/rotation.h b/extern/ceres/include/ceres/rotation.h index 51079901aaf..0cccfa7f3bb 100644 --- a/extern/ceres/include/ceres/rotation.h +++ b/extern/ceres/include/ceres/rotation.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,8 +47,9 @@ #include #include -#include +#include "ceres/constants.h" +#include "ceres/internal/euler_angles.h" #include "glog/logging.h" namespace ceres { @@ -60,7 +61,7 @@ namespace ceres { // // the expression M(i, j) is equivalent to // -// arrary[i * row_stride + j * col_stride] +// array[i * row_stride + j * col_stride] // // Conversion functions to and from rotation matrices accept // MatrixAdapters to permit using row-major and column-major layouts, @@ -136,6 +137,71 @@ template void EulerAnglesToRotationMatrix( const T* euler, const MatrixAdapter& R); +// Convert a generic Euler Angle sequence (in radians) to a 3x3 rotation matrix. +// +// Euler Angles define a sequence of 3 rotations about a sequence of axes, +// typically taken to be the X, Y, or Z axes. The last axis may be the same as +// the first axis (e.g. ZYZ) per Euler's original definition of his angles +// (proper Euler angles) or not (e.g. ZYX / yaw-pitch-roll), per common usage in +// the nautical and aerospace fields (Tait-Bryan angles). The three rotations +// may be in a global frame of reference (Extrinsic) or in a body fixed frame of +// reference (Intrinsic) that moves with the rotating object. +// +// Internally, Euler Axis sequences are classified by Ken Shoemake's scheme from +// "Euler angle conversion", Graphics Gems IV, where a choice of axis for the +// first rotation and 3 binary choices: +// 1. Parity of the axis permutation. The axis sequence has Even parity if the +// second axis of rotation is 'greater-than' the first axis of rotation +// according to the order X +inline void EulerAnglesToRotation(const T* euler, T* R); + +template +void EulerAnglesToRotation(const T* euler, + const MatrixAdapter& R); + +// Convert a 3x3 rotation matrix to a generic Euler Angle sequence (in radians) +// +// Euler Angles define a sequence of 3 rotations about a sequence of axes, +// typically taken to be the X, Y, or Z axes. The last axis may be the same as +// the first axis (e.g. ZYZ) per Euler's original definition of his angles +// (proper Euler angles) or not (e.g. ZYX / yaw-pitch-roll), per common usage in +// the nautical and aerospace fields (Tait-Bryan angles). The three rotations +// may be in a global frame of reference (Extrinsic) or in a body fixed frame of +// reference (Intrinsic) that moves with the rotating object. +// +// Internally, Euler Axis sequences are classified by Ken Shoemake's scheme from +// "Euler angle conversion", Graphics Gems IV, where a choice of axis for the +// first rotation and 3 binary choices: +// 1. Oddness of the axis permutation, that defines whether the second axis is +// 'greater-than' the first axis according to the order X>Y>Z>X) +// 2. Proper Euler Angles v.s. Tait-Bryan Angles +// 3. Extrinsic Rotations v.s. Intrinsic Rotations +// compactly represent all 24 possible Euler Angle Conventions +// +// One template parameter: EulerSystem must be explicitly given. This parameter +// is a tag named by 'Extrinsic' or 'Intrinsic' followed by three characters in +// the set '[XYZ]', specifying the axis sequence, e.g. ceres::ExtrinsicYZY +// (robotic arms), ceres::IntrinsicZYX (for aerospace), etc. +// +// The order of elements in the output array 'euler' follows the axis sequence +template +inline void RotationMatrixToEulerAngles(const T* R, T* euler); + +template +void RotationMatrixToEulerAngles( + const MatrixAdapter& R, T* euler); + // Convert a 4-vector to a 3x3 scaled rotation matrix. // // The choice of rotation is such that the quaternion [1 0 0 0] goes to an @@ -247,14 +313,15 @@ MatrixAdapter RowMajorAdapter3x3(T* pointer) { template inline void AngleAxisToQuaternion(const T* angle_axis, T* quaternion) { + using std::fpclassify; + using std::hypot; const T& a0 = angle_axis[0]; const T& a1 = angle_axis[1]; const T& a2 = angle_axis[2]; - const T theta_squared = a0 * a0 + a1 * a1 + a2 * a2; + const T theta = hypot(a0, a1, a2); // For points not at the origin, the full conversion is numerically stable. - if (theta_squared > T(0.0)) { - const T theta = sqrt(theta_squared); + if (fpclassify(theta) != FP_ZERO) { const T half_theta = theta * T(0.5); const T k = sin(half_theta) / theta; quaternion[0] = cos(half_theta); @@ -276,15 +343,16 @@ inline void AngleAxisToQuaternion(const T* angle_axis, T* quaternion) { template inline void QuaternionToAngleAxis(const T* quaternion, T* angle_axis) { + using std::fpclassify; + using std::hypot; const T& q1 = quaternion[1]; const T& q2 = quaternion[2]; const T& q3 = quaternion[3]; - const T sin_squared_theta = q1 * q1 + q2 * q2 + q3 * q3; + const T sin_theta = hypot(q1, q2, q3); // For quaternions representing non-zero rotation, the conversion // is numerically stable. - if (sin_squared_theta > T(0.0)) { - const T sin_theta = sqrt(sin_squared_theta); + if (fpclassify(sin_theta) != FP_ZERO) { const T& cos_theta = quaternion[0]; // If cos_theta is negative, theta is greater than pi/2, which @@ -385,13 +453,14 @@ inline void AngleAxisToRotationMatrix(const T* angle_axis, T* R) { template void AngleAxisToRotationMatrix( const T* angle_axis, const MatrixAdapter& R) { + using std::fpclassify; + using std::hypot; static const T kOne = T(1.0); - const T theta2 = DotProduct(angle_axis, angle_axis); - if (theta2 > T(std::numeric_limits::epsilon())) { + const T theta = hypot(angle_axis[0], angle_axis[1], angle_axis[2]); + if (fpclassify(theta) != FP_ZERO) { // We want to be careful to only evaluate the square root if the // norm of the angle_axis vector is greater than zero. Otherwise // we get a division by zero. - const T theta = sqrt(theta2); const T wx = angle_axis[0] / theta; const T wy = angle_axis[1] / theta; const T wz = angle_axis[2] / theta; @@ -411,7 +480,7 @@ void AngleAxisToRotationMatrix( R(2, 2) = costheta + wz*wz*(kOne - costheta); // clang-format on } else { - // Near zero, we switch to using the first order Taylor expansion. + // At zero, we switch to using the first order Taylor expansion. R(0, 0) = kOne; R(1, 0) = angle_axis[2]; R(2, 0) = -angle_axis[1]; @@ -424,6 +493,141 @@ void AngleAxisToRotationMatrix( } } +template +inline void EulerAnglesToRotation(const T* euler, T* R) { + EulerAnglesToRotation(euler, RowMajorAdapter3x3(R)); +} + +template +void EulerAnglesToRotation(const T* euler, + const MatrixAdapter& R) { + using std::cos; + using std::sin; + + const auto [i, j, k] = EulerSystem::kAxes; + + T ea[3]; + ea[1] = euler[1]; + if constexpr (EulerSystem::kIsIntrinsic) { + ea[0] = euler[2]; + ea[2] = euler[0]; + } else { + ea[0] = euler[0]; + ea[2] = euler[2]; + } + if constexpr (EulerSystem::kIsParityOdd) { + ea[0] = -ea[0]; + ea[1] = -ea[1]; + ea[2] = -ea[2]; + } + + const T ci = cos(ea[0]); + const T cj = cos(ea[1]); + const T ch = cos(ea[2]); + const T si = sin(ea[0]); + const T sj = sin(ea[1]); + const T sh = sin(ea[2]); + const T cc = ci * ch; + const T cs = ci * sh; + const T sc = si * ch; + const T ss = si * sh; + if constexpr (EulerSystem::kIsProperEuler) { + R(i, i) = cj; + R(i, j) = sj * si; + R(i, k) = sj * ci; + R(j, i) = sj * sh; + R(j, j) = -cj * ss + cc; + R(j, k) = -cj * cs - sc; + R(k, i) = -sj * ch; + R(k, j) = cj * sc + cs; + R(k, k) = cj * cc - ss; + } else { + R(i, i) = cj * ch; + R(i, j) = sj * sc - cs; + R(i, k) = sj * cc + ss; + R(j, i) = cj * sh; + R(j, j) = sj * ss + cc; + R(j, k) = sj * cs - sc; + R(k, i) = -sj; + R(k, j) = cj * si; + R(k, k) = cj * ci; + } +} + +template +inline void RotationMatrixToEulerAngles(const T* R, T* euler) { + RotationMatrixToEulerAngles(RowMajorAdapter3x3(R), euler); +} + +template +void RotationMatrixToEulerAngles( + const MatrixAdapter& R, T* euler) { + using std::atan2; + using std::fpclassify; + using std::hypot; + + const auto [i, j, k] = EulerSystem::kAxes; + + T ea[3]; + if constexpr (EulerSystem::kIsProperEuler) { + const T sy = hypot(R(i, j), R(i, k)); + if (fpclassify(sy) != FP_ZERO) { + ea[0] = atan2(R(i, j), R(i, k)); + ea[1] = atan2(sy, R(i, i)); + ea[2] = atan2(R(j, i), -R(k, i)); + } else { + ea[0] = atan2(-R(j, k), R(j, j)); + ea[1] = atan2(sy, R(i, i)); + ea[2] = T(0.0); + } + } else { + const T cy = hypot(R(i, i), R(j, i)); + if (fpclassify(cy) != FP_ZERO) { + ea[0] = atan2(R(k, j), R(k, k)); + ea[1] = atan2(-R(k, i), cy); + ea[2] = atan2(R(j, i), R(i, i)); + } else { + ea[0] = atan2(-R(j, k), R(j, j)); + ea[1] = atan2(-R(k, i), cy); + ea[2] = T(0.0); + } + } + if constexpr (EulerSystem::kIsParityOdd) { + ea[0] = -ea[0]; + ea[1] = -ea[1]; + ea[2] = -ea[2]; + } + euler[1] = ea[1]; + if constexpr (EulerSystem::kIsIntrinsic) { + euler[0] = ea[2]; + euler[2] = ea[0]; + } else { + euler[0] = ea[0]; + euler[2] = ea[2]; + } + + // Proper euler angles are defined for angles in + // [-pi, pi) x [0, pi / 2) x [-pi, pi) + // which is enforced here + if constexpr (EulerSystem::kIsProperEuler) { + const T kPi(constants::pi); + const T kTwoPi(2.0 * kPi); + if (euler[1] < T(0.0) || ea[1] > kPi) { + euler[0] += kPi; + euler[1] = -euler[1]; + euler[2] -= kPi; + } + + for (int i = 0; i < 3; ++i) { + if (euler[i] < -kPi) { + euler[i] += kTwoPi; + } else if (euler[i] > kPi) { + euler[i] -= kTwoPi; + } + } + } +} + template inline void EulerAnglesToRotationMatrix(const T* euler, const int row_stride_parameter, @@ -589,9 +793,12 @@ inline void AngleAxisRotatePoint(const T angle_axis[3], const T pt[3], T result[3]) { DCHECK_NE(pt, result) << "Inplace rotation is not supported."; + using std::fpclassify; + using std::hypot; - const T theta2 = DotProduct(angle_axis, angle_axis); - if (theta2 > T(std::numeric_limits::epsilon())) { + const T theta = hypot(angle_axis[0], angle_axis[1], angle_axis[2]); + + if (fpclassify(theta) != FP_ZERO) { // Away from zero, use the rodriguez formula // // result = pt costheta + @@ -602,7 +809,6 @@ inline void AngleAxisRotatePoint(const T angle_axis[3], // norm of the angle_axis vector is greater than zero. Otherwise // we get a division by zero. // - const T theta = sqrt(theta2); const T costheta = cos(theta); const T sintheta = sin(theta); const T theta_inverse = T(1.0) / theta; @@ -623,7 +829,7 @@ inline void AngleAxisRotatePoint(const T angle_axis[3], result[1] = pt[1] * costheta + w_cross_pt[1] * sintheta + w[1] * tmp; result[2] = pt[2] * costheta + w_cross_pt[2] * sintheta + w[2] * tmp; } else { - // Near zero, the first order Taylor approximation of the rotation + // At zero, the first order Taylor approximation of the rotation // matrix R corresponding to a vector w and angle theta is // // R = I + hat(w) * sin(theta) @@ -635,7 +841,7 @@ inline void AngleAxisRotatePoint(const T angle_axis[3], // and actually performing multiplication with the point pt, gives us // R * pt = pt + angle_axis x pt. // - // Switching to the Taylor expansion near zero provides meaningful + // Switching to the Taylor expansion at zero provides meaningful // derivatives when evaluated using Jets. // // Explicitly inlined evaluation of the cross product for diff --git a/extern/ceres/include/ceres/sized_cost_function.h b/extern/ceres/include/ceres/sized_cost_function.h index d76b5c26b4c..d594cfe7a5a 100644 --- a/extern/ceres/include/ceres/sized_cost_function.h +++ b/extern/ceres/include/ceres/sized_cost_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/solver.h b/extern/ceres/include/ceres/solver.h index 026fc1c0830..68438a10b33 100644 --- a/extern/ceres/include/ceres/solver.h +++ b/extern/ceres/include/ceres/solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -64,8 +64,6 @@ class CERES_EXPORT Solver { // with a message describing the problem. bool IsValid(std::string* error) const; - // Minimizer options ---------------------------------------- - // Ceres supports the two major families of optimization strategies - // Trust Region and Line Search. // @@ -378,88 +376,144 @@ class CERES_EXPORT Solver { DenseLinearAlgebraLibraryType dense_linear_algebra_library_type = EIGEN; // Ceres supports using multiple sparse linear algebra libraries for sparse - // matrix ordering and factorizations. Currently, SUITE_SPARSE and CX_SPARSE - // are the valid choices, depending on whether they are linked into Ceres at - // build time. + // matrix ordering and factorizations. SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type = #if !defined(CERES_NO_SUITESPARSE) SUITE_SPARSE; -#elif defined(CERES_USE_EIGEN_SPARSE) - EIGEN_SPARSE; -#elif !defined(CERES_NO_CXSPARSE) - CX_SPARSE; #elif !defined(CERES_NO_ACCELERATE_SPARSE) ACCELERATE_SPARSE; +#elif defined(CERES_USE_EIGEN_SPARSE) + EIGEN_SPARSE; #else NO_SPARSE; #endif // The order in which variables are eliminated in a linear solver - // can have a significant of impact on the efficiency and accuracy - // of the method. e.g., when doing sparse Cholesky factorization, + // can have a significant impact on the efficiency and accuracy of + // the method. e.g., when doing sparse Cholesky factorization, // there are matrices for which a good ordering will give a // Cholesky factor with O(n) storage, where as a bad ordering will // result in an completely dense factor. // - // Ceres allows the user to provide varying amounts of hints to - // the solver about the variable elimination ordering to use. This - // can range from no hints, where the solver is free to decide the - // best possible ordering based on the user's choices like the - // linear solver being used, to an exact order in which the - // variables should be eliminated, and a variety of possibilities - // in between. + // Sparse direct solvers like SPARSE_NORMAL_CHOLESKY and + // SPARSE_SCHUR use a fill reducing ordering of the columns and + // rows of the matrix being factorized before computing the + // numeric factorization. // - // Instances of the ParameterBlockOrdering class are used to - // communicate this information to Ceres. + // This enum controls the type of algorithm used to compute + // this fill reducing ordering. There is no single algorithm + // that works on all matrices, so determining which algorithm + // works better is a matter of empirical experimentation. // - // Formally an ordering is an ordered partitioning of the - // parameter blocks, i.e, each parameter block belongs to exactly - // one group, and each group has a unique non-negative integer - // associated with it, that determines its order in the set of - // groups. + // The exact behaviour of this setting is affected by the value of + // linear_solver_ordering as described below. + LinearSolverOrderingType linear_solver_ordering_type = AMD; + + // Besides specifying the fill reducing ordering via + // linear_solver_ordering_type, Ceres allows the user to provide varying + // amounts of hints to the linear solver about the variable elimination + // ordering to use. This can range from no hints, where the solver is free + // to decide the best possible ordering based on the user's choices like the + // linear solver being used, to an exact order in which the variables should + // be eliminated, and a variety of possibilities in between. // - // Given such an ordering, Ceres ensures that the parameter blocks in - // the lowest numbered group are eliminated first, and then the - // parameter blocks in the next lowest numbered group and so on. Within - // each group, Ceres is free to order the parameter blocks as it - // chooses. + // Instances of the ParameterBlockOrdering class are used to communicate + // this information to Ceres. // - // If nullptr, then all parameter blocks are assumed to be in the - // same group and the solver is free to decide the best - // ordering. + // Formally an ordering is an ordered partitioning of the parameter blocks, + // i.e, each parameter block belongs to exactly one group, and each group + // has a unique non-negative integer associated with it, that determines its + // order in the set of groups. // // e.g. Consider the linear system // // x + y = 3 // 2x + 3y = 7 // - // There are two ways in which it can be solved. First eliminating x - // from the two equations, solving for y and then back substituting - // for x, or first eliminating y, solving for x and back substituting - // for y. The user can construct three orderings here. + // There are two ways in which it can be solved. First eliminating x from + // the two equations, solving for y and then back substituting for x, or + // first eliminating y, solving for x and back substituting for y. The user + // can construct three orderings here. // // {0: x}, {1: y} - eliminate x first. // {0: y}, {1: x} - eliminate y first. // {0: x, y} - Solver gets to decide the elimination order. // - // Thus, to have Ceres determine the ordering automatically using - // heuristics, put all the variables in group 0 and to control the - // ordering for every variable, create groups 0..N-1, one per - // variable, in the desired order. + // Thus, to have Ceres determine the ordering automatically, put all the + // variables in group 0 and to control the ordering for every variable + // create groups 0 ... N-1, one per variable, in the desired + // order. + // + // linear_solver_ordering == nullptr and an ordering where all the parameter + // blocks are in one elimination group mean the same thing - the solver is + // free to choose what it thinks is the best elimination ordering. Therefore + // in the following we will only consider the case where + // linear_solver_ordering is nullptr. + // + // The exact interpretation of this information depends on the values of + // linear_solver_ordering_type and linear_solver_type/preconditioner_type + // and sparse_linear_algebra_type. // // Bundle Adjustment - // ----------------- + // ================= // - // A particular case of interest is bundle adjustment, where the user - // has two options. The default is to not specify an ordering at all, - // the solver will see that the user wants to use a Schur type solver - // and figure out the right elimination ordering. + // If the user is using one of the Schur solvers (DENSE_SCHUR, + // SPARSE_SCHUR, ITERATIVE_SCHUR) and chooses to specify an + // ordering, it must have one important property. The lowest + // numbered elimination group must form an independent set in the + // graph corresponding to the Hessian, or in other words, no two + // parameter blocks in in the first elimination group should + // co-occur in the same residual block. For the best performance, + // this elimination group should be as large as possible. For + // standard bundle adjustment problems, this corresponds to the + // first elimination group containing all the 3d points, and the + // second containing the all the cameras parameter blocks. // - // But if the user already knows what parameter blocks are points and - // what are cameras, they can save preprocessing time by partitioning - // the parameter blocks into two groups, one for the points and one - // for the cameras, where the group containing the points has an id - // smaller than the group containing cameras. + // If the user leaves the choice to Ceres, then the solver uses an + // approximate maximum independent set algorithm to identify the first + // elimination group. + // + // sparse_linear_algebra_library_type = SUITE_SPARSE + // ================================================= + // + // linear_solver_ordering_type = AMD + // --------------------------------- + // + // A Constrained Approximate Minimum Degree (CAMD) ordering used where the + // parameter blocks in the lowest numbered group are eliminated first, and + // then the parameter blocks in the next lowest numbered group and so + // on. Within each group, CAMD free to order the parameter blocks as it + // chooses. + // + // linear_solver_ordering_type = NESDIS + // ------------------------------------- + // + // a. linear_solver_type = SPARSE_NORMAL_CHOLESKY or + // linear_solver_type = CGNR and preconditioner_type = SUBSET + // + // The value of linear_solver_ordering is ignored and a Nested Dissection + // algorithm is used to compute a fill reducing ordering. + // + // b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR + // + // ONLY the lowest group are used to compute the Schur complement, and + // Nested Dissection is used to compute a fill reducing ordering for the + // Schur Complement (or its preconditioner). + // + // sparse_linear_algebra_library_type = EIGEN_SPARSE or ACCELERATE_SPARSE + // ====================================================================== + // + // a. linear_solver_type = SPARSE_NORMAL_CHOLESKY or + // linear_solver_type = CGNR and preconditioner_type = SUBSET + // + // then the value of linear_solver_ordering is ignored and AMD or NESDIS is + // used to compute a fill reducing ordering as requested by the user. + // + // b. linear_solver_type = SPARSE_SCHUR/DENSE_SCHUR/ITERATIVE_SCHUR + // + // ONLY the lowest group are used to compute the Schur complement, and AMD + // or NESDIS is used to compute a fill reducing ordering for the Schur + // Complement (or its preconditioner). std::shared_ptr linear_solver_ordering; // Use an explicitly computed Schur complement matrix with @@ -500,12 +554,6 @@ class CERES_EXPORT Solver { // Jacobian matrix and generally speaking, there is no performance // penalty for doing so. - // In some rare cases, it is worth using a more complicated - // reordering algorithm which has slightly better runtime - // performance at the expense of an extra copy of the Jacobian - // matrix. Setting use_postordering to true enables this tradeoff. - bool use_postordering = false; - // Some non-linear least squares problems are symbolically dense but // numerically sparse. i.e. at any given state only a small number // of jacobian entries are non-zero, but the position and number of @@ -521,11 +569,6 @@ class CERES_EXPORT Solver { // This settings only affects the SPARSE_NORMAL_CHOLESKY solver. bool dynamic_sparsity = false; - // TODO(sameeragarwal): Further expand the documentation for the - // following two options. - - // NOTE1: EXPERIMENTAL FEATURE, UNDER DEVELOPMENT, USE AT YOUR OWN RISK. - // // If use_mixed_precision_solves is true, the Gauss-Newton matrix // is computed in double precision, but its factorization is // computed in single precision. This can result in significant @@ -536,16 +579,57 @@ class CERES_EXPORT Solver { // If use_mixed_precision_solves is true, we recommend setting // max_num_refinement_iterations to 2-3. // - // NOTE2: The following two options are currently only applicable - // if sparse_linear_algebra_library_type is EIGEN_SPARSE or - // ACCELERATE_SPARSE, and linear_solver_type is SPARSE_NORMAL_CHOLESKY - // or SPARSE_SCHUR. + // This options is available when linear solver uses sparse or dense + // cholesky factorization, except when sparse_linear_algebra_library_type = + // SUITE_SPARSE. bool use_mixed_precision_solves = false; // Number steps of the iterative refinement process to run when // computing the Gauss-Newton step. int max_num_refinement_iterations = 0; + // Minimum number of iterations for which the linear solver should + // run, even if the convergence criterion is satisfied. + int min_linear_solver_iterations = 0; + + // Maximum number of iterations for which the linear solver should + // run. If the solver does not converge in less than + // max_linear_solver_iterations, then it returns MAX_ITERATIONS, + // as its termination type. + int max_linear_solver_iterations = 500; + + // Maximum number of iterations performed by SCHUR_POWER_SERIES_EXPANSION. + // Each iteration corresponds to one more term in the power series expansion + // od the inverse of the Schur complement. This value controls the maximum + // number of iterations whether it is used as a preconditioner or just to + // initialize the solution for ITERATIVE_SCHUR. + int max_num_spse_iterations = 5; + + // Use SCHUR_POWER_SERIES_EXPANSION to initialize the solution for + // ITERATIVE_SCHUR. This option can be set true regardless of what + // preconditioner is being used. + bool use_spse_initialization = false; + + // When use_spse_initialization is true, this parameter along with + // max_num_spse_iterations controls the number of + // SCHUR_POWER_SERIES_EXPANSION iterations performed for initialization. It + // is not used to control the preconditioner. + double spse_tolerance = 0.1; + + // Forcing sequence parameter. The truncated Newton solver uses + // this number to control the relative accuracy with which the + // Newton step is computed. + // + // This constant is passed to ConjugateGradientsSolver which uses + // it to terminate the iterations when + // + // (Q_i - Q_{i-1})/Q_i < eta/i + double eta = 1e-1; + + // Normalize the jacobian using Jacobi scaling before calling + // the linear least squares solver. + bool jacobi_scaling = true; + // Some non-linear least squares problems have additional // structure in the way the parameter blocks interact that it is // beneficial to modify the way the trust region step is computed. @@ -629,32 +713,6 @@ class CERES_EXPORT Solver { // iterations is disabled. double inner_iteration_tolerance = 1e-3; - // Minimum number of iterations for which the linear solver should - // run, even if the convergence criterion is satisfied. - int min_linear_solver_iterations = 0; - - // Maximum number of iterations for which the linear solver should - // run. If the solver does not converge in less than - // max_linear_solver_iterations, then it returns MAX_ITERATIONS, - // as its termination type. - int max_linear_solver_iterations = 500; - - // Forcing sequence parameter. The truncated Newton solver uses - // this number to control the relative accuracy with which the - // Newton step is computed. - // - // This constant is passed to ConjugateGradientsSolver which uses - // it to terminate the iterations when - // - // (Q_i - Q_{i-1})/Q_i < eta/i - double eta = 1e-1; - - // Normalize the jacobian using Jacobi scaling before calling - // the linear least squares solver. - bool jacobi_scaling = true; - - // Logging options --------------------------------------------------------- - LoggingType logging_type = PER_MINIMIZER_ITERATION; // By default the Minimizer progress is logged to VLOG(1), which @@ -791,10 +849,9 @@ class CERES_EXPORT Solver { // IterationSummary for each minimizer iteration in order. std::vector iterations; - // Number of minimizer iterations in which the step was - // accepted. Unless use_non_monotonic_steps is true this is also - // the number of steps in which the objective function value/cost - // went down. + // Number of minimizer iterations in which the step was accepted. Unless + // use_nonmonotonic_steps is true this is also the number of steps in which + // the objective function value/cost went down. int num_successful_steps = -1; // Number of minimizer iterations in which the step was rejected @@ -884,7 +941,7 @@ class CERES_EXPORT Solver { // Dimension of the tangent space of the problem (or the number of // columns in the Jacobian for the problem). This is different // from num_parameters if a parameter block is associated with a - // LocalParameterization/Manifold. + // Manifold. int num_effective_parameters = -1; // Number of residual blocks in the problem. @@ -905,7 +962,7 @@ class CERES_EXPORT Solver { // number of columns in the Jacobian for the reduced // problem). This is different from num_parameters_reduced if a // parameter block in the reduced problem is associated with a - // LocalParameterization/Manifold. + // Manifold. int num_effective_parameters_reduced = -1; // Number of residual blocks in the reduced problem. @@ -922,8 +979,7 @@ class CERES_EXPORT Solver { int num_threads_given = -1; // Number of threads actually used by the solver for Jacobian and - // residual evaluation. This number is not equal to - // num_threads_given if OpenMP is not available. + // residual evaluation. int num_threads_used = -1; // Type of the linear solver requested by the user. @@ -946,6 +1002,10 @@ class CERES_EXPORT Solver { SPARSE_NORMAL_CHOLESKY; #endif + bool mixed_precision_solves_used = false; + + LinearSolverOrderingType linear_solver_ordering_type = AMD; + // Size of the elimination groups given by the user as hints to // the linear solver. std::vector linear_solver_ordering_given; @@ -1005,7 +1065,7 @@ class CERES_EXPORT Solver { PreconditionerType preconditioner_type_used = IDENTITY; // Type of clustering algorithm used for visibility based - // preconditioning. Only meaningful when the preconditioner_type + // preconditioning. Only meaningful when the preconditioner_type_used // is CLUSTER_JACOBI or CLUSTER_TRIDIAGONAL. VisibilityClusteringType visibility_clustering_type = CANONICAL_VIEWS; diff --git a/extern/ceres/include/ceres/sphere_manifold.h b/extern/ceres/include/ceres/sphere_manifold.h index 5d71cbbca9a..1c7458b0330 100644 --- a/extern/ceres/include/ceres/sphere_manifold.h +++ b/extern/ceres/include/ceres/sphere_manifold.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -114,12 +114,17 @@ class SphereManifold final : public Manifold { static constexpr int TangentSpaceDimension = AmbientSpaceDimension > 0 ? AmbientSpaceDimension - 1 : Eigen::Dynamic; + // NOTE: Eigen does not allow to have a RowMajor column vector. + // In that case, change the storage order + static constexpr int SafeRowMajor = + TangentSpaceDimension == 1 ? Eigen::ColMajor : Eigen::RowMajor; + using AmbientVector = Eigen::Matrix; using TangentVector = Eigen::Matrix; using MatrixPlusJacobian = Eigen::Matrix; + SafeRowMajor>; using MatrixMinusJacobian = Eigen::Matrix residuals_, f_x_new_; Eigen::Matrix jacobian_; Eigen::Matrix jtj_, jtj_regularized_; @@ -385,7 +384,6 @@ class TinySolver { x_new_.resize(num_parameters); g_.resize(num_parameters); jacobi_scaling_.resize(num_parameters); - lm_diagonal_.resize(num_parameters); lm_step_.resize(num_parameters); residuals_.resize(num_residuals); f_x_new_.resize(num_residuals); diff --git a/extern/ceres/include/ceres/tiny_solver_autodiff_function.h b/extern/ceres/include/ceres/tiny_solver_autodiff_function.h index 3e3675ff070..1b9bd9633ef 100644 --- a/extern/ceres/include/ceres/tiny_solver_autodiff_function.h +++ b/extern/ceres/include/ceres/tiny_solver_autodiff_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -171,7 +171,7 @@ class TinySolverAutoDiffFunction { const CostFunctor& cost_functor_; // The number of residuals at runtime. - // This will be overriden if NUM_RESIDUALS == Eigen::Dynamic. + // This will be overridden if NUM_RESIDUALS == Eigen::Dynamic. int num_residuals_ = kNumResiduals; // To evaluate the cost function with jets, temporary storage is needed. These diff --git a/extern/ceres/include/ceres/tiny_solver_cost_function_adapter.h b/extern/ceres/include/ceres/tiny_solver_cost_function_adapter.h index cc5ca16af5d..166f03f1de3 100644 --- a/extern/ceres/include/ceres/tiny_solver_cost_function_adapter.h +++ b/extern/ceres/include/ceres/tiny_solver_cost_function_adapter.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/include/ceres/types.h b/extern/ceres/include/ceres/types.h index e5224238129..6e19c51dd63 100644 --- a/extern/ceres/include/ceres/types.h +++ b/extern/ceres/include/ceres/types.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -67,8 +67,7 @@ enum LinearSolverType { // Eigen. DENSE_QR, - // Solve the normal equations using a sparse cholesky solver; requires - // SuiteSparse or CXSparse. + // Solve the normal equations using a sparse cholesky solver; SPARSE_NORMAL_CHOLESKY, // Specialized solvers, specific to problems with a generalized @@ -98,7 +97,7 @@ enum PreconditionerType { // Block diagonal of the Gauss-Newton Hessian. JACOBI, - // Note: The following three preconditioners can only be used with + // Note: The following four preconditioners can only be used with // the ITERATIVE_SCHUR solver. They are well suited for Structure // from Motion problems. @@ -106,6 +105,10 @@ enum PreconditionerType { // only be used with the ITERATIVE_SCHUR solver. SCHUR_JACOBI, + // Use power series expansion to approximate the inversion of Schur complement + // as a preconditioner. + SCHUR_POWER_SERIES_EXPANSION, + // Visibility clustering based preconditioners. // // The following two preconditioners use the visibility structure of @@ -134,7 +137,7 @@ enum PreconditionerType { // well the matrix Q approximates J'J, or how well the chosen // residual blocks approximate the non-linear least squares // problem. - SUBSET, + SUBSET }; enum VisibilityClusteringType { @@ -165,11 +168,6 @@ enum SparseLinearAlgebraLibraryType { // minimum degree ordering. SUITE_SPARSE, - // A lightweight replacement for SuiteSparse, which does not require - // a LAPACK/BLAS implementation. Consequently, its performance is - // also a bit lower than SuiteSparse. - CX_SPARSE, - // Eigen's sparse linear algebra routines. In particular Ceres uses // the Simplicial LDLT routines. EIGEN_SPARSE, @@ -177,12 +175,39 @@ enum SparseLinearAlgebraLibraryType { // Apple's Accelerate framework sparse linear algebra routines. ACCELERATE_SPARSE, + // Nvidia's cuSPARSE library. + CUDA_SPARSE, + // No sparse linear solver should be used. This does not necessarily // imply that Ceres was built without any sparse library, although that // is the likely use case, merely that one should not be used. NO_SPARSE }; +// The order in which variables are eliminated in a linear solver +// can have a significant of impact on the efficiency and accuracy +// of the method. e.g., when doing sparse Cholesky factorization, +// there are matrices for which a good ordering will give a +// Cholesky factor with O(n) storage, where as a bad ordering will +// result in an completely dense factor. +// +// So sparse direct solvers like SPARSE_NORMAL_CHOLESKY and +// SPARSE_SCHUR and preconditioners like SUBSET, CLUSTER_JACOBI & +// CLUSTER_TRIDIAGONAL use a fill reducing ordering of the columns and +// rows of the matrix being factorized before actually the numeric +// factorization. +// +// This enum controls the class of algorithm used to compute this +// fill reducing ordering. There is no single algorithm that works +// on all matrices, so determining which algorithm works better is a +// matter of empirical experimentation. +enum LinearSolverOrderingType { + // Approximate Minimum Degree. + AMD, + // Nested Dissection. + NESDIS +}; + enum DenseLinearAlgebraLibraryType { EIGEN, LAPACK, @@ -467,6 +492,11 @@ CERES_EXPORT const char* SparseLinearAlgebraLibraryTypeToString( CERES_EXPORT bool StringToSparseLinearAlgebraLibraryType( std::string value, SparseLinearAlgebraLibraryType* type); +CERES_EXPORT const char* LinearSolverOrderingTypeToString( + LinearSolverOrderingType type); +CERES_EXPORT bool StringToLinearSolverOrderingType( + std::string value, LinearSolverOrderingType* type); + CERES_EXPORT const char* DenseLinearAlgebraLibraryTypeToString( DenseLinearAlgebraLibraryType type); CERES_EXPORT bool StringToDenseLinearAlgebraLibraryType( diff --git a/extern/ceres/include/ceres/version.h b/extern/ceres/include/ceres/version.h index e0d61972896..2d5d0003720 100644 --- a/extern/ceres/include/ceres/version.h +++ b/extern/ceres/include/ceres/version.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2021 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ #define CERES_PUBLIC_VERSION_H_ #define CERES_VERSION_MAJOR 2 -#define CERES_VERSION_MINOR 1 +#define CERES_VERSION_MINOR 2 #define CERES_VERSION_REVISION 0 // Classic CPP stringifcation; the extra level of indirection allows the diff --git a/extern/ceres/internal/ceres/accelerate_sparse.cc b/extern/ceres/internal/ceres/accelerate_sparse.cc index 74adfaf9afc..0baadc08c39 100644 --- a/extern/ceres/internal/ceres/accelerate_sparse.cc +++ b/extern/ceres/internal/ceres/accelerate_sparse.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -61,7 +61,7 @@ const char* SparseStatusToString(SparseStatus_t status) { CASESTR(SparseParameterError); CASESTR(SparseStatusReleased); default: - return "UKNOWN"; + return "UNKNOWN"; } } } // namespace. @@ -114,12 +114,12 @@ AccelerateSparse::CreateSparseMatrixTransposeView( // Accelerate's columnStarts is a long*, not an int*. These types might be // different (e.g. ARM on iOS) so always make a copy. column_starts_.resize(A->num_rows() + 1); // +1 for final column length. - std::copy_n(A->rows(), column_starts_.size(), &column_starts_[0]); + std::copy_n(A->rows(), column_starts_.size(), column_starts_.data()); ASSparseMatrix At; At.structure.rowCount = A->num_cols(); At.structure.columnCount = A->num_rows(); - At.structure.columnStarts = &column_starts_[0]; + At.structure.columnStarts = column_starts_.data(); At.structure.rowIndices = A->mutable_cols(); At.structure.attributes.transpose = false; At.structure.attributes.triangle = SparseUpperTriangle; @@ -127,8 +127,8 @@ AccelerateSparse::CreateSparseMatrixTransposeView( At.structure.attributes._reserved = 0; At.structure.attributes._allocatedBySparse = 0; At.structure.blockSize = 1; - if (std::is_same::value) { - At.data = reinterpret_cast(A->mutable_values()); + if constexpr (std::is_same_v) { + At.data = A->mutable_values(); } else { values_ = ConstVectorRef(A->values(), A->num_nonzeros()).template cast(); @@ -139,8 +139,23 @@ AccelerateSparse::CreateSparseMatrixTransposeView( template typename AccelerateSparse::SymbolicFactorization -AccelerateSparse::AnalyzeCholesky(ASSparseMatrix* A) { - return SparseFactor(SparseFactorizationCholesky, A->structure); +AccelerateSparse::AnalyzeCholesky(OrderingType ordering_type, + ASSparseMatrix* A) { + SparseSymbolicFactorOptions sfoption; + sfoption.control = SparseDefaultControl; + sfoption.orderMethod = SparseOrderDefault; + sfoption.order = nullptr; + sfoption.ignoreRowsAndColumns = nullptr; + sfoption.malloc = malloc; + sfoption.free = free; + sfoption.reportError = nullptr; + + if (ordering_type == OrderingType::AMD) { + sfoption.orderMethod = SparseOrderAMD; + } else if (ordering_type == OrderingType::NESDIS) { + sfoption.orderMethod = SparseOrderMetis; + } + return SparseFactor(SparseFactorizationCholesky, A->structure, sfoption); } template @@ -190,7 +205,7 @@ AppleAccelerateCholesky::~AppleAccelerateCholesky() { template CompressedRowSparseMatrix::StorageType AppleAccelerateCholesky::StorageType() const { - return CompressedRowSparseMatrix::LOWER_TRIANGULAR; + return CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR; } template @@ -199,7 +214,7 @@ LinearSolverTerminationType AppleAccelerateCholesky::Factorize( CHECK_EQ(lhs->storage_type(), StorageType()); if (lhs == nullptr) { *message = "Failure: Input lhs is nullptr."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } typename SparseTypesTrait::SparseMatrix as_lhs = as_.CreateSparseMatrixTransposeView(lhs); @@ -207,13 +222,14 @@ LinearSolverTerminationType AppleAccelerateCholesky::Factorize( if (!symbolic_factor_) { symbolic_factor_ = std::make_unique< typename SparseTypesTrait::SymbolicFactorization>( - as_.AnalyzeCholesky(&as_lhs)); + as_.AnalyzeCholesky(ordering_type_, &as_lhs)); + if (symbolic_factor_->status != SparseStatusOK) { *message = StringPrintf( "Apple Accelerate Failure : Symbolic factorisation failed: %s", SparseStatusToString(symbolic_factor_->status)); FreeSymbolicFactorization(); - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } } @@ -230,10 +246,10 @@ LinearSolverTerminationType AppleAccelerateCholesky::Factorize( "Apple Accelerate Failure : Numeric factorisation failed: %s", SparseStatusToString(numeric_factor_->status)); FreeNumericFactorization(); - return LINEAR_SOLVER_FAILURE; + return LinearSolverTerminationType::FAILURE; } - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } template @@ -246,8 +262,8 @@ LinearSolverTerminationType AppleAccelerateCholesky::Solve( typename SparseTypesTrait::DenseVector as_rhs_and_solution; as_rhs_and_solution.count = num_cols; - if (std::is_same::value) { - as_rhs_and_solution.data = reinterpret_cast(solution); + if constexpr (std::is_same_v) { + as_rhs_and_solution.data = solution; std::copy_n(rhs, num_cols, solution); } else { scalar_rhs_and_solution_ = @@ -259,7 +275,7 @@ LinearSolverTerminationType AppleAccelerateCholesky::Solve( VectorRef(solution, num_cols) = scalar_rhs_and_solution_.template cast(); } - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } template diff --git a/extern/ceres/internal/ceres/accelerate_sparse.h b/extern/ceres/internal/ceres/accelerate_sparse.h index 29d78e8c261..ef819b8aa21 100644 --- a/extern/ceres/internal/ceres/accelerate_sparse.h +++ b/extern/ceres/internal/ceres/accelerate_sparse.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -55,18 +55,18 @@ struct SparseTypesTrait {}; template <> struct SparseTypesTrait { - typedef DenseVector_Double DenseVector; - typedef SparseMatrix_Double SparseMatrix; - typedef SparseOpaqueSymbolicFactorization SymbolicFactorization; - typedef SparseOpaqueFactorization_Double NumericFactorization; + using DenseVector = DenseVector_Double; + using SparseMatrix = SparseMatrix_Double; + using SymbolicFactorization = SparseOpaqueSymbolicFactorization; + using NumericFactorization = SparseOpaqueFactorization_Double; }; template <> struct SparseTypesTrait { - typedef DenseVector_Float DenseVector; - typedef SparseMatrix_Float SparseMatrix; - typedef SparseOpaqueSymbolicFactorization SymbolicFactorization; - typedef SparseOpaqueFactorization_Float NumericFactorization; + using DenseVector = DenseVector_Float; + using SparseMatrix = SparseMatrix_Float; + using SymbolicFactorization = SparseOpaqueSymbolicFactorization; + using NumericFactorization = SparseOpaqueFactorization_Float; }; template @@ -91,7 +91,8 @@ class AccelerateSparse { // objects internally). ASSparseMatrix CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A); // Computes a symbolic factorisation of A that can be used in Solve(). - SymbolicFactorization AnalyzeCholesky(ASSparseMatrix* A); + SymbolicFactorization AnalyzeCholesky(OrderingType ordering_type, + ASSparseMatrix* A); // Compute the numeric Cholesky factorization of A, given its // symbolic factorization. NumericFactorization Cholesky(ASSparseMatrix* A, diff --git a/extern/ceres/internal/ceres/array_utils.cc b/extern/ceres/internal/ceres/array_utils.cc index 113d41c927e..a962f7f8740 100644 --- a/extern/ceres/internal/ceres/array_utils.cc +++ b/extern/ceres/internal/ceres/array_utils.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,14 +38,12 @@ #include "ceres/stringprintf.h" #include "ceres/types.h" -namespace ceres { -namespace internal { -using std::string; +namespace ceres::internal { -bool IsArrayValid(const int size, const double* x) { +bool IsArrayValid(const int64_t size, const double* x) { if (x != nullptr) { - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { if (!std::isfinite(x[i]) || (x[i] == kImpossibleValue)) { return false; } @@ -54,12 +52,12 @@ bool IsArrayValid(const int size, const double* x) { return true; } -int FindInvalidValue(const int size, const double* x) { +int64_t FindInvalidValue(const int64_t size, const double* x) { if (x == nullptr) { return size; } - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { if (!std::isfinite(x[i]) || (x[i] == kImpossibleValue)) { return i; } @@ -68,16 +66,18 @@ int FindInvalidValue(const int size, const double* x) { return size; } -void InvalidateArray(const int size, double* x) { +void InvalidateArray(const int64_t size, double* x) { if (x != nullptr) { - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { x[i] = kImpossibleValue; } } } -void AppendArrayToString(const int size, const double* x, string* result) { - for (int i = 0; i < size; ++i) { +void AppendArrayToString(const int64_t size, + const double* x, + std::string* result) { + for (int64_t i = 0; i < size; ++i) { if (x == nullptr) { StringAppendF(result, "Not Computed "); } else { @@ -90,18 +90,17 @@ void AppendArrayToString(const int size, const double* x, string* result) { } } -void MapValuesToContiguousRange(const int size, int* array) { +void MapValuesToContiguousRange(const int64_t size, int* array) { std::vector unique_values(array, array + size); std::sort(unique_values.begin(), unique_values.end()); unique_values.erase(std::unique(unique_values.begin(), unique_values.end()), unique_values.end()); - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { array[i] = std::lower_bound(unique_values.begin(), unique_values.end(), array[i]) - unique_values.begin(); } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/array_utils.h b/extern/ceres/internal/ceres/array_utils.h index d2fc7914e1b..bd51aa5d007 100644 --- a/extern/ceres/internal/ceres/array_utils.h +++ b/extern/ceres/internal/ceres/array_utils.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,30 +43,30 @@ #ifndef CERES_INTERNAL_ARRAY_UTILS_H_ #define CERES_INTERNAL_ARRAY_UTILS_H_ +#include #include #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Fill the array x with an impossible value that the user code is // never expected to compute. -CERES_NO_EXPORT void InvalidateArray(int size, double* x); +CERES_NO_EXPORT void InvalidateArray(const int64_t size, double* x); // Check if all the entries of the array x are valid, i.e. all the // values in the array should be finite and none of them should be // equal to the "impossible" value used by InvalidateArray. -CERES_NO_EXPORT bool IsArrayValid(int size, const double* x); +CERES_NO_EXPORT bool IsArrayValid(const int64_t size, const double* x); // If the array contains an invalid value, return the index for it, // otherwise return size. -CERES_NO_EXPORT int FindInvalidValue(const int size, const double* x); +CERES_NO_EXPORT int64_t FindInvalidValue(const int64_t size, const double* x); // Utility routine to print an array of doubles to a string. If the // array pointer is nullptr, it is treated as an array of zeros. -CERES_NO_EXPORT void AppendArrayToString(const int size, +CERES_NO_EXPORT void AppendArrayToString(const int64_t size, const double* x, std::string* result); @@ -83,10 +83,9 @@ CERES_NO_EXPORT void AppendArrayToString(const int size, // gets mapped to // // [1 0 2 3 0 1 3] -CERES_NO_EXPORT void MapValuesToContiguousRange(int size, int* array); +CERES_NO_EXPORT void MapValuesToContiguousRange(const int64_t size, int* array); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/block_evaluate_preparer.cc b/extern/ceres/internal/ceres/block_evaluate_preparer.cc index 56c97b60cc4..c8b81777b97 100644 --- a/extern/ceres/internal/ceres/block_evaluate_preparer.cc +++ b/extern/ceres/internal/ceres/block_evaluate_preparer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "ceres/residual_block.h" #include "ceres/sparse_matrix.h" -namespace ceres { -namespace internal { +namespace ceres::internal { void BlockEvaluatePreparer::Init(int const* const* jacobian_layout, int max_derivatives_per_residual_block) { @@ -78,5 +77,4 @@ void BlockEvaluatePreparer::Prepare(const ResidualBlock* residual_block, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_evaluate_preparer.h b/extern/ceres/internal/ceres/block_evaluate_preparer.h index d72e41ba3e4..8febfacfc86 100644 --- a/extern/ceres/internal/ceres/block_evaluate_preparer.h +++ b/extern/ceres/internal/ceres/block_evaluate_preparer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,7 @@ #include "ceres/internal/export.h" #include "ceres/scratch_evaluate_preparer.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class ResidualBlock; class SparseMatrix; @@ -72,7 +71,6 @@ class CERES_NO_EXPORT BlockEvaluatePreparer { ScratchEvaluatePreparer scratch_evaluate_preparer_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_BLOCK_EVALUATE_PREPARER_H_ diff --git a/extern/ceres/internal/ceres/block_jacobi_preconditioner.cc b/extern/ceres/internal/ceres/block_jacobi_preconditioner.cc index 6e979dea93b..8f8893f5cd7 100644 --- a/extern/ceres/internal/ceres/block_jacobi_preconditioner.cc +++ b/extern/ceres/internal/ceres/block_jacobi_preconditioner.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,71 +30,197 @@ #include "ceres/block_jacobi_preconditioner.h" +#include +#include +#include +#include + +#include "Eigen/Dense" #include "ceres/block_random_access_diagonal_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/casts.h" #include "ceres/internal/eigen.h" +#include "ceres/parallel_for.h" +#include "ceres/small_blas.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -BlockJacobiPreconditioner::BlockJacobiPreconditioner( - const BlockSparseMatrix& A) { - const CompressedRowBlockStructure* bs = A.block_structure(); - std::vector blocks(bs->cols.size()); - for (int i = 0; i < blocks.size(); ++i) { - blocks[i] = bs->cols[i].size; - } - - m_ = std::make_unique(blocks); +BlockSparseJacobiPreconditioner::BlockSparseJacobiPreconditioner( + Preconditioner::Options options, const BlockSparseMatrix& A) + : options_(std::move(options)) { + m_ = std::make_unique( + A.block_structure()->cols, options_.context, options_.num_threads); } -BlockJacobiPreconditioner::~BlockJacobiPreconditioner() = default; +BlockSparseJacobiPreconditioner::~BlockSparseJacobiPreconditioner() = default; -bool BlockJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A, - const double* D) { +bool BlockSparseJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A, + const double* D) { const CompressedRowBlockStructure* bs = A.block_structure(); const double* values = A.values(); m_->SetZero(); - for (int i = 0; i < bs->rows.size(); ++i) { - const int row_block_size = bs->rows[i].block.size; - const std::vector& cells = bs->rows[i].cells; - for (const auto& cell : cells) { - const int block_id = cell.block_id; - const int col_block_size = bs->cols[block_id].size; - int r, c, row_stride, col_stride; - CellInfo* cell_info = - m_->GetCell(block_id, block_id, &r, &c, &row_stride, &col_stride); - MatrixRef m(cell_info->values, row_stride, col_stride); - ConstMatrixRef b(values + cell.position, row_block_size, col_block_size); - m.block(r, c, col_block_size, col_block_size) += b.transpose() * b; - } - } + ParallelFor(options_.context, + 0, + bs->rows.size(), + options_.num_threads, + [this, bs, values](int i) { + const int row_block_size = bs->rows[i].block.size; + const std::vector& cells = bs->rows[i].cells; + for (const auto& cell : cells) { + const int block_id = cell.block_id; + const int col_block_size = bs->cols[block_id].size; + int r, c, row_stride, col_stride; + CellInfo* cell_info = m_->GetCell( + block_id, block_id, &r, &c, &row_stride, &col_stride); + MatrixRef m(cell_info->values, row_stride, col_stride); + ConstMatrixRef b( + values + cell.position, row_block_size, col_block_size); + auto lock = + MakeConditionalLock(options_.num_threads, cell_info->m); + // clang-format off + MatrixTransposeMatrixMultiply( + values + cell.position, row_block_size,col_block_size, + values + cell.position, row_block_size,col_block_size, + cell_info->values,r, c,row_stride,col_stride); + // clang-format on + } + }); if (D != nullptr) { // Add the diagonal. - int position = 0; - for (int i = 0; i < bs->cols.size(); ++i) { - const int block_size = bs->cols[i].size; - int r, c, row_stride, col_stride; - CellInfo* cell_info = m_->GetCell(i, i, &r, &c, &row_stride, &col_stride); - MatrixRef m(cell_info->values, row_stride, col_stride); - m.block(r, c, block_size, block_size).diagonal() += - ConstVectorRef(D + position, block_size).array().square().matrix(); - position += block_size; - } + ParallelFor(options_.context, + 0, + bs->cols.size(), + options_.num_threads, + [this, bs, D](int i) { + const int block_size = bs->cols[i].size; + int r, c, row_stride, col_stride; + CellInfo* cell_info = + m_->GetCell(i, i, &r, &c, &row_stride, &col_stride); + MatrixRef m(cell_info->values, row_stride, col_stride); + m.block(r, c, block_size, block_size).diagonal() += + ConstVectorRef(D + bs->cols[i].position, block_size) + .array() + .square() + .matrix(); + }); } m_->Invert(); return true; } -void BlockJacobiPreconditioner::RightMultiply(const double* x, - double* y) const { - m_->RightMultiply(x, y); +BlockCRSJacobiPreconditioner::BlockCRSJacobiPreconditioner( + Preconditioner::Options options, const CompressedRowSparseMatrix& A) + : options_(std::move(options)), locks_(A.col_blocks().size()) { + auto& col_blocks = A.col_blocks(); + + // Compute the number of non-zeros in the preconditioner. This is needed so + // that we can construct the CompressedRowSparseMatrix. + const int m_nnz = SumSquaredSizes(col_blocks); + m_ = std::make_unique( + A.num_cols(), A.num_cols(), m_nnz); + + const int num_col_blocks = col_blocks.size(); + + // Populate the sparsity structure of the preconditioner matrix. + int* m_cols = m_->mutable_cols(); + int* m_rows = m_->mutable_rows(); + m_rows[0] = 0; + for (int i = 0, idx = 0; i < num_col_blocks; ++i) { + // For each column block populate a diagonal block in the preconditioner. + // Not that the because of the way the CompressedRowSparseMatrix format + // works, the entire diagonal block is laid out contiguously in memory as a + // row-major matrix. We will use this when updating the block. + auto& block = col_blocks[i]; + for (int j = 0; j < block.size; ++j) { + for (int k = 0; k < block.size; ++k, ++idx) { + m_cols[idx] = block.position + k; + } + m_rows[block.position + j + 1] = idx; + } + } + + // In reality we only need num_col_blocks locks, however that would require + // that in UpdateImpl we are able to look up the column block from the it + // first column. To save ourselves this map we will instead spend a few extra + // lock objects. + std::vector locks(A.num_cols()); + locks_.swap(locks); + CHECK_EQ(m_rows[A.num_cols()], m_nnz); } -} // namespace internal -} // namespace ceres +BlockCRSJacobiPreconditioner::~BlockCRSJacobiPreconditioner() = default; + +bool BlockCRSJacobiPreconditioner::UpdateImpl( + const CompressedRowSparseMatrix& A, const double* D) { + const auto& col_blocks = A.col_blocks(); + const auto& row_blocks = A.row_blocks(); + const int num_col_blocks = col_blocks.size(); + const int num_row_blocks = row_blocks.size(); + + const int* a_rows = A.rows(); + const int* a_cols = A.cols(); + const double* a_values = A.values(); + double* m_values = m_->mutable_values(); + const int* m_rows = m_->rows(); + + m_->SetZero(); + + ParallelFor( + options_.context, + 0, + num_row_blocks, + options_.num_threads, + [this, row_blocks, a_rows, a_cols, a_values, m_values, m_rows](int i) { + const int row = row_blocks[i].position; + const int row_block_size = row_blocks[i].size; + const int row_nnz = a_rows[row + 1] - a_rows[row]; + ConstMatrixRef row_block( + a_values + a_rows[row], row_block_size, row_nnz); + int c = 0; + while (c < row_nnz) { + const int idx = a_rows[row] + c; + const int col = a_cols[idx]; + const int col_block_size = m_rows[col + 1] - m_rows[col]; + + // We make use of the fact that the entire diagonal block is + // stored contiguously in memory as a row-major matrix. + MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size); + // We do not have a row_stride version of + // MatrixTransposeMatrixMultiply, otherwise we could use it + // here to further speed up the following expression. + auto b = row_block.middleCols(c, col_block_size); + auto lock = MakeConditionalLock(options_.num_threads, locks_[col]); + m.noalias() += b.transpose() * b; + c += col_block_size; + } + }); + + ParallelFor( + options_.context, + 0, + num_col_blocks, + options_.num_threads, + [col_blocks, m_rows, m_values, D](int i) { + const int col = col_blocks[i].position; + const int col_block_size = col_blocks[i].size; + MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size); + + if (D != nullptr) { + m.diagonal() += + ConstVectorRef(D + col, col_block_size).array().square().matrix(); + } + + // TODO(sameeragarwal): Deal with Cholesky inversion failure here and + // elsewhere. + m = m.llt().solve(Matrix::Identity(col_block_size, col_block_size)); + }); + + return true; +} + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_jacobi_preconditioner.h b/extern/ceres/internal/ceres/block_jacobi_preconditioner.h index e0a512a1469..d17580243c0 100644 --- a/extern/ceres/internal/ceres/block_jacobi_preconditioner.h +++ b/extern/ceres/internal/ceres/block_jacobi_preconditioner.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,34 +38,30 @@ #include "ceres/internal/export.h" #include "ceres/preconditioner.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockSparseMatrix; -struct CompressedRowBlockStructure; +class CompressedRowSparseMatrix; // A block Jacobi preconditioner. This is intended for use with -// conjugate gradients, or other iterative symmetric solvers. To use -// the preconditioner, create one by passing a BlockSparseMatrix "A" -// to the constructor. This fixes the sparsity pattern to the pattern -// of the matrix A^TA. +// conjugate gradients, or other iterative symmetric solvers. + +// This version of the preconditioner is for use with BlockSparseMatrix +// Jacobians. // -// Before each use of the preconditioner in a solve with conjugate gradients, -// update the matrix by running Update(A, D). The values of the matrix A are -// inspected to construct the preconditioner. The vector D is applied as the -// D^TD diagonal term. -class CERES_NO_EXPORT BlockJacobiPreconditioner +// TODO(https://github.com/ceres-solver/ceres-solver/issues/936): +// BlockSparseJacobiPreconditioner::RightMultiply will benefit from +// multithreading +class CERES_NO_EXPORT BlockSparseJacobiPreconditioner : public BlockSparseMatrixPreconditioner { public: // A must remain valid while the BlockJacobiPreconditioner is. - explicit BlockJacobiPreconditioner(const BlockSparseMatrix& A); - BlockJacobiPreconditioner(const BlockJacobiPreconditioner&) = delete; - void operator=(const BlockJacobiPreconditioner&) = delete; - - ~BlockJacobiPreconditioner() override; - - // Preconditioner interface - void RightMultiply(const double* x, double* y) const final; + explicit BlockSparseJacobiPreconditioner(Preconditioner::Options, + const BlockSparseMatrix& A); + ~BlockSparseJacobiPreconditioner() override; + void RightMultiplyAndAccumulate(const double* x, double* y) const final { + return m_->RightMultiplyAndAccumulate(x, y); + } int num_rows() const final { return m_->num_rows(); } int num_cols() const final { return m_->num_rows(); } const BlockRandomAccessDiagonalMatrix& matrix() const { return *m_; } @@ -73,11 +69,35 @@ class CERES_NO_EXPORT BlockJacobiPreconditioner private: bool UpdateImpl(const BlockSparseMatrix& A, const double* D) final; + Preconditioner::Options options_; std::unique_ptr m_; }; -} // namespace internal -} // namespace ceres +// This version of the preconditioner is for use with CompressedRowSparseMatrix +// Jacobians. +class CERES_NO_EXPORT BlockCRSJacobiPreconditioner + : public CompressedRowSparseMatrixPreconditioner { + public: + // A must remain valid while the BlockJacobiPreconditioner is. + explicit BlockCRSJacobiPreconditioner(Preconditioner::Options options, + const CompressedRowSparseMatrix& A); + ~BlockCRSJacobiPreconditioner() override; + void RightMultiplyAndAccumulate(const double* x, double* y) const final { + m_->RightMultiplyAndAccumulate(x, y); + } + int num_rows() const final { return m_->num_rows(); } + int num_cols() const final { return m_->num_rows(); } + const CompressedRowSparseMatrix& matrix() const { return *m_; } + + private: + bool UpdateImpl(const CompressedRowSparseMatrix& A, const double* D) final; + + Preconditioner::Options options_; + std::vector locks_; + std::unique_ptr m_; +}; + +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/block_jacobian_writer.cc b/extern/ceres/internal/ceres/block_jacobian_writer.cc index a70660f860a..8b77f1d82fd 100644 --- a/extern/ceres/internal/ceres/block_jacobian_writer.cc +++ b/extern/ceres/internal/ceres/block_jacobian_writer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,6 +32,7 @@ #include #include +#include #include "ceres/block_evaluate_preparer.h" #include "ceres/block_sparse_matrix.h" @@ -41,10 +42,7 @@ #include "ceres/program.h" #include "ceres/residual_block.h" -namespace ceres { -namespace internal { - -using std::vector; +namespace ceres::internal { namespace { @@ -56,19 +54,27 @@ namespace { // the first num_eliminate_blocks parameter blocks as indicated by the parameter // block ordering. The remaining parameter blocks are the F blocks. // +// In order to simplify handling block-sparse to CRS conversion, cells within +// the row-block of non-partitioned matrix are stored in memory sequentially in +// the order of increasing column-block id. In case of partitioned matrices, +// cells corresponding to F sub-matrix are stored sequentially in the order of +// increasing column-block id (with cells corresponding to E sub-matrix stored +// separately). +// // TODO(keir): Consider if we should use a boolean for each parameter block // instead of num_eliminate_blocks. -void BuildJacobianLayout(const Program& program, +bool BuildJacobianLayout(const Program& program, int num_eliminate_blocks, - vector* jacobian_layout, - vector* jacobian_layout_storage) { - const vector& residual_blocks = program.residual_blocks(); + std::vector* jacobian_layout, + std::vector* jacobian_layout_storage) { + const std::vector& residual_blocks = + program.residual_blocks(); // Iterate over all the active residual blocks and determine how many E blocks // are there. This will determine where the F blocks start in the jacobian // matrix. Also compute the number of jacobian blocks. - int f_block_pos = 0; - int num_jacobian_blocks = 0; + unsigned int f_block_pos = 0; + unsigned int num_jacobian_blocks = 0; for (auto* residual_block : residual_blocks) { const int num_residuals = residual_block->NumResiduals(); const int num_parameter_blocks = residual_block->NumParameterBlocks(); @@ -84,6 +90,11 @@ void BuildJacobianLayout(const Program& program, } } } + if (num_jacobian_blocks > std::numeric_limits::max()) { + LOG(ERROR) << "Overlow error. Too many blocks in the jacobian matrix : " + << num_jacobian_blocks; + return false; + } } // We now know that the E blocks are laid out starting at zero, and the F @@ -95,65 +106,103 @@ void BuildJacobianLayout(const Program& program, jacobian_layout_storage->resize(num_jacobian_blocks); int e_block_pos = 0; - int* jacobian_pos = &(*jacobian_layout_storage)[0]; + int* jacobian_pos = jacobian_layout_storage->data(); + std::vector> active_parameter_blocks; for (int i = 0; i < residual_blocks.size(); ++i) { const ResidualBlock* residual_block = residual_blocks[i]; const int num_residuals = residual_block->NumResiduals(); const int num_parameter_blocks = residual_block->NumParameterBlocks(); (*jacobian_layout)[i] = jacobian_pos; + // Cells from F sub-matrix are to be stored sequentially with increasing + // column block id. For each non-constant parameter block, a pair of indices + // (index in the list of active parameter blocks and index in the list of + // all parameter blocks) is computed, and index pairs are sorted by the + // index of corresponding column block id. + active_parameter_blocks.clear(); + active_parameter_blocks.reserve(num_parameter_blocks); for (int j = 0; j < num_parameter_blocks; ++j) { ParameterBlock* parameter_block = residual_block->parameter_blocks()[j]; - const int parameter_block_index = parameter_block->index(); if (parameter_block->IsConstant()) { continue; } + const int k = active_parameter_blocks.size(); + active_parameter_blocks.emplace_back(k, j); + } + std::sort(active_parameter_blocks.begin(), + active_parameter_blocks.end(), + [&residual_block](const std::pair& a, + const std::pair& b) { + return residual_block->parameter_blocks()[a.second]->index() < + residual_block->parameter_blocks()[b.second]->index(); + }); + // Cell positions for each active parameter block are filled in the order of + // active parameter block indices sorted by columnd block index. This + // guarantees that cells are laid out sequentially with increasing column + // block indices. + for (const auto& indices : active_parameter_blocks) { + const auto [k, j] = indices; + ParameterBlock* parameter_block = residual_block->parameter_blocks()[j]; + const int parameter_block_index = parameter_block->index(); const int jacobian_block_size = num_residuals * parameter_block->TangentSize(); if (parameter_block_index < num_eliminate_blocks) { - *jacobian_pos = e_block_pos; + jacobian_pos[k] = e_block_pos; e_block_pos += jacobian_block_size; } else { - *jacobian_pos = f_block_pos; + jacobian_pos[k] = static_cast(f_block_pos); f_block_pos += jacobian_block_size; + if (f_block_pos > std::numeric_limits::max()) { + LOG(ERROR) + << "Overlow error. Too many entries in the Jacobian matrix."; + return false; + } } - jacobian_pos++; } + jacobian_pos += active_parameter_blocks.size(); } + return true; } } // namespace BlockJacobianWriter::BlockJacobianWriter(const Evaluator::Options& options, Program* program) - : program_(program) { + : options_(options), program_(program) { CHECK_GE(options.num_eliminate_blocks, 0) << "num_eliminate_blocks must be greater than 0."; - BuildJacobianLayout(*program, - options.num_eliminate_blocks, - &jacobian_layout_, - &jacobian_layout_storage_); + jacobian_layout_is_valid_ = BuildJacobianLayout(*program, + options.num_eliminate_blocks, + &jacobian_layout_, + &jacobian_layout_storage_); } // Create evaluate prepareres that point directly into the final jacobian. This // makes the final Write() a nop. std::unique_ptr -BlockJacobianWriter::CreateEvaluatePreparers(int num_threads) { - int max_derivatives_per_residual_block = +BlockJacobianWriter::CreateEvaluatePreparers(unsigned num_threads) { + const int max_derivatives_per_residual_block = program_->MaxDerivativesPerResidualBlock(); auto preparers = std::make_unique(num_threads); - for (int i = 0; i < num_threads; i++) { - preparers[i].Init(&jacobian_layout_[0], max_derivatives_per_residual_block); + for (unsigned i = 0; i < num_threads; i++) { + preparers[i].Init(jacobian_layout_.data(), + max_derivatives_per_residual_block); } return preparers; } std::unique_ptr BlockJacobianWriter::CreateJacobian() const { + if (!jacobian_layout_is_valid_) { + LOG(ERROR) << "Unable to create Jacobian matrix. Too many entries in the " + "Jacobian matrix."; + return nullptr; + } + auto* bs = new CompressedRowBlockStructure; - const vector& parameter_blocks = + const std::vector& parameter_blocks = program_->parameter_blocks(); // Construct the column blocks. @@ -167,7 +216,8 @@ std::unique_ptr BlockJacobianWriter::CreateJacobian() const { } // Construct the cells in each row. - const vector& residual_blocks = program_->residual_blocks(); + const std::vector& residual_blocks = + program_->residual_blocks(); int row_block_position = 0; bs->rows.resize(residual_blocks.size()); for (int i = 0; i < residual_blocks.size(); ++i) { @@ -206,8 +256,8 @@ std::unique_ptr BlockJacobianWriter::CreateJacobian() const { std::sort(row->cells.begin(), row->cells.end(), CellLessThan); } - return std::make_unique(bs); + return std::make_unique( + bs, options_.sparse_linear_algebra_library_type == CUDA_SPARSE); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_jacobian_writer.h b/extern/ceres/internal/ceres/block_jacobian_writer.h index b2d0aaa3b73..31e35acdf4e 100644 --- a/extern/ceres/internal/ceres/block_jacobian_writer.h +++ b/extern/ceres/internal/ceres/block_jacobian_writer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,16 +44,26 @@ #include "ceres/evaluator.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockEvaluatePreparer; class Program; class SparseMatrix; -// TODO(sameeragarwal): This class needs documemtation. +// TODO(sameeragarwal): This class needs documentation. class CERES_NO_EXPORT BlockJacobianWriter { public: + // Pre-computes positions of cells in block-sparse jacobian. + // Two possible memory layouts are implemented: + // - Non-partitioned case + // - Partitioned case (for Schur type linear solver) + // + // In non-partitioned case, cells are stored sequentially in the + // lexicographic order of (row block id, column block id). + // + // In the case of partitoned matrix, cells of each sub-matrix (E and F) are + // stored sequentially in the lexicographic order of (row block id, column + // block id) and cells from E sub-matrix precede cells from F sub-matrix. BlockJacobianWriter(const Evaluator::Options& options, Program* program); // JacobianWriter interface. @@ -61,7 +71,7 @@ class CERES_NO_EXPORT BlockJacobianWriter { // Create evaluate prepareres that point directly into the final jacobian. // This makes the final Write() a nop. std::unique_ptr CreateEvaluatePreparers( - int num_threads); + unsigned num_threads); std::unique_ptr CreateJacobian() const; @@ -75,12 +85,13 @@ class CERES_NO_EXPORT BlockJacobianWriter { } private: + Evaluator::Options options_; Program* program_; // Stores the position of each residual / parameter jacobian. // // The block sparse matrix that this writer writes to is stored as a set of - // contiguos dense blocks, one after each other; see BlockSparseMatrix. The + // contiguous dense blocks, one after each other; see BlockSparseMatrix. The // "double* values_" member of the block sparse matrix contains all of these // blocks. Given a pointer to the first element of a block and the size of // that block, it's possible to write to it. @@ -122,9 +133,14 @@ class CERES_NO_EXPORT BlockJacobianWriter { // The pointers in jacobian_layout_ point directly into this vector. std::vector jacobian_layout_storage_; + + // The constructor computes the layout of the Jacobian, and this bool keeps + // track of whether the computation of the layout completed successfully or + // not, if it is false, then jacobian_layout and jacobian_layout_storage are + // both in an invalid state. + bool jacobian_layout_is_valid_ = false; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_BLOCK_JACOBIAN_WRITER_H_ diff --git a/extern/ceres/internal/ceres/block_random_access_dense_matrix.cc b/extern/ceres/internal/ceres/block_random_access_dense_matrix.cc index ed172de1d82..b8be51be064 100644 --- a/extern/ceres/internal/ceres/block_random_access_dense_matrix.cc +++ b/extern/ceres/internal/ceres/block_random_access_dense_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,26 +30,21 @@ #include "ceres/block_random_access_dense_matrix.h" +#include #include #include "ceres/internal/eigen.h" +#include "ceres/parallel_vector_ops.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { BlockRandomAccessDenseMatrix::BlockRandomAccessDenseMatrix( - const std::vector& blocks) { - const int num_blocks = blocks.size(); - block_layout_.resize(num_blocks, 0); - num_rows_ = 0; - for (int i = 0; i < num_blocks; ++i) { - block_layout_[i] = num_rows_; - num_rows_ += blocks[i]; - } - + std::vector blocks, ContextImpl* context, int num_threads) + : blocks_(std::move(blocks)), context_(context), num_threads_(num_threads) { + const int num_blocks = blocks_.size(); + num_rows_ = NumScalarEntries(blocks_); values_ = std::make_unique(num_rows_ * num_rows_); - cell_infos_ = std::make_unique(num_blocks * num_blocks); for (int i = 0; i < num_blocks * num_blocks; ++i) { cell_infos_[i].values = values_.get(); @@ -58,30 +53,23 @@ BlockRandomAccessDenseMatrix::BlockRandomAccessDenseMatrix( SetZero(); } -// Assume that the user does not hold any locks on any cell blocks -// when they are calling SetZero. -BlockRandomAccessDenseMatrix::~BlockRandomAccessDenseMatrix() = default; - CellInfo* BlockRandomAccessDenseMatrix::GetCell(const int row_block_id, const int col_block_id, int* row, int* col, int* row_stride, int* col_stride) { - *row = block_layout_[row_block_id]; - *col = block_layout_[col_block_id]; + *row = blocks_[row_block_id].position; + *col = blocks_[col_block_id].position; *row_stride = num_rows_; *col_stride = num_rows_; - return &cell_infos_[row_block_id * block_layout_.size() + col_block_id]; + return &cell_infos_[row_block_id * blocks_.size() + col_block_id]; } // Assume that the user does not hold any locks on any cell blocks // when they are calling SetZero. void BlockRandomAccessDenseMatrix::SetZero() { - if (num_rows_) { - VectorRef(values_.get(), num_rows_ * num_rows_).setZero(); - } + ParallelSetZero(context_, num_threads_, values_.get(), num_rows_ * num_rows_); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_random_access_dense_matrix.h b/extern/ceres/internal/ceres/block_random_access_dense_matrix.h index 171a6d694b5..9468249924e 100644 --- a/extern/ceres/internal/ceres/block_random_access_dense_matrix.h +++ b/extern/ceres/internal/ceres/block_random_access_dense_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,11 +35,12 @@ #include #include "ceres/block_random_access_matrix.h" +#include "ceres/block_structure.h" +#include "ceres/context_impl.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // A square block random accessible matrix with the same row and // column block structure. All cells are stored in the same single @@ -56,13 +57,11 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix public: // blocks is a vector of block sizes. The resulting matrix has // blocks.size() * blocks.size() cells. - explicit BlockRandomAccessDenseMatrix(const std::vector& blocks); - BlockRandomAccessDenseMatrix(const BlockRandomAccessDenseMatrix&) = delete; - void operator=(const BlockRandomAccessDenseMatrix&) = delete; + explicit BlockRandomAccessDenseMatrix(std::vector blocks, + ContextImpl* context, + int num_threads); - // The destructor is not thread safe. It assumes that no one is - // modifying any cells when the matrix is being destroyed. - ~BlockRandomAccessDenseMatrix() override; + ~BlockRandomAccessDenseMatrix() override = default; // BlockRandomAccessMatrix interface. CellInfo* GetCell(int row_block_id, @@ -72,8 +71,6 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix int* row_stride, int* col_stride) final; - // This is not a thread safe method, it assumes that no cell is - // locked. void SetZero() final; // Since the matrix is square with the same row and column block @@ -86,14 +83,15 @@ class CERES_NO_EXPORT BlockRandomAccessDenseMatrix double* mutable_values() { return values_.get(); } private: - int num_rows_; - std::vector block_layout_; + std::vector blocks_; + ContextImpl* context_ = nullptr; + int num_threads_ = -1; + int num_rows_ = -1; std::unique_ptr values_; std::unique_ptr cell_infos_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/block_random_access_diagonal_matrix.cc b/extern/ceres/internal/ceres/block_random_access_diagonal_matrix.cc index f55f3b30c61..357fc31d81e 100644 --- a/extern/ceres/internal/ceres/block_random_access_diagonal_matrix.cc +++ b/extern/ceres/internal/ceres/block_random_access_diagonal_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -37,61 +37,26 @@ #include #include "Eigen/Dense" +#include "ceres/compressed_row_sparse_matrix.h" #include "ceres/internal/export.h" +#include "ceres/parallel_for.h" +#include "ceres/parallel_vector_ops.h" #include "ceres/stl_util.h" -#include "ceres/triplet_sparse_matrix.h" #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::vector; - -// TODO(sameeragarwal): Drop the dependence on TripletSparseMatrix. +namespace ceres::internal { BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix( - const vector& blocks) - : blocks_(blocks) { - // Build the row/column layout vector and count the number of scalar - // rows/columns. - int num_cols = 0; - int num_nonzeros = 0; - vector block_positions; - for (int block_size : blocks_) { - block_positions.push_back(num_cols); - num_cols += block_size; - num_nonzeros += block_size * block_size; + const std::vector& blocks, ContextImpl* context, int num_threads) + : context_(context), num_threads_(num_threads) { + m_ = CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(nullptr, blocks); + double* values = m_->mutable_values(); + layout_.reserve(blocks.size()); + for (auto& block : blocks) { + layout_.emplace_back(std::make_unique(values)); + values += block.size * block.size; } - - VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] " - << num_nonzeros; - - tsm_ = - std::make_unique(num_cols, num_cols, num_nonzeros); - tsm_->set_num_nonzeros(num_nonzeros); - int* rows = tsm_->mutable_rows(); - int* cols = tsm_->mutable_cols(); - double* values = tsm_->mutable_values(); - - int pos = 0; - for (int i = 0; i < blocks_.size(); ++i) { - const int block_size = blocks_[i]; - layout_.push_back(new CellInfo(values + pos)); - const int block_begin = block_positions[i]; - for (int r = 0; r < block_size; ++r) { - for (int c = 0; c < block_size; ++c, ++pos) { - rows[pos] = block_begin + r; - cols[pos] = block_begin + c; - } - } - } -} - -// Assume that the user does not hold any locks on any cell blocks -// when they are calling SetZero. -BlockRandomAccessDiagonalMatrix::~BlockRandomAccessDiagonalMatrix() { - STLDeleteContainerPointers(layout_.begin(), layout_.end()); } CellInfo* BlockRandomAccessDiagonalMatrix::GetCell(int row_block_id, @@ -103,47 +68,51 @@ CellInfo* BlockRandomAccessDiagonalMatrix::GetCell(int row_block_id, if (row_block_id != col_block_id) { return nullptr; } - const int stride = blocks_[row_block_id]; + + auto& blocks = m_->row_blocks(); + const int stride = blocks[row_block_id].size; // Each cell is stored contiguously as its own little dense matrix. *row = 0; *col = 0; *row_stride = stride; *col_stride = stride; - return layout_[row_block_id]; + return layout_[row_block_id].get(); } // Assume that the user does not hold any locks on any cell blocks // when they are calling SetZero. void BlockRandomAccessDiagonalMatrix::SetZero() { - if (tsm_->num_nonzeros()) { - VectorRef(tsm_->mutable_values(), tsm_->num_nonzeros()).setZero(); - } + ParallelSetZero( + context_, num_threads_, m_->mutable_values(), m_->num_nonzeros()); } void BlockRandomAccessDiagonalMatrix::Invert() { - double* values = tsm_->mutable_values(); - for (int block_size : blocks_) { - MatrixRef block(values, block_size, block_size); - block = block.selfadjointView().llt().solve( - Matrix::Identity(block_size, block_size)); - values += block_size * block_size; - } + auto& blocks = m_->row_blocks(); + const int num_blocks = blocks.size(); + ParallelFor(context_, 0, num_blocks, num_threads_, [this, blocks](int i) { + auto* cell_info = layout_[i].get(); + auto& block = blocks[i]; + MatrixRef b(cell_info->values, block.size, block.size); + b = b.selfadjointView().llt().solve( + Matrix::Identity(block.size, block.size)); + }); } -void BlockRandomAccessDiagonalMatrix::RightMultiply(const double* x, - double* y) const { +void BlockRandomAccessDiagonalMatrix::RightMultiplyAndAccumulate( + const double* x, double* y) const { CHECK(x != nullptr); CHECK(y != nullptr); - const double* values = tsm_->values(); - for (int block_size : blocks_) { - ConstMatrixRef block(values, block_size, block_size); - VectorRef(y, block_size).noalias() += block * ConstVectorRef(x, block_size); - x += block_size; - y += block_size; - values += block_size * block_size; - } + auto& blocks = m_->row_blocks(); + const int num_blocks = blocks.size(); + ParallelFor( + context_, 0, num_blocks, num_threads_, [this, blocks, x, y](int i) { + auto* cell_info = layout_[i].get(); + auto& block = blocks[i]; + ConstMatrixRef b(cell_info->values, block.size, block.size); + VectorRef(y + block.position, block.size).noalias() += + b * ConstVectorRef(x + block.position, block.size); + }); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_random_access_diagonal_matrix.h b/extern/ceres/internal/ceres/block_random_access_diagonal_matrix.h index 3d36c378320..e5f50b0e09e 100644 --- a/extern/ceres/internal/ceres/block_random_access_diagonal_matrix.h +++ b/extern/ceres/internal/ceres/block_random_access_diagonal_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,33 +32,30 @@ #define CERES_INTERNAL_BLOCK_RANDOM_ACCESS_DIAGONAL_MATRIX_H_ #include -#include #include #include #include "ceres/block_random_access_matrix.h" +#include "ceres/block_structure.h" +#include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/context_impl.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -#include "ceres/triplet_sparse_matrix.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -// A thread safe block diagonal matrix implementation of -// BlockRandomAccessMatrix. +// A BlockRandomAccessMatrix which only stores the block diagonal. +// BlockRandomAccessSparseMatrix can also be used to do this, but this class is +// more efficient in time and in space. class CERES_NO_EXPORT BlockRandomAccessDiagonalMatrix : public BlockRandomAccessMatrix { public: // blocks is an array of block sizes. - explicit BlockRandomAccessDiagonalMatrix(const std::vector& blocks); - BlockRandomAccessDiagonalMatrix(const BlockRandomAccessDiagonalMatrix&) = - delete; - void operator=(const BlockRandomAccessDiagonalMatrix&) = delete; - - // The destructor is not thread safe. It assumes that no one is - // modifying any cells when the matrix is being destroyed. - ~BlockRandomAccessDiagonalMatrix() override; + BlockRandomAccessDiagonalMatrix(const std::vector& blocks, + ContextImpl* context, + int num_threads); + ~BlockRandomAccessDiagonalMatrix() override = default; // BlockRandomAccessMatrix Interface. CellInfo* GetCell(int row_block_id, @@ -68,36 +65,30 @@ class CERES_NO_EXPORT BlockRandomAccessDiagonalMatrix int* row_stride, int* col_stride) final; - // This is not a thread safe method, it assumes that no cell is - // locked. + // m = 0 void SetZero() final; - // Invert the matrix assuming that each block is positive definite. + // m = m^{-1} void Invert(); - // y += S * x - void RightMultiply(const double* x, double* y) const; + // y += m * x + void RightMultiplyAndAccumulate(const double* x, double* y) const; // Since the matrix is square, num_rows() == num_cols(). - int num_rows() const final { return tsm_->num_rows(); } - int num_cols() const final { return tsm_->num_cols(); } + int num_rows() const final { return m_->num_rows(); } + int num_cols() const final { return m_->num_cols(); } - const TripletSparseMatrix* matrix() const { return tsm_.get(); } - TripletSparseMatrix* mutable_matrix() { return tsm_.get(); } + const CompressedRowSparseMatrix* matrix() const { return m_.get(); } + CompressedRowSparseMatrix* mutable_matrix() { return m_.get(); } private: - // row/column block sizes. - const std::vector blocks_; - std::vector layout_; - - // The underlying matrix object which actually stores the cells. - std::unique_ptr tsm_; - - friend class BlockRandomAccessDiagonalMatrixTest; + ContextImpl* context_ = nullptr; + const int num_threads_ = 1; + std::unique_ptr m_; + std::vector> layout_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/block_random_access_matrix.cc b/extern/ceres/internal/ceres/block_random_access_matrix.cc index 8e70c049796..cb3d9dcc8bf 100644 --- a/extern/ceres/internal/ceres/block_random_access_matrix.cc +++ b/extern/ceres/internal/ceres/block_random_access_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,10 +30,8 @@ #include "ceres/block_random_access_matrix.h" -namespace ceres { -namespace internal { +namespace ceres::internal { BlockRandomAccessMatrix::~BlockRandomAccessMatrix() = default; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_random_access_matrix.h b/extern/ceres/internal/ceres/block_random_access_matrix.h index 48759b79a18..66390d71ef8 100644 --- a/extern/ceres/internal/ceres/block_random_access_matrix.h +++ b/extern/ceres/internal/ceres/block_random_access_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,7 @@ #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // A matrix implementing the BlockRandomAccessMatrix interface is a // matrix whose rows and columns are divided into blocks. For example @@ -123,7 +122,6 @@ class CERES_NO_EXPORT BlockRandomAccessMatrix { virtual int num_cols() const = 0; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_BLOCK_RANDOM_ACCESS_MATRIX_H_ diff --git a/extern/ceres/internal/ceres/block_random_access_sparse_matrix.cc b/extern/ceres/internal/ceres/block_random_access_sparse_matrix.cc index a026daa5dac..271544efa05 100644 --- a/extern/ceres/internal/ceres/block_random_access_sparse_matrix.cc +++ b/extern/ceres/internal/ceres/block_random_access_sparse_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -37,87 +37,63 @@ #include #include "ceres/internal/export.h" +#include "ceres/parallel_vector_ops.h" #include "ceres/triplet_sparse_matrix.h" #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::make_pair; -using std::pair; -using std::set; -using std::vector; +namespace ceres::internal { BlockRandomAccessSparseMatrix::BlockRandomAccessSparseMatrix( - const vector& blocks, const set>& block_pairs) - : kMaxRowBlocks(10 * 1000 * 1000), blocks_(blocks) { - CHECK_LT(blocks.size(), kMaxRowBlocks); + const std::vector& blocks, + const std::set>& block_pairs, + ContextImpl* context, + int num_threads) + : blocks_(blocks), context_(context), num_threads_(num_threads) { + CHECK_LE(blocks.size(), std::numeric_limits::max()); - // Build the row/column layout vector and count the number of scalar - // rows/columns. - int num_cols = 0; - block_positions_.reserve(blocks_.size()); - for (int block_size : blocks_) { - block_positions_.push_back(num_cols); - num_cols += block_size; + const int num_cols = NumScalarEntries(blocks); + const int num_blocks = blocks.size(); + + std::vector num_cells_at_row(num_blocks); + for (auto& p : block_pairs) { + ++num_cells_at_row[p.first]; } - - // Count the number of scalar non-zero entries and build the layout - // object for looking into the values array of the - // TripletSparseMatrix. + auto block_structure_ = new CompressedRowBlockStructure; + block_structure_->cols = blocks; + block_structure_->rows.resize(num_blocks); + auto p = block_pairs.begin(); int num_nonzeros = 0; - for (const auto& block_pair : block_pairs) { - const int row_block_size = blocks_[block_pair.first]; - const int col_block_size = blocks_[block_pair.second]; - num_nonzeros += row_block_size * col_block_size; - } - - VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] " - << num_nonzeros; - - tsm_ = - std::make_unique(num_cols, num_cols, num_nonzeros); - tsm_->set_num_nonzeros(num_nonzeros); - int* rows = tsm_->mutable_rows(); - int* cols = tsm_->mutable_cols(); - double* values = tsm_->mutable_values(); - - int pos = 0; - for (const auto& block_pair : block_pairs) { - const int row_block_size = blocks_[block_pair.first]; - const int col_block_size = blocks_[block_pair.second]; - cell_values_.emplace_back(block_pair, values + pos); - layout_[IntPairToLong(block_pair.first, block_pair.second)] = - new CellInfo(values + pos); - pos += row_block_size * col_block_size; - } - - // Fill the sparsity pattern of the underlying matrix. - for (const auto& block_pair : block_pairs) { - const int row_block_id = block_pair.first; - const int col_block_id = block_pair.second; - const int row_block_size = blocks_[row_block_id]; - const int col_block_size = blocks_[col_block_id]; - int pos = - layout_[IntPairToLong(row_block_id, col_block_id)]->values - values; - for (int r = 0; r < row_block_size; ++r) { - for (int c = 0; c < col_block_size; ++c, ++pos) { - rows[pos] = block_positions_[row_block_id] + r; - cols[pos] = block_positions_[col_block_id] + c; - values[pos] = 1.0; - DCHECK_LT(rows[pos], tsm_->num_rows()); - DCHECK_LT(cols[pos], tsm_->num_rows()); - } + // Pairs of block indices are sorted lexicographically, thus pairs + // corresponding to a single row-block are stored in segments of index pairs + // with constant row-block index and increasing column-block index. + // CompressedRowBlockStructure is created by traversing block_pairs set. + for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) { + auto& row = block_structure_->rows[row_block_id]; + row.block = blocks[row_block_id]; + row.cells.reserve(num_cells_at_row[row_block_id]); + const int row_block_size = blocks[row_block_id].size; + // Process all index pairs corresponding to the current row block. Because + // index pairs are sorted lexicographically, cells are being appended to the + // current row-block till the first change in row-block index + for (; p != block_pairs.end() && row_block_id == p->first; ++p) { + const int col_block_id = p->second; + row.cells.emplace_back(col_block_id, num_nonzeros); + num_nonzeros += row_block_size * blocks[col_block_id].size; } } -} - -// Assume that the user does not hold any locks on any cell blocks -// when they are calling SetZero. -BlockRandomAccessSparseMatrix::~BlockRandomAccessSparseMatrix() { - for (const auto& entry : layout_) { - delete entry.second; + bsm_ = std::make_unique(block_structure_); + VLOG(1) << "Matrix Size [" << num_cols << "," << num_cols << "] " + << num_nonzeros; + double* values = bsm_->mutable_values(); + for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) { + const auto& cells = block_structure_->rows[row_block_id].cells; + for (auto& c : cells) { + const int col_block_id = c.block_id; + double* const data = values + c.position; + layout_[IntPairToInt64(row_block_id, col_block_id)] = + std::make_unique(data); + } } } @@ -127,8 +103,7 @@ CellInfo* BlockRandomAccessSparseMatrix::GetCell(int row_block_id, int* col, int* row_stride, int* col_stride) { - const LayoutType::iterator it = - layout_.find(IntPairToLong(row_block_id, col_block_id)); + const auto it = layout_.find(IntPairToInt64(row_block_id, col_block_id)); if (it == layout_.end()) { return nullptr; } @@ -136,44 +111,49 @@ CellInfo* BlockRandomAccessSparseMatrix::GetCell(int row_block_id, // Each cell is stored contiguously as its own little dense matrix. *row = 0; *col = 0; - *row_stride = blocks_[row_block_id]; - *col_stride = blocks_[col_block_id]; - return it->second; + *row_stride = blocks_[row_block_id].size; + *col_stride = blocks_[col_block_id].size; + return it->second.get(); } // Assume that the user does not hold any locks on any cell blocks // when they are calling SetZero. void BlockRandomAccessSparseMatrix::SetZero() { - if (tsm_->num_nonzeros()) { - VectorRef(tsm_->mutable_values(), tsm_->num_nonzeros()).setZero(); - } + bsm_->SetZero(context_, num_threads_); } -void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x, - double* y) const { - for (const auto& cell_position_and_data : cell_values_) { - const int row = cell_position_and_data.first.first; - const int row_block_size = blocks_[row]; - const int row_block_pos = block_positions_[row]; +void BlockRandomAccessSparseMatrix::SymmetricRightMultiplyAndAccumulate( + const double* x, double* y) const { + const auto bs = bsm_->block_structure(); + const auto values = bsm_->values(); + const int num_blocks = blocks_.size(); - const int col = cell_position_and_data.first.second; - const int col_block_size = blocks_[col]; - const int col_block_pos = block_positions_[col]; + for (int row_block_id = 0; row_block_id < num_blocks; ++row_block_id) { + const auto& row_block = bs->rows[row_block_id]; + const int row_block_size = row_block.block.size; + const int row_block_pos = row_block.block.position; - MatrixVectorMultiply( - cell_position_and_data.second, - row_block_size, - col_block_size, - x + col_block_pos, - y + row_block_pos); + for (auto& c : row_block.cells) { + const int col_block_id = c.block_id; + const int col_block_size = blocks_[col_block_id].size; + const int col_block_pos = blocks_[col_block_id].position; - // Since the matrix is symmetric, but only the upper triangular - // part is stored, if the block being accessed is not a diagonal - // block, then use the same block to do the corresponding lower - // triangular multiply also. - if (row != col) { + MatrixVectorMultiply( + values + c.position, + row_block_size, + col_block_size, + x + col_block_pos, + y + row_block_pos); + if (col_block_id == row_block_id) { + continue; + } + + // Since the matrix is symmetric, but only the upper triangular + // part is stored, if the block being accessed is not a diagonal + // block, then use the same block to do the corresponding lower + // triangular multiply also MatrixTransposeVectorMultiply( - cell_position_and_data.second, + values + c.position, row_block_size, col_block_size, x + row_block_pos, @@ -182,5 +162,4 @@ void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_random_access_sparse_matrix.h b/extern/ceres/internal/ceres/block_random_access_sparse_matrix.h index b31a2ade843..c509a019896 100644 --- a/extern/ceres/internal/ceres/block_random_access_sparse_matrix.h +++ b/extern/ceres/internal/ceres/block_random_access_sparse_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,17 +39,18 @@ #include #include "ceres/block_random_access_matrix.h" +#include "ceres/block_sparse_matrix.h" +#include "ceres/block_structure.h" +#include "ceres/context_impl.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" #include "ceres/small_blas.h" -#include "ceres/triplet_sparse_matrix.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // A thread safe square block sparse implementation of -// BlockRandomAccessMatrix. Internally a TripletSparseMatrix is used +// BlockRandomAccessMatrix. Internally a BlockSparseMatrix is used // for doing the actual storage. This class augments this matrix with // an unordered_map that allows random read/write access. class CERES_NO_EXPORT BlockRandomAccessSparseMatrix @@ -59,14 +60,14 @@ class CERES_NO_EXPORT BlockRandomAccessSparseMatrix // pairs to identify the non-zero cells // of this matrix. BlockRandomAccessSparseMatrix( - const std::vector& blocks, - const std::set>& block_pairs); - BlockRandomAccessSparseMatrix(const BlockRandomAccessSparseMatrix&) = delete; - void operator=(const BlockRandomAccessSparseMatrix&) = delete; + const std::vector& blocks, + const std::set>& block_pairs, + ContextImpl* context, + int num_threads); // The destructor is not thread safe. It assumes that no one is // modifying any cells when the matrix is being destroyed. - ~BlockRandomAccessSparseMatrix() override; + ~BlockRandomAccessSparseMatrix() override = default; // BlockRandomAccessMatrix Interface. CellInfo* GetCell(int row_block_id, @@ -80,53 +81,49 @@ class CERES_NO_EXPORT BlockRandomAccessSparseMatrix // locked. void SetZero() final; - // Assume that the matrix is symmetric and only one half of the - // matrix is stored. + // Assume that the matrix is symmetric and only one half of the matrix is + // stored. // // y += S * x - void SymmetricRightMultiply(const double* x, double* y) const; + void SymmetricRightMultiplyAndAccumulate(const double* x, double* y) const; // Since the matrix is square, num_rows() == num_cols(). - int num_rows() const final { return tsm_->num_rows(); } - int num_cols() const final { return tsm_->num_cols(); } + int num_rows() const final { return bsm_->num_rows(); } + int num_cols() const final { return bsm_->num_cols(); } // Access to the underlying matrix object. - const TripletSparseMatrix* matrix() const { return tsm_.get(); } - TripletSparseMatrix* mutable_matrix() { return tsm_.get(); } + const BlockSparseMatrix* matrix() const { return bsm_.get(); } + BlockSparseMatrix* mutable_matrix() { return bsm_.get(); } private: - int64_t IntPairToLong(int row, int col) const { - return row * kMaxRowBlocks + col; + int64_t IntPairToInt64(int row, int col) const { + return row * kRowShift + col; } - void LongToIntPair(int64_t index, int* row, int* col) const { - *row = index / kMaxRowBlocks; - *col = index % kMaxRowBlocks; + void Int64ToIntPair(int64_t index, int* row, int* col) const { + *row = index / kRowShift; + *col = index % kRowShift; } - const int64_t kMaxRowBlocks; + constexpr static int64_t kRowShift{1ll << 32}; // row/column block sizes. - const std::vector blocks_; - std::vector block_positions_; + const std::vector blocks_; + ContextImpl* context_ = nullptr; + const int num_threads_ = 1; // A mapping from to the position in // the values array of tsm_ where the block is stored. - using LayoutType = std::unordered_map; + using LayoutType = std::unordered_map>; LayoutType layout_; - // In order traversal of contents of the matrix. This allows us to - // implement a matrix-vector which is 20% faster than using the - // iterator in the Layout object instead. - std::vector, double*>> cell_values_; // The underlying matrix object which actually stores the cells. - std::unique_ptr tsm_; + std::unique_ptr bsm_; friend class BlockRandomAccessSparseMatrixTest; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/block_sparse_matrix.cc b/extern/ceres/internal/ceres/block_sparse_matrix.cc index 31ea39daeea..909389ce58b 100644 --- a/extern/ceres/internal/ceres/block_sparse_matrix.cc +++ b/extern/ceres/internal/ceres/block_sparse_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,23 +33,151 @@ #include #include #include +#include +#include #include #include "ceres/block_structure.h" +#include "ceres/crs_matrix.h" #include "ceres/internal/eigen.h" -#include "ceres/random.h" +#include "ceres/parallel_for.h" +#include "ceres/parallel_vector_ops.h" #include "ceres/small_blas.h" #include "ceres/triplet_sparse_matrix.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +#ifndef CERES_NO_CUDA +#include "cuda_runtime.h" +#endif -using std::vector; +namespace ceres::internal { + +namespace { +void ComputeCumulativeNumberOfNonZeros(std::vector& rows) { + if (rows.empty()) { + return; + } + rows[0].cumulative_nnz = rows[0].nnz; + for (int c = 1; c < rows.size(); ++c) { + const int curr_nnz = rows[c].nnz; + rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz; + } +} + +template +std::unique_ptr +CreateStructureOfCompressedRowSparseMatrix( + const double* values, + int num_rows, + int num_cols, + int num_nonzeros, + const CompressedRowBlockStructure* block_structure) { + auto crs_matrix = std::make_unique( + num_rows, num_cols, num_nonzeros); + auto crs_cols = crs_matrix->mutable_cols(); + auto crs_rows = crs_matrix->mutable_rows(); + int value_offset = 0; + const int num_row_blocks = block_structure->rows.size(); + const auto& cols = block_structure->cols; + *crs_rows++ = 0; + for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) { + const auto& row_block = block_structure->rows[row_block_id]; + // Empty row block: only requires setting row offsets + if (row_block.cells.empty()) { + std::fill(crs_rows, crs_rows + row_block.block.size, value_offset); + crs_rows += row_block.block.size; + continue; + } + + int row_nnz = 0; + if constexpr (transpose) { + // Transposed block structure comes with nnz in row-block filled-in + row_nnz = row_block.nnz / row_block.block.size; + } else { + // Nnz field of non-transposed block structure is not filled and it can + // have non-sequential structure (consider the case of jacobian for + // Schur-complement solver: E and F blocks are stored separately). + for (auto& c : row_block.cells) { + row_nnz += cols[c.block_id].size; + } + } + + // Row-wise setup of matrix structure + for (int row = 0; row < row_block.block.size; ++row) { + value_offset += row_nnz; + *crs_rows++ = value_offset; + for (auto& c : row_block.cells) { + const int col_block_size = cols[c.block_id].size; + const int col_position = cols[c.block_id].position; + std::iota(crs_cols, crs_cols + col_block_size, col_position); + crs_cols += col_block_size; + } + } + } + return crs_matrix; +} + +template +void UpdateCompressedRowSparseMatrixImpl( + CompressedRowSparseMatrix* crs_matrix, + const double* values, + const CompressedRowBlockStructure* block_structure) { + auto crs_values = crs_matrix->mutable_values(); + auto crs_rows = crs_matrix->mutable_rows(); + const int num_row_blocks = block_structure->rows.size(); + const auto& cols = block_structure->cols; + for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) { + const auto& row_block = block_structure->rows[row_block_id]; + const int row_block_size = row_block.block.size; + const int row_nnz = crs_rows[1] - crs_rows[0]; + crs_rows += row_block_size; + + if (row_nnz == 0) { + continue; + } + + MatrixRef crs_row_block(crs_values, row_block_size, row_nnz); + int col_offset = 0; + for (auto& c : row_block.cells) { + const int col_block_size = cols[c.block_id].size; + auto crs_cell = + crs_row_block.block(0, col_offset, row_block_size, col_block_size); + if constexpr (transpose) { + // Transposed matrix is filled using transposed block-strucutre + ConstMatrixRef cell( + values + c.position, col_block_size, row_block_size); + crs_cell = cell.transpose(); + } else { + ConstMatrixRef cell( + values + c.position, row_block_size, col_block_size); + crs_cell = cell; + } + col_offset += col_block_size; + } + crs_values += row_nnz * row_block_size; + } +} + +void SetBlockStructureOfCompressedRowSparseMatrix( + CompressedRowSparseMatrix* crs_matrix, + CompressedRowBlockStructure* block_structure) { + const int num_row_blocks = block_structure->rows.size(); + auto& row_blocks = *crs_matrix->mutable_row_blocks(); + row_blocks.resize(num_row_blocks); + for (int i = 0; i < num_row_blocks; ++i) { + row_blocks[i] = block_structure->rows[i].block; + } + + auto& col_blocks = *crs_matrix->mutable_col_blocks(); + col_blocks = block_structure->cols; +} + +} // namespace BlockSparseMatrix::BlockSparseMatrix( - CompressedRowBlockStructure* block_structure) - : num_rows_(0), + CompressedRowBlockStructure* block_structure, bool use_page_locked_memory) + : use_page_locked_memory_(use_page_locked_memory), + num_rows_(0), num_cols_(0), num_nonzeros_(0), block_structure_(block_structure) { @@ -66,7 +194,7 @@ BlockSparseMatrix::BlockSparseMatrix( int row_block_size = block_structure_->rows[i].block.size; num_rows_ += row_block_size; - const vector& cells = block_structure_->rows[i].cells; + const std::vector& cells = block_structure_->rows[i].cells; for (const auto& cell : cells) { int col_block_id = cell.block_id; int col_block_size = block_structure_->cols[col_block_id].size; @@ -79,51 +207,138 @@ BlockSparseMatrix::BlockSparseMatrix( CHECK_GE(num_nonzeros_, 0); VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double) << " bytes."; // NOLINT - values_ = std::make_unique(num_nonzeros_); + + values_ = AllocateValues(num_nonzeros_); max_num_nonzeros_ = num_nonzeros_; CHECK(values_ != nullptr); + AddTransposeBlockStructure(); } -void BlockSparseMatrix::SetZero() { - std::fill(values_.get(), values_.get() + num_nonzeros_, 0.0); -} +BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); } -void BlockSparseMatrix::RightMultiply(const double* x, double* y) const { - CHECK(x != nullptr); - CHECK(y != nullptr); - - for (int i = 0; i < block_structure_->rows.size(); ++i) { - int row_block_pos = block_structure_->rows[i].block.position; - int row_block_size = block_structure_->rows[i].block.size; - const vector& cells = block_structure_->rows[i].cells; - for (const auto& cell : cells) { - int col_block_id = cell.block_id; - int col_block_size = block_structure_->cols[col_block_id].size; - int col_block_pos = block_structure_->cols[col_block_id].position; - MatrixVectorMultiply( - values_.get() + cell.position, - row_block_size, - col_block_size, - x + col_block_pos, - y + row_block_pos); - } +void BlockSparseMatrix::AddTransposeBlockStructure() { + if (transpose_block_structure_ == nullptr) { + transpose_block_structure_ = CreateTranspose(*block_structure_); } } -void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const { +void BlockSparseMatrix::SetZero() { + std::fill(values_, values_ + num_nonzeros_, 0.0); +} + +void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) { + ParallelSetZero(context, num_threads, values_, num_nonzeros_); +} + +void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x, + double* y) const { + RightMultiplyAndAccumulate(x, y, nullptr, 1); +} + +void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const { CHECK(x != nullptr); CHECK(y != nullptr); + const auto values = values_; + const auto block_structure = block_structure_.get(); + const auto num_row_blocks = block_structure->rows.size(); + + ParallelFor(context, + 0, + num_row_blocks, + num_threads, + [values, block_structure, x, y](int row_block_id) { + const int row_block_pos = + block_structure->rows[row_block_id].block.position; + const int row_block_size = + block_structure->rows[row_block_id].block.size; + const auto& cells = block_structure->rows[row_block_id].cells; + for (const auto& cell : cells) { + const int col_block_id = cell.block_id; + const int col_block_size = + block_structure->cols[col_block_id].size; + const int col_block_pos = + block_structure->cols[col_block_id].position; + MatrixVectorMultiply( + values + cell.position, + row_block_size, + col_block_size, + x + col_block_pos, + y + row_block_pos); + } + }); +} + +// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method +// might benefit from caching column-block partition +void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const { + // While utilizing transposed structure allows to perform parallel + // left-multiplication by dense vector, it makes access patterns to matrix + // elements scattered. Thus, multiplication using transposed structure + // is only useful for parallel execution + CHECK(x != nullptr); + CHECK(y != nullptr); + if (transpose_block_structure_ == nullptr || num_threads == 1) { + LeftMultiplyAndAccumulate(x, y); + return; + } + + auto transpose_bs = transpose_block_structure_.get(); + const auto values = values_; + const int num_col_blocks = transpose_bs->rows.size(); + if (!num_col_blocks) { + return; + } + + // Use non-zero count as iteration cost for guided parallel-for loop + ParallelFor( + context, + 0, + num_col_blocks, + num_threads, + [values, transpose_bs, x, y](int row_block_id) { + int row_block_pos = transpose_bs->rows[row_block_id].block.position; + int row_block_size = transpose_bs->rows[row_block_id].block.size; + auto& cells = transpose_bs->rows[row_block_id].cells; + + for (auto& cell : cells) { + const int col_block_id = cell.block_id; + const int col_block_size = transpose_bs->cols[col_block_id].size; + const int col_block_pos = transpose_bs->cols[col_block_id].position; + MatrixTransposeVectorMultiply( + values + cell.position, + col_block_size, + row_block_size, + x + col_block_pos, + y + row_block_pos); + } + }, + transpose_bs->rows.data(), + [](const CompressedRow& row) { return row.cumulative_nnz; }); +} + +void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x, + double* y) const { + CHECK(x != nullptr); + CHECK(y != nullptr); + // Single-threaded left products are always computed using a non-transpose + // block structure, because it has linear acess pattern to matrix elements for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_pos = block_structure_->rows[i].block.position; int row_block_size = block_structure_->rows[i].block.size; - const vector& cells = block_structure_->rows[i].cells; + const auto& cells = block_structure_->rows[i].cells; for (const auto& cell : cells) { int col_block_id = cell.block_id; int col_block_size = block_structure_->cols[col_block_id].size; int col_block_pos = block_structure_->cols[col_block_id].position; MatrixTransposeVectorMultiply( - values_.get() + cell.position, + values_ + cell.position, row_block_size, col_block_size, x + row_block_pos, @@ -137,35 +352,144 @@ void BlockSparseMatrix::SquaredColumnNorm(double* x) const { VectorRef(x, num_cols_).setZero(); for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_size = block_structure_->rows[i].block.size; - const vector& cells = block_structure_->rows[i].cells; + auto& cells = block_structure_->rows[i].cells; for (const auto& cell : cells) { int col_block_id = cell.block_id; int col_block_size = block_structure_->cols[col_block_id].size; int col_block_pos = block_structure_->cols[col_block_id].position; const MatrixRef m( - values_.get() + cell.position, row_block_size, col_block_size); + values_ + cell.position, row_block_size, col_block_size); VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm(); } } } +// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method +// might benefit from caching column-block partition +void BlockSparseMatrix::SquaredColumnNorm(double* x, + ContextImpl* context, + int num_threads) const { + if (transpose_block_structure_ == nullptr || num_threads == 1) { + SquaredColumnNorm(x); + return; + } + + CHECK(x != nullptr); + ParallelSetZero(context, num_threads, x, num_cols_); + + auto transpose_bs = transpose_block_structure_.get(); + const auto values = values_; + const int num_col_blocks = transpose_bs->rows.size(); + ParallelFor( + context, + 0, + num_col_blocks, + num_threads, + [values, transpose_bs, x](int row_block_id) { + const auto& row = transpose_bs->rows[row_block_id]; + + for (auto& cell : row.cells) { + const auto& col = transpose_bs->cols[cell.block_id]; + const MatrixRef m(values + cell.position, col.size, row.block.size); + VectorRef(x + row.block.position, row.block.size) += + m.colwise().squaredNorm(); + } + }, + transpose_bs->rows.data(), + [](const CompressedRow& row) { return row.cumulative_nnz; }); +} + void BlockSparseMatrix::ScaleColumns(const double* scale) { CHECK(scale != nullptr); for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_size = block_structure_->rows[i].block.size; - const vector& cells = block_structure_->rows[i].cells; + auto& cells = block_structure_->rows[i].cells; for (const auto& cell : cells) { int col_block_id = cell.block_id; int col_block_size = block_structure_->cols[col_block_id].size; int col_block_pos = block_structure_->cols[col_block_id].position; - MatrixRef m( - values_.get() + cell.position, row_block_size, col_block_size); + MatrixRef m(values_ + cell.position, row_block_size, col_block_size); m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal(); } } } +// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method +// might benefit from caching column-block partition +void BlockSparseMatrix::ScaleColumns(const double* scale, + ContextImpl* context, + int num_threads) { + if (transpose_block_structure_ == nullptr || num_threads == 1) { + ScaleColumns(scale); + return; + } + + CHECK(scale != nullptr); + auto transpose_bs = transpose_block_structure_.get(); + auto values = values_; + const int num_col_blocks = transpose_bs->rows.size(); + ParallelFor( + context, + 0, + num_col_blocks, + num_threads, + [values, transpose_bs, scale](int row_block_id) { + const auto& row = transpose_bs->rows[row_block_id]; + + for (auto& cell : row.cells) { + const auto& col = transpose_bs->cols[cell.block_id]; + MatrixRef m(values + cell.position, col.size, row.block.size); + m *= ConstVectorRef(scale + row.block.position, row.block.size) + .asDiagonal(); + } + }, + transpose_bs->rows.data(), + [](const CompressedRow& row) { return row.cumulative_nnz; }); +} +std::unique_ptr +BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const { + auto bs = transpose_block_structure_.get(); + auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix( + values(), num_cols_, num_rows_, num_nonzeros_, bs); + + SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs); + + UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get()); + return crs_matrix; +} + +std::unique_ptr +BlockSparseMatrix::ToCompressedRowSparseMatrix() const { + auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix( + values(), num_rows_, num_cols_, num_nonzeros_, block_structure_.get()); + + SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), + block_structure_.get()); + + UpdateCompressedRowSparseMatrix(crs_matrix.get()); + return crs_matrix; +} + +void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose( + CompressedRowSparseMatrix* crs_matrix) const { + CHECK(crs_matrix != nullptr); + CHECK_EQ(crs_matrix->num_rows(), num_cols_); + CHECK_EQ(crs_matrix->num_cols(), num_rows_); + CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_); + UpdateCompressedRowSparseMatrixImpl( + crs_matrix, values(), transpose_block_structure_.get()); +} +void BlockSparseMatrix::UpdateCompressedRowSparseMatrix( + CompressedRowSparseMatrix* crs_matrix) const { + CHECK(crs_matrix != nullptr); + CHECK_EQ(crs_matrix->num_rows(), num_rows_); + CHECK_EQ(crs_matrix->num_cols(), num_cols_); + CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_); + UpdateCompressedRowSparseMatrixImpl( + crs_matrix, values(), block_structure_.get()); +} + void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const { CHECK(dense_matrix != nullptr); @@ -176,14 +500,14 @@ void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const { for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_pos = block_structure_->rows[i].block.position; int row_block_size = block_structure_->rows[i].block.size; - const vector& cells = block_structure_->rows[i].cells; + auto& cells = block_structure_->rows[i].cells; for (const auto& cell : cells) { int col_block_id = cell.block_id; int col_block_size = block_structure_->cols[col_block_id].size; int col_block_pos = block_structure_->cols[col_block_id].position; int jac_pos = cell.position; m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) += - MatrixRef(values_.get() + jac_pos, row_block_size, col_block_size); + MatrixRef(values_ + jac_pos, row_block_size, col_block_size); } } } @@ -199,7 +523,7 @@ void BlockSparseMatrix::ToTripletSparseMatrix( for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_pos = block_structure_->rows[i].block.position; int row_block_size = block_structure_->rows[i].block.size; - const vector& cells = block_structure_->rows[i].cells; + const auto& cells = block_structure_->rows[i].cells; for (const auto& cell : cells) { int col_block_id = cell.block_id; int col_block_size = block_structure_->cols[col_block_id].size; @@ -223,12 +547,19 @@ const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const { return block_structure_.get(); } +// Return a pointer to the block structure of matrix transpose. We continue to +// hold ownership of the object though. +const CompressedRowBlockStructure* +BlockSparseMatrix::transpose_block_structure() const { + return transpose_block_structure_.get(); +} + void BlockSparseMatrix::ToTextFile(FILE* file) const { CHECK(file != nullptr); for (int i = 0; i < block_structure_->rows.size(); ++i) { const int row_block_pos = block_structure_->rows[i].block.position; const int row_block_size = block_structure_->rows[i].block.size; - const vector& cells = block_structure_->rows[i].cells; + const auto& cells = block_structure_->rows[i].cells; for (const auto& cell : cells) { const int col_block_id = cell.block_id; const int col_block_size = block_structure_->cols[col_block_id].size; @@ -293,34 +624,51 @@ void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) { for (int i = 0; i < m_bs->rows.size(); ++i) { const CompressedRow& m_row = m_bs->rows[i]; - CompressedRow& row = block_structure_->rows[old_num_row_blocks + i]; + const int row_block_id = old_num_row_blocks + i; + CompressedRow& row = block_structure_->rows[row_block_id]; row.block.size = m_row.block.size; row.block.position = num_rows_; num_rows_ += m_row.block.size; row.cells.resize(m_row.cells.size()); + if (transpose_block_structure_) { + transpose_block_structure_->cols.emplace_back(row.block); + } for (int c = 0; c < m_row.cells.size(); ++c) { const int block_id = m_row.cells[c].block_id; row.cells[c].block_id = block_id; row.cells[c].position = num_nonzeros_; - num_nonzeros_ += m_row.block.size * m_bs->cols[block_id].size; + + const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size; + if (transpose_block_structure_) { + transpose_block_structure_->rows[block_id].cells.emplace_back( + row_block_id, num_nonzeros_); + transpose_block_structure_->rows[block_id].nnz += cell_nnz; + } + + num_nonzeros_ += cell_nnz; } } if (num_nonzeros_ > max_num_nonzeros_) { - std::unique_ptr new_values = - std::make_unique(num_nonzeros_); - std::copy_n(values_.get(), old_num_nonzeros, new_values.get()); - values_ = std::move(new_values); + double* old_values = values_; + values_ = AllocateValues(num_nonzeros_); + std::copy_n(old_values, old_num_nonzeros, values_); max_num_nonzeros_ = num_nonzeros_; + FreeValues(old_values); } - std::copy(m.values(), - m.values() + m.num_nonzeros(), - values_.get() + old_num_nonzeros); + std::copy( + m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros); + + if (transpose_block_structure_ == nullptr) { + return; + } + ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows); } void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) { const int num_row_blocks = block_structure_->rows.size(); + const int new_num_row_blocks = num_row_blocks - delta_row_blocks; int delta_num_nonzeros = 0; int delta_num_rows = 0; const std::vector& column_blocks = block_structure_->cols; @@ -330,15 +678,40 @@ void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) { for (int c = 0; c < row.cells.size(); ++c) { const Cell& cell = row.cells[c]; delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size; + + if (transpose_block_structure_) { + auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells; + while (!col_cells.empty() && + col_cells.back().block_id >= new_num_row_blocks) { + const int del_block_id = col_cells.back().block_id; + const int del_block_rows = + block_structure_->rows[del_block_id].block.size; + const int del_block_cols = column_blocks[cell.block_id].size; + const int del_cell_nnz = del_block_rows * del_block_cols; + transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz; + col_cells.pop_back(); + } + } } } num_nonzeros_ -= delta_num_nonzeros; num_rows_ -= delta_num_rows; - block_structure_->rows.resize(num_row_blocks - delta_row_blocks); + block_structure_->rows.resize(new_num_row_blocks); + + if (transpose_block_structure_ == nullptr) { + return; + } + for (int i = 0; i < delta_row_blocks; ++i) { + transpose_block_structure_->cols.pop_back(); + } + + ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows); } std::unique_ptr BlockSparseMatrix::CreateRandomMatrix( - const BlockSparseMatrix::RandomMatrixOptions& options) { + const BlockSparseMatrix::RandomMatrixOptions& options, + std::mt19937& prng, + bool use_page_locked_memory) { CHECK_GT(options.num_row_blocks, 0); CHECK_GT(options.min_row_block_size, 0); CHECK_GT(options.max_row_block_size, 0); @@ -346,7 +719,11 @@ std::unique_ptr BlockSparseMatrix::CreateRandomMatrix( CHECK_GT(options.block_density, 0.0); CHECK_LE(options.block_density, 1.0); - auto* bs = new CompressedRowBlockStructure(); + std::uniform_int_distribution col_distribution( + options.min_col_block_size, options.max_col_block_size); + std::uniform_int_distribution row_distribution( + options.min_row_block_size, options.max_row_block_size); + auto bs = std::make_unique(); if (options.col_blocks.empty()) { CHECK_GT(options.num_col_blocks, 0); CHECK_GT(options.min_col_block_size, 0); @@ -356,10 +733,7 @@ std::unique_ptr BlockSparseMatrix::CreateRandomMatrix( // Generate the col block structure. int col_block_position = 0; for (int i = 0; i < options.num_col_blocks; ++i) { - // Generate a random integer in [min_col_block_size, max_col_block_size] - const int delta_block_size = - Uniform(options.max_col_block_size - options.min_col_block_size); - const int col_block_size = options.min_col_block_size + delta_block_size; + const int col_block_size = col_distribution(prng); bs->cols.emplace_back(col_block_size, col_block_position); col_block_position += col_block_size; } @@ -368,22 +742,21 @@ std::unique_ptr BlockSparseMatrix::CreateRandomMatrix( } bool matrix_has_blocks = false; + std::uniform_real_distribution uniform01(0.0, 1.0); while (!matrix_has_blocks) { VLOG(1) << "Clearing"; bs->rows.clear(); int row_block_position = 0; int value_position = 0; for (int r = 0; r < options.num_row_blocks; ++r) { - const int delta_block_size = - Uniform(options.max_row_block_size - options.min_row_block_size); - const int row_block_size = options.min_row_block_size + delta_block_size; + const int row_block_size = row_distribution(prng); bs->rows.emplace_back(); CompressedRow& row = bs->rows.back(); row.block.size = row_block_size; row.block.position = row_block_position; row_block_position += row_block_size; for (int c = 0; c < bs->cols.size(); ++c) { - if (RandDouble() > options.block_density) continue; + if (uniform01(prng) > options.block_density) continue; row.cells.emplace_back(); Cell& cell = row.cells.back(); @@ -395,14 +768,76 @@ std::unique_ptr BlockSparseMatrix::CreateRandomMatrix( } } - auto matrix = std::make_unique(bs); + auto matrix = + std::make_unique(bs.release(), use_page_locked_memory); double* values = matrix->mutable_values(); - for (int i = 0; i < matrix->num_nonzeros(); ++i) { - values[i] = RandNormal(); - } + std::normal_distribution standard_normal_distribution; + std::generate_n( + values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] { + return standard_normal_distribution(prng); + }); return matrix; } -} // namespace internal -} // namespace ceres +std::unique_ptr CreateTranspose( + const CompressedRowBlockStructure& bs) { + auto transpose = std::make_unique(); + + transpose->rows.resize(bs.cols.size()); + for (int i = 0; i < bs.cols.size(); ++i) { + transpose->rows[i].block = bs.cols[i]; + transpose->rows[i].nnz = 0; + } + + transpose->cols.resize(bs.rows.size()); + for (int i = 0; i < bs.rows.size(); ++i) { + auto& row = bs.rows[i]; + transpose->cols[i] = row.block; + + const int nrows = row.block.size; + for (auto& cell : row.cells) { + transpose->rows[cell.block_id].cells.emplace_back(i, cell.position); + const int ncols = transpose->rows[cell.block_id].block.size; + transpose->rows[cell.block_id].nnz += nrows * ncols; + } + } + ComputeCumulativeNumberOfNonZeros(transpose->rows); + return transpose; +} + +double* BlockSparseMatrix::AllocateValues(int size) { + if (!use_page_locked_memory_) { + return new double[size]; + } + +#ifndef CERES_NO_CUDA + + double* values = nullptr; + CHECK_EQ(cudaSuccess, + cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault)); + return values; +#else + LOG(FATAL) << "Page locked memory requested when CUDA is not available. " + << "This is a Ceres bug; please contact the developers!"; + return nullptr; +#endif +}; + +void BlockSparseMatrix::FreeValues(double*& values) { + if (!use_page_locked_memory_) { + delete[] values; + values = nullptr; + return; + } + +#ifndef CERES_NO_CUDA + CHECK_EQ(cudaSuccess, cudaFreeHost(values)); + values = nullptr; +#else + LOG(FATAL) << "Page locked memory requested when CUDA is not available. " + << "This is a Ceres bug; please contact the developers!"; +#endif +}; + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_sparse_matrix.h b/extern/ceres/internal/ceres/block_sparse_matrix.h index 75b0deb59e6..2e454885ac3 100644 --- a/extern/ceres/internal/ceres/block_sparse_matrix.h +++ b/extern/ceres/internal/ceres/block_sparse_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,15 +35,17 @@ #define CERES_INTERNAL_BLOCK_SPARSE_MATRIX_H_ #include +#include #include "ceres/block_structure.h" +#include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/context_impl.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/eigen.h" #include "ceres/internal/export.h" #include "ceres/sparse_matrix.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class TripletSparseMatrix; @@ -63,31 +65,64 @@ class CERES_NO_EXPORT BlockSparseMatrix final : public SparseMatrix { // // TODO(sameeragarwal): Add a function which will validate legal // CompressedRowBlockStructure objects. - explicit BlockSparseMatrix(CompressedRowBlockStructure* block_structure); + explicit BlockSparseMatrix(CompressedRowBlockStructure* block_structure, + bool use_page_locked_memory = false); + ~BlockSparseMatrix(); - BlockSparseMatrix(); BlockSparseMatrix(const BlockSparseMatrix&) = delete; void operator=(const BlockSparseMatrix&) = delete; // Implementation of SparseMatrix interface. - void SetZero() final; - void RightMultiply(const double* x, double* y) const final; - void LeftMultiply(const double* x, double* y) const final; + void SetZero() override final; + void SetZero(ContextImpl* context, int num_threads) override final; + void RightMultiplyAndAccumulate(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const final; + void LeftMultiplyAndAccumulate(const double* x, double* y) const final; + void LeftMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const final; void SquaredColumnNorm(double* x) const final; + void SquaredColumnNorm(double* x, + ContextImpl* context, + int num_threads) const final; void ScaleColumns(const double* scale) final; + void ScaleColumns(const double* scale, + ContextImpl* context, + int num_threads) final; + + // Convert to CompressedRowSparseMatrix + std::unique_ptr ToCompressedRowSparseMatrix() + const; + // Create CompressedRowSparseMatrix corresponding to transposed matrix + std::unique_ptr + ToCompressedRowSparseMatrixTranspose() const; + // Copy values to CompressedRowSparseMatrix that has compatible structure + void UpdateCompressedRowSparseMatrix( + CompressedRowSparseMatrix* crs_matrix) const; + // Copy values to CompressedRowSparseMatrix that has structure of transposed + // matrix + void UpdateCompressedRowSparseMatrixTranspose( + CompressedRowSparseMatrix* crs_matrix) const; void ToDenseMatrix(Matrix* dense_matrix) const final; void ToTextFile(FILE* file) const final; + void AddTransposeBlockStructure(); + // clang-format off int num_rows() const final { return num_rows_; } int num_cols() const final { return num_cols_; } int num_nonzeros() const final { return num_nonzeros_; } - const double* values() const final { return values_.get(); } - double* mutable_values() final { return values_.get(); } + const double* values() const final { return values_; } + double* mutable_values() final { return values_; } // clang-format on void ToTripletSparseMatrix(TripletSparseMatrix* matrix) const; const CompressedRowBlockStructure* block_structure() const; + const CompressedRowBlockStructure* transpose_block_structure() const; // Append the contents of m to the bottom of this matrix. m must // have the same column blocks structure as this matrix. @@ -122,15 +157,22 @@ class CERES_NO_EXPORT BlockSparseMatrix final : public SparseMatrix { // distributed and whose structure is determined by // RandomMatrixOptions. static std::unique_ptr CreateRandomMatrix( - const RandomMatrixOptions& options); + const RandomMatrixOptions& options, + std::mt19937& prng, + bool use_page_locked_memory = false); private: + double* AllocateValues(int size); + void FreeValues(double*& values); + + const bool use_page_locked_memory_; int num_rows_; int num_cols_; int num_nonzeros_; int max_num_nonzeros_; - std::unique_ptr values_; + double* values_; std::unique_ptr block_structure_; + std::unique_ptr transpose_block_structure_; }; // A number of algorithms like the SchurEliminator do not need @@ -158,8 +200,10 @@ class CERES_NO_EXPORT BlockSparseMatrixData { const double* values_; }; -} // namespace internal -} // namespace ceres +std::unique_ptr CreateTranspose( + const CompressedRowBlockStructure& bs); + +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/block_structure.cc b/extern/ceres/internal/ceres/block_structure.cc index 39ba0826dab..70f68b28bc1 100644 --- a/extern/ceres/internal/ceres/block_structure.cc +++ b/extern/ceres/internal/ceres/block_structure.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,8 +30,11 @@ #include "ceres/block_structure.h" -namespace ceres { -namespace internal { +#include + +#include "glog/logging.h" + +namespace ceres::internal { bool CellLessThan(const Cell& lhs, const Cell& rhs) { if (lhs.block_id == rhs.block_id) { @@ -40,5 +43,28 @@ bool CellLessThan(const Cell& lhs, const Cell& rhs) { return (lhs.block_id < rhs.block_id); } -} // namespace internal -} // namespace ceres +std::vector Tail(const std::vector& blocks, int n) { + CHECK_LE(n, blocks.size()); + std::vector tail; + const int num_blocks = blocks.size(); + const int start = num_blocks - n; + + int position = 0; + tail.reserve(n); + for (int i = start; i < num_blocks; ++i) { + tail.emplace_back(blocks[i].size, position); + position += blocks[i].size; + } + + return tail; +} + +int SumSquaredSizes(const std::vector& blocks) { + int sum = 0; + for (const auto& b : blocks) { + sum += b.size * b.size; + } + return sum; +} + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/block_structure.h b/extern/ceres/internal/ceres/block_structure.h index fe7574c6817..9500fbb13ab 100644 --- a/extern/ceres/internal/ceres/block_structure.h +++ b/extern/ceres/internal/ceres/block_structure.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,6 +43,9 @@ #include "ceres/internal/export.h" +// This file is being included into source files that are compiled with nvcc. +// nvcc shipped with ubuntu 20.04 does not support some features of c++17, +// including nested namespace definitions namespace ceres { namespace internal { @@ -50,15 +53,19 @@ using BlockSize = int32_t; struct CERES_NO_EXPORT Block { Block() = default; - Block(int size_, int position_) : size(size_), position(position_) {} + Block(int size_, int position_) noexcept : size(size_), position(position_) {} BlockSize size{-1}; int position{-1}; // Position along the row/column. }; +inline bool operator==(const Block& left, const Block& right) noexcept { + return (left.size == right.size) && (left.position == right.position); +} + struct CERES_NO_EXPORT Cell { Cell() = default; - Cell(int block_id_, int position_) + Cell(int block_id_, int position_) noexcept : block_id(block_id_), position(position_) {} // Column or row block id as the case maybe. @@ -75,14 +82,95 @@ struct CERES_NO_EXPORT CompressedList { // Construct a CompressedList with the cells containing num_cells // entries. - explicit CompressedList(int num_cells) : cells(num_cells) {} + explicit CompressedList(int num_cells) noexcept : cells(num_cells) {} Block block; std::vector cells; + // Number of non-zeros in cells of this row block + int nnz{-1}; + // Number of non-zeros in cells of this and every preceeding row block in + // block-sparse matrix + int cumulative_nnz{-1}; }; using CompressedRow = CompressedList; using CompressedColumn = CompressedList; +// CompressedRowBlockStructure specifies the storage structure of a row block +// sparse matrix. +// +// Consider the following matrix A: +// A = [A_11 A_12 ... +// A_21 A_22 ... +// ... +// A_m1 A_m2 ... ] +// +// A row block sparse matrix is a matrix where the following properties hold: +// 1. The number of rows in every block A_ij and A_ik are the same. +// 2. The number of columns in every block A_ij and A_kj are the same. +// 3. The number of rows in A_ij and A_kj may be different (i != k). +// 4. The number of columns in A_ij and A_ik may be different (j != k). +// 5. Any block A_ij may be all 0s, in which case the block is not stored. +// +// The structure of the matrix is stored as follows: +// +// The `rows' array contains the following information for each row block: +// - rows[i].block.size: The number of rows in each block A_ij in the row block. +// - rows[i].block.position: The starting row in the full matrix A of the +// row block i. +// - rows[i].cells[j].block_id: The index into the `cols' array corresponding to +// the non-zero blocks A_ij. +// - rows[i].cells[j].position: The index in the `values' array for the contents +// of block A_ij. +// +// The `cols' array contains the following information for block: +// - cols[.].size: The number of columns spanned by the block. +// - cols[.].position: The starting column in the full matrix A of the block. +// +// +// Example of a row block sparse matrix: +// block_id: | 0 |1|2 |3 | +// rows[0]: [ 1 2 0 3 4 0 ] +// [ 5 6 0 7 8 0 ] +// rows[1]: [ 0 0 9 0 0 0 ] +// +// This matrix is stored as follows: +// +// There are four column blocks: +// cols[0].size = 2 +// cols[0].position = 0 +// cols[1].size = 1 +// cols[1].position = 2 +// cols[2].size = 2 +// cols[2].position = 3 +// cols[3].size = 1 +// cols[3].position = 5 + +// The first row block spans two rows, starting at row 0: +// rows[0].block.size = 2 // This row block spans two rows. +// rows[0].block.position = 0 // It starts at row 0. +// rows[0] has two cells, at column blocks 0 and 2: +// rows[0].cells[0].block_id = 0 // This cell is in column block 0. +// rows[0].cells[0].position = 0 // See below for an explanation of this. +// rows[0].cells[1].block_id = 2 // This cell is in column block 2. +// rows[0].cells[1].position = 4 // See below for an explanation of this. +// +// The second row block spans two rows, starting at row 2: +// rows[1].block.size = 1 // This row block spans one row. +// rows[1].block.position = 2 // It starts at row 2. +// rows[1] has one cell at column block 1: +// rows[1].cells[0].block_id = 1 // This cell is in column block 1. +// rows[1].cells[0].position = 8 // See below for an explanation of this. +// +// The values in each blocks are stored contiguously in row major order. +// However, there is no unique way to order the blocks -- it is usually +// optimized to promote cache coherent access, e.g. ordering it so that +// Jacobian blocks of parameters of the same type are stored nearby. +// This is one possible way to store the values of the blocks in a values array: +// values = { 1, 2, 5, 6, 3, 4, 7, 8, 9 } +// | | | | // The three blocks. +// ^ rows[0].cells[0].position = 0 +// ^ rows[0].cells[1].position = 4 +// ^ rows[1].cells[0].position = 8 struct CERES_NO_EXPORT CompressedRowBlockStructure { std::vector cols; std::vector rows; @@ -93,6 +181,18 @@ struct CERES_NO_EXPORT CompressedColumnBlockStructure { std::vector cols; }; +inline int NumScalarEntries(const std::vector& blocks) { + if (blocks.empty()) { + return 0; + } + + auto& block = blocks.back(); + return block.position + block.size; +} + +std::vector Tail(const std::vector& blocks, int n); +int SumSquaredSizes(const std::vector& blocks); + } // namespace internal } // namespace ceres diff --git a/extern/ceres/internal/ceres/c_api.cc b/extern/ceres/internal/ceres/c_api.cc index 8ea344dd54a..56e13248710 100644 --- a/extern/ceres/internal/ceres/c_api.cc +++ b/extern/ceres/internal/ceres/c_api.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/callbacks.cc b/extern/ceres/internal/ceres/callbacks.cc index 7a4381c293f..e6e064438ee 100644 --- a/extern/ceres/internal/ceres/callbacks.cc +++ b/extern/ceres/internal/ceres/callbacks.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,15 +32,13 @@ #include #include // NO LINT +#include #include "ceres/program.h" #include "ceres/stringprintf.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::string; +namespace ceres::internal { StateUpdatingCallback::StateUpdatingCallback(Program* program, double* parameters) @@ -49,7 +47,7 @@ StateUpdatingCallback::StateUpdatingCallback(Program* program, StateUpdatingCallback::~StateUpdatingCallback() = default; CallbackReturnType StateUpdatingCallback::operator()( - const IterationSummary& summary) { + const IterationSummary& /*summary*/) { program_->StateVectorToParameterBlocks(parameters_); program_->CopyParameterBlockStateToUserState(); return SOLVER_CONTINUE; @@ -83,7 +81,7 @@ LoggingCallback::~LoggingCallback() = default; CallbackReturnType LoggingCallback::operator()( const IterationSummary& summary) { - string output; + std::string output; if (minimizer_type == LINE_SEARCH) { output = StringPrintf( "% 4d: f:% 8e d:% 3.2e g:% 3.2e h:% 3.2e s:% 3.2e e:% 3d it:% 3.2e " @@ -127,5 +125,4 @@ CallbackReturnType LoggingCallback::operator()( return SOLVER_CONTINUE; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/callbacks.h b/extern/ceres/internal/ceres/callbacks.h index 3b1d10cfa7f..d3a76574b91 100644 --- a/extern/ceres/internal/ceres/callbacks.h +++ b/extern/ceres/internal/ceres/callbacks.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,7 @@ #include "ceres/internal/export.h" #include "ceres/iteration_callback.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Program; @@ -84,7 +83,6 @@ class CERES_NO_EXPORT LoggingCallback final : public IterationCallback { const bool log_to_stdout_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_CALLBACKS_H_ diff --git a/extern/ceres/internal/ceres/canonical_views_clustering.cc b/extern/ceres/internal/ceres/canonical_views_clustering.cc index 01b8ad38ae0..d74e570cef7 100644 --- a/extern/ceres/internal/ceres/canonical_views_clustering.cc +++ b/extern/ceres/internal/ceres/canonical_views_clustering.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,16 +33,14 @@ #include #include +#include #include "ceres/graph.h" #include "ceres/internal/export.h" #include "ceres/map_util.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::vector; +namespace ceres::internal { using IntMap = std::unordered_map; using IntSet = std::unordered_set; @@ -59,15 +57,15 @@ class CERES_NO_EXPORT CanonicalViewsClustering { // are assigned to a cluster with id = kInvalidClusterId. void ComputeClustering(const CanonicalViewsClusteringOptions& options, const WeightedGraph& graph, - vector* centers, + std::vector* centers, IntMap* membership); private: void FindValidViews(IntSet* valid_views) const; - double ComputeClusteringQualityDifference(const int candidate, - const vector& centers) const; + double ComputeClusteringQualityDifference( + int candidate, const std::vector& centers) const; void UpdateCanonicalViewAssignments(const int canonical_view); - void ComputeClusterMembership(const vector& centers, + void ComputeClusterMembership(const std::vector& centers, IntMap* membership) const; CanonicalViewsClusteringOptions options_; @@ -82,7 +80,7 @@ class CERES_NO_EXPORT CanonicalViewsClustering { void ComputeCanonicalViewsClustering( const CanonicalViewsClusteringOptions& options, const WeightedGraph& graph, - vector* centers, + std::vector* centers, IntMap* membership) { time_t start_time = time(nullptr); CanonicalViewsClustering cv; @@ -95,7 +93,7 @@ void ComputeCanonicalViewsClustering( void CanonicalViewsClustering::ComputeClustering( const CanonicalViewsClusteringOptions& options, const WeightedGraph& graph, - vector* centers, + std::vector* centers, IntMap* membership) { options_ = options; CHECK(centers != nullptr); @@ -151,7 +149,7 @@ void CanonicalViewsClustering::FindValidViews(IntSet* valid_views) const { // Computes the difference in the quality score if 'candidate' were // added to the set of canonical views. double CanonicalViewsClustering::ComputeClusteringQualityDifference( - const int candidate, const vector& centers) const { + const int candidate, const std::vector& centers) const { // View score. double difference = options_.view_score_weight * graph_->VertexWeight(candidate); @@ -198,7 +196,7 @@ void CanonicalViewsClustering::UpdateCanonicalViewAssignments( // Assign a cluster id to each view. void CanonicalViewsClustering::ComputeClusterMembership( - const vector& centers, IntMap* membership) const { + const std::vector& centers, IntMap* membership) const { CHECK(membership != nullptr); membership->clear(); @@ -222,5 +220,4 @@ void CanonicalViewsClustering::ComputeClusterMembership( } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/canonical_views_clustering.h b/extern/ceres/internal/ceres/canonical_views_clustering.h index 00a6a739d29..eb05a910b5b 100644 --- a/extern/ceres/internal/ceres/canonical_views_clustering.h +++ b/extern/ceres/internal/ceres/canonical_views_clustering.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -48,8 +48,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { struct CanonicalViewsClusteringOptions; @@ -120,8 +119,7 @@ struct CERES_NO_EXPORT CanonicalViewsClusteringOptions { double view_score_weight = 0.0; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/casts.h b/extern/ceres/internal/ceres/casts.h index 04d8ba4fe33..af944520618 100644 --- a/extern/ceres/internal/ceres/casts.h +++ b/extern/ceres/internal/ceres/casts.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/cgnr_linear_operator.h b/extern/ceres/internal/ceres/cgnr_linear_operator.h deleted file mode 100644 index d708efca24c..00000000000 --- a/extern/ceres/internal/ceres/cgnr_linear_operator.h +++ /dev/null @@ -1,123 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: keir@google.com (Keir Mierle) - -#ifndef CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_ -#define CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_ - -#include -#include - -#include "ceres/internal/disable_warnings.h" -#include "ceres/internal/eigen.h" -#include "ceres/internal/export.h" -#include "ceres/linear_operator.h" - -namespace ceres { -namespace internal { - -class SparseMatrix; - -// A linear operator which takes a matrix A and a diagonal vector D and -// performs products of the form -// -// (A^T A + D^T D)x -// -// This is used to implement iterative general sparse linear solving with -// conjugate gradients, where A is the Jacobian and D is a regularizing -// parameter. A brief proof that D^T D is the correct regularizer: -// -// Given a regularized least squares problem: -// -// min ||Ax - b||^2 + ||Dx||^2 -// x -// -// First expand into matrix notation: -// -// (Ax - b)^T (Ax - b) + xD^TDx -// -// Then multiply out to get: -// -// = xA^TAx - 2b^T Ax + b^Tb + xD^TDx -// -// Take the derivative: -// -// 0 = 2A^TAx - 2A^T b + 2 D^TDx -// 0 = A^TAx - A^T b + D^TDx -// 0 = (A^TA + D^TD)x - A^T b -// -// Thus, the symmetric system we need to solve for CGNR is -// -// Sx = z -// -// with S = A^TA + D^TD -// and z = A^T b -// -// Note: This class is not thread safe, since it uses some temporary storage. -class CERES_NO_EXPORT CgnrLinearOperator final : public LinearOperator { - public: - CgnrLinearOperator(const LinearOperator& A, const double* D) - : A_(A), D_(D), z_(new double[A.num_rows()]) {} - - void RightMultiply(const double* x, double* y) const final { - std::fill(z_.get(), z_.get() + A_.num_rows(), 0.0); - - // z = Ax - A_.RightMultiply(x, z_.get()); - - // y = y + Atz - A_.LeftMultiply(z_.get(), y); - - // y = y + DtDx - if (D_ != nullptr) { - int n = A_.num_cols(); - VectorRef(y, n).array() += - ConstVectorRef(D_, n).array().square() * ConstVectorRef(x, n).array(); - } - } - - void LeftMultiply(const double* x, double* y) const final { - RightMultiply(x, y); - } - - int num_rows() const final { return A_.num_cols(); } - int num_cols() const final { return A_.num_cols(); } - - private: - const LinearOperator& A_; - const double* D_; - std::unique_ptr z_; -}; - -} // namespace internal -} // namespace ceres - -#include "ceres/internal/reenable_warnings.h" - -#endif // CERES_INTERNAL_CGNR_LINEAR_OPERATOR_H_ diff --git a/extern/ceres/internal/ceres/cgnr_solver.cc b/extern/ceres/internal/ceres/cgnr_solver.cc index cca72bca988..da63484a734 100644 --- a/extern/ceres/internal/ceres/cgnr_solver.cc +++ b/extern/ceres/internal/ceres/cgnr_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -34,16 +34,92 @@ #include #include "ceres/block_jacobi_preconditioner.h" -#include "ceres/cgnr_linear_operator.h" #include "ceres/conjugate_gradients_solver.h" +#include "ceres/cuda_sparse_matrix.h" +#include "ceres/cuda_vector.h" #include "ceres/internal/eigen.h" #include "ceres/linear_solver.h" #include "ceres/subset_preconditioner.h" #include "ceres/wall_time.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { + +// A linear operator which takes a matrix A and a diagonal vector D and +// performs products of the form +// +// (A^T A + D^T D)x +// +// This is used to implement iterative general sparse linear solving with +// conjugate gradients, where A is the Jacobian and D is a regularizing +// parameter. A brief proof that D^T D is the correct regularizer: +// +// Given a regularized least squares problem: +// +// min ||Ax - b||^2 + ||Dx||^2 +// x +// +// First expand into matrix notation: +// +// (Ax - b)^T (Ax - b) + xD^TDx +// +// Then multiply out to get: +// +// = xA^TAx - 2b^T Ax + b^Tb + xD^TDx +// +// Take the derivative: +// +// 0 = 2A^TAx - 2A^T b + 2 D^TDx +// 0 = A^TAx - A^T b + D^TDx +// 0 = (A^TA + D^TD)x - A^T b +// +// Thus, the symmetric system we need to solve for CGNR is +// +// Sx = z +// +// with S = A^TA + D^TD +// and z = A^T b +// +// Note: This class is not thread safe, since it uses some temporary storage. +class CERES_NO_EXPORT CgnrLinearOperator final + : public ConjugateGradientsLinearOperator { + public: + CgnrLinearOperator(const LinearOperator& A, + const double* D, + ContextImpl* context, + int num_threads) + : A_(A), + D_(D), + z_(Vector::Zero(A.num_rows())), + context_(context), + num_threads_(num_threads) {} + + void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final { + // z = Ax + // y = y + Atz + z_.setZero(); + A_.RightMultiplyAndAccumulate(x, z_, context_, num_threads_); + A_.LeftMultiplyAndAccumulate(z_, y, context_, num_threads_); + + // y = y + DtDx + if (D_ != nullptr) { + int n = A_.num_cols(); + ParallelAssign( + context_, + num_threads_, + y, + y.array() + ConstVectorRef(D_, n).array().square() * x.array()); + } + } + + private: + const LinearOperator& A_; + const double* D_; + Vector z_; + + ContextImpl* context_; + int num_threads_; +}; CgnrSolver::CgnrSolver(LinearSolver::Options options) : options_(std::move(options)) { @@ -57,7 +133,14 @@ CgnrSolver::CgnrSolver(LinearSolver::Options options) } } -CgnrSolver::~CgnrSolver() = default; +CgnrSolver::~CgnrSolver() { + for (int i = 0; i < 4; ++i) { + if (scratch_[i]) { + delete scratch_[i]; + scratch_[i] = nullptr; + } + } +} LinearSolver::Summary CgnrSolver::SolveImpl( BlockSparseMatrix* A, @@ -65,48 +148,244 @@ LinearSolver::Summary CgnrSolver::SolveImpl( const LinearSolver::PerSolveOptions& per_solve_options, double* x) { EventLogger event_logger("CgnrSolver::Solve"); - - // Form z = Atb. - Vector z(A->num_cols()); - z.setZero(); - A->LeftMultiply(b, z.data()); - if (!preconditioner_) { + Preconditioner::Options preconditioner_options; + preconditioner_options.type = options_.preconditioner_type; + preconditioner_options.subset_preconditioner_start_row_block = + options_.subset_preconditioner_start_row_block; + preconditioner_options.sparse_linear_algebra_library_type = + options_.sparse_linear_algebra_library_type; + preconditioner_options.ordering_type = options_.ordering_type; + preconditioner_options.num_threads = options_.num_threads; + preconditioner_options.context = options_.context; + if (options_.preconditioner_type == JACOBI) { - preconditioner_ = std::make_unique(*A); + preconditioner_ = std::make_unique( + preconditioner_options, *A); } else if (options_.preconditioner_type == SUBSET) { - Preconditioner::Options preconditioner_options; - preconditioner_options.type = SUBSET; - preconditioner_options.subset_preconditioner_start_row_block = - options_.subset_preconditioner_start_row_block; - preconditioner_options.sparse_linear_algebra_library_type = - options_.sparse_linear_algebra_library_type; - preconditioner_options.use_postordering = options_.use_postordering; - preconditioner_options.num_threads = options_.num_threads; - preconditioner_options.context = options_.context; preconditioner_ = std::make_unique(preconditioner_options, *A); + } else { + preconditioner_ = std::make_unique(A->num_cols()); } } + preconditioner_->Update(*A, per_solve_options.D); - if (preconditioner_) { - preconditioner_->Update(*A, per_solve_options.D); + ConjugateGradientsSolverOptions cg_options; + cg_options.min_num_iterations = options_.min_num_iterations; + cg_options.max_num_iterations = options_.max_num_iterations; + cg_options.residual_reset_period = options_.residual_reset_period; + cg_options.q_tolerance = per_solve_options.q_tolerance; + cg_options.r_tolerance = per_solve_options.r_tolerance; + cg_options.context = options_.context; + cg_options.num_threads = options_.num_threads; + + // lhs = AtA + DtD + CgnrLinearOperator lhs( + *A, per_solve_options.D, options_.context, options_.num_threads); + // rhs = Atb. + Vector rhs(A->num_cols()); + rhs.setZero(); + A->LeftMultiplyAndAccumulate( + b, rhs.data(), options_.context, options_.num_threads); + + cg_solution_ = Vector::Zero(A->num_cols()); + for (int i = 0; i < 4; ++i) { + if (scratch_[i] == nullptr) { + scratch_[i] = new Vector(A->num_cols()); + } } - - LinearSolver::PerSolveOptions cg_per_solve_options = per_solve_options; - cg_per_solve_options.preconditioner = preconditioner_.get(); - - // Solve (AtA + DtD)x = z (= Atb). - VectorRef(x, A->num_cols()).setZero(); - CgnrLinearOperator lhs(*A, per_solve_options.D); event_logger.AddEvent("Setup"); - ConjugateGradientsSolver conjugate_gradient_solver(options_); - LinearSolver::Summary summary = - conjugate_gradient_solver.Solve(&lhs, z.data(), cg_per_solve_options, x); + LinearOperatorAdapter preconditioner(*preconditioner_); + auto summary = ConjugateGradientsSolver( + cg_options, lhs, rhs, preconditioner, scratch_, cg_solution_); + VectorRef(x, A->num_cols()) = cg_solution_; event_logger.AddEvent("Solve"); return summary; } -} // namespace internal -} // namespace ceres +#ifndef CERES_NO_CUDA + +// A linear operator which takes a matrix A and a diagonal vector D and +// performs products of the form +// +// (A^T A + D^T D)x +// +// This is used to implement iterative general sparse linear solving with +// conjugate gradients, where A is the Jacobian and D is a regularizing +// parameter. A brief proof is included in cgnr_linear_operator.h. +class CERES_NO_EXPORT CudaCgnrLinearOperator final + : public ConjugateGradientsLinearOperator { + public: + CudaCgnrLinearOperator(CudaSparseMatrix& A, + const CudaVector& D, + CudaVector* z) + : A_(A), D_(D), z_(z) {} + + void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { + // z = Ax + z_->SetZero(); + A_.RightMultiplyAndAccumulate(x, z_); + + // y = y + Atz + // = y + AtAx + A_.LeftMultiplyAndAccumulate(*z_, &y); + + // y = y + DtDx + y.DtDxpy(D_, x); + } + + private: + CudaSparseMatrix& A_; + const CudaVector& D_; + CudaVector* z_ = nullptr; +}; + +class CERES_NO_EXPORT CudaIdentityPreconditioner final + : public CudaPreconditioner { + public: + void Update(const CompressedRowSparseMatrix& A, const double* D) final {} + void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { + y.Axpby(1.0, x, 1.0); + } +}; + +// This class wraps the existing CPU Jacobi preconditioner, caches the structure +// of the block diagonal, and for each CGNR solve updates the values on the CPU +// and then copies them over to the GPU. +class CERES_NO_EXPORT CudaJacobiPreconditioner final + : public CudaPreconditioner { + public: + explicit CudaJacobiPreconditioner(Preconditioner::Options options, + const CompressedRowSparseMatrix& A) + : options_(std::move(options)), + cpu_preconditioner_(options_, A), + m_(options_.context, cpu_preconditioner_.matrix()) {} + ~CudaJacobiPreconditioner() = default; + + void Update(const CompressedRowSparseMatrix& A, const double* D) final { + cpu_preconditioner_.Update(A, D); + m_.CopyValuesFromCpu(cpu_preconditioner_.matrix()); + } + + void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector& y) final { + m_.RightMultiplyAndAccumulate(x, &y); + } + + private: + Preconditioner::Options options_; + BlockCRSJacobiPreconditioner cpu_preconditioner_; + CudaSparseMatrix m_; +}; + +CudaCgnrSolver::CudaCgnrSolver(LinearSolver::Options options) + : options_(std::move(options)) {} + +CudaCgnrSolver::~CudaCgnrSolver() { + for (int i = 0; i < 4; ++i) { + if (scratch_[i]) { + delete scratch_[i]; + scratch_[i] = nullptr; + } + } +} + +std::unique_ptr CudaCgnrSolver::Create( + LinearSolver::Options options, std::string* error) { + CHECK(error != nullptr); + if (options.preconditioner_type != IDENTITY && + options.preconditioner_type != JACOBI) { + *error = + "CudaCgnrSolver does not support preconditioner type " + + std::string(PreconditionerTypeToString(options.preconditioner_type)) + + ". "; + return nullptr; + } + CHECK(options.context->IsCudaInitialized()) + << "CudaCgnrSolver requires CUDA initialization."; + auto solver = std::make_unique(options); + return solver; +} + +void CudaCgnrSolver::CpuToGpuTransfer(const CompressedRowSparseMatrix& A, + const double* b, + const double* D) { + if (A_ == nullptr) { + // Assume structure is not cached, do an initialization and structural copy. + A_ = std::make_unique(options_.context, A); + b_ = std::make_unique(options_.context, A.num_rows()); + x_ = std::make_unique(options_.context, A.num_cols()); + Atb_ = std::make_unique(options_.context, A.num_cols()); + Ax_ = std::make_unique(options_.context, A.num_rows()); + D_ = std::make_unique(options_.context, A.num_cols()); + + Preconditioner::Options preconditioner_options; + preconditioner_options.type = options_.preconditioner_type; + preconditioner_options.subset_preconditioner_start_row_block = + options_.subset_preconditioner_start_row_block; + preconditioner_options.sparse_linear_algebra_library_type = + options_.sparse_linear_algebra_library_type; + preconditioner_options.ordering_type = options_.ordering_type; + preconditioner_options.num_threads = options_.num_threads; + preconditioner_options.context = options_.context; + + if (options_.preconditioner_type == JACOBI) { + preconditioner_ = + std::make_unique(preconditioner_options, A); + } else { + preconditioner_ = std::make_unique(); + } + for (int i = 0; i < 4; ++i) { + scratch_[i] = new CudaVector(options_.context, A.num_cols()); + } + } else { + // Assume structure is cached, do a value copy. + A_->CopyValuesFromCpu(A); + } + b_->CopyFromCpu(ConstVectorRef(b, A.num_rows())); + D_->CopyFromCpu(ConstVectorRef(D, A.num_cols())); +} + +LinearSolver::Summary CudaCgnrSolver::SolveImpl( + CompressedRowSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { + EventLogger event_logger("CudaCgnrSolver::Solve"); + LinearSolver::Summary summary; + summary.num_iterations = 0; + summary.termination_type = LinearSolverTerminationType::FATAL_ERROR; + + CpuToGpuTransfer(*A, b, per_solve_options.D); + event_logger.AddEvent("CPU to GPU Transfer"); + preconditioner_->Update(*A, per_solve_options.D); + event_logger.AddEvent("Preconditioner Update"); + + // Form z = Atb. + Atb_->SetZero(); + A_->LeftMultiplyAndAccumulate(*b_, Atb_.get()); + + // Solve (AtA + DtD)x = z (= Atb). + x_->SetZero(); + CudaCgnrLinearOperator lhs(*A_, *D_, Ax_.get()); + + event_logger.AddEvent("Setup"); + + ConjugateGradientsSolverOptions cg_options; + cg_options.min_num_iterations = options_.min_num_iterations; + cg_options.max_num_iterations = options_.max_num_iterations; + cg_options.residual_reset_period = options_.residual_reset_period; + cg_options.q_tolerance = per_solve_options.q_tolerance; + cg_options.r_tolerance = per_solve_options.r_tolerance; + + summary = ConjugateGradientsSolver( + cg_options, lhs, *Atb_, *preconditioner_, scratch_, *x_); + x_->CopyTo(x); + event_logger.AddEvent("Solve"); + return summary; +} + +#endif // CERES_NO_CUDA + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/cgnr_solver.h b/extern/ceres/internal/ceres/cgnr_solver.h index 25e62e9abd9..c63453821bc 100644 --- a/extern/ceres/internal/ceres/cgnr_solver.h +++ b/extern/ceres/internal/ceres/cgnr_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,11 +33,13 @@ #include +#include "ceres/conjugate_gradients_solver.h" +#include "ceres/cuda_sparse_matrix.h" +#include "ceres/cuda_vector.h" #include "ceres/internal/export.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Preconditioner; @@ -65,9 +67,50 @@ class CERES_NO_EXPORT CgnrSolver final : public BlockSparseMatrixSolver { private: const LinearSolver::Options options_; std::unique_ptr preconditioner_; + Vector cg_solution_; + Vector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr}; }; -} // namespace internal -} // namespace ceres +#ifndef CERES_NO_CUDA +class CudaPreconditioner : public ConjugateGradientsLinearOperator { + public: + virtual void Update(const CompressedRowSparseMatrix& A, const double* D) = 0; + virtual ~CudaPreconditioner() = default; +}; + +// A Cuda-accelerated version of CgnrSolver. +// This solver assumes that the sparsity structure of A remains constant for its +// lifetime. +class CERES_NO_EXPORT CudaCgnrSolver final + : public CompressedRowSparseMatrixSolver { + public: + explicit CudaCgnrSolver(LinearSolver::Options options); + static std::unique_ptr Create(LinearSolver::Options options, + std::string* error); + ~CudaCgnrSolver() override; + + Summary SolveImpl(CompressedRowSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) final; + + private: + void CpuToGpuTransfer(const CompressedRowSparseMatrix& A, + const double* b, + const double* D); + + LinearSolver::Options options_; + std::unique_ptr A_; + std::unique_ptr b_; + std::unique_ptr x_; + std::unique_ptr Atb_; + std::unique_ptr Ax_; + std::unique_ptr D_; + std::unique_ptr preconditioner_; + CudaVector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr}; +}; +#endif // CERES_NO_CUDA + +} // namespace ceres::internal #endif // CERES_INTERNAL_CGNR_SOLVER_H_ diff --git a/extern/ceres/internal/ceres/compressed_col_sparse_matrix_utils.cc b/extern/ceres/internal/ceres/compressed_col_sparse_matrix_utils.cc index 94e7e9aa446..5a25e31812e 100644 --- a/extern/ceres/internal/ceres/compressed_col_sparse_matrix_utils.cc +++ b/extern/ceres/internal/ceres/compressed_col_sparse_matrix_utils.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,30 +36,21 @@ #include "ceres/internal/export.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -using std::vector; - -void CompressedColumnScalarMatrixToBlockMatrix(const int* scalar_rows, - const int* scalar_cols, - const vector& row_blocks, - const vector& col_blocks, - vector* block_rows, - vector* block_cols) { +void CompressedColumnScalarMatrixToBlockMatrix( + const int* scalar_rows, + const int* scalar_cols, + const std::vector& row_blocks, + const std::vector& col_blocks, + std::vector* block_rows, + std::vector* block_cols) { CHECK(block_rows != nullptr); CHECK(block_cols != nullptr); block_rows->clear(); block_cols->clear(); - const int num_row_blocks = row_blocks.size(); const int num_col_blocks = col_blocks.size(); - vector row_block_starts(num_row_blocks); - for (int i = 0, cursor = 0; i < num_row_blocks; ++i) { - row_block_starts[i] = cursor; - cursor += row_blocks[i]; - } - // This loop extracts the block sparsity of the scalar sparse matrix // It does so by iterating over the columns, but only considering // the columns corresponding to the first element of each column @@ -71,52 +62,46 @@ void CompressedColumnScalarMatrixToBlockMatrix(const int* scalar_rows, for (int col_block = 0; col_block < num_col_blocks; ++col_block) { int column_size = 0; for (int idx = scalar_cols[c]; idx < scalar_cols[c + 1]; ++idx) { - vector::const_iterator it = std::lower_bound( - row_block_starts.begin(), row_block_starts.end(), scalar_rows[idx]); - // Since we are using lower_bound, it will return the row id - // where the row block starts. For everything but the first row - // of the block, where these values will be the same, we can - // skip, as we only need the first row to detect the presence of - // the block. + auto it = std::lower_bound(row_blocks.begin(), + row_blocks.end(), + scalar_rows[idx], + [](const Block& block, double value) { + return block.position < value; + }); + // Since we are using lower_bound, it will return the row id where the row + // block starts. For everything but the first row of the block, where + // these values will be the same, we can skip, as we only need the first + // row to detect the presence of the block. // - // For rows all but the first row in the last row block, - // lower_bound will return row_block_starts.end(), but those can - // be skipped like the rows in other row blocks too. - if (it == row_block_starts.end() || *it != scalar_rows[idx]) { + // For rows all but the first row in the last row block, lower_bound will + // return row_blocks_.end(), but those can be skipped like the rows in + // other row blocks too. + if (it == row_blocks.end() || it->position != scalar_rows[idx]) { continue; } - block_rows->push_back(it - row_block_starts.begin()); + block_rows->push_back(it - row_blocks.begin()); ++column_size; } block_cols->push_back(block_cols->back() + column_size); - c += col_blocks[col_block]; + c += col_blocks[col_block].size; } } -void BlockOrderingToScalarOrdering(const vector& blocks, - const vector& block_ordering, - vector* scalar_ordering) { +void BlockOrderingToScalarOrdering(const std::vector& blocks, + const std::vector& block_ordering, + std::vector* scalar_ordering) { CHECK_EQ(blocks.size(), block_ordering.size()); const int num_blocks = blocks.size(); - - // block_starts = [0, block1, block1 + block2 ..] - vector block_starts(num_blocks); - for (int i = 0, cursor = 0; i < num_blocks; ++i) { - block_starts[i] = cursor; - cursor += blocks[i]; - } - - scalar_ordering->resize(block_starts.back() + blocks.back()); + scalar_ordering->resize(NumScalarEntries(blocks)); int cursor = 0; for (int i = 0; i < num_blocks; ++i) { const int block_id = block_ordering[i]; - const int block_size = blocks[block_id]; - int block_position = block_starts[block_id]; + const int block_size = blocks[block_id].size; + int block_position = blocks[block_id].position; for (int j = 0; j < block_size; ++j) { (*scalar_ordering)[cursor++] = block_position++; } } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/compressed_col_sparse_matrix_utils.h b/extern/ceres/internal/ceres/compressed_col_sparse_matrix_utils.h index f88a5bd9588..e9e067f3c47 100644 --- a/extern/ceres/internal/ceres/compressed_col_sparse_matrix_utils.h +++ b/extern/ceres/internal/ceres/compressed_col_sparse_matrix_utils.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -34,11 +34,11 @@ #include #include +#include "ceres/block_structure.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Extract the block sparsity pattern of the scalar compressed columns // matrix and return it in compressed column form. The compressed @@ -53,8 +53,8 @@ namespace internal { CERES_NO_EXPORT void CompressedColumnScalarMatrixToBlockMatrix( const int* scalar_rows, const int* scalar_cols, - const std::vector& row_blocks, - const std::vector& col_blocks, + const std::vector& row_blocks, + const std::vector& col_blocks, std::vector* block_rows, std::vector* block_cols); @@ -62,7 +62,7 @@ CERES_NO_EXPORT void CompressedColumnScalarMatrixToBlockMatrix( // the corresponding "scalar" ordering, where the scalar ordering of // size sum(blocks). CERES_NO_EXPORT void BlockOrderingToScalarOrdering( - const std::vector& blocks, + const std::vector& blocks, const std::vector& block_ordering, std::vector* scalar_ordering); @@ -141,8 +141,7 @@ void SolveRTRWithSparseRHS(IntegerType num_cols, SolveUpperTriangularInPlace(num_cols, rows, cols, values, solution); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/compressed_row_jacobian_writer.cc b/extern/ceres/internal/ceres/compressed_row_jacobian_writer.cc index 55b30a290f9..007346dba13 100644 --- a/extern/ceres/internal/ceres/compressed_row_jacobian_writer.cc +++ b/extern/ceres/internal/ceres/compressed_row_jacobian_writer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,44 +44,42 @@ #include "ceres/residual_block.h" #include "ceres/scratch_evaluate_preparer.h" -namespace ceres { -namespace internal { - -using std::adjacent_find; -using std::make_pair; -using std::pair; -using std::vector; - +namespace ceres::internal { void CompressedRowJacobianWriter::PopulateJacobianRowAndColumnBlockVectors( const Program* program, CompressedRowSparseMatrix* jacobian) { - const vector& parameter_blocks = program->parameter_blocks(); - vector& col_blocks = *(jacobian->mutable_col_blocks()); + const auto& parameter_blocks = program->parameter_blocks(); + auto& col_blocks = *(jacobian->mutable_col_blocks()); col_blocks.resize(parameter_blocks.size()); + int col_pos = 0; for (int i = 0; i < parameter_blocks.size(); ++i) { - col_blocks[i] = parameter_blocks[i]->TangentSize(); + col_blocks[i].size = parameter_blocks[i]->TangentSize(); + col_blocks[i].position = col_pos; + col_pos += col_blocks[i].size; } - const vector& residual_blocks = program->residual_blocks(); - vector& row_blocks = *(jacobian->mutable_row_blocks()); + const auto& residual_blocks = program->residual_blocks(); + auto& row_blocks = *(jacobian->mutable_row_blocks()); row_blocks.resize(residual_blocks.size()); + int row_pos = 0; for (int i = 0; i < residual_blocks.size(); ++i) { - row_blocks[i] = residual_blocks[i]->NumResiduals(); + row_blocks[i].size = residual_blocks[i]->NumResiduals(); + row_blocks[i].position = row_pos; + row_pos += row_blocks[i].size; } } void CompressedRowJacobianWriter::GetOrderedParameterBlocks( const Program* program, int residual_id, - vector>* evaluated_jacobian_blocks) { - const ResidualBlock* residual_block = program->residual_blocks()[residual_id]; + std::vector>* evaluated_jacobian_blocks) { + auto residual_block = program->residual_blocks()[residual_id]; const int num_parameter_blocks = residual_block->NumParameterBlocks(); for (int j = 0; j < num_parameter_blocks; ++j) { - const ParameterBlock* parameter_block = - residual_block->parameter_blocks()[j]; + auto parameter_block = residual_block->parameter_blocks()[j]; if (!parameter_block->IsConstant()) { evaluated_jacobian_blocks->push_back( - make_pair(parameter_block->index(), j)); + std::make_pair(parameter_block->index(), j)); } } std::sort(evaluated_jacobian_blocks->begin(), @@ -90,20 +88,29 @@ void CompressedRowJacobianWriter::GetOrderedParameterBlocks( std::unique_ptr CompressedRowJacobianWriter::CreateJacobian() const { - const vector& residual_blocks = program_->residual_blocks(); + const auto& residual_blocks = program_->residual_blocks(); - int total_num_residuals = program_->NumResiduals(); - int total_num_effective_parameters = program_->NumEffectiveParameters(); + const int total_num_residuals = program_->NumResiduals(); + const int total_num_effective_parameters = program_->NumEffectiveParameters(); // Count the number of jacobian nonzeros. - int num_jacobian_nonzeros = 0; + // + // We used an unsigned int here, so that we can compare it INT_MAX without + // triggering overflow behaviour. + unsigned int num_jacobian_nonzeros = total_num_effective_parameters; for (auto* residual_block : residual_blocks) { const int num_residuals = residual_block->NumResiduals(); const int num_parameter_blocks = residual_block->NumParameterBlocks(); for (int j = 0; j < num_parameter_blocks; ++j) { - ParameterBlock* parameter_block = residual_block->parameter_blocks()[j]; + auto parameter_block = residual_block->parameter_blocks()[j]; if (!parameter_block->IsConstant()) { num_jacobian_nonzeros += num_residuals * parameter_block->TangentSize(); + if (num_jacobian_nonzeros > std::numeric_limits::max()) { + LOG(ERROR) << "Unable to create Jacobian matrix: Too many entries in " + "the Jacobian matrix. num_jacobian_nonzeros = " + << num_jacobian_nonzeros; + return nullptr; + } } } } @@ -112,14 +119,14 @@ std::unique_ptr CompressedRowJacobianWriter::CreateJacobian() // Allocate more space than needed to store the jacobian so that when the LM // algorithm adds the diagonal, no reallocation is necessary. This reduces // peak memory usage significantly. - std::unique_ptr jacobian = - std::make_unique( - total_num_residuals, - total_num_effective_parameters, - num_jacobian_nonzeros + total_num_effective_parameters); + auto jacobian = std::make_unique( + total_num_residuals, + total_num_effective_parameters, + static_cast(num_jacobian_nonzeros)); - // At this stage, the CompressedRowSparseMatrix is an invalid state. But this - // seems to be the only way to construct it without doing a memory copy. + // At this stage, the CompressedRowSparseMatrix is an invalid state. But + // this seems to be the only way to construct it without doing a memory + // copy. int* rows = jacobian->mutable_rows(); int* cols = jacobian->mutable_cols(); @@ -131,9 +138,9 @@ std::unique_ptr CompressedRowJacobianWriter::CreateJacobian() // Count the number of derivatives for a row of this residual block and // build a list of active parameter block indices. int num_derivatives = 0; - vector parameter_indices; + std::vector parameter_indices; for (int j = 0; j < num_parameter_blocks; ++j) { - ParameterBlock* parameter_block = residual_block->parameter_blocks()[j]; + auto parameter_block = residual_block->parameter_blocks()[j]; if (!parameter_block->IsConstant()) { parameter_indices.push_back(parameter_block->index()); num_derivatives += parameter_block->TangentSize(); @@ -141,12 +148,12 @@ std::unique_ptr CompressedRowJacobianWriter::CreateJacobian() } // Sort the parameters by their position in the state vector. - sort(parameter_indices.begin(), parameter_indices.end()); + std::sort(parameter_indices.begin(), parameter_indices.end()); if (adjacent_find(parameter_indices.begin(), parameter_indices.end()) != parameter_indices.end()) { std::string parameter_block_description; for (int j = 0; j < num_parameter_blocks; ++j) { - ParameterBlock* parameter_block = residual_block->parameter_blocks()[j]; + auto parameter_block = residual_block->parameter_blocks()[j]; parameter_block_description += parameter_block->ToString() + "\n"; } LOG(FATAL) << "Ceres internal error: " @@ -168,15 +175,13 @@ std::unique_ptr CompressedRowJacobianWriter::CreateJacobian() // values are updated. int col_pos = 0; for (int parameter_index : parameter_indices) { - ParameterBlock* parameter_block = - program_->parameter_blocks()[parameter_index]; + auto parameter_block = program_->parameter_blocks()[parameter_index]; const int parameter_block_size = parameter_block->TangentSize(); for (int r = 0; r < num_residuals; ++r) { // This is the position in the values array of the jacobian where this // row of the jacobian block should go. const int column_block_begin = rows[row_pos + r] + col_pos; - for (int c = 0; c < parameter_block_size; ++c) { cols[column_block_begin + c] = parameter_block->delta_offset() + c; } @@ -185,7 +190,8 @@ std::unique_ptr CompressedRowJacobianWriter::CreateJacobian() } row_pos += num_residuals; } - CHECK_EQ(num_jacobian_nonzeros, rows[total_num_residuals]); + CHECK_EQ(num_jacobian_nonzeros - total_num_effective_parameters, + rows[total_num_residuals]); PopulateJacobianRowAndColumnBlockVectors(program_, jacobian.get()); @@ -201,11 +207,10 @@ void CompressedRowJacobianWriter::Write(int residual_id, double* jacobian_values = jacobian->mutable_values(); const int* jacobian_rows = jacobian->rows(); - const ResidualBlock* residual_block = - program_->residual_blocks()[residual_id]; + auto residual_block = program_->residual_blocks()[residual_id]; const int num_residuals = residual_block->NumResiduals(); - vector> evaluated_jacobian_blocks; + std::vector> evaluated_jacobian_blocks; GetOrderedParameterBlocks(program_, residual_id, &evaluated_jacobian_blocks); // Where in the current row does the jacobian for a parameter block begin. @@ -214,7 +219,7 @@ void CompressedRowJacobianWriter::Write(int residual_id, // Iterate over the jacobian blocks in increasing order of their // positions in the reduced parameter vector. for (auto& evaluated_jacobian_block : evaluated_jacobian_blocks) { - const ParameterBlock* parameter_block = + auto parameter_block = program_->parameter_blocks()[evaluated_jacobian_block.first]; const int argument = evaluated_jacobian_block.second; const int parameter_block_size = parameter_block->TangentSize(); @@ -238,5 +243,4 @@ void CompressedRowJacobianWriter::Write(int residual_id, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/compressed_row_jacobian_writer.h b/extern/ceres/internal/ceres/compressed_row_jacobian_writer.h index 7badab71b04..6fc40e9464f 100644 --- a/extern/ceres/internal/ceres/compressed_row_jacobian_writer.h +++ b/extern/ceres/internal/ceres/compressed_row_jacobian_writer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -41,8 +41,7 @@ #include "ceres/internal/export.h" #include "ceres/scratch_evaluate_preparer.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CompressedRowSparseMatrix; class Program; @@ -107,7 +106,6 @@ class CERES_NO_EXPORT CompressedRowJacobianWriter { Program* program_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_COMPRESSED_ROW_JACOBIAN_WRITER_H_ diff --git a/extern/ceres/internal/ceres/compressed_row_sparse_matrix.cc b/extern/ceres/internal/ceres/compressed_row_sparse_matrix.cc index db103d9c0fa..21697f828a8 100644 --- a/extern/ceres/internal/ceres/compressed_row_sparse_matrix.cc +++ b/extern/ceres/internal/ceres/compressed_row_sparse_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -31,25 +31,24 @@ #include "ceres/compressed_row_sparse_matrix.h" #include +#include #include #include +#include #include +#include "ceres/context_impl.h" #include "ceres/crs_matrix.h" #include "ceres/internal/export.h" -#include "ceres/random.h" +#include "ceres/parallel_for.h" #include "ceres/triplet_sparse_matrix.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::vector; - +namespace ceres::internal { namespace { // Helper functor used by the constructor for reordering the contents -// of a TripletSparseMatrix. This comparator assumes thay there are no +// of a TripletSparseMatrix. This comparator assumes that there are no // duplicates in the pair of arrays rows and cols, i.e., there is no // indices i and j (not equal to each other) s.t. // @@ -119,10 +118,12 @@ void TransposeForCompressedRowSparseStructure(const int num_rows, transpose_rows[0] = 0; } +template void AddRandomBlock(const int num_rows, const int num_cols, const int row_block_begin, const int col_block_begin, + RandomNormalFunctor&& randn, std::vector* rows, std::vector* cols, std::vector* values) { @@ -130,19 +131,21 @@ void AddRandomBlock(const int num_rows, for (int c = 0; c < num_cols; ++c) { rows->push_back(row_block_begin + r); cols->push_back(col_block_begin + c); - values->push_back(RandNormal()); + values->push_back(randn()); } } } +template void AddSymmetricRandomBlock(const int num_rows, const int row_block_begin, + RandomNormalFunctor&& randn, std::vector* rows, std::vector* cols, std::vector* values) { for (int r = 0; r < num_rows; ++r) { for (int c = r; c < num_rows; ++c) { - const double v = RandNormal(); + const double v = randn(); rows->push_back(row_block_begin + r); cols->push_back(row_block_begin + c); values->push_back(v); @@ -163,7 +166,7 @@ CompressedRowSparseMatrix::CompressedRowSparseMatrix(int num_rows, int max_num_nonzeros) { num_rows_ = num_rows; num_cols_ = num_cols; - storage_type_ = UNSYMMETRIC; + storage_type_ = StorageType::UNSYMMETRIC; rows_.resize(num_rows + 1, 0); cols_.resize(max_num_nonzeros, 0); values_.resize(max_num_nonzeros, 0.0); @@ -202,7 +205,7 @@ CompressedRowSparseMatrix::FromTripletSparseMatrix( } // index is the list of indices into the TripletSparseMatrix input. - vector index(input.num_nonzeros(), 0); + std::vector index(input.num_nonzeros(), 0); for (int i = 0; i < input.num_nonzeros(); ++i) { index[i] = i; } @@ -217,9 +220,8 @@ CompressedRowSparseMatrix::FromTripletSparseMatrix( input.num_nonzeros() * sizeof(int) + // NOLINT input.num_nonzeros() * sizeof(double)); // NOLINT - std::unique_ptr output = - std::make_unique( - num_rows, num_cols, input.num_nonzeros()); + auto output = std::make_unique( + num_rows, num_cols, input.num_nonzeros()); if (num_rows == 0) { // No data to copy. @@ -255,7 +257,7 @@ CompressedRowSparseMatrix::CompressedRowSparseMatrix(const double* diagonal, num_rows_ = num_rows; num_cols_ = num_rows; - storage_type_ = UNSYMMETRIC; + storage_type_ = StorageType::UNSYMMETRIC; rows_.resize(num_rows + 1); cols_.resize(num_rows); values_.resize(num_rows); @@ -276,22 +278,37 @@ void CompressedRowSparseMatrix::SetZero() { std::fill(values_.begin(), values_.end(), 0); } -// TODO(sameeragarwal): Make RightMultiply and LeftMultiply -// block-aware for higher performance. -void CompressedRowSparseMatrix::RightMultiply(const double* x, - double* y) const { +// TODO(sameeragarwal): Make RightMultiplyAndAccumulate and +// LeftMultiplyAndAccumulate block-aware for higher performance. +void CompressedRowSparseMatrix::RightMultiplyAndAccumulate( + const double* x, double* y, ContextImpl* context, int num_threads) const { + if (storage_type_ != StorageType::UNSYMMETRIC) { + RightMultiplyAndAccumulate(x, y); + return; + } + + auto values = values_.data(); + auto rows = rows_.data(); + auto cols = cols_.data(); + + ParallelFor( + context, 0, num_rows_, num_threads, [values, rows, cols, x, y](int row) { + for (int idx = rows[row]; idx < rows[row + 1]; ++idx) { + const int c = cols[idx]; + const double v = values[idx]; + y[row] += v * x[c]; + } + }); +} + +void CompressedRowSparseMatrix::RightMultiplyAndAccumulate(const double* x, + double* y) const { CHECK(x != nullptr); CHECK(y != nullptr); - if (storage_type_ == UNSYMMETRIC) { - for (int r = 0; r < num_rows_; ++r) { - for (int idx = rows_[r]; idx < rows_[r + 1]; ++idx) { - const int c = cols_[idx]; - const double v = values_[idx]; - y[r] += v * x[c]; - } - } - } else if (storage_type_ == UPPER_TRIANGULAR) { + if (storage_type_ == StorageType::UNSYMMETRIC) { + RightMultiplyAndAccumulate(x, y, nullptr, 1); + } else if (storage_type_ == StorageType::UPPER_TRIANGULAR) { // Because of their block structure, we will have entries that lie // above (below) the diagonal for lower (upper) triangular matrices, // so the loops below need to account for this. @@ -317,7 +334,7 @@ void CompressedRowSparseMatrix::RightMultiply(const double* x, } } } - } else if (storage_type_ == LOWER_TRIANGULAR) { + } else if (storage_type_ == StorageType::LOWER_TRIANGULAR) { for (int r = 0; r < num_rows_; ++r) { int idx = rows_[r]; const int idx_end = rows_[r + 1]; @@ -340,19 +357,21 @@ void CompressedRowSparseMatrix::RightMultiply(const double* x, } } -void CompressedRowSparseMatrix::LeftMultiply(const double* x, double* y) const { +void CompressedRowSparseMatrix::LeftMultiplyAndAccumulate(const double* x, + double* y) const { CHECK(x != nullptr); CHECK(y != nullptr); - if (storage_type_ == UNSYMMETRIC) { + if (storage_type_ == StorageType::UNSYMMETRIC) { for (int r = 0; r < num_rows_; ++r) { for (int idx = rows_[r]; idx < rows_[r + 1]; ++idx) { y[cols_[idx]] += values_[idx] * x[r]; } } } else { - // Since the matrix is symmetric, LeftMultiply = RightMultiply. - RightMultiply(x, y); + // Since the matrix is symmetric, LeftMultiplyAndAccumulate = + // RightMultiplyAndAccumulate. + RightMultiplyAndAccumulate(x, y); } } @@ -360,11 +379,11 @@ void CompressedRowSparseMatrix::SquaredColumnNorm(double* x) const { CHECK(x != nullptr); std::fill(x, x + num_cols_, 0.0); - if (storage_type_ == UNSYMMETRIC) { + if (storage_type_ == StorageType::UNSYMMETRIC) { for (int idx = 0; idx < rows_[num_rows_]; ++idx) { x[cols_[idx]] += values_[idx] * values_[idx]; } - } else if (storage_type_ == UPPER_TRIANGULAR) { + } else if (storage_type_ == StorageType::UPPER_TRIANGULAR) { // Because of their block structure, we will have entries that lie // above (below) the diagonal for lower (upper) triangular // matrices, so the loops below need to account for this. @@ -390,7 +409,7 @@ void CompressedRowSparseMatrix::SquaredColumnNorm(double* x) const { } } } - } else if (storage_type_ == LOWER_TRIANGULAR) { + } else if (storage_type_ == StorageType::LOWER_TRIANGULAR) { for (int r = 0; r < num_rows_; ++r) { int idx = rows_[r]; const int idx_end = rows_[r + 1]; @@ -435,7 +454,7 @@ void CompressedRowSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const { void CompressedRowSparseMatrix::DeleteRows(int delta_rows) { CHECK_GE(delta_rows, 0); CHECK_LE(delta_rows, num_rows_); - CHECK_EQ(storage_type_, UNSYMMETRIC); + CHECK_EQ(storage_type_, StorageType::UNSYMMETRIC); num_rows_ -= delta_rows; rows_.resize(num_rows_ + 1); @@ -451,7 +470,7 @@ void CompressedRowSparseMatrix::DeleteRows(int delta_rows) { int num_row_blocks = 0; int num_rows = 0; while (num_row_blocks < row_blocks_.size() && num_rows < num_rows_) { - num_rows += row_blocks_[num_row_blocks]; + num_rows += row_blocks_[num_row_blocks].size; ++num_row_blocks; } @@ -459,7 +478,7 @@ void CompressedRowSparseMatrix::DeleteRows(int delta_rows) { } void CompressedRowSparseMatrix::AppendRows(const CompressedRowSparseMatrix& m) { - CHECK_EQ(storage_type_, UNSYMMETRIC); + CHECK_EQ(storage_type_, StorageType::UNSYMMETRIC); CHECK_EQ(m.num_cols(), num_cols_); CHECK((row_blocks_.empty() && m.row_blocks().empty()) || @@ -539,17 +558,15 @@ void CompressedRowSparseMatrix::SetMaxNumNonZeros(int num_nonzeros) { std::unique_ptr CompressedRowSparseMatrix::CreateBlockDiagonalMatrix( - const double* diagonal, const vector& blocks) { - int num_rows = 0; + const double* diagonal, const std::vector& blocks) { + const int num_rows = NumScalarEntries(blocks); int num_nonzeros = 0; - for (int block_size : blocks) { - num_rows += block_size; - num_nonzeros += block_size * block_size; + for (auto& block : blocks) { + num_nonzeros += block.size * block.size; } - std::unique_ptr matrix = - std::make_unique( - num_rows, num_rows, num_nonzeros); + auto matrix = std::make_unique( + num_rows, num_rows, num_nonzeros); int* rows = matrix->mutable_rows(); int* cols = matrix->mutable_cols(); @@ -558,15 +575,17 @@ CompressedRowSparseMatrix::CreateBlockDiagonalMatrix( int idx_cursor = 0; int col_cursor = 0; - for (int block_size : blocks) { - for (int r = 0; r < block_size; ++r) { + for (auto& block : blocks) { + for (int r = 0; r < block.size; ++r) { *(rows++) = idx_cursor; - values[idx_cursor + r] = diagonal[col_cursor + r]; - for (int c = 0; c < block_size; ++c, ++idx_cursor) { + if (diagonal != nullptr) { + values[idx_cursor + r] = diagonal[col_cursor + r]; + } + for (int c = 0; c < block.size; ++c, ++idx_cursor) { *(cols++) = col_cursor + c; } } - col_cursor += block_size; + col_cursor += block.size; } *rows = idx_cursor; @@ -580,19 +599,18 @@ CompressedRowSparseMatrix::CreateBlockDiagonalMatrix( std::unique_ptr CompressedRowSparseMatrix::Transpose() const { - std::unique_ptr transpose = - std::make_unique( - num_cols_, num_rows_, num_nonzeros()); + auto transpose = std::make_unique( + num_cols_, num_rows_, num_nonzeros()); switch (storage_type_) { - case UNSYMMETRIC: - transpose->set_storage_type(UNSYMMETRIC); + case StorageType::UNSYMMETRIC: + transpose->set_storage_type(StorageType::UNSYMMETRIC); break; - case LOWER_TRIANGULAR: - transpose->set_storage_type(UPPER_TRIANGULAR); + case StorageType::LOWER_TRIANGULAR: + transpose->set_storage_type(StorageType::UPPER_TRIANGULAR); break; - case UPPER_TRIANGULAR: - transpose->set_storage_type(LOWER_TRIANGULAR); + case StorageType::UPPER_TRIANGULAR: + transpose->set_storage_type(StorageType::LOWER_TRIANGULAR); break; default: LOG(FATAL) << "Unknown storage type: " << storage_type_; @@ -621,13 +639,14 @@ CompressedRowSparseMatrix::Transpose() const { std::unique_ptr CompressedRowSparseMatrix::CreateRandomMatrix( - CompressedRowSparseMatrix::RandomMatrixOptions options) { + CompressedRowSparseMatrix::RandomMatrixOptions options, + std::mt19937& prng) { CHECK_GT(options.num_row_blocks, 0); CHECK_GT(options.min_row_block_size, 0); CHECK_GT(options.max_row_block_size, 0); CHECK_LE(options.min_row_block_size, options.max_row_block_size); - if (options.storage_type == UNSYMMETRIC) { + if (options.storage_type == StorageType::UNSYMMETRIC) { CHECK_GT(options.num_col_blocks, 0); CHECK_GT(options.min_col_block_size, 0); CHECK_GT(options.max_col_block_size, 0); @@ -642,33 +661,42 @@ CompressedRowSparseMatrix::CreateRandomMatrix( CHECK_GT(options.block_density, 0.0); CHECK_LE(options.block_density, 1.0); - vector row_blocks; - vector col_blocks; + std::vector row_blocks; + row_blocks.reserve(options.num_row_blocks); + std::vector col_blocks; + col_blocks.reserve(options.num_col_blocks); + + std::uniform_int_distribution col_distribution( + options.min_col_block_size, options.max_col_block_size); + std::uniform_int_distribution row_distribution( + options.min_row_block_size, options.max_row_block_size); + std::uniform_real_distribution uniform01(0.0, 1.0); + std::normal_distribution standard_normal_distribution; // Generate the row block structure. + int row_pos = 0; for (int i = 0; i < options.num_row_blocks; ++i) { // Generate a random integer in [min_row_block_size, max_row_block_size] - const int delta_block_size = - Uniform(options.max_row_block_size - options.min_row_block_size); - row_blocks.push_back(options.min_row_block_size + delta_block_size); + row_blocks.emplace_back(row_distribution(prng), row_pos); + row_pos += row_blocks.back().size; } - if (options.storage_type == UNSYMMETRIC) { + if (options.storage_type == StorageType::UNSYMMETRIC) { // Generate the col block structure. + int col_pos = 0; for (int i = 0; i < options.num_col_blocks; ++i) { // Generate a random integer in [min_col_block_size, max_col_block_size] - const int delta_block_size = - Uniform(options.max_col_block_size - options.min_col_block_size); - col_blocks.push_back(options.min_col_block_size + delta_block_size); + col_blocks.emplace_back(col_distribution(prng), col_pos); + col_pos += col_blocks.back().size; } } else { // Symmetric matrices (LOWER_TRIANGULAR or UPPER_TRIANGULAR); col_blocks = row_blocks; } - vector tsm_rows; - vector tsm_cols; - vector tsm_values; + std::vector tsm_rows; + std::vector tsm_cols; + std::vector tsm_values; // For ease of construction, we are going to generate the // CompressedRowSparseMatrix by generating it as a @@ -687,51 +715,55 @@ CompressedRowSparseMatrix::CreateRandomMatrix( for (int r = 0; r < options.num_row_blocks; ++r) { int col_block_begin = 0; for (int c = 0; c < options.num_col_blocks; ++c) { - if (((options.storage_type == UPPER_TRIANGULAR) && (r > c)) || - ((options.storage_type == LOWER_TRIANGULAR) && (r < c))) { - col_block_begin += col_blocks[c]; + if (((options.storage_type == StorageType::UPPER_TRIANGULAR) && + (r > c)) || + ((options.storage_type == StorageType::LOWER_TRIANGULAR) && + (r < c))) { + col_block_begin += col_blocks[c].size; continue; } // Randomly determine if this block is present or not. - if (RandDouble() <= options.block_density) { + if (uniform01(prng) <= options.block_density) { + auto randn = [&standard_normal_distribution, &prng] { + return standard_normal_distribution(prng); + }; // If the matrix is symmetric, then we take care to generate // symmetric diagonal blocks. - if (options.storage_type == UNSYMMETRIC || r != c) { - AddRandomBlock(row_blocks[r], - col_blocks[c], + if (options.storage_type == StorageType::UNSYMMETRIC || r != c) { + AddRandomBlock(row_blocks[r].size, + col_blocks[c].size, row_block_begin, col_block_begin, + randn, &tsm_rows, &tsm_cols, &tsm_values); } else { - AddSymmetricRandomBlock(row_blocks[r], + AddSymmetricRandomBlock(row_blocks[r].size, row_block_begin, + randn, &tsm_rows, &tsm_cols, &tsm_values); } } - col_block_begin += col_blocks[c]; + col_block_begin += col_blocks[c].size; } - row_block_begin += row_blocks[r]; + row_block_begin += row_blocks[r].size; } } - const int num_rows = std::accumulate(row_blocks.begin(), row_blocks.end(), 0); - const int num_cols = std::accumulate(col_blocks.begin(), col_blocks.end(), 0); + const int num_rows = NumScalarEntries(row_blocks); + const int num_cols = NumScalarEntries(col_blocks); const bool kDoNotTranspose = false; - std::unique_ptr matrix = - CompressedRowSparseMatrix::FromTripletSparseMatrix( - TripletSparseMatrix( - num_rows, num_cols, tsm_rows, tsm_cols, tsm_values), - kDoNotTranspose); + auto matrix = CompressedRowSparseMatrix::FromTripletSparseMatrix( + TripletSparseMatrix(num_rows, num_cols, tsm_rows, tsm_cols, tsm_values), + kDoNotTranspose); (*matrix->mutable_row_blocks()) = row_blocks; (*matrix->mutable_col_blocks()) = col_blocks; matrix->set_storage_type(options.storage_type); return matrix; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/compressed_row_sparse_matrix.h b/extern/ceres/internal/ceres/compressed_row_sparse_matrix.h index 3d7d385b185..36c8895a34f 100644 --- a/extern/ceres/internal/ceres/compressed_row_sparse_matrix.h +++ b/extern/ceres/internal/ceres/compressed_row_sparse_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,10 @@ #define CERES_INTERNAL_COMPRESSED_ROW_SPARSE_MATRIX_H_ #include +#include #include +#include "ceres/block_structure.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" #include "ceres/sparse_matrix.h" @@ -46,11 +48,12 @@ struct CRSMatrix; namespace internal { +class ContextImpl; class TripletSparseMatrix; class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix { public: - enum StorageType { + enum class StorageType { UNSYMMETRIC, // Matrix is assumed to be symmetric but only the lower triangular // part of the matrix is stored. @@ -100,8 +103,12 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix { // SparseMatrix interface. ~CompressedRowSparseMatrix() override; void SetZero() final; - void RightMultiply(const double* x, double* y) const final; - void LeftMultiply(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const final; + void LeftMultiplyAndAccumulate(const double* x, double* y) const final; void SquaredColumnNorm(double* x) const final; void ScaleColumns(const double* scale) final; void ToDenseMatrix(Matrix* dense_matrix) const final; @@ -109,8 +116,8 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix { int num_rows() const final { return num_rows_; } int num_cols() const final { return num_cols_; } int num_nonzeros() const final { return rows_[num_rows_]; } - const double* values() const final { return &values_[0]; } - double* mutable_values() final { return &values_[0]; } + const double* values() const final { return values_.data(); } + double* mutable_values() final { return values_.data(); } // Delete the bottom delta_rows. // num_rows -= delta_rows @@ -132,28 +139,28 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix { void set_num_cols(const int num_cols) { num_cols_ = num_cols; } // Low level access methods that expose the structure of the matrix. - const int* cols() const { return &cols_[0]; } - int* mutable_cols() { return &cols_[0]; } + const int* cols() const { return cols_.data(); } + int* mutable_cols() { return cols_.data(); } - const int* rows() const { return &rows_[0]; } - int* mutable_rows() { return &rows_[0]; } + const int* rows() const { return rows_.data(); } + int* mutable_rows() { return rows_.data(); } StorageType storage_type() const { return storage_type_; } void set_storage_type(const StorageType storage_type) { storage_type_ = storage_type; } - const std::vector& row_blocks() const { return row_blocks_; } - std::vector* mutable_row_blocks() { return &row_blocks_; } + const std::vector& row_blocks() const { return row_blocks_; } + std::vector* mutable_row_blocks() { return &row_blocks_; } - const std::vector& col_blocks() const { return col_blocks_; } - std::vector* mutable_col_blocks() { return &col_blocks_; } + const std::vector& col_blocks() const { return col_blocks_; } + std::vector* mutable_col_blocks() { return &col_blocks_; } // Create a block diagonal CompressedRowSparseMatrix with the given // block structure. The individual blocks are assumed to be laid out // contiguously in the diagonal array, one block at a time. static std::unique_ptr CreateBlockDiagonalMatrix( - const double* diagonal, const std::vector& blocks); + const double* diagonal, const std::vector& blocks); // Options struct to control the generation of random block sparse // matrices in compressed row sparse format. @@ -165,7 +172,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix { // given bounds. // // Then we walk the block structure of the resulting matrix, and with - // probability block_density detemine whether they are structurally + // probability block_density determine whether they are structurally // zero or not. If the answer is no, then we generate entries for the // block which are distributed normally. struct RandomMatrixOptions { @@ -176,7 +183,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix { // (lower triangular) part. In this case, num_col_blocks, // min_col_block_size and max_col_block_size will be ignored and // assumed to be equal to the corresponding row settings. - StorageType storage_type = UNSYMMETRIC; + StorageType storage_type = StorageType::UNSYMMETRIC; int num_row_blocks = 0; int min_row_block_size = 0; @@ -195,7 +202,7 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix { // normally distributed and whose structure is determined by // RandomMatrixOptions. static std::unique_ptr CreateRandomMatrix( - RandomMatrixOptions options); + RandomMatrixOptions options, std::mt19937& prng); private: static std::unique_ptr FromTripletSparseMatrix( @@ -209,14 +216,31 @@ class CERES_NO_EXPORT CompressedRowSparseMatrix : public SparseMatrix { StorageType storage_type_; // If the matrix has an underlying block structure, then it can also - // carry with it row and column block sizes. This is auxilliary and + // carry with it row and column block sizes. This is auxiliary and // optional information for use by algorithms operating on the // matrix. The class itself does not make use of this information in // any way. - std::vector row_blocks_; - std::vector col_blocks_; + std::vector row_blocks_; + std::vector col_blocks_; }; +inline std::ostream& operator<<(std::ostream& s, + CompressedRowSparseMatrix::StorageType type) { + switch (type) { + case CompressedRowSparseMatrix::StorageType::UNSYMMETRIC: + s << "UNSYMMETRIC"; + break; + case CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR: + s << "UPPER_TRIANGULAR"; + break; + case CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR: + s << "LOWER_TRIANGULAR"; + break; + default: + s << "UNKNOWN CompressedRowSparseMatrix::StorageType"; + } + return s; +} } // namespace internal } // namespace ceres diff --git a/extern/ceres/internal/ceres/concurrent_queue.h b/extern/ceres/internal/ceres/concurrent_queue.h index 1e74153566a..5f490ab5128 100644 --- a/extern/ceres/internal/ceres/concurrent_queue.h +++ b/extern/ceres/internal/ceres/concurrent_queue.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // A thread-safe multi-producer, multi-consumer queue for queueing items that // are typically handled asynchronously by multiple threads. The ConcurrentQueue @@ -152,7 +151,6 @@ class ConcurrentQueue { bool wait_{true}; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_CONCURRENT_QUEUE_H_ diff --git a/extern/ceres/internal/ceres/conditioned_cost_function.cc b/extern/ceres/internal/ceres/conditioned_cost_function.cc index a9013a23d0a..5c826a980c0 100644 --- a/extern/ceres/internal/ceres/conditioned_cost_function.cc +++ b/extern/ceres/internal/ceres/conditioned_cost_function.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/conjugate_gradients_solver.cc b/extern/ceres/internal/ceres/conjugate_gradients_solver.cc deleted file mode 100644 index 62ae9201cb5..00000000000 --- a/extern/ceres/internal/ceres/conjugate_gradients_solver.cc +++ /dev/null @@ -1,253 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: sameeragarwal@google.com (Sameer Agarwal) -// -// A preconditioned conjugate gradients solver -// (ConjugateGradientsSolver) for positive semidefinite linear -// systems. -// -// We have also augmented the termination criterion used by this -// solver to support not just residual based termination but also -// termination based on decrease in the value of the quadratic model -// that CG optimizes. - -#include "ceres/conjugate_gradients_solver.h" - -#include -#include -#include - -#include "ceres/internal/eigen.h" -#include "ceres/linear_operator.h" -#include "ceres/stringprintf.h" -#include "ceres/types.h" -#include "glog/logging.h" - -namespace ceres { -namespace internal { -namespace { - -bool IsZeroOrInfinity(double x) { return ((x == 0.0) || std::isinf(x)); } - -} // namespace - -ConjugateGradientsSolver::ConjugateGradientsSolver( - LinearSolver::Options options) - : options_(std::move(options)) {} - -LinearSolver::Summary ConjugateGradientsSolver::Solve( - LinearOperator* A, - const double* b, - const LinearSolver::PerSolveOptions& per_solve_options, - double* x) { - CHECK(A != nullptr); - CHECK(x != nullptr); - CHECK(b != nullptr); - CHECK_EQ(A->num_rows(), A->num_cols()); - - LinearSolver::Summary summary; - summary.termination_type = LINEAR_SOLVER_NO_CONVERGENCE; - summary.message = "Maximum number of iterations reached."; - summary.num_iterations = 0; - - const int num_cols = A->num_cols(); - VectorRef xref(x, num_cols); - ConstVectorRef bref(b, num_cols); - - const double norm_b = bref.norm(); - if (norm_b == 0.0) { - xref.setZero(); - summary.termination_type = LINEAR_SOLVER_SUCCESS; - summary.message = "Convergence. |b| = 0."; - return summary; - } - - Vector r(num_cols); - Vector p(num_cols); - Vector z(num_cols); - Vector tmp(num_cols); - - const double tol_r = per_solve_options.r_tolerance * norm_b; - - tmp.setZero(); - A->RightMultiply(x, tmp.data()); - r = bref - tmp; - double norm_r = r.norm(); - if (options_.min_num_iterations == 0 && norm_r <= tol_r) { - summary.termination_type = LINEAR_SOLVER_SUCCESS; - summary.message = - StringPrintf("Convergence. |r| = %e <= %e.", norm_r, tol_r); - return summary; - } - - double rho = 1.0; - - // Initial value of the quadratic model Q = x'Ax - 2 * b'x. - double Q0 = -1.0 * xref.dot(bref + r); - - for (summary.num_iterations = 1;; ++summary.num_iterations) { - // Apply preconditioner - if (per_solve_options.preconditioner != nullptr) { - z.setZero(); - per_solve_options.preconditioner->RightMultiply(r.data(), z.data()); - } else { - z = r; - } - - double last_rho = rho; - rho = r.dot(z); - if (IsZeroOrInfinity(rho)) { - summary.termination_type = LINEAR_SOLVER_FAILURE; - summary.message = StringPrintf("Numerical failure. rho = r'z = %e.", rho); - break; - } - - if (summary.num_iterations == 1) { - p = z; - } else { - double beta = rho / last_rho; - if (IsZeroOrInfinity(beta)) { - summary.termination_type = LINEAR_SOLVER_FAILURE; - summary.message = StringPrintf( - "Numerical failure. beta = rho_n / rho_{n-1} = %e, " - "rho_n = %e, rho_{n-1} = %e", - beta, - rho, - last_rho); - break; - } - p = z + beta * p; - } - - Vector& q = z; - q.setZero(); - A->RightMultiply(p.data(), q.data()); - const double pq = p.dot(q); - if ((pq <= 0) || std::isinf(pq)) { - summary.termination_type = LINEAR_SOLVER_NO_CONVERGENCE; - summary.message = StringPrintf( - "Matrix is indefinite, no more progress can be made. " - "p'q = %e. |p| = %e, |q| = %e", - pq, - p.norm(), - q.norm()); - break; - } - - const double alpha = rho / pq; - if (std::isinf(alpha)) { - summary.termination_type = LINEAR_SOLVER_FAILURE; - summary.message = StringPrintf( - "Numerical failure. alpha = rho / pq = %e, rho = %e, pq = %e.", - alpha, - rho, - pq); - break; - } - - xref = xref + alpha * p; - - // Ideally we would just use the update r = r - alpha*q to keep - // track of the residual vector. However this estimate tends to - // drift over time due to round off errors. Thus every - // residual_reset_period iterations, we calculate the residual as - // r = b - Ax. We do not do this every iteration because this - // requires an additional matrix vector multiply which would - // double the complexity of the CG algorithm. - if (summary.num_iterations % options_.residual_reset_period == 0) { - tmp.setZero(); - A->RightMultiply(x, tmp.data()); - r = bref - tmp; - } else { - r = r - alpha * q; - } - - // Quadratic model based termination. - // Q1 = x'Ax - 2 * b' x. - const double Q1 = -1.0 * xref.dot(bref + r); - - // For PSD matrices A, let - // - // Q(x) = x'Ax - 2b'x - // - // be the cost of the quadratic function defined by A and b. Then, - // the solver terminates at iteration i if - // - // i * (Q(x_i) - Q(x_i-1)) / Q(x_i) < q_tolerance. - // - // This termination criterion is more useful when using CG to - // solve the Newton step. This particular convergence test comes - // from Stephen Nash's work on truncated Newton - // methods. References: - // - // 1. Stephen G. Nash & Ariela Sofer, Assessing A Search - // Direction Within A Truncated Newton Method, Operation - // Research Letters 9(1990) 219-221. - // - // 2. Stephen G. Nash, A Survey of Truncated Newton Methods, - // Journal of Computational and Applied Mathematics, - // 124(1-2), 45-59, 2000. - // - const double zeta = summary.num_iterations * (Q1 - Q0) / Q1; - if (zeta < per_solve_options.q_tolerance && - summary.num_iterations >= options_.min_num_iterations) { - summary.termination_type = LINEAR_SOLVER_SUCCESS; - summary.message = - StringPrintf("Iteration: %d Convergence: zeta = %e < %e. |r| = %e", - summary.num_iterations, - zeta, - per_solve_options.q_tolerance, - r.norm()); - break; - } - Q0 = Q1; - - // Residual based termination. - norm_r = r.norm(); - if (norm_r <= tol_r && - summary.num_iterations >= options_.min_num_iterations) { - summary.termination_type = LINEAR_SOLVER_SUCCESS; - summary.message = - StringPrintf("Iteration: %d Convergence. |r| = %e <= %e.", - summary.num_iterations, - norm_r, - tol_r); - break; - } - - if (summary.num_iterations >= options_.max_num_iterations) { - break; - } - } - - return summary; -} - -} // namespace internal -} // namespace ceres diff --git a/extern/ceres/internal/ceres/conjugate_gradients_solver.h b/extern/ceres/internal/ceres/conjugate_gradients_solver.h index 99ddb5d485b..84383ea0909 100644 --- a/extern/ceres/internal/ceres/conjugate_gradients_solver.h +++ b/extern/ceres/internal/ceres/conjugate_gradients_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -34,42 +34,277 @@ #ifndef CERES_INTERNAL_CONJUGATE_GRADIENTS_SOLVER_H_ #define CERES_INTERNAL_CONJUGATE_GRADIENTS_SOLVER_H_ +#include +#include +#include + +#include "ceres/eigen_vector_ops.h" #include "ceres/internal/disable_warnings.h" +#include "ceres/internal/eigen.h" #include "ceres/internal/export.h" +#include "ceres/linear_operator.h" #include "ceres/linear_solver.h" +#include "ceres/stringprintf.h" +#include "ceres/types.h" +#include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -class LinearOperator; - -// This class implements the now classical Conjugate Gradients -// algorithm of Hestenes & Stiefel for solving postive semidefinite -// linear sytems. Optionally it can use a preconditioner also to -// reduce the condition number of the linear system and improve the -// convergence rate. Modern references for Conjugate Gradients are the -// books by Yousef Saad and Trefethen & Bau. This implementation of CG -// has been augmented with additional termination tests that are -// needed for forcing early termination when used as part of an -// inexact Newton solver. -// -// For more details see the documentation for -// LinearSolver::PerSolveOptions::r_tolerance and -// LinearSolver::PerSolveOptions::q_tolerance in linear_solver.h. -class CERES_NO_EXPORT ConjugateGradientsSolver final : public LinearSolver { +// Interface for the linear operator used by ConjugateGradientsSolver. +template +class ConjugateGradientsLinearOperator { public: - explicit ConjugateGradientsSolver(LinearSolver::Options options); - Summary Solve(LinearOperator* A, - const double* b, - const LinearSolver::PerSolveOptions& per_solve_options, - double* x) final; - - private: - const LinearSolver::Options options_; + ~ConjugateGradientsLinearOperator() = default; + virtual void RightMultiplyAndAccumulate(const DenseVectorType& x, + DenseVectorType& y) = 0; }; -} // namespace internal -} // namespace ceres +// Adapter class that makes LinearOperator appear like an instance of +// ConjugateGradientsLinearOperator. +class LinearOperatorAdapter : public ConjugateGradientsLinearOperator { + public: + LinearOperatorAdapter(LinearOperator& linear_operator) + : linear_operator_(linear_operator) {} + + void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final { + linear_operator_.RightMultiplyAndAccumulate(x, y); + } + + private: + LinearOperator& linear_operator_; +}; + +// Options to control the ConjugateGradientsSolver. For detailed documentation +// for each of these options see linear_solver.h +struct ConjugateGradientsSolverOptions { + int min_num_iterations = 1; + int max_num_iterations = 1; + int residual_reset_period = 10; + double r_tolerance = 0.0; + double q_tolerance = 0.0; + ContextImpl* context = nullptr; + int num_threads = 1; +}; + +// This function implements the now classical Conjugate Gradients algorithm of +// Hestenes & Stiefel for solving positive semidefinite linear systems. +// Optionally it can use a preconditioner also to reduce the condition number of +// the linear system and improve the convergence rate. Modern references for +// Conjugate Gradients are the books by Yousef Saad and Trefethen & Bau. This +// implementation of CG has been augmented with additional termination tests +// that are needed for forcing early termination when used as part of an inexact +// Newton solver. +// +// This implementation is templated over DenseVectorType and then in turn on +// ConjugateGradientsLinearOperator, which allows us to write an abstract +// implementaion of the Conjugate Gradients algorithm without worrying about how +// these objects are implemented or where they are stored. In particular it +// allows us to have a single implementation that works on CPU and GPU based +// matrices and vectors. +// +// scratch must contain pointers to four DenseVector objects of the same size as +// rhs and solution. By asking the user for scratch space, we guarantee that we +// will not perform any allocations inside this function. +template +LinearSolver::Summary ConjugateGradientsSolver( + const ConjugateGradientsSolverOptions options, + ConjugateGradientsLinearOperator& lhs, + const DenseVectorType& rhs, + ConjugateGradientsLinearOperator& preconditioner, + DenseVectorType* scratch[4], + DenseVectorType& solution) { + auto IsZeroOrInfinity = [](double x) { + return ((x == 0.0) || std::isinf(x)); + }; + + DenseVectorType& p = *scratch[0]; + DenseVectorType& r = *scratch[1]; + DenseVectorType& z = *scratch[2]; + DenseVectorType& tmp = *scratch[3]; + + LinearSolver::Summary summary; + summary.termination_type = LinearSolverTerminationType::NO_CONVERGENCE; + summary.message = "Maximum number of iterations reached."; + summary.num_iterations = 0; + + const double norm_rhs = Norm(rhs, options.context, options.num_threads); + if (norm_rhs == 0.0) { + SetZero(solution, options.context, options.num_threads); + summary.termination_type = LinearSolverTerminationType::SUCCESS; + summary.message = "Convergence. |b| = 0."; + return summary; + } + + const double tol_r = options.r_tolerance * norm_rhs; + + SetZero(tmp, options.context, options.num_threads); + lhs.RightMultiplyAndAccumulate(solution, tmp); + + // r = rhs - tmp + Axpby(1.0, rhs, -1.0, tmp, r, options.context, options.num_threads); + + double norm_r = Norm(r, options.context, options.num_threads); + if (options.min_num_iterations == 0 && norm_r <= tol_r) { + summary.termination_type = LinearSolverTerminationType::SUCCESS; + summary.message = + StringPrintf("Convergence. |r| = %e <= %e.", norm_r, tol_r); + return summary; + } + + double rho = 1.0; + + // Initial value of the quadratic model Q = x'Ax - 2 * b'x. + // double Q0 = -1.0 * solution.dot(rhs + r); + Axpby(1.0, rhs, 1.0, r, tmp, options.context, options.num_threads); + double Q0 = -Dot(solution, tmp, options.context, options.num_threads); + + for (summary.num_iterations = 1;; ++summary.num_iterations) { + SetZero(z, options.context, options.num_threads); + preconditioner.RightMultiplyAndAccumulate(r, z); + + const double last_rho = rho; + // rho = r.dot(z); + rho = Dot(r, z, options.context, options.num_threads); + if (IsZeroOrInfinity(rho)) { + summary.termination_type = LinearSolverTerminationType::FAILURE; + summary.message = StringPrintf("Numerical failure. rho = r'z = %e.", rho); + break; + } + + if (summary.num_iterations == 1) { + Copy(z, p, options.context, options.num_threads); + } else { + const double beta = rho / last_rho; + if (IsZeroOrInfinity(beta)) { + summary.termination_type = LinearSolverTerminationType::FAILURE; + summary.message = StringPrintf( + "Numerical failure. beta = rho_n / rho_{n-1} = %e, " + "rho_n = %e, rho_{n-1} = %e", + beta, + rho, + last_rho); + break; + } + // p = z + beta * p; + Axpby(1.0, z, beta, p, p, options.context, options.num_threads); + } + + DenseVectorType& q = z; + SetZero(q, options.context, options.num_threads); + lhs.RightMultiplyAndAccumulate(p, q); + const double pq = Dot(p, q, options.context, options.num_threads); + if ((pq <= 0) || std::isinf(pq)) { + summary.termination_type = LinearSolverTerminationType::NO_CONVERGENCE; + summary.message = StringPrintf( + "Matrix is indefinite, no more progress can be made. " + "p'q = %e. |p| = %e, |q| = %e", + pq, + Norm(p, options.context, options.num_threads), + Norm(q, options.context, options.num_threads)); + break; + } + + const double alpha = rho / pq; + if (std::isinf(alpha)) { + summary.termination_type = LinearSolverTerminationType::FAILURE; + summary.message = StringPrintf( + "Numerical failure. alpha = rho / pq = %e, rho = %e, pq = %e.", + alpha, + rho, + pq); + break; + } + + // solution = solution + alpha * p; + Axpby(1.0, + solution, + alpha, + p, + solution, + options.context, + options.num_threads); + + // Ideally we would just use the update r = r - alpha*q to keep + // track of the residual vector. However this estimate tends to + // drift over time due to round off errors. Thus every + // residual_reset_period iterations, we calculate the residual as + // r = b - Ax. We do not do this every iteration because this + // requires an additional matrix vector multiply which would + // double the complexity of the CG algorithm. + if (summary.num_iterations % options.residual_reset_period == 0) { + SetZero(tmp, options.context, options.num_threads); + lhs.RightMultiplyAndAccumulate(solution, tmp); + Axpby(1.0, rhs, -1.0, tmp, r, options.context, options.num_threads); + // r = rhs - tmp; + } else { + Axpby(1.0, r, -alpha, q, r, options.context, options.num_threads); + // r = r - alpha * q; + } + + // Quadratic model based termination. + // Q1 = x'Ax - 2 * b' x. + // const double Q1 = -1.0 * solution.dot(rhs + r); + Axpby(1.0, rhs, 1.0, r, tmp, options.context, options.num_threads); + const double Q1 = -Dot(solution, tmp, options.context, options.num_threads); + + // For PSD matrices A, let + // + // Q(x) = x'Ax - 2b'x + // + // be the cost of the quadratic function defined by A and b. Then, + // the solver terminates at iteration i if + // + // i * (Q(x_i) - Q(x_i-1)) / Q(x_i) < q_tolerance. + // + // This termination criterion is more useful when using CG to + // solve the Newton step. This particular convergence test comes + // from Stephen Nash's work on truncated Newton + // methods. References: + // + // 1. Stephen G. Nash & Ariela Sofer, Assessing A Search + // Direction Within A Truncated Newton Method, Operation + // Research Letters 9(1990) 219-221. + // + // 2. Stephen G. Nash, A Survey of Truncated Newton Methods, + // Journal of Computational and Applied Mathematics, + // 124(1-2), 45-59, 2000. + // + const double zeta = summary.num_iterations * (Q1 - Q0) / Q1; + if (zeta < options.q_tolerance && + summary.num_iterations >= options.min_num_iterations) { + summary.termination_type = LinearSolverTerminationType::SUCCESS; + summary.message = + StringPrintf("Iteration: %d Convergence: zeta = %e < %e. |r| = %e", + summary.num_iterations, + zeta, + options.q_tolerance, + Norm(r, options.context, options.num_threads)); + break; + } + Q0 = Q1; + + // Residual based termination. + norm_r = Norm(r, options.context, options.num_threads); + if (norm_r <= tol_r && + summary.num_iterations >= options.min_num_iterations) { + summary.termination_type = LinearSolverTerminationType::SUCCESS; + summary.message = + StringPrintf("Iteration: %d Convergence. |r| = %e <= %e.", + summary.num_iterations, + norm_r, + tol_r); + break; + } + + if (summary.num_iterations >= options.max_num_iterations) { + break; + } + } + + return summary; +} + +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/context.cc b/extern/ceres/internal/ceres/context.cc index fde16b84bca..e5d85f68d93 100644 --- a/extern/ceres/internal/ceres/context.cc +++ b/extern/ceres/internal/ceres/context.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/context_impl.cc b/extern/ceres/internal/ceres/context_impl.cc index a4b3c842da1..2b9d9cce248 100644 --- a/extern/ceres/internal/ceres/context_impl.cc +++ b/extern/ceres/internal/ceres/context_impl.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,6 +33,8 @@ #include #include "ceres/internal/config.h" +#include "ceres/stringprintf.h" +#include "ceres/wall_time.h" #ifndef CERES_NO_CUDA #include "cublas_v2.h" @@ -40,69 +42,155 @@ #include "cusolverDn.h" #endif // CERES_NO_CUDA -namespace ceres { -namespace internal { +namespace ceres::internal { ContextImpl::ContextImpl() = default; #ifndef CERES_NO_CUDA -bool ContextImpl::InitCUDA(std::string* message) { - if (cuda_initialized_) { +void ContextImpl::TearDown() { + if (cusolver_handle_ != nullptr) { + cusolverDnDestroy(cusolver_handle_); + cusolver_handle_ = nullptr; + } + if (cublas_handle_ != nullptr) { + cublasDestroy(cublas_handle_); + cublas_handle_ = nullptr; + } + if (cusparse_handle_ != nullptr) { + cusparseDestroy(cusparse_handle_); + cusparse_handle_ = nullptr; + } + for (auto& s : streams_) { + if (s != nullptr) { + cudaStreamDestroy(s); + s = nullptr; + } + } + is_cuda_initialized_ = false; +} + +std::string ContextImpl::CudaConfigAsString() const { + return ceres::internal::StringPrintf( + "======================= CUDA Device Properties ======================\n" + "Cuda version : %d.%d\n" + "Device ID : %d\n" + "Device name : %s\n" + "Total GPU memory : %6.f MiB\n" + "GPU memory available : %6.f MiB\n" + "Compute capability : %d.%d\n" + "Warp size : %d\n" + "Max threads per block : %d\n" + "Max threads per dim : %d %d %d\n" + "Max grid size : %d %d %d\n" + "Multiprocessor count : %d\n" + "cudaMallocAsync supported : %s\n" + "====================================================================", + cuda_version_major_, + cuda_version_minor_, + gpu_device_id_in_use_, + gpu_device_properties_.name, + gpu_device_properties_.totalGlobalMem / 1024.0 / 1024.0, + GpuMemoryAvailable() / 1024.0 / 1024.0, + gpu_device_properties_.major, + gpu_device_properties_.minor, + gpu_device_properties_.warpSize, + gpu_device_properties_.maxThreadsPerBlock, + gpu_device_properties_.maxThreadsDim[0], + gpu_device_properties_.maxThreadsDim[1], + gpu_device_properties_.maxThreadsDim[2], + gpu_device_properties_.maxGridSize[0], + gpu_device_properties_.maxGridSize[1], + gpu_device_properties_.maxGridSize[2], + gpu_device_properties_.multiProcessorCount, + // In CUDA 12.0.0+ cudaDeviceProp has field memoryPoolsSupported, but it + // is not available in older versions + is_cuda_memory_pools_supported_ ? "Yes" : "No"); +} + +size_t ContextImpl::GpuMemoryAvailable() const { + size_t free, total; + cudaMemGetInfo(&free, &total); + return free; +} + +bool ContextImpl::InitCuda(std::string* message) { + if (is_cuda_initialized_) { return true; } + CHECK_EQ(cudaGetDevice(&gpu_device_id_in_use_), cudaSuccess); + int cuda_version; + CHECK_EQ(cudaRuntimeGetVersion(&cuda_version), cudaSuccess); + cuda_version_major_ = cuda_version / 1000; + cuda_version_minor_ = (cuda_version % 1000) / 10; + CHECK_EQ( + cudaGetDeviceProperties(&gpu_device_properties_, gpu_device_id_in_use_), + cudaSuccess); +#if CUDART_VERSION >= 11020 + int is_cuda_memory_pools_supported; + CHECK_EQ(cudaDeviceGetAttribute(&is_cuda_memory_pools_supported, + cudaDevAttrMemoryPoolsSupported, + gpu_device_id_in_use_), + cudaSuccess); + is_cuda_memory_pools_supported_ = is_cuda_memory_pools_supported == 1; +#endif + VLOG(3) << "\n" << CudaConfigAsString(); + EventLogger event_logger("InitCuda"); if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { - *message = "cuBLAS::cublasCreate failed."; - cublas_handle_ = nullptr; - return false; - } - if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) { - *message = "cuSolverDN::cusolverDnCreate failed."; - cusolver_handle_ = nullptr; - cublasDestroy(cublas_handle_); - cublas_handle_ = nullptr; - return false; - } - if (cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) != - cudaSuccess) { - *message = "CUDA::cudaStreamCreateWithFlags failed."; - cusolverDnDestroy(cusolver_handle_); - cublasDestroy(cublas_handle_); - cusolver_handle_ = nullptr; - cublas_handle_ = nullptr; - stream_ = nullptr; - return false; - } - if (cusolverDnSetStream(cusolver_handle_, stream_) != - CUSOLVER_STATUS_SUCCESS || - cublasSetStream(cublas_handle_, stream_) != CUBLAS_STATUS_SUCCESS) { *message = - "cuSolverDN::cusolverDnSetStream or cuBLAS::cublasSetStream failed."; - cusolverDnDestroy(cusolver_handle_); - cublasDestroy(cublas_handle_); - cudaStreamDestroy(stream_); - cusolver_handle_ = nullptr; + "CUDA initialization failed because cuBLAS::cublasCreate failed."; cublas_handle_ = nullptr; - stream_ = nullptr; return false; } - cuda_initialized_ = true; + event_logger.AddEvent("cublasCreate"); + if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) { + *message = + "CUDA initialization failed because cuSolverDN::cusolverDnCreate " + "failed."; + TearDown(); + return false; + } + event_logger.AddEvent("cusolverDnCreate"); + if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) { + *message = + "CUDA initialization failed because cuSPARSE::cusparseCreate failed."; + TearDown(); + return false; + } + event_logger.AddEvent("cusparseCreate"); + for (auto& s : streams_) { + if (cudaStreamCreateWithFlags(&s, cudaStreamNonBlocking) != cudaSuccess) { + *message = + "CUDA initialization failed because CUDA::cudaStreamCreateWithFlags " + "failed."; + TearDown(); + return false; + } + } + event_logger.AddEvent("cudaStreamCreateWithFlags"); + if (cusolverDnSetStream(cusolver_handle_, DefaultStream()) != + CUSOLVER_STATUS_SUCCESS || + cublasSetStream(cublas_handle_, DefaultStream()) != + CUBLAS_STATUS_SUCCESS || + cusparseSetStream(cusparse_handle_, DefaultStream()) != + CUSPARSE_STATUS_SUCCESS) { + *message = "CUDA initialization failed because SetStream failed."; + TearDown(); + return false; + } + event_logger.AddEvent("SetStream"); + is_cuda_initialized_ = true; return true; } #endif // CERES_NO_CUDA ContextImpl::~ContextImpl() { #ifndef CERES_NO_CUDA - if (cuda_initialized_) { - cusolverDnDestroy(cusolver_handle_); - cublasDestroy(cublas_handle_); - cudaStreamDestroy(stream_); - } + TearDown(); #endif // CERES_NO_CUDA } + void ContextImpl::EnsureMinimumThreads(int num_threads) { -#ifdef CERES_USE_CXX_THREADS thread_pool.Resize(num_threads); -#endif // CERES_USE_CXX_THREADS } -} // namespace internal -} // namespace ceres + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/context_impl.h b/extern/ceres/internal/ceres/context_impl.h index 8e9a03fb4ae..46692e6ce20 100644 --- a/extern/ceres/internal/ceres/context_impl.h +++ b/extern/ceres/internal/ceres/context_impl.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,14 +46,12 @@ #include "cublas_v2.h" #include "cuda_runtime.h" #include "cusolverDn.h" +#include "cusparse.h" #endif // CERES_NO_CUDA -#ifdef CERES_USE_CXX_THREADS #include "ceres/thread_pool.h" -#endif // CERES_USE_CXX_THREADS -namespace ceres { -namespace internal { +namespace ceres::internal { class CERES_NO_EXPORT ContextImpl final : public Context { public: @@ -67,30 +65,82 @@ class CERES_NO_EXPORT ContextImpl final : public Context { // defined by the hardware. Otherwise this call is a no-op. void EnsureMinimumThreads(int num_threads); -#ifdef CERES_USE_CXX_THREADS ThreadPool thread_pool; -#endif // CERES_USE_CXX_THREADS #ifndef CERES_NO_CUDA - // Initializes the cuSolverDN context, creates an asynchronous stream, and - // associates the stream with cuSolverDN. Returns true iff initialization was - // successful, else it returns false and a human-readable error message is - // returned. - bool InitCUDA(std::string* message); + // Note on Ceres' use of CUDA Devices on multi-GPU systems: + // 1. On a multi-GPU system, if nothing special is done, the "default" CUDA + // device will be used, which is device 0. + // 2. If the user masks out GPUs using the CUDA_VISIBLE_DEVICES environment + // variable, Ceres will still use device 0 visible to the program, but + // device 0 will be the first GPU indicated in the environment variable. + // 3. If the user explicitly selects a GPU in the host process before calling + // Ceres, Ceres will use that GPU. + + // Note on Ceres' use of CUDA Streams: + // Most of operations on the GPU are performed using a single stream. In + // those cases DefaultStream() should be used. This ensures that operations + // are stream-ordered, and might be concurrent with cpu processing with no + // additional efforts. + // + // a. Single-stream workloads + // - Only use default stream + // - Return control to the callee without synchronization whenever possible + // - Stream synchronization occurs only after GPU to CPU transfers, and is + // handled by CudaBuffer + // + // b. Multi-stream workloads + // Multi-stream workloads are more restricted in order to make it harder to + // get a race-condition. + // - Should always synchronize the default stream on entry + // - Should always synchronize all utilized streams on exit + // - Should not make any assumptions on one of streams_[] being default + // + // With those rules in place + // - All single-stream asynchronous workloads are serialized using default + // stream + // - Multiple-stream workloads always wait single-stream workloads to finish + // and leave no running computations on exit. + // This slightly penalizes multi-stream workloads, but makes it easier to + // avoid race conditions when multiple-stream workload depends on results of + // any preceeding gpu computations. + + // Initializes cuBLAS, cuSOLVER, and cuSPARSE contexts, creates an + // asynchronous CUDA stream, and associates the stream with the contexts. + // Returns true iff initialization was successful, else it returns false and a + // human-readable error message is returned. + bool InitCuda(std::string* message); + void TearDown(); + inline bool IsCudaInitialized() const { return is_cuda_initialized_; } + // Returns a human-readable string describing the capabilities of the current + // CUDA device. CudaConfigAsString can only be called after InitCuda has been + // called. + std::string CudaConfigAsString() const; + // Returns the number of bytes of available global memory on the current CUDA + // device. If it is called before InitCuda, it returns 0. + size_t GpuMemoryAvailable() const; - // Handle to the cuSOLVER context. cusolverDnHandle_t cusolver_handle_ = nullptr; - // Handle to cuBLAS context. cublasHandle_t cublas_handle_ = nullptr; - // CUDA device stream. - cudaStream_t stream_ = nullptr; - // Indicates whether all the CUDA resources have been initialized. - bool cuda_initialized_ = false; + + // Default stream. + // Kernel invocations and memory copies on this stream can be left without + // synchronization. + cudaStream_t DefaultStream() { return streams_[0]; } + static constexpr int kNumCudaStreams = 2; + cudaStream_t streams_[kNumCudaStreams] = {0}; + + cusparseHandle_t cusparse_handle_ = nullptr; + bool is_cuda_initialized_ = false; + int gpu_device_id_in_use_ = -1; + cudaDeviceProp gpu_device_properties_; + bool is_cuda_memory_pools_supported_ = false; + int cuda_version_major_ = 0; + int cuda_version_minor_ = 0; #endif // CERES_NO_CUDA }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/coordinate_descent_minimizer.cc b/extern/ceres/internal/ceres/coordinate_descent_minimizer.cc index a6e149d1cee..53986ee386e 100644 --- a/extern/ceres/internal/ceres/coordinate_descent_minimizer.cc +++ b/extern/ceres/internal/ceres/coordinate_descent_minimizer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,11 @@ #include #include +#include #include #include +#include +#include #include #include "ceres/evaluator.h" @@ -49,15 +52,7 @@ #include "ceres/trust_region_minimizer.h" #include "ceres/trust_region_strategy.h" -namespace ceres { -namespace internal { - -using std::map; -using std::max; -using std::min; -using std::set; -using std::string; -using std::vector; +namespace ceres::internal { CoordinateDescentMinimizer::CoordinateDescentMinimizer(ContextImpl* context) : context_(context) { @@ -70,15 +65,19 @@ bool CoordinateDescentMinimizer::Init( const Program& program, const ProblemImpl::ParameterMap& parameter_map, const ParameterBlockOrdering& ordering, - string* error) { + std::string* /*error*/) { parameter_blocks_.clear(); independent_set_offsets_.clear(); independent_set_offsets_.push_back(0); // Serialize the OrderedGroups into a vector of parameter block // offsets for parallel access. - map parameter_block_index; - map> group_to_elements = ordering.group_to_elements(); + + // TODO(sameeragarwal): Investigate if parameter_block_index should be an + // ordered or an unordered container. + std::map parameter_block_index; + std::map> group_to_elements = + ordering.group_to_elements(); for (const auto& g_t_e : group_to_elements) { const auto& elements = g_t_e.second; for (double* parameter_block : elements) { @@ -93,7 +92,8 @@ bool CoordinateDescentMinimizer::Init( // The ordering does not have to contain all parameter blocks, so // assign zero offsets/empty independent sets to these parameter // blocks. - const vector& parameter_blocks = program.parameter_blocks(); + const std::vector& parameter_blocks = + program.parameter_blocks(); for (auto* parameter_block : parameter_blocks) { if (!ordering.IsMember(parameter_block->mutable_user_state())) { parameter_blocks_.push_back(parameter_block); @@ -104,7 +104,8 @@ bool CoordinateDescentMinimizer::Init( // Compute the set of residual blocks that depend on each parameter // block. residual_blocks_.resize(parameter_block_index.size()); - const vector& residual_blocks = program.residual_blocks(); + const std::vector& residual_blocks = + program.residual_blocks(); for (auto* residual_block : residual_blocks) { const int num_parameter_blocks = residual_block->NumParameterBlocks(); for (int j = 0; j < num_parameter_blocks; ++j) { @@ -126,7 +127,7 @@ bool CoordinateDescentMinimizer::Init( void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options, double* parameters, - Solver::Summary* summary) { + Solver::Summary* /*summary*/) { // Set the state and mark all parameter blocks constant. for (auto* parameter_block : parameter_blocks_) { parameter_block->SetState(parameters + parameter_block->state_offset()); @@ -135,8 +136,6 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options, std::vector> linear_solvers( options.num_threads); - // std::unique_ptr linear_solvers( - // new LinearSolver*[options.num_threads]); LinearSolver::Options linear_solver_options; linear_solver_options.type = DENSE_QR; @@ -155,9 +154,9 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options, } const int num_inner_iteration_threads = - min(options.num_threads, num_problems); + std::min(options.num_threads, num_problems); evaluator_options_.num_threads = - max(1, options.num_threads / num_inner_iteration_threads); + std::max(1, options.num_threads / num_inner_iteration_threads); // The parameter blocks in each independent set can be optimized // in parallel, since they do not co-occur in any residual block. @@ -170,9 +169,11 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options, ParameterBlock* parameter_block = parameter_blocks_[j]; const int old_index = parameter_block->index(); const int old_delta_offset = parameter_block->delta_offset(); + const int old_state_offset = parameter_block->state_offset(); parameter_block->SetVarying(); parameter_block->set_index(0); parameter_block->set_delta_offset(0); + parameter_block->set_state_offset(0); Program inner_program; inner_program.mutable_parameter_blocks()->push_back(parameter_block); @@ -189,11 +190,12 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options, Solver::Summary inner_summary; Solve(&inner_program, linear_solvers[thread_id].get(), - parameters + parameter_block->state_offset(), + parameters + old_state_offset, &inner_summary); parameter_block->set_index(old_index); parameter_block->set_delta_offset(old_delta_offset); + parameter_block->set_state_offset(old_state_offset); parameter_block->SetState(parameters + parameter_block->state_offset()); parameter_block->SetConstant(); @@ -203,10 +205,6 @@ void CoordinateDescentMinimizer::Minimize(const Minimizer::Options& options, for (auto* parameter_block : parameter_blocks_) { parameter_block->SetVarying(); } - - // for (int i = 0; i < options.num_threads; ++i) { - // delete linear_solvers[i]; - //} } // Solve the optimization problem for one parameter block. @@ -218,7 +216,7 @@ void CoordinateDescentMinimizer::Solve(Program* program, summary->initial_cost = 0.0; summary->fixed_cost = 0.0; summary->final_cost = 0.0; - string error; + std::string error; Minimizer::Options minimizer_options; minimizer_options.evaluator = @@ -241,8 +239,10 @@ void CoordinateDescentMinimizer::Solve(Program* program, bool CoordinateDescentMinimizer::IsOrderingValid( const Program& program, const ParameterBlockOrdering& ordering, - string* message) { - const map>& group_to_elements = + std::string* message) { + // TODO(sameeragarwal): Investigate if this should be an ordered or an + // unordered group. + const std::map>& group_to_elements = ordering.group_to_elements(); // Verify that each group is an independent set @@ -270,5 +270,4 @@ CoordinateDescentMinimizer::CreateOrdering(const Program& program) { return ordering; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/coordinate_descent_minimizer.h b/extern/ceres/internal/ceres/coordinate_descent_minimizer.h index 75f26480c88..8fc5dd7464a 100644 --- a/extern/ceres/internal/ceres/coordinate_descent_minimizer.h +++ b/extern/ceres/internal/ceres/coordinate_descent_minimizer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -31,6 +31,7 @@ #ifndef CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_ #define CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_ +#include #include #include @@ -40,8 +41,7 @@ #include "ceres/problem_impl.h" #include "ceres/solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Program; class LinearSolver; @@ -103,7 +103,6 @@ class CERES_NO_EXPORT CoordinateDescentMinimizer final : public Minimizer { ContextImpl* context_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_COORDINATE_DESCENT_MINIMIZER_H_ diff --git a/extern/ceres/internal/ceres/corrector.cc b/extern/ceres/internal/ceres/corrector.cc index bf3ba9c5714..d9b80cd908a 100644 --- a/extern/ceres/internal/ceres/corrector.cc +++ b/extern/ceres/internal/ceres/corrector.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,7 @@ #include "ceres/internal/eigen.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { Corrector::Corrector(const double sq_norm, const double rho[3]) { CHECK_GE(sq_norm, 0.0); @@ -88,7 +87,7 @@ Corrector::Corrector(const double sq_norm, const double rho[3]) { // We now require that the first derivative of the loss function be // positive only if the second derivative is positive. This is // because when the second derivative is non-positive, we do not use - // the second order correction suggested by BANS and instead use a + // the second order correction suggested by BAMS and instead use a // simpler first order strategy which does not use a division by the // gradient of the loss function. CHECK_GT(rho[1], 0.0); @@ -112,7 +111,7 @@ Corrector::Corrector(const double sq_norm, const double rho[3]) { void Corrector::CorrectResiduals(const int num_rows, double* residuals) { DCHECK(residuals != nullptr); - // Equation 11 in BANS. + // Equation 11 in BAMS. VectorRef(residuals, num_rows) *= residual_scaling_; } @@ -129,7 +128,7 @@ void Corrector::CorrectJacobian(const int num_rows, return; } - // Equation 11 in BANS. + // Equation 11 in BAMS. // // J = sqrt(rho) * (J - alpha^2 r * r' J) // @@ -155,5 +154,4 @@ void Corrector::CorrectJacobian(const int num_rows, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/corrector.h b/extern/ceres/internal/ceres/corrector.h index 44379a3ea7a..2216a967a83 100644 --- a/extern/ceres/internal/ceres/corrector.h +++ b/extern/ceres/internal/ceres/corrector.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,7 +30,7 @@ // // Class definition for the object that is responsible for applying a // second order correction to the Gauss-Newton based on the ideas in -// BANS by Triggs et al. +// BAMS by Triggs et al. #ifndef CERES_INTERNAL_CORRECTOR_H_ #define CERES_INTERNAL_CORRECTOR_H_ @@ -38,8 +38,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Corrector is responsible for applying the second order correction // to the residual and jacobian of a least squares problem based on a @@ -48,7 +47,7 @@ namespace internal { // The key idea here is to look at the expressions for the robustified // gauss newton approximation and then take its square root to get the // corresponding corrections to the residual and jacobian. For the -// full expressions see Eq. 10 and 11 in BANS by Triggs et al. +// full expressions see Eq. 10 and 11 in BAMS by Triggs et al. class CERES_NO_EXPORT Corrector { public: // The constructor takes the squared norm, the value, the first and @@ -87,8 +86,7 @@ class CERES_NO_EXPORT Corrector { double residual_scaling_; double alpha_sq_norm_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/cost_function.cc b/extern/ceres/internal/ceres/cost_function.cc index 7597b431ec9..abd53dde0ab 100644 --- a/extern/ceres/internal/ceres/cost_function.cc +++ b/extern/ceres/internal/ceres/cost_function.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/covariance.cc b/extern/ceres/internal/ceres/covariance.cc index d63dd3789c3..50da0299470 100644 --- a/extern/ceres/internal/ceres/covariance.cc +++ b/extern/ceres/internal/ceres/covariance.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,9 +39,6 @@ namespace ceres { -using std::pair; -using std::vector; - Covariance::Covariance(const Covariance::Options& options) { impl_ = std::make_unique(options); } @@ -49,14 +46,15 @@ Covariance::Covariance(const Covariance::Options& options) { Covariance::~Covariance() = default; bool Covariance::Compute( - const vector>& covariance_blocks, + const std::vector>& + covariance_blocks, Problem* problem) { - return impl_->Compute(covariance_blocks, problem->impl_.get()); + return impl_->Compute(covariance_blocks, problem->mutable_impl()); } -bool Covariance::Compute(const vector& parameter_blocks, +bool Covariance::Compute(const std::vector& parameter_blocks, Problem* problem) { - return impl_->Compute(parameter_blocks, problem->impl_.get()); + return impl_->Compute(parameter_blocks, problem->mutable_impl()); } bool Covariance::GetCovarianceBlock(const double* parameter_block1, @@ -79,7 +77,7 @@ bool Covariance::GetCovarianceBlockInTangentSpace( } bool Covariance::GetCovarianceMatrix( - const vector& parameter_blocks, + const std::vector& parameter_blocks, double* covariance_matrix) const { return impl_->GetCovarianceMatrixInTangentOrAmbientSpace(parameter_blocks, true, // ambient diff --git a/extern/ceres/internal/ceres/covariance_impl.cc b/extern/ceres/internal/ceres/covariance_impl.cc index 324b5531a04..6e8362dacc3 100644 --- a/extern/ceres/internal/ceres/covariance_impl.cc +++ b/extern/ceres/internal/ceres/covariance_impl.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -57,24 +57,12 @@ #include "ceres/wall_time.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::swap; +namespace ceres::internal { using CovarianceBlocks = std::vector>; CovarianceImpl::CovarianceImpl(const Covariance::Options& options) : options_(options), is_computed_(false), is_valid_(false) { -#ifdef CERES_NO_THREADS - if (options_.num_threads > 1) { - LOG(WARNING) << "No threading support is compiled into this binary; " - << "only options.num_threads = 1 is supported. Switching " - << "to single threaded mode."; - options_.num_threads = 1; - } -#endif - evaluate_options_.num_threads = options_.num_threads; evaluate_options_.apply_loss_function = options_.apply_loss_function; } @@ -176,7 +164,7 @@ bool CovarianceImpl::GetCovarianceBlockInTangentOrAmbientSpace( const double* parameter_block2 = original_parameter_block2; const bool transpose = parameter_block1 > parameter_block2; if (transpose) { - swap(parameter_block1, parameter_block2); + std::swap(parameter_block1, parameter_block2); } // Find where in the covariance matrix the block is located. @@ -190,7 +178,7 @@ bool CovarianceImpl::GetCovarianceBlockInTangentOrAmbientSpace( const int* cols_begin = cols + rows[row_begin]; // The only part that requires work is walking the compressed column - // vector to determine where the set of columns correspnding to the + // vector to determine where the set of columns corresponding to the // covariance block begin. int offset = 0; while (cols_begin[offset] != col_begin && offset < row_size) { @@ -322,9 +310,8 @@ bool CovarianceImpl::GetCovarianceMatrixInTangentOrAmbientSpace( // Assemble the blocks in the covariance matrix. MatrixRef covariance(covariance_matrix, covariance_size, covariance_size); const int num_threads = options_.num_threads; - std::unique_ptr workspace( - new double[num_threads * max_covariance_block_size * - max_covariance_block_size]); + auto workspace = std::make_unique( + num_threads * max_covariance_block_size * max_covariance_block_size); bool success = true; @@ -481,14 +468,12 @@ bool CovarianceImpl::ComputeCovarianceSparsity( // Iterate over the covariance blocks contained in this row block // and count the number of columns in this row block. int num_col_blocks = 0; - int num_columns = 0; for (int j = i; j < covariance_blocks.size(); ++j, ++num_col_blocks) { const std::pair& block_pair = covariance_blocks[j]; if (block_pair.first != row_block) { break; } - num_columns += problem->ParameterBlockTangentSize(block_pair.second); } // Fill out all the compressed rows for this parameter block. @@ -598,9 +583,9 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() { cholmod_jacobian.ncol = num_cols; cholmod_jacobian.nzmax = num_nonzeros; cholmod_jacobian.nz = nullptr; - cholmod_jacobian.p = reinterpret_cast(&transpose_rows[0]); - cholmod_jacobian.i = reinterpret_cast(&transpose_cols[0]); - cholmod_jacobian.x = reinterpret_cast(&transpose_values[0]); + cholmod_jacobian.p = reinterpret_cast(transpose_rows.data()); + cholmod_jacobian.i = reinterpret_cast(transpose_cols.data()); + cholmod_jacobian.x = reinterpret_cast(transpose_values.data()); cholmod_jacobian.z = nullptr; cholmod_jacobian.stype = 0; // Matrix is not symmetric. cholmod_jacobian.itype = CHOLMOD_LONG; @@ -628,13 +613,15 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() { // more efficient, both in runtime as well as the quality of // ordering computed. So, it maybe worth doing that analysis // separately. - const SuiteSparse_long rank = SuiteSparseQR(SPQR_ORDERING_BESTAMD, - SPQR_DEFAULT_TOL, - cholmod_jacobian.ncol, - &cholmod_jacobian, - &R, - &permutation, - &cc); + const SuiteSparse_long rank = SuiteSparseQR( + SPQR_ORDERING_BESTAMD, + options_.column_pivot_threshold < 0 ? SPQR_DEFAULT_TOL + : options_.column_pivot_threshold, + static_cast(cholmod_jacobian.ncol), + &cholmod_jacobian, + &R, + &permutation, + &cc); event_logger.AddEvent("Numeric Factorization"); if (R == nullptr) { LOG(ERROR) << "Something is wrong. SuiteSparseQR returned R = nullptr."; @@ -678,7 +665,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSuiteSparseQR() { // Since the covariance matrix is symmetric, the i^th row and column // are equal. const int num_threads = options_.num_threads; - std::unique_ptr workspace(new double[num_threads * num_cols]); + auto workspace = std::make_unique(num_threads * num_cols); problem_->context()->EnsureMinimumThreads(num_threads); ParallelFor( @@ -830,19 +817,23 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() { jacobian.values.data()); event_logger.AddEvent("ConvertToSparseMatrix"); - Eigen::SparseQR> qr_solver( - sparse_jacobian); + Eigen::SparseQR> qr; + if (options_.column_pivot_threshold > 0) { + qr.setPivotThreshold(options_.column_pivot_threshold); + } + + qr.compute(sparse_jacobian); event_logger.AddEvent("QRDecomposition"); - if (qr_solver.info() != Eigen::Success) { + if (qr.info() != Eigen::Success) { LOG(ERROR) << "Eigen::SparseQR decomposition failed."; return false; } - if (qr_solver.rank() < jacobian.num_cols) { + if (qr.rank() < jacobian.num_cols) { LOG(ERROR) << "Jacobian matrix is rank deficient. " << "Number of columns: " << jacobian.num_cols - << " rank: " << qr_solver.rank(); + << " rank: " << qr.rank(); return false; } @@ -852,7 +843,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() { // Compute the inverse column permutation used by QR factorization. Eigen::PermutationMatrix inverse_permutation = - qr_solver.colsPermutation().inverse(); + qr.colsPermutation().inverse(); // The following loop exploits the fact that the i^th column of A^{-1} // is given by the solution to the linear system @@ -865,7 +856,7 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() { // are equal. const int num_cols = jacobian.num_cols; const int num_threads = options_.num_threads; - std::unique_ptr workspace(new double[num_threads * num_cols]); + auto workspace = std::make_unique(num_threads * num_cols); problem_->context()->EnsureMinimumThreads(num_threads); ParallelFor( @@ -875,9 +866,9 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() { if (row_end != row_begin) { double* solution = workspace.get() + thread_id * num_cols; SolveRTRWithSparseRHS(num_cols, - qr_solver.matrixR().innerIndexPtr(), - qr_solver.matrixR().outerIndexPtr(), - &qr_solver.matrixR().data().value(0), + qr.matrixR().innerIndexPtr(), + qr.matrixR().outerIndexPtr(), + &qr.matrixR().data().value(0), inverse_permutation.indices().coeff(r), solution); @@ -895,5 +886,4 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingEigenSparseQR() { return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/covariance_impl.h b/extern/ceres/internal/ceres/covariance_impl.h index fc029ce25b7..9ff798228c6 100644 --- a/extern/ceres/internal/ceres/covariance_impl.h +++ b/extern/ceres/internal/ceres/covariance_impl.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,8 +43,7 @@ #include "ceres/problem_impl.h" #include "ceres/suitesparse.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CompressedRowSparseMatrix; @@ -96,8 +95,7 @@ class CERES_NO_EXPORT CovarianceImpl { std::unique_ptr covariance_matrix_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/cuda_block_sparse_crs_view.cc b/extern/ceres/internal/ceres/cuda_block_sparse_crs_view.cc new file mode 100644 index 00000000000..7564d5268cf --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_block_sparse_crs_view.cc @@ -0,0 +1,103 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#include "ceres/cuda_block_sparse_crs_view.h" + +#ifndef CERES_NO_CUDA + +#include "ceres/cuda_kernels_bsm_to_crs.h" + +namespace ceres::internal { + +CudaBlockSparseCRSView::CudaBlockSparseCRSView(const BlockSparseMatrix& bsm, + ContextImpl* context) + : context_(context) { + block_structure_ = std::make_unique( + *bsm.block_structure(), context); + CudaBuffer rows(context, bsm.num_rows() + 1); + CudaBuffer cols(context, bsm.num_nonzeros()); + FillCRSStructure(block_structure_->num_row_blocks(), + bsm.num_rows(), + block_structure_->first_cell_in_row_block(), + block_structure_->cells(), + block_structure_->row_blocks(), + block_structure_->col_blocks(), + rows.data(), + cols.data(), + context->DefaultStream(), + context->is_cuda_memory_pools_supported_); + is_crs_compatible_ = block_structure_->IsCrsCompatible(); + // if matrix is crs-compatible - we can drop block-structure and don't need + // streamed_buffer_ + if (is_crs_compatible_) { + VLOG(3) << "Block-sparse matrix is compatible with CRS, discarding " + "block-structure"; + block_structure_ = nullptr; + } else { + streamed_buffer_ = std::make_unique>( + context_, kMaxTemporaryArraySize); + } + crs_matrix_ = std::make_unique( + bsm.num_cols(), std::move(rows), std::move(cols), context); + UpdateValues(bsm); +} + +void CudaBlockSparseCRSView::UpdateValues(const BlockSparseMatrix& bsm) { + if (is_crs_compatible_) { + // Values of CRS-compatible matrices can be copied as-is + CHECK_EQ(cudaSuccess, + cudaMemcpyAsync(crs_matrix_->mutable_values(), + bsm.values(), + bsm.num_nonzeros() * sizeof(double), + cudaMemcpyHostToDevice, + context_->DefaultStream())); + return; + } + streamed_buffer_->CopyToGpu( + bsm.values(), + bsm.num_nonzeros(), + [bs = block_structure_.get(), crs = crs_matrix_.get()]( + const double* values, int num_values, int offset, auto stream) { + PermuteToCRS(offset, + num_values, + bs->num_row_blocks(), + bs->first_cell_in_row_block(), + bs->cells(), + bs->row_blocks(), + bs->col_blocks(), + crs->rows(), + values, + crs->mutable_values(), + stream); + }); +} + +} // namespace ceres::internal +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_block_sparse_crs_view.h b/extern/ceres/internal/ceres/cuda_block_sparse_crs_view.h new file mode 100644 index 00000000000..58ef61857cb --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_block_sparse_crs_view.h @@ -0,0 +1,108 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) +// + +#ifndef CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_ +#define CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_ + +#include "ceres/internal/config.h" + +#ifndef CERES_NO_CUDA + +#include + +#include "ceres/block_sparse_matrix.h" +#include "ceres/cuda_block_structure.h" +#include "ceres/cuda_buffer.h" +#include "ceres/cuda_sparse_matrix.h" +#include "ceres/cuda_streamed_buffer.h" + +namespace ceres::internal { +// We use cuSPARSE library for SpMV operations. However, it does not support +// block-sparse format with varying size of the blocks. Thus, we perform the +// following operations in order to compute products of block-sparse matrices +// and dense vectors on gpu: +// - Once per block-sparse structure update: +// - Compute CRS structure from block-sparse structure and check if values of +// block-sparse matrix would have the same order as values of CRS matrix +// - Once per block-sparse values update: +// - Update values in CRS matrix with values of block-sparse matrix +// +// Only block-sparse matrices with sequential order of cells are supported. +// +// UpdateValues method updates values: +// - In a single host-to-device copy for matrices with CRS-compatible value +// layout +// - Simultaneously transferring and permuting values using CudaStreamedBuffer +// otherwise +class CERES_NO_EXPORT CudaBlockSparseCRSView { + public: + // Initializes internal CRS matrix using structure and values of block-sparse + // matrix For block-sparse matrices that have value layout different from CRS + // block-sparse structure will be stored/ + CudaBlockSparseCRSView(const BlockSparseMatrix& bsm, ContextImpl* context); + + const CudaSparseMatrix* crs_matrix() const { return crs_matrix_.get(); } + CudaSparseMatrix* mutable_crs_matrix() { return crs_matrix_.get(); } + + // Update values of crs_matrix_ using values of block-sparse matrix. + // Assumes that bsm has the same block-sparse structure as matrix that was + // used for construction. + void UpdateValues(const BlockSparseMatrix& bsm); + + // Returns true if block-sparse matrix had CRS-compatible value layout + bool IsCrsCompatible() const { return is_crs_compatible_; } + + void LeftMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const { + crs_matrix()->LeftMultiplyAndAccumulate(x, y); + } + + void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const { + crs_matrix()->RightMultiplyAndAccumulate(x, y); + } + + private: + // Value permutation kernel performs a single element-wise operation per + // thread, thus performing permutation in blocks of 8 megabytes of + // block-sparse values seems reasonable + static constexpr int kMaxTemporaryArraySize = 1 * 1024 * 1024; + std::unique_ptr crs_matrix_; + // Only created if block-sparse matrix has non-CRS value layout + std::unique_ptr> streamed_buffer_; + // Only stored if block-sparse matrix has non-CRS value layout + std::unique_ptr block_structure_; + bool is_crs_compatible_; + ContextImpl* context_; +}; + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA +#endif // CERES_INTERNAL_CUDA_BLOCK_SPARSE_CRS_VIEW_H_ diff --git a/extern/ceres/internal/ceres/cuda_block_sparse_crs_view_test.cc b/extern/ceres/internal/ceres/cuda_block_sparse_crs_view_test.cc new file mode 100644 index 00000000000..7d7d46c243b --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_block_sparse_crs_view_test.cc @@ -0,0 +1,164 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#include "ceres/cuda_block_sparse_crs_view.h" + +#include +#include + +#include + +#ifndef CERES_NO_CUDA + +namespace ceres::internal { +class CudaBlockSparseCRSViewTest : public ::testing::Test { + protected: + void SetUp() final { + std::string message; + CHECK(context_.InitCuda(&message)) + << "InitCuda() failed because: " << message; + + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = 1234; + options.min_row_block_size = 1; + options.max_row_block_size = 10; + options.num_col_blocks = 567; + options.min_col_block_size = 1; + options.max_col_block_size = 10; + options.block_density = 0.2; + std::mt19937 rng; + + // Block-sparse matrix with order of values different from CRS + block_sparse_non_crs_compatible_ = + BlockSparseMatrix::CreateRandomMatrix(options, rng, true); + std::iota(block_sparse_non_crs_compatible_->mutable_values(), + block_sparse_non_crs_compatible_->mutable_values() + + block_sparse_non_crs_compatible_->num_nonzeros(), + 1); + + options.max_row_block_size = 1; + // Block-sparse matrix with CRS order of values (row-blocks are rows) + block_sparse_crs_compatible_rows_ = + BlockSparseMatrix::CreateRandomMatrix(options, rng, true); + std::iota(block_sparse_crs_compatible_rows_->mutable_values(), + block_sparse_crs_compatible_rows_->mutable_values() + + block_sparse_crs_compatible_rows_->num_nonzeros(), + 1); + // Block-sparse matrix with CRS order of values (single cell per row-block) + auto bs = std::make_unique( + *block_sparse_non_crs_compatible_->block_structure()); + + int num_nonzeros = 0; + for (auto& r : bs->rows) { + const int num_cells = r.cells.size(); + if (num_cells > 1) { + std::uniform_int_distribution uniform_cell(0, num_cells - 1); + const int selected_cell = uniform_cell(rng); + std::swap(r.cells[0], r.cells[selected_cell]); + r.cells.resize(1); + } + const int row_block_size = r.block.size; + for (auto& c : r.cells) { + c.position = num_nonzeros; + const int col_block_size = bs->cols[c.block_id].size; + num_nonzeros += col_block_size * row_block_size; + } + } + block_sparse_crs_compatible_single_cell_ = + std::make_unique(bs.release()); + std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(), + block_sparse_crs_compatible_single_cell_->mutable_values() + + block_sparse_crs_compatible_single_cell_->num_nonzeros(), + 1); + } + + void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) { + ASSERT_EQ(csm.num_cols(), bsm.num_cols()); + ASSERT_EQ(csm.num_rows(), bsm.num_rows()); + ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros()); + const int num_rows = bsm.num_rows(); + const int num_cols = bsm.num_cols(); + Vector x(num_cols); + Vector y(num_rows); + CudaVector x_cuda(&context_, num_cols); + CudaVector y_cuda(&context_, num_rows); + Vector y_cuda_host(num_rows); + + for (int i = 0; i < num_cols; ++i) { + x.setZero(); + y.setZero(); + y_cuda.SetZero(); + x[i] = 1.; + x_cuda.CopyFromCpu(x); + csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda); + bsm.RightMultiplyAndAccumulate( + x.data(), y.data(), &context_, std::thread::hardware_concurrency()); + y_cuda.CopyTo(&y_cuda_host); + // There will be up to 1 non-zero product per row, thus we expect an exact + // match on 32-bit integer indices + EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.); + } + } + + std::unique_ptr block_sparse_non_crs_compatible_; + std::unique_ptr block_sparse_crs_compatible_rows_; + std::unique_ptr block_sparse_crs_compatible_single_cell_; + ContextImpl context_; +}; + +TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) { + auto view = + CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_); + ASSERT_EQ(view.IsCrsCompatible(), false); + + auto matrix = view.crs_matrix(); + Compare(*block_sparse_non_crs_compatible_, *matrix); +} + +TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) { + auto view = + CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_); + ASSERT_EQ(view.IsCrsCompatible(), true); + + auto matrix = view.crs_matrix(); + Compare(*block_sparse_crs_compatible_rows_, *matrix); +} + +TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) { + auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_, + &context_); + ASSERT_EQ(view.IsCrsCompatible(), true); + + auto matrix = view.crs_matrix(); + Compare(*block_sparse_crs_compatible_single_cell_, *matrix); +} +} // namespace ceres::internal + +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_block_structure.cc b/extern/ceres/internal/ceres/cuda_block_structure.cc new file mode 100644 index 00000000000..3685775b60f --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_block_structure.cc @@ -0,0 +1,234 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#include "ceres/cuda_block_structure.h" + +#ifndef CERES_NO_CUDA + +namespace ceres::internal { +namespace { +// Dimension of a sorted array of blocks +inline int Dimension(const std::vector& blocks) { + if (blocks.empty()) { + return 0; + } + const auto& last = blocks.back(); + return last.size + last.position; +} +} // namespace +CudaBlockSparseStructure::CudaBlockSparseStructure( + const CompressedRowBlockStructure& block_structure, ContextImpl* context) + : CudaBlockSparseStructure(block_structure, 0, context) {} + +CudaBlockSparseStructure::CudaBlockSparseStructure( + const CompressedRowBlockStructure& block_structure, + const int num_col_blocks_e, + ContextImpl* context) + : first_cell_in_row_block_(context), + value_offset_row_block_f_(context), + cells_(context), + row_blocks_(context), + col_blocks_(context) { + // Row blocks extracted from CompressedRowBlockStructure::rows + std::vector row_blocks; + // Column blocks can be reused as-is + const auto& col_blocks = block_structure.cols; + + // Row block offset is an index of the first cell corresponding to row block + std::vector first_cell_in_row_block; + // Offset of the first value in the first non-empty row-block of F sub-matrix + std::vector value_offset_row_block_f; + // Flat array of all cells from all row-blocks + std::vector cells; + + int f_values_offset = -1; + num_nonzeros_e_ = 0; + is_crs_compatible_ = true; + num_row_blocks_ = block_structure.rows.size(); + num_col_blocks_ = col_blocks.size(); + + row_blocks.reserve(num_row_blocks_); + first_cell_in_row_block.reserve(num_row_blocks_ + 1); + value_offset_row_block_f.reserve(num_row_blocks_ + 1); + num_nonzeros_ = 0; + // Block-sparse matrices arising from block-jacobian writer are expected to + // have sequential layout (for partitioned matrices - it is expected that both + // E and F sub-matrices have sequential layout). + bool sequential_layout = true; + int row_block_id = 0; + num_row_blocks_e_ = 0; + for (; row_block_id < num_row_blocks_; ++row_block_id) { + const auto& r = block_structure.rows[row_block_id]; + const int row_block_size = r.block.size; + const int num_cells = r.cells.size(); + + if (num_col_blocks_e == 0 || r.cells.size() == 0 || + r.cells[0].block_id >= num_col_blocks_e) { + break; + } + num_row_blocks_e_ = row_block_id + 1; + // In E sub-matrix there is exactly a single E cell in the row + // since E cells are stored separately from F cells, crs-compatiblity of + // F sub-matrix only breaks if there are more than 2 cells in row (that + // is, more than 1 cell in F sub-matrix) + if (num_cells > 2 && row_block_size > 1) { + is_crs_compatible_ = false; + } + row_blocks.emplace_back(r.block); + first_cell_in_row_block.push_back(cells.size()); + + for (int cell_id = 0; cell_id < num_cells; ++cell_id) { + const auto& c = r.cells[cell_id]; + const int col_block_size = col_blocks[c.block_id].size; + const int cell_size = col_block_size * row_block_size; + cells.push_back(c); + if (cell_id == 0) { + DCHECK(c.position == num_nonzeros_e_); + num_nonzeros_e_ += cell_size; + } else { + if (f_values_offset == -1) { + num_nonzeros_ = c.position; + f_values_offset = c.position; + } + sequential_layout &= c.position == num_nonzeros_; + num_nonzeros_ += cell_size; + if (cell_id == 1) { + // Correct value_offset_row_block_f for empty row-blocks of F + // preceding this one + for (auto it = value_offset_row_block_f.rbegin(); + it != value_offset_row_block_f.rend(); + ++it) { + if (*it != -1) break; + *it = c.position; + } + value_offset_row_block_f.push_back(c.position); + } + } + } + if (num_cells == 1) { + value_offset_row_block_f.push_back(-1); + } + } + for (; row_block_id < num_row_blocks_; ++row_block_id) { + const auto& r = block_structure.rows[row_block_id]; + const int row_block_size = r.block.size; + const int num_cells = r.cells.size(); + // After num_row_blocks_e_ row-blocks, there should be no cells in E + // sub-matrix. Thus crs-compatibility of F sub-matrix breaks if there are + // more than one cells in the row-block + if (num_cells > 1 && row_block_size > 1) { + is_crs_compatible_ = false; + } + row_blocks.emplace_back(r.block); + first_cell_in_row_block.push_back(cells.size()); + + if (r.cells.empty()) { + value_offset_row_block_f.push_back(-1); + } else { + for (auto it = value_offset_row_block_f.rbegin(); + it != value_offset_row_block_f.rend(); + --it) { + if (*it != -1) break; + *it = cells[0].position; + } + value_offset_row_block_f.push_back(r.cells[0].position); + } + for (const auto& c : r.cells) { + const int col_block_size = col_blocks[c.block_id].size; + const int cell_size = col_block_size * row_block_size; + cells.push_back(c); + DCHECK(c.block_id >= num_col_blocks_e); + if (f_values_offset == -1) { + num_nonzeros_ = c.position; + f_values_offset = c.position; + } + sequential_layout &= c.position == num_nonzeros_; + num_nonzeros_ += cell_size; + } + } + + if (f_values_offset == -1) { + f_values_offset = num_nonzeros_e_; + num_nonzeros_ = num_nonzeros_e_; + } + // Fill non-zero offsets for the last rows of F submatrix + for (auto it = value_offset_row_block_f.rbegin(); + it != value_offset_row_block_f.rend(); + ++it) { + if (*it != -1) break; + *it = num_nonzeros_; + } + value_offset_row_block_f.push_back(num_nonzeros_); + CHECK_EQ(num_nonzeros_e_, f_values_offset); + first_cell_in_row_block.push_back(cells.size()); + num_cells_ = cells.size(); + + num_rows_ = Dimension(row_blocks); + num_cols_ = Dimension(col_blocks); + + CHECK(sequential_layout); + + if (VLOG_IS_ON(3)) { + const size_t first_cell_in_row_block_size = + first_cell_in_row_block.size() * sizeof(int); + const size_t cells_size = cells.size() * sizeof(Cell); + const size_t row_blocks_size = row_blocks.size() * sizeof(Block); + const size_t col_blocks_size = col_blocks.size() * sizeof(Block); + const size_t total_size = first_cell_in_row_block_size + cells_size + + col_blocks_size + row_blocks_size; + const double ratio = + (100. * total_size) / (num_nonzeros_ * (sizeof(int) + sizeof(double)) + + num_rows_ * sizeof(int)); + VLOG(3) << "\nCudaBlockSparseStructure:\n" + "\tRow block offsets: " + << first_cell_in_row_block_size + << " bytes\n" + "\tColumn blocks: " + << col_blocks_size + << " bytes\n" + "\tRow blocks: " + << row_blocks_size + << " bytes\n" + "\tCells: " + << cells_size << " bytes\n\tTotal: " << total_size + << " bytes of GPU memory (" << ratio << "% of CRS matrix size)"; + } + + first_cell_in_row_block_.CopyFromCpuVector(first_cell_in_row_block); + cells_.CopyFromCpuVector(cells); + row_blocks_.CopyFromCpuVector(row_blocks); + col_blocks_.CopyFromCpuVector(col_blocks); + if (num_col_blocks_e || num_row_blocks_e_) { + value_offset_row_block_f_.CopyFromCpuVector(value_offset_row_block_f); + } +} +} // namespace ceres::internal + +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_block_structure.h b/extern/ceres/internal/ceres/cuda_block_structure.h new file mode 100644 index 00000000000..6da6fdd2a6e --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_block_structure.h @@ -0,0 +1,120 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#ifndef CERES_INTERNAL_CUDA_BLOCK_STRUCTURE_H_ +#define CERES_INTERNAL_CUDA_BLOCK_STRUCTURE_H_ + +#include "ceres/internal/config.h" + +#ifndef CERES_NO_CUDA + +#include "ceres/block_structure.h" +#include "ceres/cuda_buffer.h" + +namespace ceres::internal { +class CudaBlockStructureTest; + +// This class stores a read-only block-sparse structure in gpu memory. +// Invariants are the same as those of CompressedRowBlockStructure. +// In order to simplify allocation and copying data to gpu, cells from all +// row-blocks are stored in a single array sequentially. Array +// first_cell_in_row_block of size num_row_blocks + 1 allows to identify range +// of cells corresponding to a row-block. Cells corresponding to i-th row-block +// are stored in sub-array cells[first_cell_in_row_block[i]; ... +// first_cell_in_row_block[i + 1] - 1], and their order is preserved. +class CERES_NO_EXPORT CudaBlockSparseStructure { + public: + // CompressedRowBlockStructure is contains a vector of CompressedLists, with + // each CompressedList containing a vector of Cells. We precompute a flat + // array of cells on cpu and transfer it to the gpu. + CudaBlockSparseStructure(const CompressedRowBlockStructure& block_structure, + ContextImpl* context); + // In the case of partitioned matrices, number of non-zeros in E and layout of + // F are computed + CudaBlockSparseStructure(const CompressedRowBlockStructure& block_structure, + const int num_col_blocks_e, + ContextImpl* context); + + int num_rows() const { return num_rows_; } + int num_cols() const { return num_cols_; } + int num_cells() const { return num_cells_; } + int num_nonzeros() const { return num_nonzeros_; } + // When partitioned matrix constructor was used, returns number of non-zeros + // in E sub-matrix + int num_nonzeros_e() const { return num_nonzeros_e_; } + int num_row_blocks() const { return num_row_blocks_; } + int num_row_blocks_e() const { return num_row_blocks_e_; } + int num_col_blocks() const { return num_col_blocks_; } + + // Returns true if values from block-sparse matrix (F sub-matrix in + // partitioned case) can be copied to CRS matrix as-is. This is possible if + // each row-block is stored in CRS order: + // - Row-block consists of a single row + // - Row-block contains a single cell + bool IsCrsCompatible() const { return is_crs_compatible_; } + + // Device pointer to array of num_row_blocks + 1 indices of the first cell of + // row block + const int* first_cell_in_row_block() const { + return first_cell_in_row_block_.data(); + } + // Device pointer to array of num_row_blocks + 1 indices of the first value in + // this or subsequent row-blocks of submatrix F + const int* value_offset_row_block_f() const { + return value_offset_row_block_f_.data(); + } + // Device pointer to array of num_cells cells, sorted by row-block + const Cell* cells() const { return cells_.data(); } + // Device pointer to array of row blocks + const Block* row_blocks() const { return row_blocks_.data(); } + // Device pointer to array of column blocks + const Block* col_blocks() const { return col_blocks_.data(); } + + private: + int num_rows_; + int num_cols_; + int num_cells_; + int num_nonzeros_; + int num_nonzeros_e_; + int num_row_blocks_; + int num_row_blocks_e_; + int num_col_blocks_; + bool is_crs_compatible_; + CudaBuffer first_cell_in_row_block_; + CudaBuffer value_offset_row_block_f_; + CudaBuffer cells_; + CudaBuffer row_blocks_; + CudaBuffer col_blocks_; + friend class CudaBlockStructureTest; +}; +} // namespace ceres::internal + +#endif // CERES_NO_CUDA +#endif // CERES_INTERNAL_CUDA_BLOCK_SPARSE_STRUCTURE_H_ diff --git a/extern/ceres/internal/ceres/cuda_block_structure_test.cc b/extern/ceres/internal/ceres/cuda_block_structure_test.cc new file mode 100644 index 00000000000..daff431680b --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_block_structure_test.cc @@ -0,0 +1,144 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#include "ceres/internal/config.h" + +#ifndef CERES_NO_CUDA + +#include +#include + +#include + +#include "ceres/block_sparse_matrix.h" +#include "ceres/cuda_block_structure.h" + +namespace ceres::internal { + +class CudaBlockStructureTest : public ::testing::Test { + protected: + void SetUp() final { + std::string message; + CHECK(context_.InitCuda(&message)) + << "InitCuda() failed because: " << message; + + BlockSparseMatrix::RandomMatrixOptions options; + options.num_row_blocks = 1234; + options.min_row_block_size = 1; + options.max_row_block_size = 10; + options.num_col_blocks = 567; + options.min_col_block_size = 1; + options.max_col_block_size = 10; + options.block_density = 0.2; + std::mt19937 rng; + A_ = BlockSparseMatrix::CreateRandomMatrix(options, rng); + std::iota( + A_->mutable_values(), A_->mutable_values() + A_->num_nonzeros(), 1); + } + + std::vector GetCells(const CudaBlockSparseStructure& structure) { + const auto& cuda_buffer = structure.cells_; + std::vector cells(cuda_buffer.size()); + cuda_buffer.CopyToCpu(cells.data(), cells.size()); + return cells; + } + std::vector GetRowBlocks(const CudaBlockSparseStructure& structure) { + const auto& cuda_buffer = structure.row_blocks_; + std::vector blocks(cuda_buffer.size()); + cuda_buffer.CopyToCpu(blocks.data(), blocks.size()); + return blocks; + } + std::vector GetColBlocks(const CudaBlockSparseStructure& structure) { + const auto& cuda_buffer = structure.col_blocks_; + std::vector blocks(cuda_buffer.size()); + cuda_buffer.CopyToCpu(blocks.data(), blocks.size()); + return blocks; + } + std::vector GetRowBlockOffsets( + const CudaBlockSparseStructure& structure) { + const auto& cuda_buffer = structure.first_cell_in_row_block_; + std::vector first_cell_in_row_block(cuda_buffer.size()); + cuda_buffer.CopyToCpu(first_cell_in_row_block.data(), + first_cell_in_row_block.size()); + return first_cell_in_row_block; + } + + std::unique_ptr A_; + ContextImpl context_; +}; + +TEST_F(CudaBlockStructureTest, StructureIdentity) { + auto block_structure = A_->block_structure(); + const int num_row_blocks = block_structure->rows.size(); + const int num_col_blocks = block_structure->cols.size(); + + CudaBlockSparseStructure cuda_block_structure(*block_structure, &context_); + + ASSERT_EQ(cuda_block_structure.num_rows(), A_->num_rows()); + ASSERT_EQ(cuda_block_structure.num_cols(), A_->num_cols()); + ASSERT_EQ(cuda_block_structure.num_nonzeros(), A_->num_nonzeros()); + ASSERT_EQ(cuda_block_structure.num_row_blocks(), num_row_blocks); + ASSERT_EQ(cuda_block_structure.num_col_blocks(), num_col_blocks); + + std::vector blocks = GetColBlocks(cuda_block_structure); + ASSERT_EQ(blocks.size(), num_col_blocks); + for (int i = 0; i < num_col_blocks; ++i) { + EXPECT_EQ(block_structure->cols[i].position, blocks[i].position); + EXPECT_EQ(block_structure->cols[i].size, blocks[i].size); + } + + std::vector cells = GetCells(cuda_block_structure); + std::vector first_cell_in_row_block = + GetRowBlockOffsets(cuda_block_structure); + blocks = GetRowBlocks(cuda_block_structure); + + ASSERT_EQ(blocks.size(), num_row_blocks); + ASSERT_EQ(first_cell_in_row_block.size(), num_row_blocks + 1); + ASSERT_EQ(first_cell_in_row_block.back(), cells.size()); + + for (int i = 0; i < num_row_blocks; ++i) { + const int num_cells = block_structure->rows[i].cells.size(); + EXPECT_EQ(blocks[i].position, block_structure->rows[i].block.position); + EXPECT_EQ(blocks[i].size, block_structure->rows[i].block.size); + const int first_cell = first_cell_in_row_block[i]; + const int last_cell = first_cell_in_row_block[i + 1]; + ASSERT_EQ(last_cell - first_cell, num_cells); + for (int j = 0; j < num_cells; ++j) { + EXPECT_EQ(cells[first_cell + j].block_id, + block_structure->rows[i].cells[j].block_id); + EXPECT_EQ(cells[first_cell + j].position, + block_structure->rows[i].cells[j].position); + } + } +} + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_buffer.h b/extern/ceres/internal/ceres/cuda_buffer.h index a1cf78420d5..40048fddbe5 100644 --- a/extern/ceres/internal/ceres/cuda_buffer.h +++ b/extern/ceres/internal/ceres/cuda_buffer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -31,6 +31,7 @@ #ifndef CERES_INTERNAL_CUDA_BUFFER_H_ #define CERES_INTERNAL_CUDA_BUFFER_H_ +#include "ceres/context_impl.h" #include "ceres/internal/config.h" #ifndef CERES_NO_CUDA @@ -40,17 +41,27 @@ #include "cuda_runtime.h" #include "glog/logging.h" +namespace ceres::internal { // An encapsulated buffer to maintain GPU memory, and handle transfers between // GPU and system memory. It is the responsibility of the user to ensure that // the appropriate GPU device is selected before each subroutine is called. This // is particularly important when using multiple GPU devices on different CPU // threads, since active Cuda devices are determined by the cuda runtime on a -// per-thread basis. Note that unless otherwise specified, all methods use the -// default stream, and are synchronous. +// per-thread basis. template class CudaBuffer { public: - CudaBuffer() = default; + explicit CudaBuffer(ContextImpl* context) : context_(context) {} + CudaBuffer(ContextImpl* context, int size) : context_(context) { + Reserve(size); + } + + CudaBuffer(CudaBuffer&& other) + : data_(other.data_), size_(other.size_), context_(other.context_) { + other.data_ = nullptr; + other.size_ = 0; + } + CudaBuffer(const CudaBuffer&) = delete; CudaBuffer& operator=(const CudaBuffer&) = delete; @@ -67,41 +78,95 @@ class CudaBuffer { if (data_ != nullptr) { CHECK_EQ(cudaFree(data_), cudaSuccess); } - CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess); + CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess) + << "Failed to allocate " << size * sizeof(T) + << " bytes of GPU memory"; size_ = size; } } - // Perform an asynchronous copy from CPU memory to GPU memory using the stream - // provided. - void CopyToGpuAsync(const T* data, const size_t size, cudaStream_t stream) { + // Perform an asynchronous copy from CPU memory to GPU memory managed by this + // CudaBuffer instance using the stream provided. + void CopyFromCpu(const T* data, const size_t size) { Reserve(size); - CHECK_EQ(cudaMemcpyAsync( - data_, data, size * sizeof(T), cudaMemcpyHostToDevice, stream), + CHECK_EQ(cudaMemcpyAsync(data_, + data, + size * sizeof(T), + cudaMemcpyHostToDevice, + context_->DefaultStream()), cudaSuccess); } - // Copy data from the GPU to CPU memory. This is necessarily synchronous since - // any potential GPU kernels that may be writing to the buffer must finish - // before the transfer happens. - void CopyToHost(T* data, const size_t size) { + // Perform an asynchronous copy from a vector in CPU memory to GPU memory + // managed by this CudaBuffer instance. + void CopyFromCpuVector(const std::vector& data) { + Reserve(data.size()); + CHECK_EQ(cudaMemcpyAsync(data_, + data.data(), + data.size() * sizeof(T), + cudaMemcpyHostToDevice, + context_->DefaultStream()), + cudaSuccess); + } + + // Perform an asynchronous copy from another GPU memory array to the GPU + // memory managed by this CudaBuffer instance using the stream provided. + void CopyFromGPUArray(const T* data, const size_t size) { + Reserve(size); + CHECK_EQ(cudaMemcpyAsync(data_, + data, + size * sizeof(T), + cudaMemcpyDeviceToDevice, + context_->DefaultStream()), + cudaSuccess); + } + + // Copy data from the GPU memory managed by this CudaBuffer instance to CPU + // memory. It is the caller's responsibility to ensure that the CPU memory + // pointer is valid, i.e. it is not null, and that it points to memory of + // at least this->size() size. This method ensures all previously dispatched + // GPU operations on the specified stream have completed before copying the + // data to CPU memory. + void CopyToCpu(T* data, const size_t size) const { CHECK(data_ != nullptr); - CHECK_EQ(cudaMemcpy(data, data_, size * sizeof(T), cudaMemcpyDeviceToHost), + CHECK_EQ(cudaMemcpyAsync(data, + data_, + size * sizeof(T), + cudaMemcpyDeviceToHost, + context_->DefaultStream()), + cudaSuccess); + CHECK_EQ(cudaStreamSynchronize(context_->DefaultStream()), cudaSuccess); + } + + // Copy N items from another GPU memory array to the GPU memory managed by + // this CudaBuffer instance, growing this buffer's size if needed. This copy + // is asynchronous, and operates on the stream provided. + void CopyNItemsFrom(int n, const CudaBuffer& other) { + Reserve(n); + CHECK(other.data_ != nullptr); + CHECK(data_ != nullptr); + CHECK_EQ(cudaMemcpyAsync(data_, + other.data_, + size_ * sizeof(T), + cudaMemcpyDeviceToDevice, + context_->DefaultStream()), cudaSuccess); } - void CopyToGpu(const std::vector& data) { - CopyToGpu(data.data(), data.size()); - } - + // Return a pointer to the GPU memory managed by this CudaBuffer instance. T* data() { return data_; } + const T* data() const { return data_; } + // Return the number of items of type T that can fit in the GPU memory + // allocated so far by this CudaBuffer instance. size_t size() const { return size_; } private: T* data_ = nullptr; size_t size_ = 0; + ContextImpl* context_ = nullptr; }; +} // namespace ceres::internal #endif // CERES_NO_CUDA -#endif // CERES_INTERNAL_CUDA_BUFFER_H_ \ No newline at end of file +#endif // CERES_INTERNAL_CUDA_BUFFER_H_ diff --git a/extern/ceres/internal/ceres/cuda_dense_cholesky_test.cc b/extern/ceres/internal/ceres/cuda_dense_cholesky_test.cc new file mode 100644 index 00000000000..b74a75a9d0f --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_dense_cholesky_test.cc @@ -0,0 +1,332 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include + +#include "ceres/dense_cholesky.h" +#include "ceres/internal/config.h" +#include "ceres/internal/eigen.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +namespace ceres::internal { + +#ifndef CERES_NO_CUDA + +TEST(CUDADenseCholesky, InvalidOptionOnCreate) { + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + auto dense_cuda_solver = CUDADenseCholesky::Create(options); + EXPECT_EQ(dense_cuda_solver, nullptr); +} + +// Tests the CUDA Cholesky solver with a simple 4x4 matrix. +TEST(CUDADenseCholesky, Cholesky4x4Matrix) { + Eigen::Matrix4d A; + // clang-format off + A << 4, 12, -16, 0, + 12, 37, -43, 0, + -16, -43, 98, 0, + 0, 0, 0, 1; + // clang-format on + + Vector b = Eigen::Vector4d::Ones(); + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + auto dense_cuda_solver = CUDADenseCholesky::Create(options); + ASSERT_NE(dense_cuda_solver, nullptr); + std::string error_string; + ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + Eigen::Vector4d x = Eigen::Vector4d::Zero(); + ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + static const double kEpsilon = std::numeric_limits::epsilon() * 10; + const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0); + EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon); + EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon); + EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon); + EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon); +} + +TEST(CUDADenseCholesky, SingularMatrix) { + Eigen::Matrix3d A; + // clang-format off + A << 1, 0, 0, + 0, 1, 0, + 0, 0, 0; + // clang-format on + + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + auto dense_cuda_solver = CUDADenseCholesky::Create(options); + ASSERT_NE(dense_cuda_solver, nullptr); + std::string error_string; + ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string), + LinearSolverTerminationType::FAILURE); +} + +TEST(CUDADenseCholesky, NegativeMatrix) { + Eigen::Matrix3d A; + // clang-format off + A << 1, 0, 0, + 0, 1, 0, + 0, 0, -1; + // clang-format on + + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + auto dense_cuda_solver = CUDADenseCholesky::Create(options); + ASSERT_NE(dense_cuda_solver, nullptr); + std::string error_string; + ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string), + LinearSolverTerminationType::FAILURE); +} + +TEST(CUDADenseCholesky, MustFactorizeBeforeSolve) { + const Eigen::Vector3d b = Eigen::Vector3d::Ones(); + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + auto dense_cuda_solver = CUDADenseCholesky::Create(options); + ASSERT_NE(dense_cuda_solver, nullptr); + std::string error_string; + ASSERT_EQ(dense_cuda_solver->Solve(b.data(), nullptr, &error_string), + LinearSolverTerminationType::FATAL_ERROR); +} + +TEST(CUDADenseCholesky, Randomized1600x1600Tests) { + const int kNumCols = 1600; + using LhsType = Eigen::Matrix; + using RhsType = Eigen::Matrix; + using SolutionType = Eigen::Matrix; + + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = ceres::CUDA; + std::unique_ptr dense_cholesky = + CUDADenseCholesky::Create(options); + + const int kNumTrials = 20; + for (int i = 0; i < kNumTrials; ++i) { + LhsType lhs = LhsType::Random(kNumCols, kNumCols); + lhs = lhs.transpose() * lhs; + lhs += 1e-3 * LhsType::Identity(kNumCols, kNumCols); + SolutionType x_expected = SolutionType::Random(kNumCols); + RhsType rhs = lhs * x_expected; + SolutionType x_computed = SolutionType::Zero(kNumCols); + // Sanity check the random matrix sizes. + EXPECT_EQ(lhs.rows(), kNumCols); + EXPECT_EQ(lhs.cols(), kNumCols); + EXPECT_EQ(rhs.rows(), kNumCols); + EXPECT_EQ(rhs.cols(), 1); + EXPECT_EQ(x_expected.rows(), kNumCols); + EXPECT_EQ(x_expected.cols(), 1); + EXPECT_EQ(x_computed.rows(), kNumCols); + EXPECT_EQ(x_computed.cols(), 1); + LinearSolver::Summary summary; + summary.termination_type = dense_cholesky->FactorAndSolve( + kNumCols, lhs.data(), rhs.data(), x_computed.data(), &summary.message); + ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS); + static const double kEpsilon = std::numeric_limits::epsilon() * 3e5; + ASSERT_NEAR( + (x_computed - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon); + } +} + +TEST(CUDADenseCholeskyMixedPrecision, InvalidOptionsOnCreate) { + { + // Did not ask for CUDA, and did not ask for mixed precision. + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + auto solver = CUDADenseCholeskyMixedPrecision::Create(options); + ASSERT_EQ(solver, nullptr); + } + { + // Asked for CUDA, but did not ask for mixed precision. + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = ceres::CUDA; + auto solver = CUDADenseCholeskyMixedPrecision::Create(options); + ASSERT_EQ(solver, nullptr); + } +} + +// Tests the CUDA Cholesky solver with a simple 4x4 matrix. +TEST(CUDADenseCholeskyMixedPrecision, Cholesky4x4Matrix1Step) { + Eigen::Matrix4d A; + // clang-format off + // A common test Cholesky decomposition test matrix, see : + // https://en.wikipedia.org/w/index.php?title=Cholesky_decomposition&oldid=1080607368#Example + A << 4, 12, -16, 0, + 12, 37, -43, 0, + -16, -43, 98, 0, + 0, 0, 0, 1; + // clang-format on + + const Eigen::Vector4d b = Eigen::Vector4d::Ones(); + LinearSolver::Options options; + options.max_num_refinement_iterations = 0; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + options.use_mixed_precision_solves = true; + auto solver = CUDADenseCholeskyMixedPrecision::Create(options); + ASSERT_NE(solver, nullptr); + std::string error_string; + ASSERT_EQ(solver->Factorize(A.cols(), A.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + Eigen::Vector4d x = Eigen::Vector4d::Zero(); + ASSERT_EQ(solver->Solve(b.data(), x.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + // A single step of the mixed precision solver will be equivalent to solving + // in low precision (FP32). Hence the tolerance is defined w.r.t. FP32 epsilon + // instead of FP64 epsilon. + static const double kEpsilon = std::numeric_limits::epsilon() * 10; + const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0); + EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon); + EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon); + EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon); + EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon); +} + +// Tests the CUDA Cholesky solver with a simple 4x4 matrix. +TEST(CUDADenseCholeskyMixedPrecision, Cholesky4x4Matrix4Steps) { + Eigen::Matrix4d A; + // clang-format off + A << 4, 12, -16, 0, + 12, 37, -43, 0, + -16, -43, 98, 0, + 0, 0, 0, 1; + // clang-format on + + const Eigen::Vector4d b = Eigen::Vector4d::Ones(); + LinearSolver::Options options; + options.max_num_refinement_iterations = 3; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + options.use_mixed_precision_solves = true; + auto solver = CUDADenseCholeskyMixedPrecision::Create(options); + ASSERT_NE(solver, nullptr); + std::string error_string; + ASSERT_EQ(solver->Factorize(A.cols(), A.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + Eigen::Vector4d x = Eigen::Vector4d::Zero(); + ASSERT_EQ(solver->Solve(b.data(), x.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + // The error does not reduce beyond four iterations, and stagnates at this + // level of precision. + static const double kEpsilon = std::numeric_limits::epsilon() * 100; + const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0); + EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon); + EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon); + EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon); + EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon); +} + +TEST(CUDADenseCholeskyMixedPrecision, Randomized1600x1600Tests) { + const int kNumCols = 1600; + using LhsType = Eigen::Matrix; + using RhsType = Eigen::Matrix; + using SolutionType = Eigen::Matrix; + + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = ceres::CUDA; + options.use_mixed_precision_solves = true; + options.max_num_refinement_iterations = 20; + std::unique_ptr dense_cholesky = + CUDADenseCholeskyMixedPrecision::Create(options); + + const int kNumTrials = 20; + for (int i = 0; i < kNumTrials; ++i) { + LhsType lhs = LhsType::Random(kNumCols, kNumCols); + lhs = lhs.transpose() * lhs; + lhs += 1e-3 * LhsType::Identity(kNumCols, kNumCols); + SolutionType x_expected = SolutionType::Random(kNumCols); + RhsType rhs = lhs * x_expected; + SolutionType x_computed = SolutionType::Zero(kNumCols); + // Sanity check the random matrix sizes. + EXPECT_EQ(lhs.rows(), kNumCols); + EXPECT_EQ(lhs.cols(), kNumCols); + EXPECT_EQ(rhs.rows(), kNumCols); + EXPECT_EQ(rhs.cols(), 1); + EXPECT_EQ(x_expected.rows(), kNumCols); + EXPECT_EQ(x_expected.cols(), 1); + EXPECT_EQ(x_computed.rows(), kNumCols); + EXPECT_EQ(x_computed.cols(), 1); + LinearSolver::Summary summary; + summary.termination_type = dense_cholesky->FactorAndSolve( + kNumCols, lhs.data(), rhs.data(), x_computed.data(), &summary.message); + ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS); + static const double kEpsilon = std::numeric_limits::epsilon() * 1e6; + ASSERT_NEAR( + (x_computed - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon); + } +} + +#endif // CERES_NO_CUDA + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/cuda_dense_qr_test.cc b/extern/ceres/internal/ceres/cuda_dense_qr_test.cc new file mode 100644 index 00000000000..b1f25e2b6d4 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_dense_qr_test.cc @@ -0,0 +1,177 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include + +#include "ceres/dense_qr.h" +#include "ceres/internal/eigen.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +namespace ceres::internal { + +#ifndef CERES_NO_CUDA + +TEST(CUDADenseQR, InvalidOptionOnCreate) { + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + auto dense_cuda_solver = CUDADenseQR::Create(options); + EXPECT_EQ(dense_cuda_solver, nullptr); +} + +// Tests the CUDA QR solver with a simple 4x4 matrix. +TEST(CUDADenseQR, QR4x4Matrix) { + Eigen::Matrix4d A; + // clang-format off + A << 4, 12, -16, 0, + 12, 37, -43, 0, + -16, -43, 98, 0, + 0, 0, 0, 1; + // clang-format on + const Eigen::Vector4d b = Eigen::Vector4d::Ones(); + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + auto dense_cuda_solver = CUDADenseQR::Create(options); + ASSERT_NE(dense_cuda_solver, nullptr); + std::string error_string; + ASSERT_EQ( + dense_cuda_solver->Factorize(A.rows(), A.cols(), A.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + Eigen::Vector4d x = Eigen::Vector4d::Zero(); + ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + // Empirically observed accuracy of cuSolverDN's QR solver. + const double kEpsilon = std::numeric_limits::epsilon() * 1500; + const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0); + EXPECT_NEAR((x - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon); +} + +// Tests the CUDA QR solver with a simple 4x4 matrix. +TEST(CUDADenseQR, QR4x2Matrix) { + Eigen::Matrix A; + // clang-format off + A << 4, 12, + 12, 37, + -16, -43, + 0, 0; + // clang-format on + + const std::vector b(4, 1.0); + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + auto dense_cuda_solver = CUDADenseQR::Create(options); + ASSERT_NE(dense_cuda_solver, nullptr); + std::string error_string; + ASSERT_EQ( + dense_cuda_solver->Factorize(A.rows(), A.cols(), A.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + std::vector x(2, 0); + ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string), + LinearSolverTerminationType::SUCCESS); + // Empirically observed accuracy of cuSolverDN's QR solver. + const double kEpsilon = std::numeric_limits::epsilon() * 10; + // Solution values computed with Octave. + const Eigen::Vector2d x_expected(-1.143410852713177, 0.4031007751937981); + EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon); + EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon); +} + +TEST(CUDADenseQR, MustFactorizeBeforeSolve) { + const Eigen::Vector3d b = Eigen::Vector3d::Ones(); + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = CUDA; + auto dense_cuda_solver = CUDADenseQR::Create(options); + ASSERT_NE(dense_cuda_solver, nullptr); + std::string error_string; + ASSERT_EQ(dense_cuda_solver->Solve(b.data(), nullptr, &error_string), + LinearSolverTerminationType::FATAL_ERROR); +} + +TEST(CUDADenseQR, Randomized1600x100Tests) { + const int kNumRows = 1600; + const int kNumCols = 100; + using LhsType = Eigen::Matrix; + using RhsType = Eigen::Matrix; + using SolutionType = Eigen::Matrix; + + LinearSolver::Options options; + ContextImpl context; + options.context = &context; + std::string error; + EXPECT_TRUE(context.InitCuda(&error)) << error; + options.dense_linear_algebra_library_type = ceres::CUDA; + std::unique_ptr dense_qr = CUDADenseQR::Create(options); + + const int kNumTrials = 20; + for (int i = 0; i < kNumTrials; ++i) { + LhsType lhs = LhsType::Random(kNumRows, kNumCols); + SolutionType x_expected = SolutionType::Random(kNumCols); + RhsType rhs = lhs * x_expected; + SolutionType x_computed = SolutionType::Zero(kNumCols); + // Sanity check the random matrix sizes. + EXPECT_EQ(lhs.rows(), kNumRows); + EXPECT_EQ(lhs.cols(), kNumCols); + EXPECT_EQ(rhs.rows(), kNumRows); + EXPECT_EQ(rhs.cols(), 1); + EXPECT_EQ(x_expected.rows(), kNumCols); + EXPECT_EQ(x_expected.cols(), 1); + EXPECT_EQ(x_computed.rows(), kNumCols); + EXPECT_EQ(x_computed.cols(), 1); + LinearSolver::Summary summary; + summary.termination_type = dense_qr->FactorAndSolve(kNumRows, + kNumCols, + lhs.data(), + rhs.data(), + x_computed.data(), + &summary.message); + ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS); + ASSERT_NEAR((x_computed - x_expected).norm() / x_expected.norm(), + 0.0, + std::numeric_limits::epsilon() * 400); + } +} +#endif // CERES_NO_CUDA + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc b/extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc new file mode 100644 index 00000000000..b9ca4cd9823 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.cu.cc @@ -0,0 +1,477 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#include "ceres/cuda_kernels_bsm_to_crs.h" + +#include +#include +#include + +#include "ceres/block_structure.h" +#include "ceres/cuda_kernels_utils.h" + +namespace ceres { +namespace internal { + +namespace { +inline auto ThrustCudaStreamExecutionPolicy(cudaStream_t stream) { + // par_nosync execution policy was added in Thrust 1.16 + // https://github.com/NVIDIA/thrust/blob/main/CHANGELOG.md#thrust-1160 +#if THRUST_VERSION < 101700 + return thrust::cuda::par.on(stream); +#else + return thrust::cuda::par_nosync.on(stream); +#endif +} + +void* CudaMalloc(size_t size, + cudaStream_t stream, + bool memory_pools_supported) { + void* data = nullptr; + // Stream-ordered alloaction API is available since CUDA 11.2, but might be + // not implemented by particular device +#if CUDART_VERSION < 11020 +#warning \ + "Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.2+" + cudaMalloc(&data, size); +#else + if (memory_pools_supported) { + cudaMallocAsync(&data, size, stream); + } else { + cudaMalloc(&data, size); + } +#endif + return data; +} + +void CudaFree(void* data, cudaStream_t stream, bool memory_pools_supported) { + // Stream-ordered alloaction API is available since CUDA 11.2, but might be + // not implemented by particular device +#if CUDART_VERSION < 11020 +#warning \ + "Stream-ordered allocations are unavailable, consider updating CUDA toolkit to version 11.2+" + cudaSuccess, cudaFree(data); +#else + if (memory_pools_supported) { + cudaFreeAsync(data, stream); + } else { + cudaFree(data); + } +#endif +} +template +T* CudaAllocate(size_t num_elements, + cudaStream_t stream, + bool memory_pools_supported) { + T* data = static_cast( + CudaMalloc(num_elements * sizeof(T), stream, memory_pools_supported)); + return data; +} +} // namespace + +// Fill row block id and nnz for each row using block-sparse structure +// represented by a set of flat arrays. +// Inputs: +// - num_row_blocks: number of row-blocks in block-sparse structure +// - first_cell_in_row_block: index of the first cell of the row-block; size: +// num_row_blocks + 1 +// - cells: cells of block-sparse structure as a continuous array +// - row_blocks: row blocks of block-sparse structure stored sequentially +// - col_blocks: column blocks of block-sparse structure stored sequentially +// Outputs: +// - rows: rows[i + 1] will contain number of non-zeros in i-th row, rows[0] +// will be set to 0; rows are filled with a shift by one element in order +// to obtain row-index array of CRS matrix with a inclusive scan afterwards +// - row_block_ids: row_block_ids[i] will be set to index of row-block that +// contains i-th row. +// Computation is perform row-block-wise +template +__global__ void RowBlockIdAndNNZ( + const int num_row_blocks, + const int num_col_blocks_e, + const int num_row_blocks_e, + const int* __restrict__ first_cell_in_row_block, + const Cell* __restrict__ cells, + const Block* __restrict__ row_blocks, + const Block* __restrict__ col_blocks, + int* __restrict__ rows_e, + int* __restrict__ rows_f, + int* __restrict__ row_block_ids) { + const int row_block_id = blockIdx.x * blockDim.x + threadIdx.x; + if (row_block_id > num_row_blocks) { + // No synchronization is performed in this kernel, thus it is safe to return + return; + } + if (row_block_id == num_row_blocks) { + // one extra thread sets the first element + rows_f[0] = 0; + if constexpr (partitioned) { + rows_e[0] = 0; + } + return; + } + const auto& row_block = row_blocks[row_block_id]; + auto first_cell = cells + first_cell_in_row_block[row_block_id]; + const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1]; + int row_nnz_e = 0; + if (partitioned && row_block_id < num_row_blocks_e) { + // First cell is a cell from E + row_nnz_e = col_blocks[first_cell->block_id].size; + ++first_cell; + } + int row_nnz_f = 0; + for (auto cell = first_cell; cell < last_cell; ++cell) { + row_nnz_f += col_blocks[cell->block_id].size; + } + const int first_row = row_block.position; + const int last_row = first_row + row_block.size; + for (int i = first_row; i < last_row; ++i) { + if constexpr (partitioned) { + rows_e[i + 1] = row_nnz_e; + } + rows_f[i + 1] = row_nnz_f; + row_block_ids[i] = row_block_id; + } +} + +// Row-wise creation of CRS structure +// Inputs: +// - num_rows: number of rows in matrix +// - first_cell_in_row_block: index of the first cell of the row-block; size: +// num_row_blocks + 1 +// - cells: cells of block-sparse structure as a continuous array +// - row_blocks: row blocks of block-sparse structure stored sequentially +// - col_blocks: column blocks of block-sparse structure stored sequentially +// - row_block_ids: index of row-block that corresponds to row +// - rows: row-index array of CRS structure +// Outputs: +// - cols: column-index array of CRS structure +// Computaion is perform row-wise +template +__global__ void ComputeColumns(const int num_rows, + const int num_row_blocks_e, + const int num_col_blocks_e, + const int* __restrict__ first_cell_in_row_block, + const Cell* __restrict__ cells, + const Block* __restrict__ row_blocks, + const Block* __restrict__ col_blocks, + const int* __restrict__ row_block_ids, + const int* __restrict__ rows_e, + int* __restrict__ cols_e, + const int* __restrict__ rows_f, + int* __restrict__ cols_f) { + const int row = blockIdx.x * blockDim.x + threadIdx.x; + if (row >= num_rows) { + // No synchronization is performed in this kernel, thus it is safe to return + return; + } + const int row_block_id = row_block_ids[row]; + // position in crs matrix + auto first_cell = cells + first_cell_in_row_block[row_block_id]; + const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1]; + const int num_cols_e = col_blocks[num_col_blocks_e].position; + // For reach cell of row-block only current row is being filled + if (partitioned && row_block_id < num_row_blocks_e) { + // The first cell is cell from E + const auto& col_block = col_blocks[first_cell->block_id]; + const int col_block_size = col_block.size; + int column_idx = col_block.position; + int crs_position_e = rows_e[row]; + // Column indices for each element of row_in_block row of current cell + for (int i = 0; i < col_block_size; ++i, ++crs_position_e) { + cols_e[crs_position_e] = column_idx++; + } + ++first_cell; + } + int crs_position_f = rows_f[row]; + for (auto cell = first_cell; cell < last_cell; ++cell) { + const auto& col_block = col_blocks[cell->block_id]; + const int col_block_size = col_block.size; + int column_idx = col_block.position - num_cols_e; + // Column indices for each element of row_in_block row of current cell + for (int i = 0; i < col_block_size; ++i, ++crs_position_f) { + cols_f[crs_position_f] = column_idx++; + } + } +} + +void FillCRSStructure(const int num_row_blocks, + const int num_rows, + const int* first_cell_in_row_block, + const Cell* cells, + const Block* row_blocks, + const Block* col_blocks, + int* rows, + int* cols, + cudaStream_t stream, + bool memory_pools_supported) { + // Set number of non-zeros per row in rows array and row to row-block map in + // row_block_ids array + int* row_block_ids = + CudaAllocate(num_rows, stream, memory_pools_supported); + const int num_blocks_blockwise = NumBlocksInGrid(num_row_blocks + 1); + RowBlockIdAndNNZ<<>>( + num_row_blocks, + 0, + 0, + first_cell_in_row_block, + cells, + row_blocks, + col_blocks, + nullptr, + rows, + row_block_ids); + // Finalize row-index array of CRS strucure by computing prefix sum + thrust::inclusive_scan( + ThrustCudaStreamExecutionPolicy(stream), rows, rows + num_rows + 1, rows); + + // Fill cols array of CRS structure + const int num_blocks_rowwise = NumBlocksInGrid(num_rows); + ComputeColumns<<>>( + num_rows, + 0, + 0, + first_cell_in_row_block, + cells, + row_blocks, + col_blocks, + row_block_ids, + nullptr, + nullptr, + rows, + cols); + CudaFree(row_block_ids, stream, memory_pools_supported); +} + +void FillCRSStructurePartitioned(const int num_row_blocks, + const int num_rows, + const int num_row_blocks_e, + const int num_col_blocks_e, + const int num_nonzeros_e, + const int* first_cell_in_row_block, + const Cell* cells, + const Block* row_blocks, + const Block* col_blocks, + int* rows_e, + int* cols_e, + int* rows_f, + int* cols_f, + cudaStream_t stream, + bool memory_pools_supported) { + // Set number of non-zeros per row in rows array and row to row-block map in + // row_block_ids array + int* row_block_ids = + CudaAllocate(num_rows, stream, memory_pools_supported); + const int num_blocks_blockwise = NumBlocksInGrid(num_row_blocks + 1); + RowBlockIdAndNNZ<<>>( + num_row_blocks, + num_col_blocks_e, + num_row_blocks_e, + first_cell_in_row_block, + cells, + row_blocks, + col_blocks, + rows_e, + rows_f, + row_block_ids); + // Finalize row-index array of CRS strucure by computing prefix sum + thrust::inclusive_scan(ThrustCudaStreamExecutionPolicy(stream), + rows_e, + rows_e + num_rows + 1, + rows_e); + thrust::inclusive_scan(ThrustCudaStreamExecutionPolicy(stream), + rows_f, + rows_f + num_rows + 1, + rows_f); + + // Fill cols array of CRS structure + const int num_blocks_rowwise = NumBlocksInGrid(num_rows); + ComputeColumns<<>>( + num_rows, + num_row_blocks_e, + num_col_blocks_e, + first_cell_in_row_block, + cells, + row_blocks, + col_blocks, + row_block_ids, + rows_e, + cols_e, + rows_f, + cols_f); + CudaFree(row_block_ids, stream, memory_pools_supported); +} + +template +__device__ int PartitionPoint(const T* data, + int first, + int last, + Predicate&& predicate) { + if (!predicate(data[first])) { + return first; + } + while (last - first > 1) { + const auto midpoint = first + (last - first) / 2; + if (predicate(data[midpoint])) { + first = midpoint; + } else { + last = midpoint; + } + } + return last; +} + +// Element-wise reordering of block-sparse values +// - first_cell_in_row_block - position of the first cell of row-block +// - block_sparse_values - segment of block-sparse values starting from +// block_sparse_offset, containing num_values +template +__global__ void PermuteToCrsKernel( + const int block_sparse_offset, + const int num_values, + const int num_row_blocks, + const int num_row_blocks_e, + const int* __restrict__ first_cell_in_row_block, + const int* __restrict__ value_offset_row_block_f, + const Cell* __restrict__ cells, + const Block* __restrict__ row_blocks, + const Block* __restrict__ col_blocks, + const int* __restrict__ crs_rows, + const double* __restrict__ block_sparse_values, + double* __restrict__ crs_values) { + const int value_id = blockIdx.x * blockDim.x + threadIdx.x; + if (value_id >= num_values) { + return; + } + const int block_sparse_value_id = value_id + block_sparse_offset; + // Find the corresponding row-block with a binary search + const int row_block_id = + (partitioned + ? PartitionPoint(value_offset_row_block_f, + 0, + num_row_blocks, + [block_sparse_value_id] __device__( + const int row_block_offset) { + return row_block_offset <= block_sparse_value_id; + }) + : PartitionPoint(first_cell_in_row_block, + 0, + num_row_blocks, + [cells, block_sparse_value_id] __device__( + const int row_block_offset) { + return cells[row_block_offset].position <= + block_sparse_value_id; + })) - + 1; + // Find cell and calculate offset within the row with a linear scan + const auto& row_block = row_blocks[row_block_id]; + auto first_cell = cells + first_cell_in_row_block[row_block_id]; + const auto last_cell = cells + first_cell_in_row_block[row_block_id + 1]; + const int row_block_size = row_block.size; + int num_cols_before = 0; + if (partitioned && row_block_id < num_row_blocks_e) { + ++first_cell; + } + for (const Cell* cell = first_cell; cell < last_cell; ++cell) { + const auto& col_block = col_blocks[cell->block_id]; + const int col_block_size = col_block.size; + const int cell_size = row_block_size * col_block_size; + if (cell->position + cell_size > block_sparse_value_id) { + const int pos_in_cell = block_sparse_value_id - cell->position; + const int row_in_cell = pos_in_cell / col_block_size; + const int col_in_cell = pos_in_cell % col_block_size; + const int row = row_in_cell + row_block.position; + crs_values[crs_rows[row] + num_cols_before + col_in_cell] = + block_sparse_values[value_id]; + break; + } + num_cols_before += col_block_size; + } +} + +void PermuteToCRS(const int block_sparse_offset, + const int num_values, + const int num_row_blocks, + const int* first_cell_in_row_block, + const Cell* cells, + const Block* row_blocks, + const Block* col_blocks, + const int* crs_rows, + const double* block_sparse_values, + double* crs_values, + cudaStream_t stream) { + const int num_blocks_valuewise = NumBlocksInGrid(num_values); + PermuteToCrsKernel + <<>>( + block_sparse_offset, + num_values, + num_row_blocks, + 0, + first_cell_in_row_block, + nullptr, + cells, + row_blocks, + col_blocks, + crs_rows, + block_sparse_values, + crs_values); +} + +void PermuteToCRSPartitionedF(const int block_sparse_offset, + const int num_values, + const int num_row_blocks, + const int num_row_blocks_e, + const int* first_cell_in_row_block, + const int* value_offset_row_block_f, + const Cell* cells, + const Block* row_blocks, + const Block* col_blocks, + const int* crs_rows, + const double* block_sparse_values, + double* crs_values, + cudaStream_t stream) { + const int num_blocks_valuewise = NumBlocksInGrid(num_values); + PermuteToCrsKernel<<>>( + block_sparse_offset, + num_values, + num_row_blocks, + num_row_blocks_e, + first_cell_in_row_block, + value_offset_row_block_f, + cells, + row_blocks, + col_blocks, + crs_rows, + block_sparse_values, + crs_values); +} + +} // namespace internal +} // namespace ceres diff --git a/extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.h b/extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.h new file mode 100644 index 00000000000..27f4a252191 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_kernels_bsm_to_crs.h @@ -0,0 +1,113 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#ifndef CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_ +#define CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_ + +#include "ceres/internal/config.h" + +#ifndef CERES_NO_CUDA + +#include "cuda_runtime.h" + +namespace ceres { +namespace internal { +struct Block; +struct Cell; + +// Compute structure of CRS matrix using block-sparse structure. +// Arrays corresponding to CRS matrix are to be allocated by caller +void FillCRSStructure(const int num_row_blocks, + const int num_rows, + const int* first_cell_in_row_block, + const Cell* cells, + const Block* row_blocks, + const Block* col_blocks, + int* rows, + int* cols, + cudaStream_t stream, + bool memory_pools_supported); + +// Compute structure of partitioned CRS matrix using block-sparse structure. +// Arrays corresponding to CRS matrices are to be allocated by caller +void FillCRSStructurePartitioned(const int num_row_blocks, + const int num_rows, + const int num_row_blocks_e, + const int num_col_blocks_e, + const int num_nonzeros_e, + const int* first_cell_in_row_block, + const Cell* cells, + const Block* row_blocks, + const Block* col_blocks, + int* rows_e, + int* cols_e, + int* rows_f, + int* cols_f, + cudaStream_t stream, + bool memory_pools_supported); + +// Permute segment of values from block-sparse matrix with sequential layout to +// CRS order. Segment starts at block_sparse_offset and has length of num_values +void PermuteToCRS(const int block_sparse_offset, + const int num_values, + const int num_row_blocks, + const int* first_cell_in_row_block, + const Cell* cells, + const Block* row_blocks, + const Block* col_blocks, + const int* crs_rows, + const double* block_sparse_values, + double* crs_values, + cudaStream_t stream); + +// Permute segment of values from F sub-matrix of block-sparse partitioned +// matrix with sequential layout to CRS order. Segment starts at +// block_sparse_offset (including the offset induced by values of E submatrix) +// and has length of num_values +void PermuteToCRSPartitionedF(const int block_sparse_offset, + const int num_values, + const int num_row_blocks, + const int num_row_blocks_e, + const int* first_cell_in_row_block, + const int* value_offset_row_block_f, + const Cell* cells, + const Block* row_blocks, + const Block* col_blocks, + const int* crs_rows, + const double* block_sparse_values, + double* crs_values, + cudaStream_t stream); + +} // namespace internal +} // namespace ceres + +#endif // CERES_NO_CUDA + +#endif // CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_ diff --git a/extern/ceres/internal/ceres/parallel_for_nothreads.cc b/extern/ceres/internal/ceres/cuda_kernels_utils.h similarity index 58% rename from extern/ceres/internal/ceres/parallel_for_nothreads.cc rename to extern/ceres/internal/ceres/cuda_kernels_utils.h index 1c1871662c8..4a17bac7fef 100644 --- a/extern/ceres/internal/ceres/parallel_for_nothreads.cc +++ b/extern/ceres/internal/ceres/cuda_kernels_utils.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -26,53 +26,31 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // -// Author: alexs.mac@gmail.com (Alex Stewart) +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) -// This include must come before any #ifndef check on Ceres compile options. -#include "ceres/internal/config.h" - -#ifdef CERES_NO_THREADS - -#include "ceres/parallel_for.h" -#include "glog/logging.h" +#ifndef CERES_INTERNAL_CUDA_KERNELS_UTILS_H_ +#define CERES_INTERNAL_CUDA_KERNELS_UTILS_H_ namespace ceres { namespace internal { -int MaxNumThreadsAvailable() { return 1; } +// Parallel execution on CUDA device requires splitting job into blocks of a +// fixed size. We use block-size of kCudaBlockSize for all kernels that do not +// require any specific block size. As the CUDA Toolkit documentation says, +// "although arbitrary in this case, is a common choice". This is determined by +// the warp size, max block size, and multiprocessor sizes of recent GPUs. For +// complex kernels with significant register usage and unusual memory patterns, +// the occupancy calculator API might provide better performance. See "Occupancy +// Calculator" under the CUDA toolkit documentation. +constexpr int kCudaBlockSize = 256; -void ParallelFor(ContextImpl* context, - int start, - int end, - int num_threads, - const std::function& function) { - CHECK_GT(num_threads, 0); - CHECK(context != nullptr); - if (end <= start) { - return; - } - for (int i = start; i < end; ++i) { - function(i); - } +// Compute number of blocks of kCudaBlockSize that span over 1-d grid with +// dimension size. Note that 1-d grid dimension is limited by 2^31-1 in CUDA, +// thus a signed int is used as an argument. +inline int NumBlocksInGrid(int size) { + return (size + kCudaBlockSize - 1) / kCudaBlockSize; } - -void ParallelFor(ContextImpl* context, - int start, - int end, - int num_threads, - const std::function& function) { - CHECK_GT(num_threads, 0); - CHECK(context != nullptr); - if (end <= start) { - return; - } - const int thread_id = 0; - for (int i = start; i < end; ++i) { - function(thread_id, i); - } -} - } // namespace internal } // namespace ceres -#endif // CERES_NO_THREADS +#endif // CERES_INTERNAL_CUDA_KERNELS_UTILS_H_ diff --git a/extern/ceres/internal/ceres/cuda_kernels_vector_ops.cu.cc b/extern/ceres/internal/ceres/cuda_kernels_vector_ops.cu.cc new file mode 100644 index 00000000000..3199ca6e025 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_kernels_vector_ops.cu.cc @@ -0,0 +1,123 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include "ceres/cuda_kernels_vector_ops.h" + +#include + +#include "ceres/cuda_kernels_utils.h" + +namespace ceres { +namespace internal { + +template +__global__ void TypeConversionKernel(const SrcType* __restrict__ input, + DstType* __restrict__ output, + const int size) { + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < size) { + output[i] = static_cast(input[i]); + } +} + +void CudaFP64ToFP32(const double* input, + float* output, + const int size, + cudaStream_t stream) { + const int num_blocks = NumBlocksInGrid(size); + TypeConversionKernel + <<>>(input, output, size); +} + +void CudaFP32ToFP64(const float* input, + double* output, + const int size, + cudaStream_t stream) { + const int num_blocks = NumBlocksInGrid(size); + TypeConversionKernel + <<>>(input, output, size); +} + +template +__global__ void SetZeroKernel(T* __restrict__ output, const int size) { + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < size) { + output[i] = T(0.0); + } +} + +void CudaSetZeroFP32(float* output, const int size, cudaStream_t stream) { + const int num_blocks = NumBlocksInGrid(size); + SetZeroKernel<<>>(output, size); +} + +void CudaSetZeroFP64(double* output, const int size, cudaStream_t stream) { + const int num_blocks = NumBlocksInGrid(size); + SetZeroKernel + <<>>(output, size); +} + +template +__global__ void XPlusEqualsYKernel(DstType* __restrict__ x, + const SrcType* __restrict__ y, + const int size) { + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < size) { + x[i] = x[i] + DstType(y[i]); + } +} + +void CudaDsxpy(double* x, float* y, const int size, cudaStream_t stream) { + const int num_blocks = NumBlocksInGrid(size); + XPlusEqualsYKernel + <<>>(x, y, size); +} + +__global__ void CudaDtDxpyKernel(double* __restrict__ y, + const double* D, + const double* __restrict__ x, + const int size) { + const int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < size) { + y[i] = y[i] + D[i] * D[i] * x[i]; + } +} + +void CudaDtDxpy(double* y, + const double* D, + const double* x, + const int size, + cudaStream_t stream) { + const int num_blocks = NumBlocksInGrid(size); + CudaDtDxpyKernel<<>>(y, D, x, size); +} + +} // namespace internal +} // namespace ceres diff --git a/extern/ceres/internal/ceres/cuda_kernels_vector_ops.h b/extern/ceres/internal/ceres/cuda_kernels_vector_ops.h new file mode 100644 index 00000000000..9905657b324 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_kernels_vector_ops.h @@ -0,0 +1,83 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#ifndef CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_ +#define CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_ + +#include "ceres/internal/config.h" + +#ifndef CERES_NO_CUDA + +#include "cuda_runtime.h" + +namespace ceres { +namespace internal { +class Block; +class Cell; + +// Convert an array of double (FP64) values to float (FP32). Both arrays must +// already be on GPU memory. +void CudaFP64ToFP32(const double* input, + float* output, + const int size, + cudaStream_t stream); + +// Convert an array of float (FP32) values to double (FP64). Both arrays must +// already be on GPU memory. +void CudaFP32ToFP64(const float* input, + double* output, + const int size, + cudaStream_t stream); + +// Set all elements of the array to the FP32 value 0. The array must be in GPU +// memory. +void CudaSetZeroFP32(float* output, const int size, cudaStream_t stream); + +// Set all elements of the array to the FP64 value 0. The array must be in GPU +// memory. +void CudaSetZeroFP64(double* output, const int size, cudaStream_t stream); + +// Compute x = x + double(y). Input array is float (FP32), output array is +// double (FP64). Both arrays must already be on GPU memory. +void CudaDsxpy(double* x, float* y, const int size, cudaStream_t stream); + +// Compute y[i] = y[i] + d[i]^2 x[i]. All arrays must already be on GPU memory. +void CudaDtDxpy(double* y, + const double* D, + const double* x, + const int size, + cudaStream_t stream); + +} // namespace internal +} // namespace ceres + +#endif // CERES_NO_CUDA + +#endif // CERES_INTERNAL_CUDA_KERNELS_VECTOR_OPS_H_ diff --git a/extern/ceres/internal/ceres/cuda_kernels_vector_ops_test.cc b/extern/ceres/internal/ceres/cuda_kernels_vector_ops_test.cc new file mode 100644 index 00000000000..e6116f7086f --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_kernels_vector_ops_test.cc @@ -0,0 +1,198 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include "ceres/cuda_kernels_vector_ops.h" + +#include + +#include +#include +#include + +#include "ceres/context_impl.h" +#include "ceres/cuda_buffer.h" +#include "ceres/internal/config.h" +#include "ceres/internal/eigen.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +namespace ceres { +namespace internal { + +#ifndef CERES_NO_CUDA + +TEST(CudaFP64ToFP32, SimpleConversions) { + ContextImpl context; + std::string cuda_error; + EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error; + std::vector fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0}; + CudaBuffer fp64_gpu(&context); + fp64_gpu.CopyFromCpuVector(fp64_cpu); + CudaBuffer fp32_gpu(&context); + fp32_gpu.Reserve(fp64_cpu.size()); + CudaFP64ToFP32(fp64_gpu.data(), + fp32_gpu.data(), + fp64_cpu.size(), + context.DefaultStream()); + std::vector fp32_cpu(fp64_cpu.size()); + fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size()); + for (int i = 0; i < fp32_cpu.size(); ++i) { + EXPECT_EQ(fp32_cpu[i], static_cast(fp64_cpu[i])); + } +} + +TEST(CudaFP64ToFP32, NumericallyExtremeValues) { + ContextImpl context; + std::string cuda_error; + EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error; + std::vector fp64_cpu = { + DBL_MIN, 10.0 * DBL_MIN, DBL_MAX, 0.1 * DBL_MAX}; + // First just make sure that the compiler has represented these values + // accurately as fp64. + EXPECT_GT(fp64_cpu[0], 0.0); + EXPECT_GT(fp64_cpu[1], 0.0); + EXPECT_TRUE(std::isfinite(fp64_cpu[2])); + EXPECT_TRUE(std::isfinite(fp64_cpu[3])); + CudaBuffer fp64_gpu(&context); + fp64_gpu.CopyFromCpuVector(fp64_cpu); + CudaBuffer fp32_gpu(&context); + fp32_gpu.Reserve(fp64_cpu.size()); + CudaFP64ToFP32(fp64_gpu.data(), + fp32_gpu.data(), + fp64_cpu.size(), + context.DefaultStream()); + std::vector fp32_cpu(fp64_cpu.size()); + fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size()); + EXPECT_EQ(fp32_cpu[0], 0.0f); + EXPECT_EQ(fp32_cpu[1], 0.0f); + EXPECT_EQ(fp32_cpu[2], std::numeric_limits::infinity()); + EXPECT_EQ(fp32_cpu[3], std::numeric_limits::infinity()); +} + +TEST(CudaFP32ToFP64, SimpleConversions) { + ContextImpl context; + std::string cuda_error; + EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error; + std::vector fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0}; + CudaBuffer fp32_gpu(&context); + fp32_gpu.CopyFromCpuVector(fp32_cpu); + CudaBuffer fp64_gpu(&context); + fp64_gpu.Reserve(fp32_cpu.size()); + CudaFP32ToFP64(fp32_gpu.data(), + fp64_gpu.data(), + fp32_cpu.size(), + context.DefaultStream()); + std::vector fp64_cpu(fp32_cpu.size()); + fp64_gpu.CopyToCpu(fp64_cpu.data(), fp64_cpu.size()); + for (int i = 0; i < fp64_cpu.size(); ++i) { + EXPECT_EQ(fp64_cpu[i], static_cast(fp32_cpu[i])); + } +} + +TEST(CudaSetZeroFP32, NonZeroInput) { + ContextImpl context; + std::string cuda_error; + EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error; + std::vector fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0}; + CudaBuffer fp32_gpu(&context); + fp32_gpu.CopyFromCpuVector(fp32_cpu); + CudaSetZeroFP32(fp32_gpu.data(), fp32_cpu.size(), context.DefaultStream()); + std::vector fp32_cpu_zero(fp32_cpu.size()); + fp32_gpu.CopyToCpu(fp32_cpu_zero.data(), fp32_cpu_zero.size()); + for (int i = 0; i < fp32_cpu_zero.size(); ++i) { + EXPECT_EQ(fp32_cpu_zero[i], 0.0f); + } +} + +TEST(CudaSetZeroFP64, NonZeroInput) { + ContextImpl context; + std::string cuda_error; + EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error; + std::vector fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0}; + CudaBuffer fp64_gpu(&context); + fp64_gpu.CopyFromCpuVector(fp64_cpu); + CudaSetZeroFP64(fp64_gpu.data(), fp64_cpu.size(), context.DefaultStream()); + std::vector fp64_cpu_zero(fp64_cpu.size()); + fp64_gpu.CopyToCpu(fp64_cpu_zero.data(), fp64_cpu_zero.size()); + for (int i = 0; i < fp64_cpu_zero.size(); ++i) { + EXPECT_EQ(fp64_cpu_zero[i], 0.0); + } +} + +TEST(CudaDsxpy, DoubleValues) { + ContextImpl context; + std::string cuda_error; + EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error; + std::vector fp32_cpu_a = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0}; + std::vector fp64_cpu_b = { + 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0}; + CudaBuffer fp32_gpu_a(&context); + fp32_gpu_a.CopyFromCpuVector(fp32_cpu_a); + CudaBuffer fp64_gpu_b(&context); + fp64_gpu_b.CopyFromCpuVector(fp64_cpu_b); + CudaDsxpy(fp64_gpu_b.data(), + fp32_gpu_a.data(), + fp32_gpu_a.size(), + context.DefaultStream()); + fp64_gpu_b.CopyToCpu(fp64_cpu_b.data(), fp64_cpu_b.size()); + for (int i = 0; i < fp64_cpu_b.size(); ++i) { + EXPECT_DOUBLE_EQ(fp64_cpu_b[i], 2.0 * fp32_cpu_a[i]); + } +} + +TEST(CudaDtDxpy, ComputeFourItems) { + ContextImpl context; + std::string cuda_error; + EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error; + std::vector x_cpu = {1, 2, 3, 4}; + std::vector y_cpu = {4, 3, 2, 1}; + std::vector d_cpu = {10, 20, 30, 40}; + CudaBuffer x_gpu(&context); + x_gpu.CopyFromCpuVector(x_cpu); + CudaBuffer y_gpu(&context); + y_gpu.CopyFromCpuVector(y_cpu); + CudaBuffer d_gpu(&context); + d_gpu.CopyFromCpuVector(d_cpu); + CudaDtDxpy(y_gpu.data(), + d_gpu.data(), + x_gpu.data(), + y_gpu.size(), + context.DefaultStream()); + y_gpu.CopyToCpu(y_cpu.data(), y_cpu.size()); + EXPECT_DOUBLE_EQ(y_cpu[0], 4.0 + 10.0 * 10.0 * 1.0); + EXPECT_DOUBLE_EQ(y_cpu[1], 3.0 + 20.0 * 20.0 * 2.0); + EXPECT_DOUBLE_EQ(y_cpu[2], 2.0 + 30.0 * 30.0 * 3.0); + EXPECT_DOUBLE_EQ(y_cpu[3], 1.0 + 40.0 * 40.0 * 4.0); +} + +#endif // CERES_NO_CUDA + +} // namespace internal +} // namespace ceres diff --git a/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc b/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc new file mode 100644 index 00000000000..c0c1dc8c2fb --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.cc @@ -0,0 +1,152 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#include "ceres/cuda_partitioned_block_sparse_crs_view.h" + +#ifndef CERES_NO_CUDA + +#include "ceres/cuda_block_structure.h" +#include "ceres/cuda_kernels_bsm_to_crs.h" + +namespace ceres::internal { + +CudaPartitionedBlockSparseCRSView::CudaPartitionedBlockSparseCRSView( + const BlockSparseMatrix& bsm, + const int num_col_blocks_e, + ContextImpl* context) + : + + context_(context) { + const auto& bs = *bsm.block_structure(); + block_structure_ = + std::make_unique(bs, num_col_blocks_e, context); + // Determine number of non-zeros in left submatrix + // Row-blocks are at least 1 row high, thus we can use a temporary array of + // num_rows for ComputeNonZerosInColumnBlockSubMatrix; and later reuse it for + // FillCRSStructurePartitioned + const int num_rows = bsm.num_rows(); + const int num_nonzeros_e = block_structure_->num_nonzeros_e(); + const int num_nonzeros_f = bsm.num_nonzeros() - num_nonzeros_e; + + const int num_cols_e = num_col_blocks_e < bs.cols.size() + ? bs.cols[num_col_blocks_e].position + : bsm.num_cols(); + const int num_cols_f = bsm.num_cols() - num_cols_e; + + CudaBuffer rows_e(context, num_rows + 1); + CudaBuffer cols_e(context, num_nonzeros_e); + CudaBuffer rows_f(context, num_rows + 1); + CudaBuffer cols_f(context, num_nonzeros_f); + + num_row_blocks_e_ = block_structure_->num_row_blocks_e(); + FillCRSStructurePartitioned(block_structure_->num_row_blocks(), + num_rows, + num_row_blocks_e_, + num_col_blocks_e, + num_nonzeros_e, + block_structure_->first_cell_in_row_block(), + block_structure_->cells(), + block_structure_->row_blocks(), + block_structure_->col_blocks(), + rows_e.data(), + cols_e.data(), + rows_f.data(), + cols_f.data(), + context->DefaultStream(), + context->is_cuda_memory_pools_supported_); + f_is_crs_compatible_ = block_structure_->IsCrsCompatible(); + if (f_is_crs_compatible_) { + block_structure_ = nullptr; + } else { + streamed_buffer_ = std::make_unique>( + context, kMaxTemporaryArraySize); + } + matrix_e_ = std::make_unique( + num_cols_e, std::move(rows_e), std::move(cols_e), context); + matrix_f_ = std::make_unique( + num_cols_f, std::move(rows_f), std::move(cols_f), context); + + CHECK_EQ(bsm.num_nonzeros(), + matrix_e_->num_nonzeros() + matrix_f_->num_nonzeros()); + + UpdateValues(bsm); +} + +void CudaPartitionedBlockSparseCRSView::UpdateValues( + const BlockSparseMatrix& bsm) { + if (f_is_crs_compatible_) { + CHECK_EQ(cudaSuccess, + cudaMemcpyAsync(matrix_e_->mutable_values(), + bsm.values(), + matrix_e_->num_nonzeros() * sizeof(double), + cudaMemcpyHostToDevice, + context_->DefaultStream())); + + CHECK_EQ(cudaSuccess, + cudaMemcpyAsync(matrix_f_->mutable_values(), + bsm.values() + matrix_e_->num_nonzeros(), + matrix_f_->num_nonzeros() * sizeof(double), + cudaMemcpyHostToDevice, + context_->DefaultStream())); + return; + } + streamed_buffer_->CopyToGpu( + bsm.values(), + bsm.num_nonzeros(), + [block_structure = block_structure_.get(), + num_nonzeros_e = matrix_e_->num_nonzeros(), + num_row_blocks_e = num_row_blocks_e_, + values_f = matrix_f_->mutable_values(), + rows_f = matrix_f_->rows()]( + const double* values, int num_values, int offset, auto stream) { + PermuteToCRSPartitionedF(num_nonzeros_e + offset, + num_values, + block_structure->num_row_blocks(), + num_row_blocks_e, + block_structure->first_cell_in_row_block(), + block_structure->value_offset_row_block_f(), + block_structure->cells(), + block_structure->row_blocks(), + block_structure->col_blocks(), + rows_f, + values, + values_f, + stream); + }); + CHECK_EQ(cudaSuccess, + cudaMemcpyAsync(matrix_e_->mutable_values(), + bsm.values(), + matrix_e_->num_nonzeros() * sizeof(double), + cudaMemcpyHostToDevice, + context_->DefaultStream())); +} + +} // namespace ceres::internal +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.h b/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.h new file mode 100644 index 00000000000..3072deab902 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view.h @@ -0,0 +1,111 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) +// + +#ifndef CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_ +#define CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_ + +#include "ceres/internal/config.h" + +#ifndef CERES_NO_CUDA + +#include + +#include "ceres/block_sparse_matrix.h" +#include "ceres/cuda_block_structure.h" +#include "ceres/cuda_buffer.h" +#include "ceres/cuda_sparse_matrix.h" +#include "ceres/cuda_streamed_buffer.h" + +namespace ceres::internal { +// We use cuSPARSE library for SpMV operations. However, it does not support +// neither block-sparse format with varying size of the blocks nor +// submatrix-vector products. Thus, we perform the following operations in order +// to compute products of partitioned block-sparse matrices and dense vectors on +// gpu: +// - Once per block-sparse structure update: +// - Compute CRS structures of left and right submatrices from block-sparse +// structure +// - Check if values of F sub-matrix can be copied without permutation +// matrices +// - Once per block-sparse values update: +// - Copy values of E sub-matrix +// - Permute or copy values of F sub-matrix +// +// It is assumed that cells of block-sparse matrix are laid out sequentially in +// both of sub-matrices and there is exactly one cell in row-block of E +// sub-matrix in the first num_row_blocks_e_ row blocks, and no cells in E +// sub-matrix below num_row_blocks_e_ row blocks. +// +// This class avoids storing both CRS and block-sparse values in GPU memory. +// Instead, block-sparse values are transferred to gpu memory as a disjoint set +// of small continuous segments with simultaneous permutation of the values into +// correct order using block-structure. +class CERES_NO_EXPORT CudaPartitionedBlockSparseCRSView { + public: + // Initializes internal CRS matrix and block-sparse structure on GPU side + // values. The following objects are stored in gpu memory for the whole + // lifetime of the object + // - matrix_e_: left CRS submatrix + // - matrix_f_: right CRS submatrix + // - block_structure_: copy of block-sparse structure on GPU + // - streamed_buffer_: helper for value updating + CudaPartitionedBlockSparseCRSView(const BlockSparseMatrix& bsm, + const int num_col_blocks_e, + ContextImpl* context); + + // Update values of CRS submatrices using values of block-sparse matrix. + // Assumes that bsm has the same block-sparse structure as matrix that was + // used for construction. + void UpdateValues(const BlockSparseMatrix& bsm); + + const CudaSparseMatrix* matrix_e() const { return matrix_e_.get(); } + const CudaSparseMatrix* matrix_f() const { return matrix_f_.get(); } + CudaSparseMatrix* mutable_matrix_e() { return matrix_e_.get(); } + CudaSparseMatrix* mutable_matrix_f() { return matrix_f_.get(); } + + private: + // Value permutation kernel performs a single element-wise operation per + // thread, thus performing permutation in blocks of 8 megabytes of + // block-sparse values seems reasonable + static constexpr int kMaxTemporaryArraySize = 1 * 1024 * 1024; + std::unique_ptr matrix_e_; + std::unique_ptr matrix_f_; + std::unique_ptr> streamed_buffer_; + std::unique_ptr block_structure_; + bool f_is_crs_compatible_; + int num_row_blocks_e_; + ContextImpl* context_; +}; + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA +#endif // CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_ diff --git a/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view_test.cc b/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view_test.cc new file mode 100644 index 00000000000..ddfdeef07e4 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_partitioned_block_sparse_crs_view_test.cc @@ -0,0 +1,279 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#include "ceres/cuda_partitioned_block_sparse_crs_view.h" + +#include +#include + +#ifndef CERES_NO_CUDA + +namespace ceres::internal { + +namespace { +struct RandomPartitionedMatrixOptions { + int num_row_blocks_e; + int num_row_blocks_f; + int num_col_blocks_e; + int num_col_blocks_f; + int min_row_block_size; + int max_row_block_size; + int min_col_block_size; + int max_col_block_size; + double empty_f_probability; + double cell_probability_f; + int max_cells_f; +}; + +std::unique_ptr CreateRandomPartitionedMatrix( + const RandomPartitionedMatrixOptions& options, std::mt19937& rng) { + const int num_row_blocks = + std::max(options.num_row_blocks_e, options.num_row_blocks_f); + const int num_col_blocks = + options.num_col_blocks_e + options.num_col_blocks_f; + + CompressedRowBlockStructure* block_structure = + new CompressedRowBlockStructure; + block_structure->cols.reserve(num_col_blocks); + block_structure->rows.reserve(num_row_blocks); + + // Create column blocks + std::uniform_int_distribution col_size(options.min_col_block_size, + options.max_col_block_size); + int num_cols = 0; + for (int i = 0; i < num_col_blocks; ++i) { + const int size = col_size(rng); + block_structure->cols.emplace_back(size, num_cols); + num_cols += size; + } + + // Prepare column-block indices of E cells + std::vector e_col_block_idx; + e_col_block_idx.reserve(options.num_row_blocks_e); + std::uniform_int_distribution col_e(0, options.num_col_blocks_e - 1); + for (int i = 0; i < options.num_row_blocks_e; ++i) { + e_col_block_idx.emplace_back(col_e(rng)); + } + std::sort(e_col_block_idx.begin(), e_col_block_idx.end()); + + // Prepare cell structure + std::uniform_int_distribution row_size(options.min_row_block_size, + options.max_row_block_size); + std::uniform_real_distribution uniform; + int num_rows = 0; + for (int i = 0; i < num_row_blocks; ++i) { + const int size = row_size(rng); + block_structure->rows.emplace_back(); + auto& row = block_structure->rows.back(); + row.block.size = size; + row.block.position = num_rows; + num_rows += size; + if (i < options.num_row_blocks_e) { + row.cells.emplace_back(e_col_block_idx[i], -1); + if (uniform(rng) < options.empty_f_probability) { + continue; + } + } + if (i >= options.num_row_blocks_f) continue; + const int cells_before = row.cells.size(); + for (int j = options.num_col_blocks_e; j < num_col_blocks; ++j) { + if (uniform(rng) > options.cell_probability_f) { + continue; + } + row.cells.emplace_back(j, -1); + } + if (row.cells.size() > cells_before + options.max_cells_f) { + std::shuffle(row.cells.begin() + cells_before, row.cells.end(), rng); + row.cells.resize(cells_before + options.max_cells_f); + std::sort( + row.cells.begin(), row.cells.end(), [](const auto& a, const auto& b) { + return a.block_id < b.block_id; + }); + } + } + + // Fill positions in E sub-matrix + int num_nonzeros = 0; + for (int i = 0; i < options.num_row_blocks_e; ++i) { + CHECK_GE(block_structure->rows[i].cells.size(), 1); + block_structure->rows[i].cells[0].position = num_nonzeros; + const int col_block_size = + block_structure->cols[block_structure->rows[i].cells[0].block_id].size; + const int row_block_size = block_structure->rows[i].block.size; + num_nonzeros += row_block_size * col_block_size; + CHECK_GE(num_nonzeros, 0); + } + // Fill positions in F sub-matrix + for (int i = 0; i < options.num_row_blocks_f; ++i) { + const int row_block_size = block_structure->rows[i].block.size; + for (auto& cell : block_structure->rows[i].cells) { + if (cell.position >= 0) continue; + cell.position = num_nonzeros; + const int col_block_size = block_structure->cols[cell.block_id].size; + num_nonzeros += row_block_size * col_block_size; + CHECK_GE(num_nonzeros, 0); + } + } + // Populate values + auto bsm = std::make_unique(block_structure, true); + for (int i = 0; i < num_nonzeros; ++i) { + bsm->mutable_values()[i] = i + 1; + } + return bsm; +} +} // namespace + +class CudaPartitionedBlockSparseCRSViewTest : public ::testing::Test { + static constexpr int kNumColBlocksE = 456; + + protected: + void SetUp() final { + std::string message; + CHECK(context_.InitCuda(&message)) + << "InitCuda() failed because: " << message; + + RandomPartitionedMatrixOptions options; + options.num_row_blocks_f = 123; + options.num_row_blocks_e = 456; + options.num_col_blocks_f = 123; + options.num_col_blocks_e = kNumColBlocksE; + options.min_row_block_size = 1; + options.max_row_block_size = 4; + options.min_col_block_size = 1; + options.max_col_block_size = 4; + options.empty_f_probability = .1; + options.cell_probability_f = .2; + options.max_cells_f = options.num_col_blocks_f; + + std::mt19937 rng; + short_f_ = CreateRandomPartitionedMatrix(options, rng); + + options.num_row_blocks_e = 123; + options.num_row_blocks_f = 456; + short_e_ = CreateRandomPartitionedMatrix(options, rng); + + options.max_cells_f = 1; + options.num_row_blocks_e = options.num_row_blocks_f; + options.num_row_blocks_e = options.num_row_blocks_f; + f_crs_compatible_ = CreateRandomPartitionedMatrix(options, rng); + } + + void TestMatrix(const BlockSparseMatrix& A_) { + const int num_col_blocks_e = 456; + CudaPartitionedBlockSparseCRSView view(A_, kNumColBlocksE, &context_); + + const int num_rows = A_.num_rows(); + const int num_cols = A_.num_cols(); + + const auto& bs = *A_.block_structure(); + const int num_cols_e = bs.cols[num_col_blocks_e].position; + const int num_cols_f = num_cols - num_cols_e; + + auto matrix_e = view.matrix_e(); + auto matrix_f = view.matrix_f(); + ASSERT_EQ(matrix_e->num_cols(), num_cols_e); + ASSERT_EQ(matrix_e->num_rows(), num_rows); + ASSERT_EQ(matrix_f->num_cols(), num_cols_f); + ASSERT_EQ(matrix_f->num_rows(), num_rows); + + Vector x(num_cols); + Vector x_left(num_cols_e); + Vector x_right(num_cols_f); + Vector y(num_rows); + CudaVector x_cuda(&context_, num_cols); + CudaVector x_left_cuda(&context_, num_cols_e); + CudaVector x_right_cuda(&context_, num_cols_f); + CudaVector y_cuda(&context_, num_rows); + Vector y_cuda_host(num_rows); + + for (int i = 0; i < num_cols_e; ++i) { + x.setZero(); + x_left.setZero(); + y.setZero(); + y_cuda.SetZero(); + x[i] = 1.; + x_left[i] = 1.; + x_left_cuda.CopyFromCpu(x_left); + A_.RightMultiplyAndAccumulate( + x.data(), y.data(), &context_, std::thread::hardware_concurrency()); + matrix_e->RightMultiplyAndAccumulate(x_left_cuda, &y_cuda); + y_cuda.CopyTo(&y_cuda_host); + // There will be up to 1 non-zero product per row, thus we expect an exact + // match on 32-bit integer indices + EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.); + } + for (int i = num_cols_e; i < num_cols_f; ++i) { + x.setZero(); + x_right.setZero(); + y.setZero(); + y_cuda.SetZero(); + x[i] = 1.; + x_right[i - num_cols_e] = 1.; + x_right_cuda.CopyFromCpu(x_right); + A_.RightMultiplyAndAccumulate( + x.data(), y.data(), &context_, std::thread::hardware_concurrency()); + matrix_f->RightMultiplyAndAccumulate(x_right_cuda, &y_cuda); + y_cuda.CopyTo(&y_cuda_host); + // There will be up to 1 non-zero product per row, thus we expect an exact + // match on 32-bit integer indices + EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.); + } + } + + // E sub-matrix might have less row-blocks with cells than F sub-matrix. This + // test matrix checks if this case is handled properly + std::unique_ptr short_e_; + // In case of non-crs compatible F matrix, permuting values from block-order + // to crs order involves binary search over row-blocks of F. Having lots of + // row-blocks with no F cells is an edge case for this algorithm. + std::unique_ptr short_f_; + // With F matrix being CRS-compatible, update of the values of partitioned + // matrix view reduces to two host->device memcopies, and uses a separate code + // path + std::unique_ptr f_crs_compatible_; + + ContextImpl context_; +}; + +TEST_F(CudaPartitionedBlockSparseCRSViewTest, CreateUpdateValuesShortE) { + TestMatrix(*short_e_); +} + +TEST_F(CudaPartitionedBlockSparseCRSViewTest, CreateUpdateValuesShortF) { + TestMatrix(*short_f_); +} + +TEST_F(CudaPartitionedBlockSparseCRSViewTest, + CreateUpdateValuesCrsCompatibleF) { + TestMatrix(*f_crs_compatible_); +} +} // namespace ceres::internal + +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_sparse_matrix.cc b/extern/ceres/internal/ceres/cuda_sparse_matrix.cc new file mode 100644 index 00000000000..33685a400eb --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_sparse_matrix.cc @@ -0,0 +1,226 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) +// +// A CUDA sparse matrix linear operator. + +// This include must come before any #ifndef check on Ceres compile options. +// clang-format off +#include "ceres/internal/config.h" +// clang-format on + +#include "ceres/cuda_sparse_matrix.h" + +#include + +#include + +#include "ceres/block_sparse_matrix.h" +#include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/context_impl.h" +#include "ceres/crs_matrix.h" +#include "ceres/internal/export.h" +#include "ceres/types.h" +#include "ceres/wall_time.h" + +#ifndef CERES_NO_CUDA + +#include "ceres/cuda_buffer.h" +#include "ceres/cuda_kernels_vector_ops.h" +#include "ceres/cuda_vector.h" +#include "cuda_runtime_api.h" +#include "cusparse.h" + +namespace ceres::internal { +namespace { +// Starting in CUDA 11.2.1, CUSPARSE_MV_ALG_DEFAULT was deprecated in favor of +// CUSPARSE_SPMV_ALG_DEFAULT. +#if CUDART_VERSION >= 11021 +const auto kSpMVAlgorithm = CUSPARSE_SPMV_ALG_DEFAULT; +#else // CUDART_VERSION >= 11021 +const auto kSpMVAlgorithm = CUSPARSE_MV_ALG_DEFAULT; +#endif // CUDART_VERSION >= 11021 +size_t GetTempBufferSizeForOp(const cusparseHandle_t& handle, + const cusparseOperation_t op, + const cusparseDnVecDescr_t& x, + const cusparseDnVecDescr_t& y, + const cusparseSpMatDescr_t& A) { + size_t buffer_size; + const double alpha = 1.0; + const double beta = 1.0; + CHECK_NE(A, nullptr); + CHECK_EQ(cusparseSpMV_bufferSize(handle, + op, + &alpha, + A, + x, + &beta, + y, + CUDA_R_64F, + kSpMVAlgorithm, + &buffer_size), + CUSPARSE_STATUS_SUCCESS); + return buffer_size; +} + +size_t GetTempBufferSize(const cusparseHandle_t& handle, + const cusparseDnVecDescr_t& left, + const cusparseDnVecDescr_t& right, + const cusparseSpMatDescr_t& A) { + CHECK_NE(A, nullptr); + return std::max(GetTempBufferSizeForOp( + handle, CUSPARSE_OPERATION_NON_TRANSPOSE, right, left, A), + GetTempBufferSizeForOp( + handle, CUSPARSE_OPERATION_TRANSPOSE, left, right, A)); +} +} // namespace + +CudaSparseMatrix::CudaSparseMatrix(int num_cols, + CudaBuffer&& rows, + CudaBuffer&& cols, + ContextImpl* context) + : num_rows_(rows.size() - 1), + num_cols_(num_cols), + num_nonzeros_(cols.size()), + context_(context), + rows_(std::move(rows)), + cols_(std::move(cols)), + values_(context, num_nonzeros_), + spmv_buffer_(context) { + Initialize(); +} + +CudaSparseMatrix::CudaSparseMatrix(ContextImpl* context, + const CompressedRowSparseMatrix& crs_matrix) + : num_rows_(crs_matrix.num_rows()), + num_cols_(crs_matrix.num_cols()), + num_nonzeros_(crs_matrix.num_nonzeros()), + context_(context), + rows_(context, num_rows_ + 1), + cols_(context, num_nonzeros_), + values_(context, num_nonzeros_), + spmv_buffer_(context) { + rows_.CopyFromCpu(crs_matrix.rows(), num_rows_ + 1); + cols_.CopyFromCpu(crs_matrix.cols(), num_nonzeros_); + values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_); + Initialize(); +} + +CudaSparseMatrix::~CudaSparseMatrix() { + CHECK_EQ(cusparseDestroySpMat(descr_), CUSPARSE_STATUS_SUCCESS); + descr_ = nullptr; + CHECK_EQ(CUSPARSE_STATUS_SUCCESS, cusparseDestroyDnVec(descr_vec_left_)); + CHECK_EQ(CUSPARSE_STATUS_SUCCESS, cusparseDestroyDnVec(descr_vec_right_)); +} + +void CudaSparseMatrix::CopyValuesFromCpu( + const CompressedRowSparseMatrix& crs_matrix) { + // There is no quick and easy way to verify that the structure is unchanged, + // but at least we can check that the size of the matrix and the number of + // nonzeros is unchanged. + CHECK_EQ(num_rows_, crs_matrix.num_rows()); + CHECK_EQ(num_cols_, crs_matrix.num_cols()); + CHECK_EQ(num_nonzeros_, crs_matrix.num_nonzeros()); + values_.CopyFromCpu(crs_matrix.values(), num_nonzeros_); +} + +void CudaSparseMatrix::Initialize() { + CHECK(context_->IsCudaInitialized()); + CHECK_EQ(CUSPARSE_STATUS_SUCCESS, + cusparseCreateCsr(&descr_, + num_rows_, + num_cols_, + num_nonzeros_, + rows_.data(), + cols_.data(), + values_.data(), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F)); + + // Note: values_.data() is used as non-zero pointer to device memory + // When there is no non-zero values, data-pointer of values_ array will be a + // nullptr; but in this case left/right products are trivial and temporary + // buffer (and vector descriptors) is not required + if (!num_nonzeros_) return; + + CHECK_EQ(CUSPARSE_STATUS_SUCCESS, + cusparseCreateDnVec( + &descr_vec_left_, num_rows_, values_.data(), CUDA_R_64F)); + CHECK_EQ(CUSPARSE_STATUS_SUCCESS, + cusparseCreateDnVec( + &descr_vec_right_, num_cols_, values_.data(), CUDA_R_64F)); + size_t buffer_size = GetTempBufferSize( + context_->cusparse_handle_, descr_vec_left_, descr_vec_right_, descr_); + spmv_buffer_.Reserve(buffer_size); +} + +void CudaSparseMatrix::SpMv(cusparseOperation_t op, + const cusparseDnVecDescr_t& x, + const cusparseDnVecDescr_t& y) const { + const double alpha = 1.0; + const double beta = 1.0; + + CHECK_EQ(cusparseSpMV(context_->cusparse_handle_, + op, + &alpha, + descr_, + x, + &beta, + y, + CUDA_R_64F, + kSpMVAlgorithm, + spmv_buffer_.data()), + CUSPARSE_STATUS_SUCCESS); +} + +void CudaSparseMatrix::RightMultiplyAndAccumulate(const CudaVector& x, + CudaVector* y) const { + DCHECK(GetTempBufferSize( + context_->cusparse_handle_, y->descr(), x.descr(), descr_) <= + spmv_buffer_.size()); + SpMv(CUSPARSE_OPERATION_NON_TRANSPOSE, x.descr(), y->descr()); +} + +void CudaSparseMatrix::LeftMultiplyAndAccumulate(const CudaVector& x, + CudaVector* y) const { + // TODO(Joydeep Biswas): We should consider storing a transposed copy of the + // matrix by converting CSR to CSC. From the cuSPARSE documentation: + // "In general, opA == CUSPARSE_OPERATION_NON_TRANSPOSE is 3x faster than opA + // != CUSPARSE_OPERATION_NON_TRANSPOSE" + DCHECK(GetTempBufferSize( + context_->cusparse_handle_, x.descr(), y->descr(), descr_) <= + spmv_buffer_.size()); + SpMv(CUSPARSE_OPERATION_TRANSPOSE, x.descr(), y->descr()); +} + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_sparse_matrix.h b/extern/ceres/internal/ceres/cuda_sparse_matrix.h new file mode 100644 index 00000000000..2940d1d6f35 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_sparse_matrix.h @@ -0,0 +1,143 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) +// +// A CUDA sparse matrix linear operator. + +#ifndef CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_ +#define CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_ + +// This include must come before any #ifndef check on Ceres compile options. +// clang-format off +#include "ceres/internal/config.h" +// clang-format on + +#include +#include +#include + +#include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/context_impl.h" +#include "ceres/internal/export.h" +#include "ceres/types.h" + +#ifndef CERES_NO_CUDA +#include "ceres/cuda_buffer.h" +#include "ceres/cuda_vector.h" +#include "cusparse.h" + +namespace ceres::internal { + +// A sparse matrix hosted on the GPU in compressed row sparse format, with +// CUDA-accelerated operations. +// The user of the class must ensure that ContextImpl::InitCuda() has already +// been successfully called before using this class. +class CERES_NO_EXPORT CudaSparseMatrix { + public: + // Create a GPU copy of the matrix provided. + CudaSparseMatrix(ContextImpl* context, + const CompressedRowSparseMatrix& crs_matrix); + + // Create matrix from existing row and column index buffers. + // Values are left uninitialized. + CudaSparseMatrix(int num_cols, + CudaBuffer&& rows, + CudaBuffer&& cols, + ContextImpl* context); + + ~CudaSparseMatrix(); + + // Left/right products are using internal buffer and are not thread-safe + // y = y + Ax; + void RightMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const; + // y = y + A'x; + void LeftMultiplyAndAccumulate(const CudaVector& x, CudaVector* y) const; + + int num_rows() const { return num_rows_; } + int num_cols() const { return num_cols_; } + int num_nonzeros() const { return num_nonzeros_; } + + const int32_t* rows() const { return rows_.data(); } + const int32_t* cols() const { return cols_.data(); } + const double* values() const { return values_.data(); } + + int32_t* mutable_rows() { return rows_.data(); } + int32_t* mutable_cols() { return cols_.data(); } + double* mutable_values() { return values_.data(); } + + // If subsequent uses of this matrix involve only numerical changes and no + // structural changes, then this method can be used to copy the updated + // non-zero values -- the row and column index arrays are kept the same. It + // is the caller's responsibility to ensure that the sparsity structure of the + // matrix is unchanged. + void CopyValuesFromCpu(const CompressedRowSparseMatrix& crs_matrix); + + const cusparseSpMatDescr_t& descr() const { return descr_; } + + private: + // Disable copy and assignment. + CudaSparseMatrix(const CudaSparseMatrix&) = delete; + CudaSparseMatrix& operator=(const CudaSparseMatrix&) = delete; + + // Allocate temporary buffer for left/right products, create cuSPARSE + // descriptors + void Initialize(); + + // y = y + op(M)x. op must be either CUSPARSE_OPERATION_NON_TRANSPOSE or + // CUSPARSE_OPERATION_TRANSPOSE. + void SpMv(cusparseOperation_t op, + const cusparseDnVecDescr_t& x, + const cusparseDnVecDescr_t& y) const; + + int num_rows_ = 0; + int num_cols_ = 0; + int num_nonzeros_ = 0; + + ContextImpl* context_ = nullptr; + // CSR row indices. + CudaBuffer rows_; + // CSR column indices. + CudaBuffer cols_; + // CSR values. + CudaBuffer values_; + + // CuSparse object that describes this matrix. + cusparseSpMatDescr_t descr_ = nullptr; + + // Dense vector descriptors for pointer interface + cusparseDnVecDescr_t descr_vec_left_ = nullptr; + cusparseDnVecDescr_t descr_vec_right_ = nullptr; + + mutable CudaBuffer spmv_buffer_; +}; + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA +#endif // CERES_INTERNAL_CUDA_SPARSE_MATRIX_H_ diff --git a/extern/ceres/internal/ceres/cuda_sparse_matrix_test.cc b/extern/ceres/internal/ceres/cuda_sparse_matrix_test.cc new file mode 100644 index 00000000000..774829bc458 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_sparse_matrix_test.cc @@ -0,0 +1,286 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include "ceres/cuda_sparse_matrix.h" + +#include + +#include "ceres/block_sparse_matrix.h" +#include "ceres/casts.h" +#include "ceres/cuda_vector.h" +#include "ceres/internal/config.h" +#include "ceres/internal/eigen.h" +#include "ceres/linear_least_squares_problems.h" +#include "ceres/triplet_sparse_matrix.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +namespace ceres { +namespace internal { + +#ifndef CERES_NO_CUDA + +class CudaSparseMatrixTest : public ::testing::Test { + protected: + void SetUp() final { + std::string message; + CHECK(context_.InitCuda(&message)) + << "InitCuda() failed because: " << message; + std::unique_ptr problem = + CreateLinearLeastSquaresProblemFromId(2); + CHECK(problem != nullptr); + A_.reset(down_cast(problem->A.release())); + CHECK(A_ != nullptr); + CHECK(problem->b != nullptr); + CHECK(problem->x != nullptr); + b_.resize(A_->num_rows()); + for (int i = 0; i < A_->num_rows(); ++i) { + b_[i] = problem->b[i]; + } + x_.resize(A_->num_cols()); + for (int i = 0; i < A_->num_cols(); ++i) { + x_[i] = problem->x[i]; + } + CHECK_EQ(A_->num_rows(), b_.rows()); + CHECK_EQ(A_->num_cols(), x_.rows()); + } + + std::unique_ptr A_; + Vector x_; + Vector b_; + ContextImpl context_; +}; + +TEST_F(CudaSparseMatrixTest, RightMultiplyAndAccumulate) { + std::string message; + auto A_crs = A_->ToCompressedRowSparseMatrix(); + CudaSparseMatrix A_gpu(&context_, *A_crs); + CudaVector x_gpu(&context_, A_gpu.num_cols()); + CudaVector res_gpu(&context_, A_gpu.num_rows()); + x_gpu.CopyFromCpu(x_); + + const Vector minus_b = -b_; + // res = -b + res_gpu.CopyFromCpu(minus_b); + // res += A * x + A_gpu.RightMultiplyAndAccumulate(x_gpu, &res_gpu); + + Vector res; + res_gpu.CopyTo(&res); + + Vector res_expected = minus_b; + A_->RightMultiplyAndAccumulate(x_.data(), res_expected.data()); + + EXPECT_LE((res - res_expected).norm(), + std::numeric_limits::epsilon() * 1e3); +} + +TEST(CudaSparseMatrix, CopyValuesFromCpu) { + // A1: + // [ 1 1 0 0 + // 0 1 1 0] + // A2: + // [ 1 2 0 0 + // 0 3 4 0] + // b: [1 2 3 4]' + // A1 * b = [3 5]' + // A2 * b = [5 18]' + TripletSparseMatrix A1(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 1, 1, 1}); + TripletSparseMatrix A2(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4}); + Vector b(4); + b << 1, 2, 3, 4; + + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + auto A1_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A1); + CudaSparseMatrix A_gpu(&context, *A1_crs); + CudaVector b_gpu(&context, A1.num_cols()); + CudaVector x_gpu(&context, A1.num_rows()); + b_gpu.CopyFromCpu(b); + x_gpu.SetZero(); + + Vector x_expected(2); + x_expected << 3, 5; + A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu); + Vector x_computed; + x_gpu.CopyTo(&x_computed); + EXPECT_EQ(x_computed, x_expected); + + auto A2_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A2); + A_gpu.CopyValuesFromCpu(*A2_crs); + x_gpu.SetZero(); + x_expected << 5, 18; + A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu); + x_gpu.CopyTo(&x_computed); + EXPECT_EQ(x_computed, x_expected); +} + +TEST(CudaSparseMatrix, RightMultiplyAndAccumulate) { + // A: + // [ 1 2 0 0 + // 0 3 4 0] + // b: [1 2 3 4]' + // A * b = [5 18]' + TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4}); + Vector b(4); + b << 1, 2, 3, 4; + Vector x_expected(2); + x_expected << 5, 18; + + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A); + CudaSparseMatrix A_gpu(&context, *A_crs); + CudaVector b_gpu(&context, A.num_cols()); + CudaVector x_gpu(&context, A.num_rows()); + b_gpu.CopyFromCpu(b); + x_gpu.SetZero(); + + A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu); + + Vector x_computed; + x_gpu.CopyTo(&x_computed); + + EXPECT_EQ(x_computed, x_expected); +} + +TEST(CudaSparseMatrix, LeftMultiplyAndAccumulate) { + // A: + // [ 1 2 0 0 + // 0 3 4 0] + // b: [1 2]' + // A'* b = [1 8 8 0]' + TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4}); + Vector b(2); + b << 1, 2; + Vector x_expected(4); + x_expected << 1, 8, 8, 0; + + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A); + CudaSparseMatrix A_gpu(&context, *A_crs); + CudaVector b_gpu(&context, A.num_rows()); + CudaVector x_gpu(&context, A.num_cols()); + b_gpu.CopyFromCpu(b); + x_gpu.SetZero(); + + A_gpu.LeftMultiplyAndAccumulate(b_gpu, &x_gpu); + + Vector x_computed; + x_gpu.CopyTo(&x_computed); + + EXPECT_EQ(x_computed, x_expected); +} + +// If there are numerical errors due to synchronization issues, they will show +// up when testing with large matrices, since each operation will take +// significant time, thus hopefully revealing any potential synchronization +// issues. +TEST(CudaSparseMatrix, LargeMultiplyAndAccumulate) { + // Create a large NxN matrix A that has the following structure: + // In row i, only columns i and i+1 are non-zero. + // A_{i, i} = A_{i, i+1} = 1. + // There will be 2 * N - 1 non-zero elements in A. + // X = [1:N] + // Right multiply test: + // b = A * X + // Left multiply test: + // b = A' * X + + const int N = 10 * 1000 * 1000; + const int num_non_zeros = 2 * N - 1; + std::vector row_indices(num_non_zeros); + std::vector col_indices(num_non_zeros); + std::vector values(num_non_zeros); + + for (int i = 0; i < N; ++i) { + row_indices[2 * i] = i; + col_indices[2 * i] = i; + values[2 * i] = 1.0; + if (i + 1 < N) { + col_indices[2 * i + 1] = i + 1; + row_indices[2 * i + 1] = i; + values[2 * i + 1] = 1; + } + } + TripletSparseMatrix A(N, N, row_indices, col_indices, values); + Vector x(N); + for (int i = 0; i < N; ++i) { + x[i] = i + 1; + } + + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A); + CudaSparseMatrix A_gpu(&context, *A_crs); + CudaVector b_gpu(&context, N); + CudaVector x_gpu(&context, N); + x_gpu.CopyFromCpu(x); + + // First check RightMultiply. + { + b_gpu.SetZero(); + A_gpu.RightMultiplyAndAccumulate(x_gpu, &b_gpu); + Vector b_computed; + b_gpu.CopyTo(&b_computed); + for (int i = 0; i < N; ++i) { + if (i + 1 < N) { + EXPECT_EQ(b_computed[i], 2 * (i + 1) + 1); + } else { + EXPECT_EQ(b_computed[i], i + 1); + } + } + } + + // Next check LeftMultiply. + { + b_gpu.SetZero(); + A_gpu.LeftMultiplyAndAccumulate(x_gpu, &b_gpu); + Vector b_computed; + b_gpu.CopyTo(&b_computed); + for (int i = 0; i < N; ++i) { + if (i > 0) { + EXPECT_EQ(b_computed[i], 2 * (i + 1) - 1); + } else { + EXPECT_EQ(b_computed[i], i + 1); + } + } + } +} + +#endif // CERES_NO_CUDA + +} // namespace internal +} // namespace ceres diff --git a/extern/ceres/internal/ceres/cuda_streamed_buffer.h b/extern/ceres/internal/ceres/cuda_streamed_buffer.h new file mode 100644 index 00000000000..8761ef43275 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_streamed_buffer.h @@ -0,0 +1,335 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#ifndef CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_ +#define CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_ + +#include "ceres/internal/config.h" + +#ifndef CERES_NO_CUDA +#include "ceres/cuda_buffer.h" + +namespace ceres::internal { + +// Most contemporary CUDA devices are capable of simultaneous code execution and +// host-to-device transfer. This class copies batches of data to GPU memory and +// executes processing of copied data in parallel (asynchronously). +// Data is copied to a fixed-size buffer on GPU (containing at most +// max_buffer_size values), and this memory is re-used when the previous +// batch of values is processed by user-provided callback +// Host-to-device copy uses a temporary buffer if required. Each batch of values +// has size of kValuesPerBatch, except the last one. +template +class CERES_NO_EXPORT CudaStreamedBuffer { + public: + // If hardware supports only one host-to-device copy or one host-to-device + // copy is able to reach peak bandwidth, two streams are sufficient to reach + // maximum efficiency: + // - If transferring batch of values takes more time, than processing it on + // gpu, then at every moment of time one of the streams will be transferring + // data and other stream will be either processing data or idle; the whole + // process will be bounded by host-to-device copy. + // - If transferring batch of values takes less time, than processing it on + // gpu, then at every moment of time one of the streams will be processing + // data and other stream will be either performing computations or + // transferring data, and the whole process will be bounded by computations. + static constexpr int kNumBatches = 2; + // max_buffer_size is the maximal size (in elements of type T) of array + // to be pre-allocated in gpu memory. The size of array determines size of + // batch of values for simultaneous copying and processing. It should be large + // enough to allow highly-parallel execution of user kernels; making it too + // large increases latency. + CudaStreamedBuffer(ContextImpl* context, const int max_buffer_size) + : kValuesPerBatch(max_buffer_size / kNumBatches), + context_(context), + values_gpu_(context, kValuesPerBatch * kNumBatches) { + static_assert(ContextImpl::kNumCudaStreams >= kNumBatches); + CHECK_GE(max_buffer_size, kNumBatches); + // Pre-allocate a buffer of page-locked memory for transfers from a regular + // cpu memory. Because we will be only writing into that buffer from cpu, + // memory is allocated with cudaHostAllocWriteCombined flag. + CHECK_EQ(cudaSuccess, + cudaHostAlloc(&values_cpu_pinned_, + sizeof(T) * kValuesPerBatch * kNumBatches, + cudaHostAllocWriteCombined)); + for (auto& e : copy_finished_) { + CHECK_EQ(cudaSuccess, + cudaEventCreateWithFlags(&e, cudaEventDisableTiming)); + } + } + + CudaStreamedBuffer(const CudaStreamedBuffer&) = delete; + + ~CudaStreamedBuffer() { + CHECK_EQ(cudaSuccess, cudaFreeHost(values_cpu_pinned_)); + for (auto& e : copy_finished_) { + CHECK_EQ(cudaSuccess, cudaEventDestroy(e)); + } + } + + // Transfer num_values at host-memory pointer from, calling + // callback(device_pointer, size_of_batch, offset_of_batch, stream_to_use) + // after scheduling transfer of each batch of data. User-provided callback + // should perform processing of data at device_pointer only in + // stream_to_use stream (device_pointer will be re-used in the next + // callback invocation with the same stream). + // + // Two diagrams below describe operation in two possible scenarios, depending + // on input data being stored in page-locked memory. In this example we will + // have max_buffer_size = 2 * K, num_values = N * K and callback + // scheduling a single asynchronous launch of + // Kernel<<..., stream_to_use>>(device_pointer, + // size_of_batch, + // offset_of_batch) + // + // a. Copying from page-locked memory + // In this case no copy on the host-side is necessary, and this method just + // schedules a bunch of interleaved memory copies and callback invocations: + // + // cudaStreamSynchronize(context->DefaultStream()); + // - Iteration #0: + // - cudaMemcpyAsync(values_gpu_, from, K * sizeof(T), H->D, stream_0) + // - callback(values_gpu_, K, 0, stream_0) + // - Iteration #1: + // - cudaMemcpyAsync(values_gpu_ + K, from + K, K * sizeof(T), H->D, + // stream_1) + // - callback(values_gpu_ + K, K, K, stream_1) + // - Iteration #2: + // - cudaMemcpyAsync(values_gpu_, from + 2 * K, K * sizeof(T), H->D, + // stream_0) + // - callback(values_gpu_, K, 2 * K, stream_0) + // - Iteration #3: + // - cudaMemcpyAsync(values_gpu_ + K, from + 3 * K, K * sizeof(T), H->D, + // stream_1) + // - callback(values_gpu_ + K, K, 3 * K, stream_1) + // ... + // - Iteration #i: + // - cudaMemcpyAsync(values_gpu_ + (i % 2) * K, from + i * K, K * + // sizeof(T), H->D, stream_(i % 2)) + // - callback(values_gpu_ + (i % 2) * K, K, i * K, stream_(i % 2) + // ... + // cudaStreamSynchronize(stream_0) + // cudaStreamSynchronize(stream_1) + // + // This sequence of calls results in following activity on gpu (assuming that + // kernel invoked by callback takes less time than host-to-device copy): + // +-------------------+-------------------+ + // | Stream #0 | Stream #1 | + // +-------------------+-------------------+ + // | Copy host->device | | + // | | | + // | | | + // +-------------------+-------------------+ + // | Kernel | Copy host->device | + // +-------------------+ | + // | | | + // +-------------------+-------------------+ + // | Copy host->device | Kernel | + // | +-------------------+ + // | | | + // +-------------------+-------------------+ + // | Kernel | Copy host->device | + // | ... | + // +---------------------------------------+ + // + // b. Copying from regular memory + // In this case a copy from regular memory to page-locked memory is required + // in order to get asynchrnonous operation. Because pinned memory on host-side + // is reused, additional synchronization is required. On each iteration method + // the following actions are performed: + // - Wait till previous copy operation in stream is completed + // - Copy batch of values from input array into pinned memory + // - Asynchronously launch host-to-device copy + // - Setup event for synchronization on copy completion + // - Invoke callback (that launches kernel asynchronously) + // + // Invocations are performed with the following arguments + // cudaStreamSynchronize(context->DefaultStream()); + // - Iteration #0: + // - cudaEventSynchronize(copy_finished_0) + // - std::copy_n(from, K, values_cpu_pinned_) + // - cudaMemcpyAsync(values_gpu_, values_cpu_pinned_, K * sizeof(T), H->D, + // stream_0) + // - cudaEventRecord(copy_finished_0, stream_0) + // - callback(values_gpu_, K, 0, stream_0) + // - Iteration #1: + // - cudaEventSynchronize(copy_finished_1) + // - std::copy_n(from + K, K, values_cpu_pinned_ + K) + // - cudaMemcpyAsync(values_gpu_ + K, values_cpu_pinned_ + K, K * + // sizeof(T), H->D, stream_1) + // - cudaEventRecord(copy_finished_1, stream_1) + // - callback(values_gpu_ + K, K, K, stream_1) + // - Iteration #2: + // - cudaEventSynchronize(copy_finished_0) + // - std::copy_n(from + 2 * K, K, values_cpu_pinned_) + // - cudaMemcpyAsync(values_gpu_, values_cpu_pinned_, K * sizeof(T), H->D, + // stream_0) + // - cudaEventRecord(copy_finished_0, stream_0) + // - callback(values_gpu_, K, 2 * K, stream_0) + // - Iteration #3: + // - cudaEventSynchronize(copy_finished_1) + // - std::copy_n(from + 3 * K, K, values_cpu_pinned_ + K) + // - cudaMemcpyAsync(values_gpu_ + K, values_cpu_pinned_ + K, K * + // sizeof(T), H->D, stream_1) + // - cudaEventRecord(copy_finished_1, stream_1) + // - callback(values_gpu_ + K, K, 3 * K, stream_1) + // ... + // - Iteration #i: + // - cudaEventSynchronize(copy_finished_(i % 2)) + // - std::copy_n(from + i * K, K, values_cpu_pinned_ + (i % 2) * K) + // - cudaMemcpyAsync(values_gpu_ + (i % 2) * K, values_cpu_pinned_ + (i % + // 2) * K, K * sizeof(T), H->D, stream_(i % 2)) + // - cudaEventRecord(copy_finished_(i % 2), stream_(i % 2)) + // - callback(values_gpu_ + (i % 2) * K, K, i * K, stream_(i % 2)) + // ... + // cudaStreamSynchronize(stream_0) + // cudaStreamSynchronize(stream_1) + // + // This sequence of calls results in following activity on cpu and gpu + // (assuming that kernel invoked by callback takes less time than + // host-to-device copy and copy in cpu memory, and copy in cpu memory is + // faster than host-to-device copy): + // +----------------------------+-------------------+-------------------+ + // | Stream #0 | Stream #0 | Stream #1 | + // +----------------------------+-------------------+-------------------+ + // | Copy to pinned memory | | | + // | | | | + // +----------------------------+-------------------| | + // | Copy to pinned memory | Copy host->device | | + // | | | | + // +----------------------------+ | | + // | Waiting previous h->d copy | | | + // +----------------------------+-------------------+-------------------+ + // | Copy to pinned memory | Kernel | Copy host->device | + // | +-------------------+ | + // +----------------------------+ | | + // | Waiting previous h->d copy | | | + // +----------------------------+-------------------+-------------------+ + // | Copy to pinned memory | Copy host->device | Kernel | + // | | +-------------------+ + // | ... ... | + // +----------------------------+---------------------------------------+ + // + template + void CopyToGpu(const T* from, const int num_values, Fun&& callback) { + // This synchronization is not required in some cases, but we perform it in + // order to avoid situation when user callback depends on data that is + // still to be computed in default stream + CHECK_EQ(cudaSuccess, cudaStreamSynchronize(context_->DefaultStream())); + + // If pointer to input data does not correspond to page-locked memory, + // host-to-device memory copy might be executed synchrnonously (with a copy + // to pinned memory happening inside the driver). In that case we perform + // copy to a pre-allocated array of page-locked memory. + const bool copy_to_pinned_memory = MemoryTypeResultsInSynchronousCopy(from); + T* batch_values_gpu[kNumBatches]; + T* batch_values_cpu[kNumBatches]; + auto streams = context_->streams_; + for (int i = 0; i < kNumBatches; ++i) { + batch_values_gpu[i] = values_gpu_.data() + kValuesPerBatch * i; + batch_values_cpu[i] = values_cpu_pinned_ + kValuesPerBatch * i; + } + int batch_id = 0; + for (int offset = 0; offset < num_values; offset += kValuesPerBatch) { + const int num_values_batch = + std::min(num_values - offset, kValuesPerBatch); + const T* batch_from = from + offset; + T* batch_to = batch_values_gpu[batch_id]; + auto stream = streams[batch_id]; + auto copy_finished = copy_finished_[batch_id]; + + if (copy_to_pinned_memory) { + // Copying values to a temporary buffer should be started only after the + // previous copy from temporary buffer to device is completed. + CHECK_EQ(cudaSuccess, cudaEventSynchronize(copy_finished)); + std::copy_n(batch_from, num_values_batch, batch_values_cpu[batch_id]); + batch_from = batch_values_cpu[batch_id]; + } + CHECK_EQ(cudaSuccess, + cudaMemcpyAsync(batch_to, + batch_from, + sizeof(T) * num_values_batch, + cudaMemcpyHostToDevice, + stream)); + if (copy_to_pinned_memory) { + // Next copy to a temporary buffer can start straight after asynchronous + // copy is completed (and might be started before kernels asynchronously + // executed in stream by user-supplied callback are completed). + // No explicit synchronization is required when copying data from + // page-locked memory, because memory copy and user kernel execution + // with corresponding part of values_gpu_ array is serialized using + // stream + CHECK_EQ(cudaSuccess, cudaEventRecord(copy_finished, stream)); + } + callback(batch_to, num_values_batch, offset, stream); + batch_id = (batch_id + 1) % kNumBatches; + } + // Explicitly synchronize on all CUDA streams that were utilized. + for (int i = 0; i < kNumBatches; ++i) { + CHECK_EQ(cudaSuccess, cudaStreamSynchronize(streams[i])); + } + } + + private: + // It is necessary to have all host-to-device copies to be completely + // asynchronous. This requires source memory to be allocated in page-locked + // memory. + static bool MemoryTypeResultsInSynchronousCopy(const void* ptr) { + cudaPointerAttributes attributes; + auto status = cudaPointerGetAttributes(&attributes, ptr); +#if CUDART_VERSION < 11000 + // In CUDA versions prior 11 call to cudaPointerGetAttributes with host + // pointer will return cudaErrorInvalidValue + if (status == cudaErrorInvalidValue) { + return true; + } +#endif + CHECK_EQ(status, cudaSuccess); + // This class only supports cpu memory as a source + CHECK_NE(attributes.type, cudaMemoryTypeDevice); + // If host memory was allocated (or registered) with CUDA API, or is a + // managed memory, then call to cudaMemcpyAsync will be asynchrnous. In case + // of managed memory it might be slightly better to perform a single call of + // user-provided call-back (and hope that page migration will provide a + // similar throughput with zero efforts from our side). + return attributes.type == cudaMemoryTypeUnregistered; + } + + const int kValuesPerBatch; + ContextImpl* context_ = nullptr; + CudaBuffer values_gpu_; + T* values_cpu_pinned_ = nullptr; + cudaEvent_t copy_finished_[kNumBatches] = {nullptr}; +}; + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA +#endif // CERES_INTERNAL_CUDA_STREAMED_BUFFER_H_ diff --git a/extern/ceres/internal/ceres/cuda_streamed_buffer_test.cc b/extern/ceres/internal/ceres/cuda_streamed_buffer_test.cc new file mode 100644 index 00000000000..4837005f3cf --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_streamed_buffer_test.cc @@ -0,0 +1,169 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#include "ceres/internal/config.h" + +#ifndef CERES_NO_CUDA + +#include +#include + +#include + +#include "ceres/cuda_streamed_buffer.h" + +namespace ceres::internal { + +TEST(CudaStreamedBufferTest, IntegerCopy) { + // Offsets and sizes of batches supplied to callback + std::vector> batches; + const int kMaxTemporaryArraySize = 16; + const int kInputSize = kMaxTemporaryArraySize * 7 + 3; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + + std::vector inputs(kInputSize); + std::vector outputs(kInputSize, -1); + std::iota(inputs.begin(), inputs.end(), 0); + + CudaStreamedBuffer streamed_buffer(&context, kMaxTemporaryArraySize); + streamed_buffer.CopyToGpu(inputs.data(), + kInputSize, + [&outputs, &batches](const int* device_pointer, + int size, + int offset, + cudaStream_t stream) { + batches.emplace_back(offset, size); + CHECK_EQ(cudaSuccess, + cudaMemcpyAsync(outputs.data() + offset, + device_pointer, + sizeof(int) * size, + cudaMemcpyDeviceToHost, + stream)); + }); + // All operations in all streams should be completed when CopyToGpu returns + // control to the callee + for (int i = 0; i < ContextImpl::kNumCudaStreams; ++i) { + CHECK_EQ(cudaSuccess, cudaStreamQuery(context.streams_[i])); + } + + // Check if every element was visited + for (int i = 0; i < kInputSize; ++i) { + CHECK_EQ(outputs[i], i); + } + + // Check if there is no overlap between batches + std::sort(batches.begin(), batches.end()); + const int num_batches = batches.size(); + for (int i = 0; i < num_batches; ++i) { + const auto [begin, size] = batches[i]; + const int end = begin + size; + CHECK_GE(begin, 0); + CHECK_LT(begin, kInputSize); + + CHECK_GT(size, 0); + CHECK_LE(end, kInputSize); + + if (i + 1 == num_batches) continue; + CHECK_EQ(end, batches[i + 1].first); + } +} + +TEST(CudaStreamedBufferTest, IntegerNoCopy) { + // Offsets and sizes of batches supplied to callback + std::vector> batches; + const int kMaxTemporaryArraySize = 16; + const int kInputSize = kMaxTemporaryArraySize * 7 + 3; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + + int* inputs; + int* outputs; + CHECK_EQ(cudaSuccess, + cudaHostAlloc( + &inputs, sizeof(int) * kInputSize, cudaHostAllocWriteCombined)); + CHECK_EQ( + cudaSuccess, + cudaHostAlloc(&outputs, sizeof(int) * kInputSize, cudaHostAllocDefault)); + + std::fill(outputs, outputs + kInputSize, -1); + std::iota(inputs, inputs + kInputSize, 0); + + CudaStreamedBuffer streamed_buffer(&context, kMaxTemporaryArraySize); + streamed_buffer.CopyToGpu(inputs, + kInputSize, + [outputs, &batches](const int* device_pointer, + int size, + int offset, + cudaStream_t stream) { + batches.emplace_back(offset, size); + CHECK_EQ(cudaSuccess, + cudaMemcpyAsync(outputs + offset, + device_pointer, + sizeof(int) * size, + cudaMemcpyDeviceToHost, + stream)); + }); + // All operations in all streams should be completed when CopyToGpu returns + // control to the callee + for (int i = 0; i < ContextImpl::kNumCudaStreams; ++i) { + CHECK_EQ(cudaSuccess, cudaStreamQuery(context.streams_[i])); + } + + // Check if every element was visited + for (int i = 0; i < kInputSize; ++i) { + CHECK_EQ(outputs[i], i); + } + + // Check if there is no overlap between batches + std::sort(batches.begin(), batches.end()); + const int num_batches = batches.size(); + for (int i = 0; i < num_batches; ++i) { + const auto [begin, size] = batches[i]; + const int end = begin + size; + CHECK_GE(begin, 0); + CHECK_LT(begin, kInputSize); + + CHECK_GT(size, 0); + CHECK_LE(end, kInputSize); + + if (i + 1 == num_batches) continue; + CHECK_EQ(end, batches[i + 1].first); + } + + CHECK_EQ(cudaSuccess, cudaFreeHost(inputs)); + CHECK_EQ(cudaSuccess, cudaFreeHost(outputs)); +} + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_vector.cc b/extern/ceres/internal/ceres/cuda_vector.cc new file mode 100644 index 00000000000..08217b28286 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_vector.cc @@ -0,0 +1,185 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) +// +// A simple CUDA vector class. + +// This include must come before any #ifndef check on Ceres compile options. +// clang-format off +#include "ceres/internal/config.h" +// clang-format on + +#include + +#include "ceres/context_impl.h" +#include "ceres/internal/export.h" +#include "ceres/types.h" + +#ifndef CERES_NO_CUDA + +#include "ceres/cuda_buffer.h" +#include "ceres/cuda_kernels_vector_ops.h" +#include "ceres/cuda_vector.h" +#include "cublas_v2.h" + +namespace ceres::internal { + +CudaVector::CudaVector(ContextImpl* context, int size) + : context_(context), data_(context, size) { + DCHECK_NE(context, nullptr); + DCHECK(context->IsCudaInitialized()); + Resize(size); +} + +CudaVector::CudaVector(CudaVector&& other) + : num_rows_(other.num_rows_), + context_(other.context_), + data_(std::move(other.data_)), + descr_(other.descr_) { + other.num_rows_ = 0; + other.descr_ = nullptr; +} + +CudaVector& CudaVector::operator=(const CudaVector& other) { + if (this != &other) { + Resize(other.num_rows()); + data_.CopyFromGPUArray(other.data_.data(), num_rows_); + } + return *this; +} + +void CudaVector::DestroyDescriptor() { + if (descr_ != nullptr) { + CHECK_EQ(cusparseDestroyDnVec(descr_), CUSPARSE_STATUS_SUCCESS); + descr_ = nullptr; + } +} + +CudaVector::~CudaVector() { DestroyDescriptor(); } + +void CudaVector::Resize(int size) { + data_.Reserve(size); + num_rows_ = size; + DestroyDescriptor(); + CHECK_EQ(cusparseCreateDnVec(&descr_, num_rows_, data_.data(), CUDA_R_64F), + CUSPARSE_STATUS_SUCCESS); +} + +double CudaVector::Dot(const CudaVector& x) const { + double result = 0; + CHECK_EQ(cublasDdot(context_->cublas_handle_, + num_rows_, + data_.data(), + 1, + x.data(), + 1, + &result), + CUBLAS_STATUS_SUCCESS) + << "CuBLAS cublasDdot failed."; + return result; +} + +double CudaVector::Norm() const { + double result = 0; + CHECK_EQ(cublasDnrm2( + context_->cublas_handle_, num_rows_, data_.data(), 1, &result), + CUBLAS_STATUS_SUCCESS) + << "CuBLAS cublasDnrm2 failed."; + return result; +} + +void CudaVector::CopyFromCpu(const double* x) { + data_.CopyFromCpu(x, num_rows_); +} + +void CudaVector::CopyFromCpu(const Vector& x) { + if (x.rows() != num_rows_) { + Resize(x.rows()); + } + CopyFromCpu(x.data()); +} + +void CudaVector::CopyTo(Vector* x) const { + CHECK(x != nullptr); + x->resize(num_rows_); + data_.CopyToCpu(x->data(), num_rows_); +} + +void CudaVector::CopyTo(double* x) const { + CHECK(x != nullptr); + data_.CopyToCpu(x, num_rows_); +} + +void CudaVector::SetZero() { + // Allow empty vector to be zeroed + if (num_rows_ == 0) return; + CHECK(data_.data() != nullptr); + CudaSetZeroFP64(data_.data(), num_rows_, context_->DefaultStream()); +} + +void CudaVector::Axpby(double a, const CudaVector& x, double b) { + if (&x == this) { + Scale(a + b); + return; + } + CHECK_EQ(num_rows_, x.num_rows_); + if (b != 1.0) { + // First scale y by b. + CHECK_EQ( + cublasDscal(context_->cublas_handle_, num_rows_, &b, data_.data(), 1), + CUBLAS_STATUS_SUCCESS) + << "CuBLAS cublasDscal failed."; + } + // Then add a * x to y. + CHECK_EQ(cublasDaxpy(context_->cublas_handle_, + num_rows_, + &a, + x.data(), + 1, + data_.data(), + 1), + CUBLAS_STATUS_SUCCESS) + << "CuBLAS cublasDaxpy failed."; +} + +void CudaVector::DtDxpy(const CudaVector& D, const CudaVector& x) { + CudaDtDxpy( + data_.data(), D.data(), x.data(), num_rows_, context_->DefaultStream()); +} + +void CudaVector::Scale(double s) { + CHECK_EQ( + cublasDscal(context_->cublas_handle_, num_rows_, &s, data_.data(), 1), + CUBLAS_STATUS_SUCCESS) + << "CuBLAS cublasDscal failed."; +} + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA diff --git a/extern/ceres/internal/ceres/cuda_vector.h b/extern/ceres/internal/ceres/cuda_vector.h new file mode 100644 index 00000000000..8db5649fe8b --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_vector.h @@ -0,0 +1,193 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) +// +// A simple CUDA vector class. + +#ifndef CERES_INTERNAL_CUDA_VECTOR_H_ +#define CERES_INTERNAL_CUDA_VECTOR_H_ + +// This include must come before any #ifndef check on Ceres compile options. +// clang-format off +#include "ceres/internal/config.h" +// clang-format on + +#include + +#include +#include + +#include "ceres/context_impl.h" +#include "ceres/internal/export.h" +#include "ceres/types.h" + +#ifndef CERES_NO_CUDA + +#include "ceres/cuda_buffer.h" +#include "ceres/cuda_kernels_vector_ops.h" +#include "ceres/internal/eigen.h" +#include "cublas_v2.h" +#include "cusparse.h" + +namespace ceres::internal { + +// An Nx1 vector, denoted y hosted on the GPU, with CUDA-accelerated operations. +class CERES_NO_EXPORT CudaVector { + public: + // Create a pre-allocated vector of size N and return a pointer to it. The + // caller must ensure that InitCuda() has already been successfully called on + // context before calling this method. + CudaVector(ContextImpl* context, int size); + + CudaVector(CudaVector&& other); + + ~CudaVector(); + + void Resize(int size); + + // Perform a deep copy of the vector. + CudaVector& operator=(const CudaVector&); + + // Return the inner product x' * y. + double Dot(const CudaVector& x) const; + + // Return the L2 norm of the vector (||y||_2). + double Norm() const; + + // Set all elements to zero. + void SetZero(); + + // Copy from Eigen vector. + void CopyFromCpu(const Vector& x); + + // Copy from CPU memory array. + void CopyFromCpu(const double* x); + + // Copy to Eigen vector. + void CopyTo(Vector* x) const; + + // Copy to CPU memory array. It is the caller's responsibility to ensure + // that the array is large enough. + void CopyTo(double* x) const; + + // y = a * x + b * y. + void Axpby(double a, const CudaVector& x, double b); + + // y = diag(d)' * diag(d) * x + y. + void DtDxpy(const CudaVector& D, const CudaVector& x); + + // y = s * y. + void Scale(double s); + + int num_rows() const { return num_rows_; } + int num_cols() const { return 1; } + + const double* data() const { return data_.data(); } + double* mutable_data() { return data_.data(); } + + const cusparseDnVecDescr_t& descr() const { return descr_; } + + private: + CudaVector(const CudaVector&) = delete; + void DestroyDescriptor(); + + int num_rows_ = 0; + ContextImpl* context_ = nullptr; + CudaBuffer data_; + // CuSparse object that describes this dense vector. + cusparseDnVecDescr_t descr_ = nullptr; +}; + +// Blas1 operations on Cuda vectors. These functions are needed as an +// abstraction layer so that we can use different versions of a vector style +// object in the conjugate gradients linear solver. +// Context and num_threads arguments are not used by CUDA implementation, +// context embedded into CudaVector is used instead. +inline double Norm(const CudaVector& x, + ContextImpl* context = nullptr, + int num_threads = 1) { + (void)context; + (void)num_threads; + return x.Norm(); +} +inline void SetZero(CudaVector& x, + ContextImpl* context = nullptr, + int num_threads = 1) { + (void)context; + (void)num_threads; + x.SetZero(); +} +inline void Axpby(double a, + const CudaVector& x, + double b, + const CudaVector& y, + CudaVector& z, + ContextImpl* context = nullptr, + int num_threads = 1) { + (void)context; + (void)num_threads; + if (&x == &y && &y == &z) { + // z = (a + b) * z; + z.Scale(a + b); + } else if (&x == &z) { + // x is aliased to z. + // z = x + // = b * y + a * x; + z.Axpby(b, y, a); + } else if (&y == &z) { + // y is aliased to z. + // z = y = a * x + b * y; + z.Axpby(a, x, b); + } else { + // General case: all inputs and outputs are distinct. + z = y; + z.Axpby(a, x, b); + } +} +inline double Dot(const CudaVector& x, + const CudaVector& y, + ContextImpl* context = nullptr, + int num_threads = 1) { + (void)context; + (void)num_threads; + return x.Dot(y); +} +inline void Copy(const CudaVector& from, + CudaVector& to, + ContextImpl* context = nullptr, + int num_threads = 1) { + (void)context; + (void)num_threads; + to = from; +} + +} // namespace ceres::internal + +#endif // CERES_NO_CUDA +#endif // CERES_INTERNAL_CUDA_SPARSE_LINEAR_OPERATOR_H_ diff --git a/extern/ceres/internal/ceres/cuda_vector_test.cc b/extern/ceres/internal/ceres/cuda_vector_test.cc new file mode 100644 index 00000000000..8dcb4b7b976 --- /dev/null +++ b/extern/ceres/internal/ceres/cuda_vector_test.cc @@ -0,0 +1,423 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include "ceres/cuda_vector.h" + +#include + +#include "ceres/internal/config.h" +#include "ceres/internal/eigen.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +namespace ceres { +namespace internal { + +#ifndef CERES_NO_CUDA + +TEST(CudaVector, Creation) { + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x(&context, 1000); + EXPECT_EQ(x.num_rows(), 1000); + EXPECT_NE(x.data(), nullptr); +} + +TEST(CudaVector, CopyVector) { + Vector x(3); + x << 1, 2, 3; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector y(&context, 10); + y.CopyFromCpu(x); + EXPECT_EQ(y.num_rows(), 3); + + Vector z(3); + z << 0, 0, 0; + y.CopyTo(&z); + EXPECT_EQ(x, z); +} + +TEST(CudaVector, Move) { + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector y(&context, 10); + const auto y_data = y.data(); + const auto y_descr = y.descr(); + EXPECT_EQ(y.num_rows(), 10); + CudaVector z(std::move(y)); + EXPECT_EQ(y.data(), nullptr); + EXPECT_EQ(y.descr(), nullptr); + EXPECT_EQ(y.num_rows(), 0); + + EXPECT_EQ(z.data(), y_data); + EXPECT_EQ(z.descr(), y_descr); +} + +TEST(CudaVector, DeepCopy) { + Vector x(3); + x << 1, 2, 3; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 3); + x_gpu.CopyFromCpu(x); + + CudaVector y_gpu(&context, 3); + y_gpu.SetZero(); + EXPECT_EQ(y_gpu.Norm(), 0.0); + + y_gpu = x_gpu; + Vector y(3); + y << 0, 0, 0; + y_gpu.CopyTo(&y); + EXPECT_EQ(x, y); +} + +TEST(CudaVector, Dot) { + Vector x(3); + Vector y(3); + x << 1, 2, 3; + y << 100, 10, 1; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 10); + CudaVector y_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + EXPECT_EQ(x_gpu.Dot(y_gpu), 123.0); + EXPECT_EQ(Dot(x_gpu, y_gpu), 123.0); +} + +TEST(CudaVector, Norm) { + Vector x(3); + x << 1, 2, 3; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + + EXPECT_NEAR(x_gpu.Norm(), + sqrt(1.0 + 4.0 + 9.0), + std::numeric_limits::epsilon()); + + EXPECT_NEAR(Norm(x_gpu), + sqrt(1.0 + 4.0 + 9.0), + std::numeric_limits::epsilon()); +} + +TEST(CudaVector, SetZero) { + Vector x(4); + x << 1, 1, 1, 1; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + + EXPECT_NEAR(x_gpu.Norm(), 2.0, std::numeric_limits::epsilon()); + + x_gpu.SetZero(); + EXPECT_NEAR(x_gpu.Norm(), 0.0, std::numeric_limits::epsilon()); + + x_gpu.CopyFromCpu(x); + EXPECT_NEAR(x_gpu.Norm(), 2.0, std::numeric_limits::epsilon()); + SetZero(x_gpu); + EXPECT_NEAR(x_gpu.Norm(), 0.0, std::numeric_limits::epsilon()); +} + +TEST(CudaVector, Resize) { + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 10); + EXPECT_EQ(x_gpu.num_rows(), 10); + x_gpu.Resize(4); + EXPECT_EQ(x_gpu.num_rows(), 4); +} + +TEST(CudaVector, Axpy) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + x_gpu.Axpby(2.0, y_gpu, 1.0); + Vector result; + Vector expected(4); + expected << 201, 21, 3, 1; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyBEquals1) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + x_gpu.Axpby(2.0, y_gpu, 1.0); + Vector result; + Vector expected(4); + expected << 201, 21, 3, 1; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyMemberFunctionBNotEqual1) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + x_gpu.Axpby(2.0, y_gpu, 3.0); + Vector result; + Vector expected(4); + expected << 203, 23, 5, 3; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyMemberFunctionBEqual1) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + x_gpu.Axpby(2.0, y_gpu, 1.0); + Vector result; + Vector expected(4); + expected << 201, 21, 3, 1; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyMemberXAliasesY) { + Vector x(4); + x << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.SetZero(); + + x_gpu.Axpby(2.0, x_gpu, 1.0); + Vector result; + Vector expected(4); + expected << 300, 30, 3, 0; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodNoAliases) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + CudaVector z_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + z_gpu.Resize(4); + z_gpu.SetZero(); + + Axpby(2.0, x_gpu, 3.0, y_gpu, z_gpu); + Vector result; + Vector expected(4); + expected << 302, 32, 5, 2; + z_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodXAliasesY) { + Vector x(4); + x << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector z_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + z_gpu.SetZero(); + + Axpby(2.0, x_gpu, 3.0, x_gpu, z_gpu); + Vector result; + Vector expected(4); + expected << 500, 50, 5, 0; + z_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodXAliasesZ) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 10); + CudaVector y_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + Axpby(2.0, x_gpu, 3.0, y_gpu, x_gpu); + Vector result; + Vector expected(4); + expected << 302, 32, 5, 2; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodYAliasesZ) { + Vector x(4); + Vector y(4); + x << 1, 1, 1, 1; + y << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + + Axpby(2.0, x_gpu, 3.0, y_gpu, y_gpu); + Vector result; + Vector expected(4); + expected << 302, 32, 5, 2; + y_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, AxpbyNonMemberMethodXAliasesYAliasesZ) { + Vector x(4); + x << 100, 10, 1, 0; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 10); + x_gpu.CopyFromCpu(x); + + Axpby(2.0, x_gpu, 3.0, x_gpu, x_gpu); + Vector result; + Vector expected(4); + expected << 500, 50, 5, 0; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, DtDxpy) { + Vector x(4); + Vector y(4); + Vector D(4); + x << 1, 2, 3, 4; + y << 100, 10, 1, 0; + D << 4, 3, 2, 1; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + CudaVector y_gpu(&context, 4); + CudaVector D_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + y_gpu.CopyFromCpu(y); + D_gpu.CopyFromCpu(D); + + y_gpu.DtDxpy(D_gpu, x_gpu); + Vector result; + Vector expected(4); + expected << 116, 28, 13, 4; + y_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +TEST(CudaVector, Scale) { + Vector x(4); + x << 1, 2, 3, 4; + ContextImpl context; + std::string message; + CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message; + CudaVector x_gpu(&context, 4); + x_gpu.CopyFromCpu(x); + + x_gpu.Scale(-3.0); + + Vector result; + Vector expected(4); + expected << -3.0, -6.0, -9.0, -12.0; + x_gpu.CopyTo(&result); + EXPECT_EQ(result, expected); +} + +#endif // CERES_NO_CUDA + +} // namespace internal +} // namespace ceres diff --git a/extern/ceres/internal/ceres/cxsparse.cc b/extern/ceres/internal/ceres/cxsparse.cc deleted file mode 100644 index b1eb2055e35..00000000000 --- a/extern/ceres/internal/ceres/cxsparse.cc +++ /dev/null @@ -1,284 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: strandmark@google.com (Petter Strandmark) - -// This include must come before any #ifndef check on Ceres compile options. -#include "ceres/internal/config.h" - -#ifndef CERES_NO_CXSPARSE - -#include -#include -#include - -#include "ceres/compressed_col_sparse_matrix_utils.h" -#include "ceres/compressed_row_sparse_matrix.h" -#include "ceres/cxsparse.h" -#include "ceres/triplet_sparse_matrix.h" -#include "glog/logging.h" - -namespace ceres { -namespace internal { - -using std::vector; - -CXSparse::CXSparse() : scratch_(nullptr), scratch_size_(0) {} - -CXSparse::~CXSparse() { - if (scratch_size_ > 0) { - cs_di_free(scratch_); - } -} - -csn* CXSparse::Cholesky(cs_di* A, cs_dis* symbolic_factor) { - return cs_di_chol(A, symbolic_factor); -} - -void CXSparse::Solve(cs_dis* symbolic_factor, csn* numeric_factor, double* b) { - // Make sure we have enough scratch space available. - const int num_cols = numeric_factor->L->n; - if (scratch_size_ < num_cols) { - if (scratch_size_ > 0) { - cs_di_free(scratch_); - } - scratch_ = - reinterpret_cast(cs_di_malloc(num_cols, sizeof(CS_ENTRY))); - scratch_size_ = num_cols; - } - - // When the Cholesky factor succeeded, these methods are - // guaranteed to succeeded as well. In the comments below, "x" - // refers to the scratch space. - // - // Set x = P * b. - CHECK(cs_di_ipvec(symbolic_factor->pinv, b, scratch_, num_cols)); - // Set x = L \ x. - CHECK(cs_di_lsolve(numeric_factor->L, scratch_)); - // Set x = L' \ x. - CHECK(cs_di_ltsolve(numeric_factor->L, scratch_)); - // Set b = P' * x. - CHECK(cs_di_pvec(symbolic_factor->pinv, scratch_, b, num_cols)); -} - -bool CXSparse::SolveCholesky(cs_di* lhs, double* rhs_and_solution) { - return cs_cholsol(1, lhs, rhs_and_solution); -} - -cs_dis* CXSparse::AnalyzeCholesky(cs_di* A) { - // order = 1 for Cholesky factor. - return cs_schol(1, A); -} - -cs_dis* CXSparse::AnalyzeCholeskyWithNaturalOrdering(cs_di* A) { - // order = 0 for Natural ordering. - return cs_schol(0, A); -} - -cs_dis* CXSparse::BlockAnalyzeCholesky(cs_di* A, - const vector& row_blocks, - const vector& col_blocks) { - const int num_row_blocks = row_blocks.size(); - const int num_col_blocks = col_blocks.size(); - - vector block_rows; - vector block_cols; - CompressedColumnScalarMatrixToBlockMatrix( - A->i, A->p, row_blocks, col_blocks, &block_rows, &block_cols); - cs_di block_matrix; - block_matrix.m = num_row_blocks; - block_matrix.n = num_col_blocks; - block_matrix.nz = -1; - block_matrix.nzmax = block_rows.size(); - block_matrix.p = &block_cols[0]; - block_matrix.i = &block_rows[0]; - block_matrix.x = nullptr; - - int* ordering = cs_amd(1, &block_matrix); - vector block_ordering(num_row_blocks, -1); - std::copy(ordering, ordering + num_row_blocks, &block_ordering[0]); - cs_free(ordering); - - vector scalar_ordering; - BlockOrderingToScalarOrdering(row_blocks, block_ordering, &scalar_ordering); - - auto* symbolic_factor = - reinterpret_cast(cs_calloc(1, sizeof(cs_dis))); - symbolic_factor->pinv = cs_pinv(&scalar_ordering[0], A->n); - cs* permuted_A = cs_symperm(A, symbolic_factor->pinv, 0); - - symbolic_factor->parent = cs_etree(permuted_A, 0); - int* postordering = cs_post(symbolic_factor->parent, A->n); - int* column_counts = - cs_counts(permuted_A, symbolic_factor->parent, postordering, 0); - cs_free(postordering); - cs_spfree(permuted_A); - - symbolic_factor->cp = static_cast(cs_malloc(A->n + 1, sizeof(int))); - symbolic_factor->lnz = cs_cumsum(symbolic_factor->cp, column_counts, A->n); - symbolic_factor->unz = symbolic_factor->lnz; - - cs_free(column_counts); - - if (symbolic_factor->lnz < 0) { - cs_sfree(symbolic_factor); - symbolic_factor = nullptr; - } - - return symbolic_factor; -} - -cs_di CXSparse::CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A) { - cs_di At; - At.m = A->num_cols(); - At.n = A->num_rows(); - At.nz = -1; - At.nzmax = A->num_nonzeros(); - At.p = A->mutable_rows(); - At.i = A->mutable_cols(); - At.x = A->mutable_values(); - return At; -} - -cs_di* CXSparse::CreateSparseMatrix(TripletSparseMatrix* tsm) { - cs_di_sparse tsm_wrapper; - tsm_wrapper.nzmax = tsm->num_nonzeros(); - tsm_wrapper.nz = tsm->num_nonzeros(); - tsm_wrapper.m = tsm->num_rows(); - tsm_wrapper.n = tsm->num_cols(); - tsm_wrapper.p = tsm->mutable_cols(); - tsm_wrapper.i = tsm->mutable_rows(); - tsm_wrapper.x = tsm->mutable_values(); - - return cs_compress(&tsm_wrapper); -} - -void CXSparse::ApproximateMinimumDegreeOrdering(cs_di* A, int* ordering) { - int* cs_ordering = cs_amd(1, A); - std::copy(cs_ordering, cs_ordering + A->m, ordering); - cs_free(cs_ordering); -} - -cs_di* CXSparse::TransposeMatrix(cs_di* A) { return cs_di_transpose(A, 1); } - -cs_di* CXSparse::MatrixMatrixMultiply(cs_di* A, cs_di* B) { - return cs_di_multiply(A, B); -} - -void CXSparse::Free(cs_di* sparse_matrix) { cs_di_spfree(sparse_matrix); } - -void CXSparse::Free(cs_dis* symbolic_factor) { cs_di_sfree(symbolic_factor); } - -void CXSparse::Free(csn* numeric_factor) { cs_di_nfree(numeric_factor); } - -std::unique_ptr CXSparseCholesky::Create( - const OrderingType ordering_type) { - return std::unique_ptr(new CXSparseCholesky(ordering_type)); -} - -CompressedRowSparseMatrix::StorageType CXSparseCholesky::StorageType() const { - return CompressedRowSparseMatrix::LOWER_TRIANGULAR; -} - -CXSparseCholesky::CXSparseCholesky(const OrderingType ordering_type) - : ordering_type_(ordering_type), - symbolic_factor_(nullptr), - numeric_factor_(nullptr) {} - -CXSparseCholesky::~CXSparseCholesky() { - FreeSymbolicFactorization(); - FreeNumericFactorization(); -} - -LinearSolverTerminationType CXSparseCholesky::Factorize( - CompressedRowSparseMatrix* lhs, std::string* message) { - CHECK_EQ(lhs->storage_type(), StorageType()); - if (lhs == nullptr) { - *message = "Failure: Input lhs is nullptr."; - return LINEAR_SOLVER_FATAL_ERROR; - } - - cs_di cs_lhs = cs_.CreateSparseMatrixTransposeView(lhs); - - if (symbolic_factor_ == nullptr) { - if (ordering_type_ == NATURAL) { - symbolic_factor_ = cs_.AnalyzeCholeskyWithNaturalOrdering(&cs_lhs); - } else { - if (!lhs->col_blocks().empty() && !(lhs->row_blocks().empty())) { - symbolic_factor_ = cs_.BlockAnalyzeCholesky( - &cs_lhs, lhs->col_blocks(), lhs->row_blocks()); - } else { - symbolic_factor_ = cs_.AnalyzeCholesky(&cs_lhs); - } - } - - if (symbolic_factor_ == nullptr) { - *message = "CXSparse Failure : Symbolic factorization failed."; - return LINEAR_SOLVER_FATAL_ERROR; - } - } - - FreeNumericFactorization(); - numeric_factor_ = cs_.Cholesky(&cs_lhs, symbolic_factor_); - if (numeric_factor_ == nullptr) { - *message = "CXSparse Failure : Numeric factorization failed."; - return LINEAR_SOLVER_FAILURE; - } - - return LINEAR_SOLVER_SUCCESS; -} - -LinearSolverTerminationType CXSparseCholesky::Solve(const double* rhs, - double* solution, - std::string* message) { - CHECK(numeric_factor_ != nullptr) - << "Solve called without a call to Factorize first."; - const int num_cols = numeric_factor_->L->n; - memcpy(solution, rhs, num_cols * sizeof(*solution)); - cs_.Solve(symbolic_factor_, numeric_factor_, solution); - return LINEAR_SOLVER_SUCCESS; -} - -void CXSparseCholesky::FreeSymbolicFactorization() { - if (symbolic_factor_ != nullptr) { - cs_.Free(symbolic_factor_); - symbolic_factor_ = nullptr; - } -} - -void CXSparseCholesky::FreeNumericFactorization() { - if (numeric_factor_ != nullptr) { - cs_.Free(numeric_factor_); - numeric_factor_ = nullptr; - } -} - -} // namespace internal -} // namespace ceres - -#endif // CERES_NO_CXSPARSE diff --git a/extern/ceres/internal/ceres/cxsparse.h b/extern/ceres/internal/ceres/cxsparse.h deleted file mode 100644 index 97fc0459464..00000000000 --- a/extern/ceres/internal/ceres/cxsparse.h +++ /dev/null @@ -1,182 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: strandmark@google.com (Petter Strandmark) - -#ifndef CERES_INTERNAL_CXSPARSE_H_ -#define CERES_INTERNAL_CXSPARSE_H_ - -// This include must come before any #ifndef check on Ceres compile options. -#include "ceres/internal/config.h" - -#ifndef CERES_NO_CXSPARSE - -#include -#include -#include - -#include "ceres/internal/disable_warnings.h" -#include "ceres/linear_solver.h" -#include "ceres/sparse_cholesky.h" -#include "cs.h" - -namespace ceres { -namespace internal { - -class CompressedRowSparseMatrix; -class TripletSparseMatrix; - -// This object provides access to solving linear systems using Cholesky -// factorization with a known symbolic factorization. This features does not -// explicitly exist in CXSparse. The methods in the class are nonstatic because -// the class manages internal scratch space. -class CERES_NO_EXPORT CXSparse { - public: - CXSparse(); - ~CXSparse(); - - // Solve the system lhs * solution = rhs in place by using an - // approximate minimum degree fill reducing ordering. - bool SolveCholesky(cs_di* lhs, double* rhs_and_solution); - - // Solves a linear system given its symbolic and numeric factorization. - void Solve(cs_dis* symbolic_factor, - csn* numeric_factor, - double* rhs_and_solution); - - // Compute the numeric Cholesky factorization of A, given its - // symbolic factorization. - // - // Caller owns the result. - csn* Cholesky(cs_di* A, cs_dis* symbolic_factor); - - // Creates a sparse matrix from a compressed-column form. No memory is - // allocated or copied; the structure A is filled out with info from the - // argument. - cs_di CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A); - - // Creates a new matrix from a triplet form. Deallocate the returned matrix - // with Free. May return nullptr if the compression or allocation fails. - cs_di* CreateSparseMatrix(TripletSparseMatrix* A); - - // B = A' - // - // The returned matrix should be deallocated with Free when not used - // anymore. - cs_di* TransposeMatrix(cs_di* A); - - // C = A * B - // - // The returned matrix should be deallocated with Free when not used - // anymore. - cs_di* MatrixMatrixMultiply(cs_di* A, cs_di* B); - - // Computes a symbolic factorization of A that can be used in SolveCholesky. - // - // The returned matrix should be deallocated with Free when not used anymore. - cs_dis* AnalyzeCholesky(cs_di* A); - - // Computes a symbolic factorization of A that can be used in - // SolveCholesky, but does not compute a fill-reducing ordering. - // - // The returned matrix should be deallocated with Free when not used anymore. - cs_dis* AnalyzeCholeskyWithNaturalOrdering(cs_di* A); - - // Computes a symbolic factorization of A that can be used in - // SolveCholesky. The difference from AnalyzeCholesky is that this - // function first detects the block sparsity of the matrix using - // information about the row and column blocks and uses this block - // sparse matrix to find a fill-reducing ordering. This ordering is - // then used to find a symbolic factorization. This can result in a - // significant performance improvement AnalyzeCholesky on block - // sparse matrices. - // - // The returned matrix should be deallocated with Free when not used - // anymore. - cs_dis* BlockAnalyzeCholesky(cs_di* A, - const std::vector& row_blocks, - const std::vector& col_blocks); - - // Compute an fill-reducing approximate minimum degree ordering of - // the matrix A. ordering should be non-nullptr and should point to - // enough memory to hold the ordering for the rows of A. - void ApproximateMinimumDegreeOrdering(cs_di* A, int* ordering); - - void Free(cs_di* sparse_matrix); - void Free(cs_dis* symbolic_factorization); - void Free(csn* numeric_factorization); - - private: - // Cached scratch space - CS_ENTRY* scratch_; - int scratch_size_; -}; - -// An implementation of SparseCholesky interface using the CXSparse -// library. -class CERES_NO_EXPORT CXSparseCholesky final : public SparseCholesky { - public: - // Factory - static std::unique_ptr Create(OrderingType ordering_type); - - // SparseCholesky interface. - ~CXSparseCholesky() override; - CompressedRowSparseMatrix::StorageType StorageType() const final; - LinearSolverTerminationType Factorize(CompressedRowSparseMatrix* lhs, - std::string* message) final; - LinearSolverTerminationType Solve(const double* rhs, - double* solution, - std::string* message) final; - - private: - explicit CXSparseCholesky(const OrderingType ordering_type); - void FreeSymbolicFactorization(); - void FreeNumericFactorization(); - - const OrderingType ordering_type_; - CXSparse cs_; - cs_dis* symbolic_factor_; - csn* numeric_factor_; -}; - -} // namespace internal -} // namespace ceres - -#include "ceres/internal/reenable_warnings.h" - -#else - -typedef void cs_dis; - -class CXSparse { - public: - void Free(void* arg) {} -}; -#endif // CERES_NO_CXSPARSE - -#endif // CERES_INTERNAL_CXSPARSE_H_ diff --git a/extern/ceres/internal/ceres/dense_cholesky.cc b/extern/ceres/internal/ceres/dense_cholesky.cc index 0e0bba7873b..5a3e7e2cad1 100644 --- a/extern/ceres/internal/ceres/dense_cholesky.cc +++ b/extern/ceres/internal/ceres/dense_cholesky.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,12 +33,15 @@ #include #include #include +#include #include #include "ceres/internal/config.h" +#include "ceres/iterative_refiner.h" #ifndef CERES_NO_CUDA #include "ceres/context_impl.h" +#include "ceres/cuda_kernels_vector_ops.h" #include "cuda_runtime.h" #include "cusolverDn.h" #endif // CERES_NO_CUDA @@ -57,10 +60,21 @@ extern "C" void dpotrs_(const char* uplo, double* b, const int* ldb, int* info); + +extern "C" void spotrf_( + const char* uplo, const int* n, float* a, const int* lda, int* info); + +extern "C" void spotrs_(const char* uplo, + const int* n, + const int* nrhs, + const float* a, + const int* lda, + float* b, + const int* ldb, + int* info); #endif -namespace ceres { -namespace internal { +namespace ceres::internal { DenseCholesky::~DenseCholesky() = default; @@ -70,12 +84,22 @@ std::unique_ptr DenseCholesky::Create( switch (options.dense_linear_algebra_library_type) { case EIGEN: - dense_cholesky = std::make_unique(); + // Eigen mixed precision solver not yet implemented. + if (options.use_mixed_precision_solves) { + dense_cholesky = std::make_unique(); + } else { + dense_cholesky = std::make_unique(); + } break; case LAPACK: #ifndef CERES_NO_LAPACK - dense_cholesky = std::make_unique(); + // LAPACK mixed precision solver not yet implemented. + if (options.use_mixed_precision_solves) { + dense_cholesky = std::make_unique(); + } else { + dense_cholesky = std::make_unique(); + } break; #else LOG(FATAL) << "Ceres was compiled without support for LAPACK."; @@ -83,7 +107,11 @@ std::unique_ptr DenseCholesky::Create( case CUDA: #ifndef CERES_NO_CUDA - dense_cholesky = CUDADenseCholesky::Create(options); + if (options.use_mixed_precision_solves) { + dense_cholesky = CUDADenseCholeskyMixedPrecision::Create(options); + } else { + dense_cholesky = CUDADenseCholesky::Create(options); + } break; #else LOG(FATAL) << "Ceres was compiled without support for CUDA."; @@ -94,6 +122,14 @@ std::unique_ptr DenseCholesky::Create( << DenseLinearAlgebraLibraryTypeToString( options.dense_linear_algebra_library_type); } + + if (options.max_num_refinement_iterations > 0) { + auto refiner = std::make_unique( + options.max_num_refinement_iterations); + dense_cholesky = std::make_unique( + std::move(dense_cholesky), std::move(refiner)); + } + return dense_cholesky; } @@ -105,7 +141,7 @@ LinearSolverTerminationType DenseCholesky::FactorAndSolve( std::string* message) { LinearSolverTerminationType termination_type = Factorize(num_cols, lhs, message); - if (termination_type == LINEAR_SOLVER_SUCCESS) { + if (termination_type == LinearSolverTerminationType::SUCCESS) { termination_type = Solve(rhs, solution, message); } return termination_type; @@ -117,11 +153,11 @@ LinearSolverTerminationType EigenDenseCholesky::Factorize( llt_ = std::make_unique(m); if (llt_->info() != Eigen::Success) { *message = "Eigen failure. Unable to perform dense Cholesky factorization."; - return LINEAR_SOLVER_FAILURE; + return LinearSolverTerminationType::FAILURE; } *message = "Success."; - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs, @@ -129,13 +165,41 @@ LinearSolverTerminationType EigenDenseCholesky::Solve(const double* rhs, std::string* message) { if (llt_->info() != Eigen::Success) { *message = "Eigen failure. Unable to perform dense Cholesky factorization."; - return LINEAR_SOLVER_FAILURE; + return LinearSolverTerminationType::FAILURE; } VectorRef(solution, llt_->cols()) = llt_->solve(ConstVectorRef(rhs, llt_->cols())); *message = "Success."; - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; +} + +LinearSolverTerminationType FloatEigenDenseCholesky::Factorize( + int num_cols, double* lhs, std::string* message) { + // TODO(sameeragarwal): Check if this causes a double allocation. + lhs_ = Eigen::Map(lhs, num_cols, num_cols).cast(); + llt_ = std::make_unique(lhs_); + if (llt_->info() != Eigen::Success) { + *message = "Eigen failure. Unable to perform dense Cholesky factorization."; + return LinearSolverTerminationType::FAILURE; + } + + *message = "Success."; + return LinearSolverTerminationType::SUCCESS; +} + +LinearSolverTerminationType FloatEigenDenseCholesky::Solve( + const double* rhs, double* solution, std::string* message) { + if (llt_->info() != Eigen::Success) { + *message = "Eigen failure. Unable to perform dense Cholesky factorization."; + return LinearSolverTerminationType::FAILURE; + } + + rhs_ = ConstVectorRef(rhs, llt_->cols()).cast(); + solution_ = llt_->solve(rhs_); + VectorRef(solution, llt_->cols()) = solution_.cast(); + *message = "Success."; + return LinearSolverTerminationType::SUCCESS; } #ifndef CERES_NO_LAPACK @@ -149,19 +213,19 @@ LinearSolverTerminationType LAPACKDenseCholesky::Factorize( dpotrf_(&uplo, &num_cols_, lhs_, &num_cols_, &info); if (info < 0) { - termination_type_ = LINEAR_SOLVER_FATAL_ERROR; + termination_type_ = LinearSolverTerminationType::FATAL_ERROR; LOG(FATAL) << "Congratulations, you found a bug in Ceres. " << "Please report it. " << "LAPACK::dpotrf fatal error. " << "Argument: " << -info << " is invalid."; } else if (info > 0) { - termination_type_ = LINEAR_SOLVER_FAILURE; + termination_type_ = LinearSolverTerminationType::FAILURE; *message = StringPrintf( "LAPACK::dpotrf numerical failure. " "The leading minor of order %d is not positive definite.", info); } else { - termination_type_ = LINEAR_SOLVER_SUCCESS; + termination_type_ = LinearSolverTerminationType::SUCCESS; *message = "Success."; } return termination_type_; @@ -174,12 +238,12 @@ LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs, const int nrhs = 1; int info = 0; - std::copy_n(rhs, num_cols_, solution); + VectorRef(solution, num_cols_) = ConstVectorRef(rhs, num_cols_); dpotrs_( &uplo, &num_cols_, &nrhs, lhs_, &num_cols_, solution, &num_cols_, &info); if (info < 0) { - termination_type_ = LINEAR_SOLVER_FATAL_ERROR; + termination_type_ = LinearSolverTerminationType::FATAL_ERROR; LOG(FATAL) << "Congratulations, you found a bug in Ceres. " << "Please report it. " << "LAPACK::dpotrs fatal error. " @@ -187,35 +251,118 @@ LinearSolverTerminationType LAPACKDenseCholesky::Solve(const double* rhs, } *message = "Success"; - termination_type_ = LINEAR_SOLVER_SUCCESS; + termination_type_ = LinearSolverTerminationType::SUCCESS; return termination_type_; } +LinearSolverTerminationType FloatLAPACKDenseCholesky::Factorize( + int num_cols, double* lhs, std::string* message) { + num_cols_ = num_cols; + lhs_ = Eigen::Map(lhs, num_cols, num_cols).cast(); + + const char uplo = 'L'; + int info = 0; + spotrf_(&uplo, &num_cols_, lhs_.data(), &num_cols_, &info); + + if (info < 0) { + termination_type_ = LinearSolverTerminationType::FATAL_ERROR; + LOG(FATAL) << "Congratulations, you found a bug in Ceres. " + << "Please report it. " + << "LAPACK::spotrf fatal error. " + << "Argument: " << -info << " is invalid."; + } else if (info > 0) { + termination_type_ = LinearSolverTerminationType::FAILURE; + *message = StringPrintf( + "LAPACK::spotrf numerical failure. " + "The leading minor of order %d is not positive definite.", + info); + } else { + termination_type_ = LinearSolverTerminationType::SUCCESS; + *message = "Success."; + } + return termination_type_; +} + +LinearSolverTerminationType FloatLAPACKDenseCholesky::Solve( + const double* rhs, double* solution, std::string* message) { + const char uplo = 'L'; + const int nrhs = 1; + int info = 0; + rhs_and_solution_ = ConstVectorRef(rhs, num_cols_).cast(); + spotrs_(&uplo, + &num_cols_, + &nrhs, + lhs_.data(), + &num_cols_, + rhs_and_solution_.data(), + &num_cols_, + &info); + + if (info < 0) { + termination_type_ = LinearSolverTerminationType::FATAL_ERROR; + LOG(FATAL) << "Congratulations, you found a bug in Ceres. " + << "Please report it. " + << "LAPACK::dpotrs fatal error. " + << "Argument: " << -info << " is invalid."; + } + + *message = "Success"; + termination_type_ = LinearSolverTerminationType::SUCCESS; + VectorRef(solution, num_cols_) = + rhs_and_solution_.head(num_cols_).cast(); + return termination_type_; +} + #endif // CERES_NO_LAPACK +RefinedDenseCholesky::RefinedDenseCholesky( + std::unique_ptr dense_cholesky, + std::unique_ptr iterative_refiner) + : dense_cholesky_(std::move(dense_cholesky)), + iterative_refiner_(std::move(iterative_refiner)) {} + +RefinedDenseCholesky::~RefinedDenseCholesky() = default; + +LinearSolverTerminationType RefinedDenseCholesky::Factorize( + const int num_cols, double* lhs, std::string* message) { + lhs_ = lhs; + num_cols_ = num_cols; + return dense_cholesky_->Factorize(num_cols, lhs, message); +} + +LinearSolverTerminationType RefinedDenseCholesky::Solve(const double* rhs, + double* solution, + std::string* message) { + CHECK(lhs_ != nullptr); + auto termination_type = dense_cholesky_->Solve(rhs, solution, message); + if (termination_type != LinearSolverTerminationType::SUCCESS) { + return termination_type; + } + + iterative_refiner_->Refine( + num_cols_, lhs_, rhs, dense_cholesky_.get(), solution); + return LinearSolverTerminationType::SUCCESS; +} + #ifndef CERES_NO_CUDA -bool CUDADenseCholesky::Init(ContextImpl* context, std::string* message) { - if (!context->InitCUDA(message)) { - return false; - } - cusolver_handle_ = context->cusolver_handle_; - stream_ = context->stream_; - error_.Reserve(1); - *message = "CUDADenseCholesky::Init Success."; - return true; -} +CUDADenseCholesky::CUDADenseCholesky(ContextImpl* context) + : context_(context), + lhs_{context}, + rhs_{context}, + device_workspace_{context}, + error_(context, 1) {} LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols, double* lhs, std::string* message) { - factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + factorize_result_ = LinearSolverTerminationType::FATAL_ERROR; lhs_.Reserve(num_cols * num_cols); num_cols_ = num_cols; - lhs_.CopyToGpuAsync(lhs, num_cols * num_cols, stream_); + lhs_.CopyFromCpu(lhs, num_cols * num_cols); int device_workspace_size = 0; - if (cusolverDnDpotrf_bufferSize(cusolver_handle_, + if (cusolverDnDpotrf_bufferSize(context_->cusolver_handle_, CUBLAS_FILL_MODE_LOWER, num_cols, lhs_.data(), @@ -223,10 +370,10 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols, &device_workspace_size) != CUSOLVER_STATUS_SUCCESS) { *message = "cuSolverDN::cusolverDnDpotrf_bufferSize failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } device_workspace_.Reserve(device_workspace_size); - if (cusolverDnDpotrf(cusolver_handle_, + if (cusolverDnDpotrf(context_->cusolver_handle_, CUBLAS_FILL_MODE_LOWER, num_cols, lhs_.data(), @@ -235,15 +382,10 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols, device_workspace_.size(), error_.data()) != CUSOLVER_STATUS_SUCCESS) { *message = "cuSolverDN::cusolverDnDpotrf failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; - } - if (cudaDeviceSynchronize() != cudaSuccess || - cudaStreamSynchronize(stream_) != cudaSuccess) { - *message = "Cuda device synchronization failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } int error = 0; - error_.CopyToHost(&error, 1); + error_.CopyToCpu(&error, 1); if (error < 0) { LOG(FATAL) << "Congratulations, you found a bug in Ceres - " << "please report it. " @@ -251,29 +393,29 @@ LinearSolverTerminationType CUDADenseCholesky::Factorize(int num_cols, << "Argument: " << -error << " is invalid."; // The following line is unreachable, but return failure just to be // pedantic, since the compiler does not know that. - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } else if (error > 0) { *message = StringPrintf( "cuSolverDN::cusolverDnDpotrf numerical failure. " "The leading minor of order %d is not positive definite.", error); - factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FAILURE; - return LinearSolverTerminationType::LINEAR_SOLVER_FAILURE; + factorize_result_ = LinearSolverTerminationType::FAILURE; + return LinearSolverTerminationType::FAILURE; } *message = "Success"; - factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS; - return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS; + factorize_result_ = LinearSolverTerminationType::SUCCESS; + return LinearSolverTerminationType::SUCCESS; } LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs, double* solution, std::string* message) { - if (factorize_result_ != LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS) { - *message = "Factorize did not complete succesfully previously."; + if (factorize_result_ != LinearSolverTerminationType::SUCCESS) { + *message = "Factorize did not complete successfully previously."; return factorize_result_; } - rhs_.CopyToGpuAsync(rhs, num_cols_, stream_); - if (cusolverDnDpotrs(cusolver_handle_, + rhs_.CopyFromCpu(rhs, num_cols_); + if (cusolverDnDpotrs(context_->cusolver_handle_, CUBLAS_FILL_MODE_LOWER, num_cols_, 1, @@ -283,45 +425,221 @@ LinearSolverTerminationType CUDADenseCholesky::Solve(const double* rhs, num_cols_, error_.data()) != CUSOLVER_STATUS_SUCCESS) { *message = "cuSolverDN::cusolverDnDpotrs failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; - } - if (cudaDeviceSynchronize() != cudaSuccess || - cudaStreamSynchronize(stream_) != cudaSuccess) { - *message = "Cuda device synchronization failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } int error = 0; - error_.CopyToHost(&error, 1); + error_.CopyToCpu(&error, 1); if (error != 0) { LOG(FATAL) << "Congratulations, you found a bug in Ceres. " << "Please report it." << "cuSolverDN::cusolverDnDpotrs fatal error. " << "Argument: " << -error << " is invalid."; } - rhs_.CopyToHost(solution, num_cols_); + rhs_.CopyToCpu(solution, num_cols_); *message = "Success"; - return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } std::unique_ptr CUDADenseCholesky::Create( const LinearSolver::Options& options) { - if (options.dense_linear_algebra_library_type != CUDA) { - // The user called the wrong factory method. + if (options.dense_linear_algebra_library_type != CUDA || + options.context == nullptr || !options.context->IsCudaInitialized()) { return nullptr; } - auto cuda_dense_cholesky = - std::unique_ptr(new CUDADenseCholesky()); - std::string cuda_error; - if (cuda_dense_cholesky->Init(options.context, &cuda_error)) { - return cuda_dense_cholesky; + return std::unique_ptr( + new CUDADenseCholesky(options.context)); +} + +std::unique_ptr +CUDADenseCholeskyMixedPrecision::Create(const LinearSolver::Options& options) { + if (options.dense_linear_algebra_library_type != CUDA || + !options.use_mixed_precision_solves || options.context == nullptr || + !options.context->IsCudaInitialized()) { + return nullptr; } - // Initialization failed, destroy the object (done automatically) and return a - // nullptr. - LOG(ERROR) << "CUDADenseCholesky::Init failed: " << cuda_error; - return nullptr; + return std::unique_ptr( + new CUDADenseCholeskyMixedPrecision( + options.context, options.max_num_refinement_iterations)); +} + +LinearSolverTerminationType +CUDADenseCholeskyMixedPrecision::CudaCholeskyFactorize(std::string* message) { + int device_workspace_size = 0; + if (cusolverDnSpotrf_bufferSize(context_->cusolver_handle_, + CUBLAS_FILL_MODE_LOWER, + num_cols_, + lhs_fp32_.data(), + num_cols_, + &device_workspace_size) != + CUSOLVER_STATUS_SUCCESS) { + *message = "cuSolverDN::cusolverDnSpotrf_bufferSize failed."; + return LinearSolverTerminationType::FATAL_ERROR; + } + device_workspace_.Reserve(device_workspace_size); + if (cusolverDnSpotrf(context_->cusolver_handle_, + CUBLAS_FILL_MODE_LOWER, + num_cols_, + lhs_fp32_.data(), + num_cols_, + device_workspace_.data(), + device_workspace_.size(), + error_.data()) != CUSOLVER_STATUS_SUCCESS) { + *message = "cuSolverDN::cusolverDnSpotrf failed."; + return LinearSolverTerminationType::FATAL_ERROR; + } + int error = 0; + error_.CopyToCpu(&error, 1); + if (error < 0) { + LOG(FATAL) << "Congratulations, you found a bug in Ceres - " + << "please report it. " + << "cuSolverDN::cusolverDnSpotrf fatal error. " + << "Argument: " << -error << " is invalid."; + // The following line is unreachable, but return failure just to be + // pedantic, since the compiler does not know that. + return LinearSolverTerminationType::FATAL_ERROR; + } + if (error > 0) { + *message = StringPrintf( + "cuSolverDN::cusolverDnSpotrf numerical failure. " + "The leading minor of order %d is not positive definite.", + error); + factorize_result_ = LinearSolverTerminationType::FAILURE; + return LinearSolverTerminationType::FAILURE; + } + *message = "Success"; + return LinearSolverTerminationType::SUCCESS; +} + +LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::CudaCholeskySolve( + std::string* message) { + CHECK_EQ(cudaMemcpyAsync(correction_fp32_.data(), + residual_fp32_.data(), + num_cols_ * sizeof(float), + cudaMemcpyDeviceToDevice, + context_->DefaultStream()), + cudaSuccess); + if (cusolverDnSpotrs(context_->cusolver_handle_, + CUBLAS_FILL_MODE_LOWER, + num_cols_, + 1, + lhs_fp32_.data(), + num_cols_, + correction_fp32_.data(), + num_cols_, + error_.data()) != CUSOLVER_STATUS_SUCCESS) { + *message = "cuSolverDN::cusolverDnDpotrs failed."; + return LinearSolverTerminationType::FATAL_ERROR; + } + int error = 0; + error_.CopyToCpu(&error, 1); + if (error != 0) { + LOG(FATAL) << "Congratulations, you found a bug in Ceres. " + << "Please report it." + << "cuSolverDN::cusolverDnDpotrs fatal error. " + << "Argument: " << -error << " is invalid."; + } + *message = "Success"; + return LinearSolverTerminationType::SUCCESS; +} + +CUDADenseCholeskyMixedPrecision::CUDADenseCholeskyMixedPrecision( + ContextImpl* context, int max_num_refinement_iterations) + : context_(context), + lhs_fp64_{context}, + rhs_fp64_{context}, + lhs_fp32_{context}, + device_workspace_{context}, + error_(context, 1), + x_fp64_{context}, + correction_fp32_{context}, + residual_fp32_{context}, + residual_fp64_{context}, + max_num_refinement_iterations_(max_num_refinement_iterations) {} + +LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Factorize( + int num_cols, double* lhs, std::string* message) { + num_cols_ = num_cols; + + // Copy fp64 version of lhs to GPU. + lhs_fp64_.Reserve(num_cols * num_cols); + lhs_fp64_.CopyFromCpu(lhs, num_cols * num_cols); + + // Create an fp32 copy of lhs, lhs_fp32. + lhs_fp32_.Reserve(num_cols * num_cols); + CudaFP64ToFP32(lhs_fp64_.data(), + lhs_fp32_.data(), + num_cols * num_cols, + context_->DefaultStream()); + + // Factorize lhs_fp32. + factorize_result_ = CudaCholeskyFactorize(message); + return factorize_result_; +} + +LinearSolverTerminationType CUDADenseCholeskyMixedPrecision::Solve( + const double* rhs, double* solution, std::string* message) { + // If factorization failed, return failure. + if (factorize_result_ != LinearSolverTerminationType::SUCCESS) { + *message = "Factorize did not complete successfully previously."; + return factorize_result_; + } + + // Reserve memory for all arrays. + rhs_fp64_.Reserve(num_cols_); + x_fp64_.Reserve(num_cols_); + correction_fp32_.Reserve(num_cols_); + residual_fp32_.Reserve(num_cols_); + residual_fp64_.Reserve(num_cols_); + + // Initialize x = 0. + CudaSetZeroFP64(x_fp64_.data(), num_cols_, context_->DefaultStream()); + + // Initialize residual = rhs. + rhs_fp64_.CopyFromCpu(rhs, num_cols_); + residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_); + + for (int i = 0; i <= max_num_refinement_iterations_; ++i) { + // Cast residual from fp64 to fp32. + CudaFP64ToFP32(residual_fp64_.data(), + residual_fp32_.data(), + num_cols_, + context_->DefaultStream()); + // [fp32] c = lhs^-1 * residual. + auto result = CudaCholeskySolve(message); + if (result != LinearSolverTerminationType::SUCCESS) { + return result; + } + // [fp64] x += c. + CudaDsxpy(x_fp64_.data(), + correction_fp32_.data(), + num_cols_, + context_->DefaultStream()); + if (i < max_num_refinement_iterations_) { + // [fp64] residual = rhs - lhs * x + // This is done in two steps: + // 1. [fp64] residual = rhs + residual_fp64_.CopyFromGPUArray(rhs_fp64_.data(), num_cols_); + // 2. [fp64] residual = residual - lhs * x + double alpha = -1.0; + double beta = 1.0; + cublasDsymv(context_->cublas_handle_, + CUBLAS_FILL_MODE_LOWER, + num_cols_, + &alpha, + lhs_fp64_.data(), + num_cols_, + x_fp64_.data(), + 1, + &beta, + residual_fp64_.data(), + 1); + } + } + x_fp64_.CopyToCpu(solution, num_cols_); + *message = "Success."; + return LinearSolverTerminationType::SUCCESS; } #endif // CERES_NO_CUDA -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dense_cholesky.h b/extern/ceres/internal/ceres/dense_cholesky.h index 655a2f815ee..04a5dd558c5 100644 --- a/extern/ceres/internal/ceres/dense_cholesky.h +++ b/extern/ceres/internal/ceres/dense_cholesky.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,6 +40,7 @@ #include #include "Eigen/Dense" +#include "ceres/context_impl.h" #include "ceres/cuda_buffer.h" #include "ceres/linear_solver.h" #include "glog/logging.h" @@ -49,8 +50,7 @@ #include "cusolverDn.h" #endif // CERES_NO_CUDA -namespace ceres { -namespace internal { +namespace ceres::internal { // An interface that abstracts away the internal details of various dense linear // algebra libraries and offers a simple API for solving dense symmetric @@ -88,7 +88,7 @@ class CERES_NO_EXPORT DenseCholesky { std::string* message) = 0; // Convenience method which combines a call to Factorize and Solve. Solve is - // only called if Factorize returns LINEAR_SOLVER_SUCCESS. + // only called if Factorize returns LinearSolverTerminationType::SUCCESS. // // The input matrix lhs may be modified by the implementation to store the // factorization, irrespective of whether the method succeeds or not. It is @@ -115,6 +115,23 @@ class CERES_NO_EXPORT EigenDenseCholesky final : public DenseCholesky { std::unique_ptr llt_; }; +class CERES_NO_EXPORT FloatEigenDenseCholesky final : public DenseCholesky { + public: + LinearSolverTerminationType Factorize(int num_cols, + double* lhs, + std::string* message) override; + LinearSolverTerminationType Solve(const double* rhs, + double* solution, + std::string* message) override; + + private: + Eigen::MatrixXf lhs_; + Eigen::VectorXf rhs_; + Eigen::VectorXf solution_; + using LLTType = Eigen::LLT; + std::unique_ptr llt_; +}; + #ifndef CERES_NO_LAPACK class CERES_NO_EXPORT LAPACKDenseCholesky final : public DenseCholesky { public: @@ -128,10 +145,53 @@ class CERES_NO_EXPORT LAPACKDenseCholesky final : public DenseCholesky { private: double* lhs_ = nullptr; int num_cols_ = -1; - LinearSolverTerminationType termination_type_ = LINEAR_SOLVER_FATAL_ERROR; + LinearSolverTerminationType termination_type_ = + LinearSolverTerminationType::FATAL_ERROR; +}; + +class CERES_NO_EXPORT FloatLAPACKDenseCholesky final : public DenseCholesky { + public: + LinearSolverTerminationType Factorize(int num_cols, + double* lhs, + std::string* message) override; + LinearSolverTerminationType Solve(const double* rhs, + double* solution, + std::string* message) override; + + private: + Eigen::MatrixXf lhs_; + Eigen::VectorXf rhs_and_solution_; + int num_cols_ = -1; + LinearSolverTerminationType termination_type_ = + LinearSolverTerminationType::FATAL_ERROR; }; #endif // CERES_NO_LAPACK +class DenseIterativeRefiner; + +// Computes an initial solution using the given instance of +// DenseCholesky, and then refines it using the DenseIterativeRefiner. +class CERES_NO_EXPORT RefinedDenseCholesky final : public DenseCholesky { + public: + RefinedDenseCholesky( + std::unique_ptr dense_cholesky, + std::unique_ptr iterative_refiner); + ~RefinedDenseCholesky() override; + + LinearSolverTerminationType Factorize(int num_cols, + double* lhs, + std::string* message) override; + LinearSolverTerminationType Solve(const double* rhs, + double* solution, + std::string* message) override; + + private: + std::unique_ptr dense_cholesky_; + std::unique_ptr iterative_refiner_; + double* lhs_ = nullptr; + int num_cols_; +}; + #ifndef CERES_NO_CUDA // CUDA implementation of DenseCholesky using the cuSolverDN library using the // 32-bit legacy interface for maximum compatibility. @@ -149,16 +209,9 @@ class CERES_NO_EXPORT CUDADenseCholesky final : public DenseCholesky { std::string* message) override; private: - CUDADenseCholesky() = default; - // Picks up the cuSolverDN and cuStream handles from the context. If - // the context is unable to initialize CUDA, returns false with a - // human-readable message indicating the reason. - bool Init(ContextImpl* context, std::string* message); + explicit CUDADenseCholesky(ContextImpl* context); - // Handle to the cuSOLVER context. - cusolverDnHandle_t cusolver_handle_ = nullptr; - // CUDA device stream. - cudaStream_t stream_ = nullptr; + ContextImpl* context_ = nullptr; // Number of columns in the A matrix, to be cached between calls to *Factorize // and *Solve. size_t num_cols_ = 0; @@ -171,13 +224,85 @@ class CERES_NO_EXPORT CUDADenseCholesky final : public DenseCholesky { // Required for error handling with cuSOLVER. CudaBuffer error_; // Cache the result of Factorize to ensure that when Solve is called, the - // factiorization of lhs is valid. - LinearSolverTerminationType factorize_result_ = LINEAR_SOLVER_FATAL_ERROR; + // factorization of lhs is valid. + LinearSolverTerminationType factorize_result_ = + LinearSolverTerminationType::FATAL_ERROR; +}; + +// A mixed-precision iterative refinement dense Cholesky solver using FP32 CUDA +// Dense Cholesky for inner iterations, and FP64 outer refinements. +// This class implements a modified version of the "Classical iterative +// refinement" (Algorithm 4.1) from the following paper: +// Haidar, Azzam, Harun Bayraktar, Stanimire Tomov, Jack Dongarra, and Nicholas +// J. Higham. "Mixed-precision iterative refinement using tensor cores on GPUs +// to accelerate solution of linear systems." Proceedings of the Royal Society A +// 476, no. 2243 (2020): 20200110. +// +// The three key modifications from Algorithm 4.1 in the paper are: +// 1. We use Cholesky factorization instead of LU factorization since our A is +// symmetric positive definite. +// 2. During the solution update, the up-cast and accumulation is performed in +// one step with a custom kernel. +class CERES_NO_EXPORT CUDADenseCholeskyMixedPrecision final + : public DenseCholesky { + public: + static std::unique_ptr Create( + const LinearSolver::Options& options); + CUDADenseCholeskyMixedPrecision(const CUDADenseCholeskyMixedPrecision&) = + delete; + CUDADenseCholeskyMixedPrecision& operator=( + const CUDADenseCholeskyMixedPrecision&) = delete; + LinearSolverTerminationType Factorize(int num_cols, + double* lhs, + std::string* message) override; + LinearSolverTerminationType Solve(const double* rhs, + double* solution, + std::string* message) override; + + private: + CUDADenseCholeskyMixedPrecision(ContextImpl* context, + int max_num_refinement_iterations); + + // Helper function to wrap Cuda boilerplate needed to call Spotrf. + LinearSolverTerminationType CudaCholeskyFactorize(std::string* message); + // Helper function to wrap Cuda boilerplate needed to call Spotrs. + LinearSolverTerminationType CudaCholeskySolve(std::string* message); + // Picks up the cuSolverDN and cuStream handles from the context in the + // options, and the number of refinement iterations from the options. If + // the context is unable to initialize CUDA, returns false with a + // human-readable message indicating the reason. + bool Init(const LinearSolver::Options& options, std::string* message); + + ContextImpl* context_ = nullptr; + // Number of columns in the A matrix, to be cached between calls to *Factorize + // and *Solve. + size_t num_cols_ = 0; + CudaBuffer lhs_fp64_; + CudaBuffer rhs_fp64_; + CudaBuffer lhs_fp32_; + // Scratch space for cuSOLVER on the GPU. + CudaBuffer device_workspace_; + // Required for error handling with cuSOLVER. + CudaBuffer error_; + + // Solution to lhs * x = rhs. + CudaBuffer x_fp64_; + // Incremental correction to x. + CudaBuffer correction_fp32_; + // Residual to iterative refinement. + CudaBuffer residual_fp32_; + CudaBuffer residual_fp64_; + + // Number of inner refinement iterations to perform. + int max_num_refinement_iterations_ = 0; + // Cache the result of Factorize to ensure that when Solve is called, the + // factorization of lhs is valid. + LinearSolverTerminationType factorize_result_ = + LinearSolverTerminationType::FATAL_ERROR; }; #endif // CERES_NO_CUDA -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_DENSE_CHOLESKY_H_ diff --git a/extern/ceres/internal/ceres/dense_jacobian_writer.h b/extern/ceres/internal/ceres/dense_jacobian_writer.h index 0020937124e..d0f2c89a7cc 100644 --- a/extern/ceres/internal/ceres/dense_jacobian_writer.h +++ b/extern/ceres/internal/ceres/dense_jacobian_writer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -75,8 +75,8 @@ class CERES_NO_EXPORT DenseJacobianWriter { DenseSparseMatrix* dense_jacobian = down_cast(jacobian); const ResidualBlock* residual_block = program_->residual_blocks()[residual_id]; - int num_parameter_blocks = residual_block->NumParameterBlocks(); - int num_residuals = residual_block->NumResiduals(); + const int num_parameter_blocks = residual_block->NumParameterBlocks(); + const int num_residuals = residual_block->NumResiduals(); // Now copy the jacobians for each parameter into the dense jacobian matrix. for (int j = 0; j < num_parameter_blocks; ++j) { diff --git a/extern/ceres/internal/ceres/dense_normal_cholesky_solver.cc b/extern/ceres/internal/ceres/dense_normal_cholesky_solver.cc index 30a0c023f51..f6d5e5a98f4 100644 --- a/extern/ceres/internal/ceres/dense_normal_cholesky_solver.cc +++ b/extern/ceres/internal/ceres/dense_normal_cholesky_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,7 @@ #include "ceres/types.h" #include "ceres/wall_time.h" -namespace ceres { -namespace internal { +namespace ceres::internal { DenseNormalCholeskySolver::DenseNormalCholeskySolver( LinearSolver::Options options) @@ -87,5 +86,4 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl( return summary; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dense_normal_cholesky_solver.h b/extern/ceres/internal/ceres/dense_normal_cholesky_solver.h index 5b3c74069f0..c6aa2aff10f 100644 --- a/extern/ceres/internal/ceres/dense_normal_cholesky_solver.h +++ b/extern/ceres/internal/ceres/dense_normal_cholesky_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -41,8 +41,7 @@ #include "ceres/internal/export.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class DenseSparseMatrix; @@ -94,8 +93,7 @@ class CERES_NO_EXPORT DenseNormalCholeskySolver std::unique_ptr cholesky_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/dense_qr.cc b/extern/ceres/internal/ceres/dense_qr.cc index 4b9c8a4a035..fbbcadc87dc 100644 --- a/extern/ceres/internal/ceres/dense_qr.cc +++ b/extern/ceres/internal/ceres/dense_qr.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@ #include #include #include + #ifndef CERES_NO_CUDA #include "ceres/context_impl.h" #include "cublas_v2.h" @@ -98,7 +99,7 @@ extern "C" void dormqr_(const char* side, const char* trans, const int* m, // a is a column major lda x n. // b is a column major matrix of ldb x nrhs // -// info = 0 succesful. +// info = 0 successful. // = -i < 0 i^th argument is an illegal value. // = i > 0, i^th diagonal element of A is zero. extern "C" void dtrtrs_(const char* uplo, const char* trans, const char* diag, @@ -108,8 +109,7 @@ extern "C" void dtrtrs_(const char* uplo, const char* trans, const char* diag, #endif -namespace ceres { -namespace internal { +namespace ceres::internal { DenseQR::~DenseQR() = default; @@ -153,7 +153,7 @@ LinearSolverTerminationType DenseQR::FactorAndSolve(int num_rows, std::string* message) { LinearSolverTerminationType termination_type = Factorize(num_rows, num_cols, lhs, message); - if (termination_type == LINEAR_SOLVER_SUCCESS) { + if (termination_type == LinearSolverTerminationType::SUCCESS) { termination_type = Solve(rhs, solution, message); } return termination_type; @@ -166,7 +166,7 @@ LinearSolverTerminationType EigenDenseQR::Factorize(int num_rows, Eigen::Map m(lhs, num_rows, num_cols); qr_ = std::make_unique(m); *message = "Success."; - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } LinearSolverTerminationType EigenDenseQR::Solve(const double* rhs, @@ -175,7 +175,7 @@ LinearSolverTerminationType EigenDenseQR::Solve(const double* rhs, VectorRef(solution, qr_->cols()) = qr_->solve(ConstVectorRef(rhs, qr_->rows())); *message = "Success."; - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } #ifndef CERES_NO_LAPACK @@ -237,7 +237,7 @@ LinearSolverTerminationType LAPACKDenseQR::Factorize(int num_rows, << "Argument: " << -info << " is invalid."; } - termination_type_ = LINEAR_SOLVER_SUCCESS; + termination_type_ = LinearSolverTerminationType::SUCCESS; *message = "Success."; return termination_type_; } @@ -245,7 +245,7 @@ LinearSolverTerminationType LAPACKDenseQR::Factorize(int num_rows, LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs, double* solution, std::string* message) { - if (termination_type_ != LINEAR_SOLVER_SUCCESS) { + if (termination_type_ != LinearSolverTerminationType::SUCCESS) { *message = "QR factorization failed and solve called."; return termination_type_; } @@ -298,10 +298,10 @@ LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs, *message = "QR factorization failure. The factorization is not full rank. R has " "zeros on the diagonal."; - termination_type_ = LINEAR_SOLVER_FAILURE; + termination_type_ = LinearSolverTerminationType::FAILURE; } else { std::copy_n(q_transpose_rhs_.data(), num_cols_, solution); - termination_type_ = LINEAR_SOLVER_SUCCESS; + termination_type_ = LinearSolverTerminationType::SUCCESS; } return termination_type_; @@ -311,30 +311,26 @@ LinearSolverTerminationType LAPACKDenseQR::Solve(const double* rhs, #ifndef CERES_NO_CUDA -bool CUDADenseQR::Init(ContextImpl* context, std::string* message) { - if (!context->InitCUDA(message)) { - return false; - } - cublas_handle_ = context->cublas_handle_; - cusolver_handle_ = context->cusolver_handle_; - stream_ = context->stream_; - error_.Reserve(1); - *message = "CUDADenseQR::Init Success."; - return true; -} +CUDADenseQR::CUDADenseQR(ContextImpl* context) + : context_(context), + lhs_{context}, + rhs_{context}, + tau_{context}, + device_workspace_{context}, + error_(context, 1) {} LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows, int num_cols, double* lhs, std::string* message) { - factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + factorize_result_ = LinearSolverTerminationType::FATAL_ERROR; lhs_.Reserve(num_rows * num_cols); tau_.Reserve(std::min(num_rows, num_cols)); num_rows_ = num_rows; num_cols_ = num_cols; - lhs_.CopyToGpuAsync(lhs, num_rows * num_cols, stream_); + lhs_.CopyFromCpu(lhs, num_rows * num_cols); int device_workspace_size = 0; - if (cusolverDnDgeqrf_bufferSize(cusolver_handle_, + if (cusolverDnDgeqrf_bufferSize(context_->cusolver_handle_, num_rows, num_cols, lhs_.data(), @@ -342,10 +338,10 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows, &device_workspace_size) != CUSOLVER_STATUS_SUCCESS) { *message = "cuSolverDN::cusolverDnDgeqrf_bufferSize failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } device_workspace_.Reserve(device_workspace_size); - if (cusolverDnDgeqrf(cusolver_handle_, + if (cusolverDnDgeqrf(context_->cusolver_handle_, num_rows, num_cols, lhs_.data(), @@ -355,15 +351,10 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows, device_workspace_.size(), error_.data()) != CUSOLVER_STATUS_SUCCESS) { *message = "cuSolverDN::cusolverDnDgeqrf failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; - } - if (cudaDeviceSynchronize() != cudaSuccess || - cudaStreamSynchronize(stream_) != cudaSuccess) { - *message = "Cuda device synchronization failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } int error = 0; - error_.CopyToHost(&error, 1); + error_.CopyToCpu(&error, 1); if (error < 0) { LOG(FATAL) << "Congratulations, you found a bug in Ceres - " << "please report it. " @@ -371,24 +362,24 @@ LinearSolverTerminationType CUDADenseQR::Factorize(int num_rows, << "Argument: " << -error << " is invalid."; // The following line is unreachable, but return failure just to be // pedantic, since the compiler does not know that. - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } *message = "Success"; - factorize_result_ = LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS; - return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS; + factorize_result_ = LinearSolverTerminationType::SUCCESS; + return LinearSolverTerminationType::SUCCESS; } LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs, double* solution, std::string* message) { - if (factorize_result_ != LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS) { - *message = "Factorize did not complete succesfully previously."; + if (factorize_result_ != LinearSolverTerminationType::SUCCESS) { + *message = "Factorize did not complete successfully previously."; return factorize_result_; } - rhs_.CopyToGpuAsync(rhs, num_rows_, stream_); + rhs_.CopyFromCpu(rhs, num_rows_); int device_workspace_size = 0; - if (cusolverDnDormqr_bufferSize(cusolver_handle_, + if (cusolverDnDormqr_bufferSize(context_->cusolver_handle_, CUBLAS_SIDE_LEFT, CUBLAS_OP_T, num_rows_, @@ -402,12 +393,12 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs, &device_workspace_size) != CUSOLVER_STATUS_SUCCESS) { *message = "cuSolverDN::cusolverDnDormqr_bufferSize failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } device_workspace_.Reserve(device_workspace_size); // Compute rhs = Q^T * rhs, assuming that lhs has already been factorized. // The result of factorization would have stored Q in a packed form in lhs_. - if (cusolverDnDormqr(cusolver_handle_, + if (cusolverDnDormqr(context_->cusolver_handle_, CUBLAS_SIDE_LEFT, CUBLAS_OP_T, num_rows_, @@ -422,10 +413,10 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs, device_workspace_.size(), error_.data()) != CUSOLVER_STATUS_SUCCESS) { *message = "cuSolverDN::cusolverDnDormqr failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } int error = 0; - error_.CopyToHost(&error, 1); + error_.CopyToCpu(&error, 1); if (error < 0) { LOG(FATAL) << "Congratulations, you found a bug in Ceres. " << "Please report it." @@ -434,7 +425,7 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs, } // Compute the solution vector as x = R \ (Q^T * rhs). Since the previous step // replaced rhs by (Q^T * rhs), this is just x = R \ rhs. - if (cublasDtrsv(cublas_handle_, + if (cublasDtrsv(context_->cublas_handle_, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, @@ -444,38 +435,22 @@ LinearSolverTerminationType CUDADenseQR::Solve(const double* rhs, rhs_.data(), 1) != CUBLAS_STATUS_SUCCESS) { *message = "cuBLAS::cublasDtrsv failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } - if (cudaDeviceSynchronize() != cudaSuccess || - cudaStreamSynchronize(stream_) != cudaSuccess) { - *message = "Cuda device synchronization failed."; - return LinearSolverTerminationType::LINEAR_SOLVER_FATAL_ERROR; - } - rhs_.CopyToHost(solution, num_cols_); + rhs_.CopyToCpu(solution, num_cols_); *message = "Success"; - return LinearSolverTerminationType::LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } std::unique_ptr CUDADenseQR::Create( const LinearSolver::Options& options) { - if (options.dense_linear_algebra_library_type != CUDA) { - // The user called the wrong factory method. + if (options.dense_linear_algebra_library_type != CUDA || + options.context == nullptr || !options.context->IsCudaInitialized()) { return nullptr; } - auto cuda_dense_qr = std::unique_ptr(new CUDADenseQR()); - std::string cuda_error; - if (cuda_dense_qr->Init(options.context, &cuda_error)) { - return cuda_dense_qr; - } - // Initialization failed, destroy the object (done automatically) and return a - // nullptr. - LOG(ERROR) << "CUDADenseQR::Init failed: " << cuda_error; - return nullptr; + return std::unique_ptr(new CUDADenseQR(options.context)); } -CUDADenseQR::CUDADenseQR() = default; - #endif // CERES_NO_CUDA -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dense_qr.h b/extern/ceres/internal/ceres/dense_qr.h index 7a2ffb52ae6..0ba17c4df94 100644 --- a/extern/ceres/internal/ceres/dense_qr.h +++ b/extern/ceres/internal/ceres/dense_qr.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,6 +40,7 @@ #include #include "Eigen/Dense" +#include "ceres/context_impl.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/eigen.h" #include "ceres/internal/export.h" @@ -54,8 +55,7 @@ #include "cusolverDn.h" #endif // CERES_NO_CUDA -namespace ceres { -namespace internal { +namespace ceres::internal { // An interface that abstracts away the internal details of various dense linear // algebra libraries and offers a simple API for solving dense linear systems @@ -92,7 +92,7 @@ class CERES_NO_EXPORT DenseQR { std::string* message) = 0; // Convenience method which combines a call to Factorize and Solve. Solve is - // only called if Factorize returns LINEAR_SOLVER_SUCCESS. + // only called if Factorize returns LinearSolverTerminationType::SUCCESS. // // The input matrix lhs may be modified by the implementation to store the // factorization, irrespective of whether the method succeeds or not. It is @@ -136,7 +136,8 @@ class CERES_NO_EXPORT LAPACKDenseQR final : public DenseQR { double* lhs_ = nullptr; int num_rows_; int num_cols_; - LinearSolverTerminationType termination_type_ = LINEAR_SOLVER_FATAL_ERROR; + LinearSolverTerminationType termination_type_ = + LinearSolverTerminationType::FATAL_ERROR; Vector work_; Vector tau_; Vector q_transpose_rhs_; @@ -164,18 +165,9 @@ class CERES_NO_EXPORT CUDADenseQR final : public DenseQR { std::string* message) override; private: - CUDADenseQR(); - // Picks up the cuSolverDN, cuBLAS, and cuStream handles from the context. If - // the context is unable to initialize CUDA, returns false with a - // human-readable message indicating the reason. - bool Init(ContextImpl* context, std::string* message); + explicit CUDADenseQR(ContextImpl* context); - // Handle to the cuSOLVER context. - cusolverDnHandle_t cusolver_handle_ = nullptr; - // Handle to cuBLAS context. - cublasHandle_t cublas_handle_ = nullptr; - // CUDA device stream. - cudaStream_t stream_ = nullptr; + ContextImpl* context_ = nullptr; // Number of rowns in the A matrix, to be cached between calls to *Factorize // and *Solve. size_t num_rows_ = 0; @@ -194,13 +186,13 @@ class CERES_NO_EXPORT CUDADenseQR final : public DenseQR { CudaBuffer error_; // Cache the result of Factorize to ensure that when Solve is called, the // factiorization of lhs is valid. - LinearSolverTerminationType factorize_result_ = LINEAR_SOLVER_FATAL_ERROR; + LinearSolverTerminationType factorize_result_ = + LinearSolverTerminationType::FATAL_ERROR; }; #endif // CERES_NO_CUDA -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/dense_qr_solver.cc b/extern/ceres/internal/ceres/dense_qr_solver.cc index 24cb25abd8e..92652b4d6d4 100644 --- a/extern/ceres/internal/ceres/dense_qr_solver.cc +++ b/extern/ceres/internal/ceres/dense_qr_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "ceres/types.h" #include "ceres/wall_time.h" -namespace ceres { -namespace internal { +namespace ceres::internal { DenseQRSolver::DenseQRSolver(const LinearSolver::Options& options) : options_(options), dense_qr_(DenseQR::Create(options)) {} @@ -81,5 +80,4 @@ LinearSolver::Summary DenseQRSolver::SolveImpl( return summary; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dense_qr_solver.h b/extern/ceres/internal/ceres/dense_qr_solver.h index 39922a2692b..12db52f0c48 100644 --- a/extern/ceres/internal/ceres/dense_qr_solver.h +++ b/extern/ceres/internal/ceres/dense_qr_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "ceres/internal/export.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class DenseSparseMatrix; @@ -112,8 +111,7 @@ class CERES_NO_EXPORT DenseQRSolver final : public DenseSparseMatrixSolver { std::unique_ptr dense_qr_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/dense_sparse_matrix.cc b/extern/ceres/internal/ceres/dense_sparse_matrix.cc index 8b967f2ade7..e0c917cc5b0 100644 --- a/extern/ceres/internal/ceres/dense_sparse_matrix.cc +++ b/extern/ceres/internal/ceres/dense_sparse_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "ceres/triplet_sparse_matrix.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { DenseSparseMatrix::DenseSparseMatrix(int num_rows, int num_cols) : m_(Matrix(num_rows, num_cols)) {} @@ -60,17 +59,31 @@ DenseSparseMatrix::DenseSparseMatrix(Matrix m) : m_(std::move(m)) {} void DenseSparseMatrix::SetZero() { m_.setZero(); } -void DenseSparseMatrix::RightMultiply(const double* x, double* y) const { - VectorRef(y, num_rows()) += matrix() * ConstVectorRef(x, num_cols()); +void DenseSparseMatrix::RightMultiplyAndAccumulate(const double* x, + double* y) const { + VectorRef(y, num_rows()).noalias() += m_ * ConstVectorRef(x, num_cols()); } -void DenseSparseMatrix::LeftMultiply(const double* x, double* y) const { - VectorRef(y, num_cols()) += - matrix().transpose() * ConstVectorRef(x, num_rows()); +void DenseSparseMatrix::LeftMultiplyAndAccumulate(const double* x, + double* y) const { + VectorRef(y, num_cols()).noalias() += + m_.transpose() * ConstVectorRef(x, num_rows()); } void DenseSparseMatrix::SquaredColumnNorm(double* x) const { - VectorRef(x, num_cols()) = m_.colwise().squaredNorm(); + // This implementation is 3x faster than the naive version + // x = m_.colwise().square().sum(), likely because m_ + // is a row major matrix. + + const int num_rows = m_.rows(); + const int num_cols = m_.cols(); + std::fill_n(x, num_cols, 0.0); + const double* m = m_.data(); + for (int i = 0; i < num_rows; ++i) { + for (int j = 0; j < num_cols; ++j, ++m) { + x[j] += (*m) * (*m); + } + } } void DenseSparseMatrix::ScaleColumns(const double* scale) { @@ -100,5 +113,4 @@ void DenseSparseMatrix::ToTextFile(FILE* file) const { } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dense_sparse_matrix.h b/extern/ceres/internal/ceres/dense_sparse_matrix.h index 655cbb8a3db..dc066d5b84a 100644 --- a/extern/ceres/internal/ceres/dense_sparse_matrix.h +++ b/extern/ceres/internal/ceres/dense_sparse_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,7 @@ #include "ceres/sparse_matrix.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class TripletSparseMatrix; @@ -54,8 +53,8 @@ class CERES_NO_EXPORT DenseSparseMatrix final : public SparseMatrix { // SparseMatrix interface. void SetZero() final; - void RightMultiply(const double* x, double* y) const final; - void LeftMultiply(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, double* y) const final; + void LeftMultiplyAndAccumulate(const double* x, double* y) const final; void SquaredColumnNorm(double* x) const final; void ScaleColumns(const double* scale) final; void ToDenseMatrix(Matrix* dense_matrix) const final; @@ -73,8 +72,7 @@ class CERES_NO_EXPORT DenseSparseMatrix final : public SparseMatrix { Matrix m_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/detect_structure.cc b/extern/ceres/internal/ceres/detect_structure.cc index 4aac4452153..e82d70fb21a 100644 --- a/extern/ceres/internal/ceres/detect_structure.cc +++ b/extern/ceres/internal/ceres/detect_structure.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,8 +33,7 @@ #include "ceres/internal/eigen.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { void DetectStructure(const CompressedRowBlockStructure& bs, const int num_eliminate_blocks, @@ -119,5 +118,4 @@ void DetectStructure(const CompressedRowBlockStructure& bs, // clang-format on } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/detect_structure.h b/extern/ceres/internal/ceres/detect_structure.h index 6151c042256..3237d1051f7 100644 --- a/extern/ceres/internal/ceres/detect_structure.h +++ b/extern/ceres/internal/ceres/detect_structure.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Detect static blocks in the problem sparsity. For rows containing // e_blocks, we are interested in detecting if the size of the row @@ -63,8 +62,7 @@ void CERES_NO_EXPORT DetectStructure(const CompressedRowBlockStructure& bs, int* e_block_size, int* f_block_size); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/dogleg_strategy.cc b/extern/ceres/internal/ceres/dogleg_strategy.cc index 65f7ccd8480..877d8d9e3f4 100644 --- a/extern/ceres/internal/ceres/dogleg_strategy.cc +++ b/extern/ceres/internal/ceres/dogleg_strategy.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,8 +44,7 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { namespace { const double kMaxMu = 1.0; const double kMinMu = 1e-8; @@ -101,7 +100,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep( } TrustRegionStrategy::Summary summary; summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.termination_type = LinearSolverTerminationType::SUCCESS; return summary; } @@ -138,11 +137,13 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep( summary.num_iterations = linear_solver_summary.num_iterations; summary.termination_type = linear_solver_summary.termination_type; - if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) { + if (linear_solver_summary.termination_type == + LinearSolverTerminationType::FATAL_ERROR) { return summary; } - if (linear_solver_summary.termination_type != LINEAR_SOLVER_FAILURE) { + if (linear_solver_summary.termination_type != + LinearSolverTerminationType::FAILURE) { switch (dogleg_type_) { // Interpolate the Cauchy point and the Gauss-Newton step. case TRADITIONAL_DOGLEG: @@ -153,7 +154,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep( // Cauchy point and the (Gauss-)Newton step. case SUBSPACE_DOGLEG: if (!ComputeSubspaceModel(jacobian)) { - summary.termination_type = LINEAR_SOLVER_FAILURE; + summary.termination_type = LinearSolverTerminationType::FAILURE; break; } ComputeSubspaceDoglegStep(step); @@ -174,7 +175,7 @@ TrustRegionStrategy::Summary DoglegStrategy::ComputeStep( void DoglegStrategy::ComputeGradient(SparseMatrix* jacobian, const double* residuals) { gradient_.setZero(); - jacobian->LeftMultiply(residuals, gradient_.data()); + jacobian->LeftMultiplyAndAccumulate(residuals, gradient_.data()); gradient_.array() /= diagonal_.array(); } @@ -187,7 +188,7 @@ void DoglegStrategy::ComputeCauchyPoint(SparseMatrix* jacobian) { // The Jacobian is scaled implicitly by computing J * (D^-1 * (D^-1 * g)) // instead of (J * D^-1) * (D^-1 * g). Vector scaled_gradient = (gradient_.array() / diagonal_.array()).matrix(); - jacobian->RightMultiply(scaled_gradient.data(), Jg.data()); + jacobian->RightMultiplyAndAccumulate(scaled_gradient.data(), Jg.data()); alpha_ = gradient_.squaredNorm() / Jg.squaredNorm(); } @@ -518,7 +519,7 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep( const double* residuals) { const int n = jacobian->num_cols(); LinearSolver::Summary linear_solver_summary; - linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE; + linear_solver_summary.termination_type = LinearSolverTerminationType::FAILURE; // The Jacobian matrix is often quite poorly conditioned. Thus it is // necessary to add a diagonal matrix at the bottom to prevent the @@ -531,7 +532,7 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep( // If the solve fails, the multiplier to the diagonal is increased // up to max_mu_ by a factor of mu_increase_factor_ every time. If // the linear solver is still not successful, the strategy returns - // with LINEAR_SOLVER_FAILURE. + // with LinearSolverTerminationType::FAILURE. // // Next time when a new Gauss-Newton step is requested, the // multiplier starts out from the last successful solve. @@ -582,21 +583,25 @@ LinearSolver::Summary DoglegStrategy::ComputeGaussNewtonStep( } } - if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) { + if (linear_solver_summary.termination_type == + LinearSolverTerminationType::FATAL_ERROR) { return linear_solver_summary; } - if (linear_solver_summary.termination_type == LINEAR_SOLVER_FAILURE || + if (linear_solver_summary.termination_type == + LinearSolverTerminationType::FAILURE || !IsArrayValid(n, gauss_newton_step_.data())) { mu_ *= mu_increase_factor_; VLOG(2) << "Increasing mu " << mu_; - linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE; + linear_solver_summary.termination_type = + LinearSolverTerminationType::FAILURE; continue; } break; } - if (linear_solver_summary.termination_type != LINEAR_SOLVER_FAILURE) { + if (linear_solver_summary.termination_type != + LinearSolverTerminationType::FAILURE) { // The scaled Gauss-Newton step is D * GN: // // - (D^-1 J^T J D^-1)^-1 (D^-1 g) @@ -627,7 +632,7 @@ void DoglegStrategy::StepAccepted(double step_quality) { reuse_ = false; } -void DoglegStrategy::StepRejected(double step_quality) { +void DoglegStrategy::StepRejected(double /*step_quality*/) { radius_ *= 0.5; reuse_ = true; } @@ -701,14 +706,13 @@ bool DoglegStrategy::ComputeSubspaceModel(SparseMatrix* jacobian) { Vector tmp; tmp = (subspace_basis_.col(0).array() / diagonal_.array()).matrix(); - jacobian->RightMultiply(tmp.data(), Jb.row(0).data()); + jacobian->RightMultiplyAndAccumulate(tmp.data(), Jb.row(0).data()); tmp = (subspace_basis_.col(1).array() / diagonal_.array()).matrix(); - jacobian->RightMultiply(tmp.data(), Jb.row(1).data()); + jacobian->RightMultiplyAndAccumulate(tmp.data(), Jb.row(1).data()); subspace_B_ = Jb * Jb.transpose(); return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dogleg_strategy.h b/extern/ceres/internal/ceres/dogleg_strategy.h index 1d219afe8bc..b4d29c92d37 100644 --- a/extern/ceres/internal/ceres/dogleg_strategy.h +++ b/extern/ceres/internal/ceres/dogleg_strategy.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,7 @@ #include "ceres/linear_solver.h" #include "ceres/trust_region_strategy.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Dogleg step computation and trust region sizing strategy based on // on "Methods for Nonlinear Least Squares" by K. Madsen, H.B. Nielsen @@ -159,8 +158,7 @@ class CERES_NO_EXPORT DoglegStrategy final : public TrustRegionStrategy { Matrix2d subspace_B_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/dynamic_compressed_row_finalizer.h b/extern/ceres/internal/ceres/dynamic_compressed_row_finalizer.h index fedee3b7a83..9da73c0c7c9 100644 --- a/extern/ceres/internal/ceres/dynamic_compressed_row_finalizer.h +++ b/extern/ceres/internal/ceres/dynamic_compressed_row_finalizer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -28,15 +28,14 @@ // // Author: richie.stebbing@gmail.com (Richard Stebbing) -#ifndef CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALIZER_H_ -#define CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALIZER_H_ +#ifndef CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALIZER_H_ +#define CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALIZER_H_ #include "ceres/casts.h" #include "ceres/dynamic_compressed_row_sparse_matrix.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { struct CERES_NO_EXPORT DynamicCompressedRowJacobianFinalizer { void operator()(SparseMatrix* base_jacobian, int num_parameters) { @@ -46,7 +45,6 @@ struct CERES_NO_EXPORT DynamicCompressedRowJacobianFinalizer { } }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal -#endif // CERES_INTERNAL_DYNAMIC_COMPRESED_ROW_FINALISER_H_ +#endif // CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_FINALISER_H_ diff --git a/extern/ceres/internal/ceres/dynamic_compressed_row_jacobian_writer.cc b/extern/ceres/internal/ceres/dynamic_compressed_row_jacobian_writer.cc index 8c254e98f46..790a5fb7567 100644 --- a/extern/ceres/internal/ceres/dynamic_compressed_row_jacobian_writer.cc +++ b/extern/ceres/internal/ceres/dynamic_compressed_row_jacobian_writer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -31,6 +31,8 @@ #include "ceres/dynamic_compressed_row_jacobian_writer.h" #include +#include +#include #include "ceres/casts.h" #include "ceres/compressed_row_jacobian_writer.h" @@ -39,11 +41,7 @@ #include "ceres/program.h" #include "ceres/residual_block.h" -namespace ceres { -namespace internal { - -using std::pair; -using std::vector; +namespace ceres::internal { std::unique_ptr DynamicCompressedRowJacobianWriter::CreateEvaluatePreparers(int num_threads) { @@ -69,7 +67,7 @@ void DynamicCompressedRowJacobianWriter::Write(int residual_id, program_->residual_blocks()[residual_id]; const int num_residuals = residual_block->NumResiduals(); - vector> evaluated_jacobian_blocks; + std::vector> evaluated_jacobian_blocks; CompressedRowJacobianWriter::GetOrderedParameterBlocks( program_, residual_id, &evaluated_jacobian_blocks); @@ -100,5 +98,4 @@ void DynamicCompressedRowJacobianWriter::Write(int residual_id, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dynamic_compressed_row_jacobian_writer.h b/extern/ceres/internal/ceres/dynamic_compressed_row_jacobian_writer.h index 794a9b4c1e6..489197f47bb 100644 --- a/extern/ceres/internal/ceres/dynamic_compressed_row_jacobian_writer.h +++ b/extern/ceres/internal/ceres/dynamic_compressed_row_jacobian_writer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "ceres/internal/export.h" #include "ceres/scratch_evaluate_preparer.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Program; class SparseMatrix; @@ -68,7 +67,7 @@ class CERES_NO_EXPORT DynamicCompressedRowJacobianWriter { // Write only the non-zero jacobian entries for a residual block // (specified by `residual_id`) into `base_jacobian`, starting at the row - // specifed by `residual_offset`. + // specified by `residual_offset`. // // This method is thread-safe over residual blocks (each `residual_id`). void Write(int residual_id, @@ -80,7 +79,6 @@ class CERES_NO_EXPORT DynamicCompressedRowJacobianWriter { Program* program_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_DYNAMIC_COMPRESSED_ROW_JACOBIAN_WRITER_H_ diff --git a/extern/ceres/internal/ceres/dynamic_compressed_row_sparse_matrix.cc b/extern/ceres/internal/ceres/dynamic_compressed_row_sparse_matrix.cc index 7185e14a411..4081c9c2a17 100644 --- a/extern/ceres/internal/ceres/dynamic_compressed_row_sparse_matrix.cc +++ b/extern/ceres/internal/ceres/dynamic_compressed_row_sparse_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,7 @@ #include -namespace ceres { -namespace internal { +namespace ceres::internal { DynamicCompressedRowSparseMatrix::DynamicCompressedRowSparseMatrix( int num_rows, int num_cols, int initial_max_num_nonzeros) @@ -99,5 +98,4 @@ void DynamicCompressedRowSparseMatrix::Finalize(int num_additional_elements) { << "the number of jacobian nonzeros. Please contact the developers!"; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dynamic_compressed_row_sparse_matrix.h b/extern/ceres/internal/ceres/dynamic_compressed_row_sparse_matrix.h index 5b4c402f830..6dafe598e47 100644 --- a/extern/ceres/internal/ceres/dynamic_compressed_row_sparse_matrix.h +++ b/extern/ceres/internal/ceres/dynamic_compressed_row_sparse_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,13 +47,12 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final : public CompressedRowSparseMatrix { public: - // Set the number of rows and columns for the underlyig + // Set the number of rows and columns for the underlying // `CompressedRowSparseMatrix` and set the initial number of maximum non-zero // entries. Note that following the insertion of entries, when `Finalize` // is called the number of non-zeros is determined and all internal @@ -74,7 +73,7 @@ class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final // Insert an entry at a given row and column position. This method is // thread-safe across rows i.e. different threads can insert values - // simultaneously into different rows. It should be emphasised that this + // simultaneously into different rows. It should be emphasized that this // method always inserts a new entry and does not check for existing // entries at the specified row and column position. Duplicate entries // for a given row and column position will result in undefined @@ -98,8 +97,7 @@ class CERES_NO_EXPORT DynamicCompressedRowSparseMatrix final std::vector> dynamic_values_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/dynamic_sparse_normal_cholesky_solver.cc b/extern/ceres/internal/ceres/dynamic_sparse_normal_cholesky_solver.cc index 5e907e18d51..d77d7f7b007 100644 --- a/extern/ceres/internal/ceres/dynamic_sparse_normal_cholesky_solver.cc +++ b/extern/ceres/internal/ceres/dynamic_sparse_normal_cholesky_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,7 +39,6 @@ #include "Eigen/SparseCore" #include "ceres/compressed_row_sparse_matrix.h" -#include "ceres/cxsparse.h" #include "ceres/internal/config.h" #include "ceres/internal/eigen.h" #include "ceres/linear_solver.h" @@ -52,8 +51,7 @@ #include "Eigen/SparseCholesky" #endif -namespace ceres { -namespace internal { +namespace ceres::internal { DynamicSparseNormalCholeskySolver::DynamicSparseNormalCholeskySolver( LinearSolver::Options options) @@ -66,7 +64,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImpl( double* x) { const int num_cols = A->num_cols(); VectorRef(x, num_cols).setZero(); - A->LeftMultiply(b, x); + A->LeftMultiplyAndAccumulate(b, x); if (per_solve_options.D != nullptr) { // Temporarily append a diagonal block to the A matrix, but undo @@ -87,9 +85,6 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImpl( case SUITE_SPARSE: summary = SolveImplUsingSuiteSparse(A, x); break; - case CX_SPARSE: - summary = SolveImplUsingCXSparse(A, x); - break; case EIGEN_SPARSE: summary = SolveImplUsingEigen(A, x); break; @@ -113,7 +108,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen( LinearSolver::Summary summary; summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_FATAL_ERROR; + summary.termination_type = LinearSolverTerminationType::FATAL_ERROR; summary.message = "SPARSE_NORMAL_CHOLESKY cannot be used with EIGEN_SPARSE " "because Ceres was not built with support for " @@ -138,7 +133,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen( LinearSolver::Summary summary; summary.num_iterations = 1; - summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.termination_type = LinearSolverTerminationType::SUCCESS; summary.message = "Success."; solver.analyzePattern(lhs); @@ -150,7 +145,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen( event_logger.AddEvent("Analyze"); if (solver.info() != Eigen::Success) { - summary.termination_type = LINEAR_SOLVER_FATAL_ERROR; + summary.termination_type = LinearSolverTerminationType::FATAL_ERROR; summary.message = "Eigen failure. Unable to find symbolic factorization."; return summary; } @@ -158,7 +153,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen( solver.factorize(lhs); event_logger.AddEvent("Factorize"); if (solver.info() != Eigen::Success) { - summary.termination_type = LINEAR_SOLVER_FAILURE; + summary.termination_type = LinearSolverTerminationType::FAILURE; summary.message = "Eigen failure. Unable to find numeric factorization."; return summary; } @@ -167,7 +162,7 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen( VectorRef(rhs_and_solution, lhs.cols()) = solver.solve(rhs); event_logger.AddEvent("Solve"); if (solver.info() != Eigen::Success) { - summary.termination_type = LINEAR_SOLVER_FAILURE; + summary.termination_type = LinearSolverTerminationType::FAILURE; summary.message = "Eigen failure. Unable to do triangular solve."; return summary; } @@ -176,66 +171,16 @@ LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingEigen( #endif // CERES_USE_EIGEN_SPARSE } -LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingCXSparse( - CompressedRowSparseMatrix* A, double* rhs_and_solution) { -#ifdef CERES_NO_CXSPARSE - - LinearSolver::Summary summary; - summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_FATAL_ERROR; - summary.message = - "SPARSE_NORMAL_CHOLESKY cannot be used with CX_SPARSE " - "because Ceres was not built with support for CXSparse. " - "This requires enabling building with -DCXSPARSE=ON."; - - return summary; - -#else - EventLogger event_logger( - "DynamicSparseNormalCholeskySolver::CXSparse::Solve"); - - LinearSolver::Summary summary; - summary.num_iterations = 1; - summary.termination_type = LINEAR_SOLVER_SUCCESS; - summary.message = "Success."; - - CXSparse cxsparse; - - // Wrap the augmented Jacobian in a compressed sparse column matrix. - cs_di a_transpose = cxsparse.CreateSparseMatrixTransposeView(A); - - // Compute the normal equations. J'J delta = J'f and solve them - // using a sparse Cholesky factorization. Notice that when compared - // to SuiteSparse we have to explicitly compute the transpose of Jt, - // and then the normal equations before they can be - // factorized. CHOLMOD/SuiteSparse on the other hand can just work - // off of Jt to compute the Cholesky factorization of the normal - // equations. - cs_di* a = cxsparse.TransposeMatrix(&a_transpose); - cs_di* lhs = cxsparse.MatrixMatrixMultiply(&a_transpose, a); - cxsparse.Free(a); - event_logger.AddEvent("NormalEquations"); - - if (!cxsparse.SolveCholesky(lhs, rhs_and_solution)) { - summary.termination_type = LINEAR_SOLVER_FAILURE; - summary.message = "CXSparse::SolveCholesky failed"; - } - event_logger.AddEvent("Solve"); - - cxsparse.Free(lhs); - event_logger.AddEvent("TearDown"); - return summary; -#endif -} - LinearSolver::Summary DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse( CompressedRowSparseMatrix* A, double* rhs_and_solution) { #ifdef CERES_NO_SUITESPARSE + (void)A; + (void)rhs_and_solution; LinearSolver::Summary summary; summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_FATAL_ERROR; + summary.termination_type = LinearSolverTerminationType::FATAL_ERROR; summary.message = "SPARSE_NORMAL_CHOLESKY cannot be used with SUITE_SPARSE " "because Ceres was not built with support for SuiteSparse. " @@ -247,7 +192,7 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse( EventLogger event_logger( "DynamicSparseNormalCholeskySolver::SuiteSparse::Solve"); LinearSolver::Summary summary; - summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.termination_type = LinearSolverTerminationType::SUCCESS; summary.num_iterations = 1; summary.message = "Success."; @@ -255,16 +200,17 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse( const int num_cols = A->num_cols(); cholmod_sparse lhs = ss.CreateSparseMatrixTransposeView(A); event_logger.AddEvent("Setup"); - cholmod_factor* factor = ss.AnalyzeCholesky(&lhs, &summary.message); + cholmod_factor* factor = + ss.AnalyzeCholesky(&lhs, options_.ordering_type, &summary.message); event_logger.AddEvent("Analysis"); if (factor == nullptr) { - summary.termination_type = LINEAR_SOLVER_FATAL_ERROR; + summary.termination_type = LinearSolverTerminationType::FATAL_ERROR; return summary; } summary.termination_type = ss.Cholesky(&lhs, factor, &summary.message); - if (summary.termination_type == LINEAR_SOLVER_SUCCESS) { + if (summary.termination_type == LinearSolverTerminationType::SUCCESS) { cholmod_dense cholmod_rhs = ss.CreateDenseVectorView(rhs_and_solution, num_cols); cholmod_dense* solution = ss.Solve(factor, &cholmod_rhs, &summary.message); @@ -274,7 +220,7 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse( rhs_and_solution, solution->x, num_cols * sizeof(*rhs_and_solution)); ss.Free(solution); } else { - summary.termination_type = LINEAR_SOLVER_FAILURE; + summary.termination_type = LinearSolverTerminationType::FAILURE; } } @@ -285,5 +231,4 @@ DynamicSparseNormalCholeskySolver::SolveImplUsingSuiteSparse( #endif } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/dynamic_sparse_normal_cholesky_solver.h b/extern/ceres/internal/ceres/dynamic_sparse_normal_cholesky_solver.h index 6f73c961212..022b38e4fd2 100644 --- a/extern/ceres/internal/ceres/dynamic_sparse_normal_cholesky_solver.h +++ b/extern/ceres/internal/ceres/dynamic_sparse_normal_cholesky_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -42,8 +42,7 @@ #include "ceres/internal/export.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CompressedRowSparseMatrix; @@ -77,7 +76,6 @@ class CERES_NO_EXPORT DynamicSparseNormalCholeskySolver const LinearSolver::Options options_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_DYNAMIC_SPARSE_NORMAL_CHOLESKY_SOLVER_H_ diff --git a/extern/ceres/internal/ceres/eigen_vector_ops.h b/extern/ceres/internal/ceres/eigen_vector_ops.h new file mode 100644 index 00000000000..6ebff8891ef --- /dev/null +++ b/extern/ceres/internal/ceres/eigen_vector_ops.h @@ -0,0 +1,105 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#ifndef CERES_INTERNAL_EIGEN_VECTOR_OPS_H_ +#define CERES_INTERNAL_EIGEN_VECTOR_OPS_H_ + +#include + +#include "ceres/internal/eigen.h" +#include "ceres/internal/fixed_array.h" +#include "ceres/parallel_for.h" +#include "ceres/parallel_vector_ops.h" + +namespace ceres::internal { + +// Blas1 operations on Eigen vectors. These functions are needed as an +// abstraction layer so that we can use different versions of a vector style +// object in the conjugate gradients linear solver. +template +inline double Norm(const Eigen::DenseBase& x, + ContextImpl* context, + int num_threads) { + FixedArray norms(num_threads, 0.); + ParallelFor( + context, + 0, + x.rows(), + num_threads, + [&x, &norms](int thread_id, std::tuple range) { + auto [start, end] = range; + norms[thread_id] += x.segment(start, end - start).squaredNorm(); + }, + kMinBlockSizeParallelVectorOps); + return std::sqrt(std::accumulate(norms.begin(), norms.end(), 0.)); +} +inline void SetZero(Vector& x, ContextImpl* context, int num_threads) { + ParallelSetZero(context, num_threads, x); +} +inline void Axpby(double a, + const Vector& x, + double b, + const Vector& y, + Vector& z, + ContextImpl* context, + int num_threads) { + ParallelAssign(context, num_threads, z, a * x + b * y); +} +template +inline double Dot(const VectorLikeX& x, + const VectorLikeY& y, + ContextImpl* context, + int num_threads) { + FixedArray dots(num_threads, 0.); + ParallelFor( + context, + 0, + x.rows(), + num_threads, + [&x, &y, &dots](int thread_id, std::tuple range) { + auto [start, end] = range; + const int block_size = end - start; + const auto& x_block = x.segment(start, block_size); + const auto& y_block = y.segment(start, block_size); + dots[thread_id] += x_block.dot(y_block); + }, + kMinBlockSizeParallelVectorOps); + return std::accumulate(dots.begin(), dots.end(), 0.); +} +inline void Copy(const Vector& from, + Vector& to, + ContextImpl* context, + int num_threads) { + ParallelAssign(context, num_threads, to, from); +} + +} // namespace ceres::internal + +#endif // CERES_INTERNAL_EIGEN_VECTOR_OPS_H_ diff --git a/extern/ceres/internal/ceres/eigensparse.cc b/extern/ceres/internal/ceres/eigensparse.cc index 81668c82e67..7ed401d46b2 100644 --- a/extern/ceres/internal/ceres/eigensparse.cc +++ b/extern/ceres/internal/ceres/eigensparse.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,22 +36,25 @@ #include +#ifndef CERES_NO_EIGEN_METIS +#include // This is needed because MetisSupport depends on iostream. + +#include "Eigen/MetisSupport" +#endif + #include "Eigen/SparseCholesky" #include "Eigen/SparseCore" #include "ceres/compressed_row_sparse_matrix.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -// TODO(sameeragarwal): Use enable_if to clean up the implementations -// for when Scalar == double. template class EigenSparseCholeskyTemplate final : public SparseCholesky { public: EigenSparseCholeskyTemplate() = default; CompressedRowSparseMatrix::StorageType StorageType() const final { - return CompressedRowSparseMatrix::LOWER_TRIANGULAR; + return CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR; } LinearSolverTerminationType Factorize( @@ -68,7 +71,7 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky { if (solver_.info() != Eigen::Success) { *message = "Eigen failure. Unable to find symbolic factorization."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } analyzed_ = true; @@ -77,9 +80,9 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky { solver_.factorize(lhs); if (solver_.info() != Eigen::Success) { *message = "Eigen failure. Unable to find numeric factorization."; - return LINEAR_SOLVER_FAILURE; + return LinearSolverTerminationType::FAILURE; } - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } LinearSolverTerminationType Solve(const double* rhs_ptr, @@ -87,23 +90,23 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky { std::string* message) override { CHECK(analyzed_) << "Solve called without a call to Factorize first."; - scalar_rhs_ = ConstVectorRef(rhs_ptr, solver_.cols()) - .template cast(); - - // The two casts are needed if the Scalar in this class is not - // double. For code simplicity we are going to assume that Eigen - // is smart enough to figure out that casting a double Vector to a - // double Vector is a straight copy. If this turns into a - // performance bottleneck (unlikely), we can revisit this. - scalar_solution_ = solver_.solve(scalar_rhs_); - VectorRef(solution_ptr, solver_.cols()) = - scalar_solution_.template cast(); + // Avoid copying when the scalar type is double + if constexpr (std::is_same_v) { + ConstVectorRef scalar_rhs(rhs_ptr, solver_.cols()); + VectorRef(solution_ptr, solver_.cols()) = solver_.solve(scalar_rhs); + } else { + auto scalar_rhs = ConstVectorRef(rhs_ptr, solver_.cols()) + .template cast(); + auto scalar_solution = solver_.solve(scalar_rhs); + VectorRef(solution_ptr, solver_.cols()) = + scalar_solution.template cast(); + } if (solver_.info() != Eigen::Success) { *message = "Eigen failure. Unable to do triangular solve."; - return LINEAR_SOLVER_FAILURE; + return LinearSolverTerminationType::FAILURE; } - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } LinearSolverTerminationType Factorize(CompressedRowSparseMatrix* lhs, @@ -111,9 +114,8 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky { CHECK_EQ(lhs->storage_type(), StorageType()); typename Solver::Scalar* values_ptr = nullptr; - if (std::is_same::value) { - values_ptr = - reinterpret_cast(lhs->mutable_values()); + if constexpr (std::is_same_v) { + values_ptr = lhs->mutable_values(); } else { // In the case where the scalar used in this class is not // double. In that case, make a copy of the values array in the @@ -123,19 +125,20 @@ class EigenSparseCholeskyTemplate final : public SparseCholesky { values_ptr = values_.data(); } - Eigen::Map> + Eigen::Map< + const Eigen::SparseMatrix> eigen_lhs(lhs->num_rows(), lhs->num_rows(), lhs->num_nonzeros(), - lhs->mutable_rows(), - lhs->mutable_cols(), + lhs->rows(), + lhs->cols(), values_ptr); return Factorize(eigen_lhs, message); } private: - Eigen::Matrix values_, - scalar_rhs_, scalar_solution_; + Eigen::Matrix values_; + bool analyzed_{false}; Solver solver_; }; @@ -150,11 +153,22 @@ std::unique_ptr EigenSparseCholesky::Create( Eigen::Upper, Eigen::NaturalOrdering>; - if (ordering_type == AMD) { + if (ordering_type == OrderingType::AMD) { return std::make_unique>(); - } else { - return std::make_unique>(); + } else if (ordering_type == OrderingType::NESDIS) { +#ifndef CERES_NO_EIGEN_METIS + using WithMetisOrdering = Eigen::SimplicialLDLT, + Eigen::Upper, + Eigen::MetisOrdering>; + return std::make_unique>(); +#else + LOG(FATAL) + << "Congratulations you have found a bug in Ceres Solver. Please " + "report it to the Ceres Solver developers."; + return nullptr; +#endif // CERES_NO_EIGEN_METIS } + return std::make_unique>(); } EigenSparseCholesky::~EigenSparseCholesky() = default; @@ -168,16 +182,26 @@ std::unique_ptr FloatEigenSparseCholesky::Create( Eigen::SimplicialLDLT, Eigen::Upper, Eigen::NaturalOrdering>; - if (ordering_type == AMD) { + if (ordering_type == OrderingType::AMD) { return std::make_unique>(); - } else { - return std::make_unique>(); + } else if (ordering_type == OrderingType::NESDIS) { +#ifndef CERES_NO_EIGEN_METIS + using WithMetisOrdering = Eigen::SimplicialLDLT, + Eigen::Upper, + Eigen::MetisOrdering>; + return std::make_unique>(); +#else + LOG(FATAL) + << "Congratulations you have found a bug in Ceres Solver. Please " + "report it to the Ceres Solver developers."; + return nullptr; +#endif // CERES_NO_EIGEN_METIS } + return std::make_unique>(); } FloatEigenSparseCholesky::~FloatEigenSparseCholesky() = default; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_USE_EIGEN_SPARSE diff --git a/extern/ceres/internal/ceres/eigensparse.h b/extern/ceres/internal/ceres/eigensparse.h index c4a4142e586..f16e8f2fa9b 100644 --- a/extern/ceres/internal/ceres/eigensparse.h +++ b/extern/ceres/internal/ceres/eigensparse.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,8 +46,18 @@ #include "ceres/linear_solver.h" #include "ceres/sparse_cholesky.h" -namespace ceres { -namespace internal { +namespace ceres::internal { + +class EigenSparse { + public: + static constexpr bool IsNestedDissectionAvailable() noexcept { +#ifdef CERES_NO_EIGEN_METIS + return false; +#else + return true; +#endif + } +}; class CERES_NO_EXPORT EigenSparseCholesky : public SparseCholesky { public: @@ -83,8 +93,18 @@ class CERES_NO_EXPORT FloatEigenSparseCholesky : public SparseCholesky { std::string* message) override = 0; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal + +#else + +namespace ceres::internal { + +class EigenSparse { + public: + static constexpr bool IsNestedDissectionAvailable() noexcept { return false; } +}; + +} // namespace ceres::internal #endif // CERES_USE_EIGEN_SPARSE diff --git a/extern/ceres/internal/ceres/evaluation_callback.cc b/extern/ceres/internal/ceres/evaluation_callback.cc index 77591a8c621..5ac66451541 100644 --- a/extern/ceres/internal/ceres/evaluation_callback.cc +++ b/extern/ceres/internal/ceres/evaluation_callback.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/evaluator.cc b/extern/ceres/internal/ceres/evaluator.cc index 52d0f09e5df..64eb4c5b3ec 100644 --- a/extern/ceres/internal/ceres/evaluator.cc +++ b/extern/ceres/internal/ceres/evaluator.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,8 +46,7 @@ #include "ceres/scratch_evaluate_preparer.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { Evaluator::~Evaluator() = default; @@ -65,10 +64,17 @@ std::unique_ptr Evaluator::Create(const Evaluator::Options& options, case DENSE_SCHUR: case SPARSE_SCHUR: case ITERATIVE_SCHUR: - case CGNR: - return std::make_unique< - ProgramEvaluator>( - options, program); + case CGNR: { + if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) { + return std::make_unique>( + options, program); + } else { + return std::make_unique< + ProgramEvaluator>( + options, program); + } + } case SPARSE_NORMAL_CHOLESKY: if (options.dynamic_sparsity) { return std::make_unique< @@ -88,5 +94,4 @@ std::unique_ptr Evaluator::Create(const Evaluator::Options& options, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/evaluator.h b/extern/ceres/internal/ceres/evaluator.h index 68a4fb28a55..dcb3cf6c75b 100644 --- a/extern/ceres/internal/ceres/evaluator.h +++ b/extern/ceres/internal/ceres/evaluator.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -65,6 +65,8 @@ class CERES_NO_EXPORT Evaluator { int num_threads = 1; int num_eliminate_blocks = -1; LinearSolverType linear_solver_type = DENSE_QR; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type = + NO_SPARSE; bool dynamic_sparsity = false; ContextImpl* context = nullptr; EvaluationCallback* evaluation_callback = nullptr; diff --git a/extern/ceres/internal/ceres/execution_summary.h b/extern/ceres/internal/ceres/execution_summary.h index fbee75fc0cb..accc5e47d15 100644 --- a/extern/ceres/internal/ceres/execution_summary.h +++ b/extern/ceres/internal/ceres/execution_summary.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,7 @@ #include "ceres/internal/export.h" #include "ceres/wall_time.h" -namespace ceres { -namespace internal { +namespace ceres::internal { struct CallStatistics { CallStatistics() = default; @@ -85,7 +84,6 @@ class ScopedExecutionTimer { ExecutionSummary* summary_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_EXECUTION_SUMMARY_H_ diff --git a/extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.cc b/extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.cc new file mode 100644 index 00000000000..efe4d8d7a74 --- /dev/null +++ b/extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.cc @@ -0,0 +1,120 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas) + +#include "ceres/fake_bundle_adjustment_jacobian.h" + +#include +#include +#include +#include + +#include "Eigen/Dense" +#include "ceres/block_sparse_matrix.h" +#include "ceres/internal/eigen.h" + +namespace ceres::internal { + +std::unique_ptr CreateFakeBundleAdjustmentJacobian( + int num_cameras, + int num_points, + int camera_size, + int point_size, + double visibility, + std::mt19937& prng) { + constexpr int kResidualSize = 2; + + CompressedRowBlockStructure* bs = new CompressedRowBlockStructure; + int c = 0; + // Add column blocks for each point + for (int i = 0; i < num_points; ++i) { + bs->cols.push_back(Block(point_size, c)); + c += point_size; + } + + // Add column blocks for each camera. + for (int i = 0; i < num_cameras; ++i) { + bs->cols.push_back(Block(camera_size, c)); + c += camera_size; + } + + std::bernoulli_distribution visibility_distribution(visibility); + int row_pos = 0; + int cell_pos = 0; + for (int i = 0; i < num_points; ++i) { + for (int j = 0; j < num_cameras; ++j) { + if (!visibility_distribution(prng)) { + continue; + } + bs->rows.emplace_back(); + auto& row = bs->rows.back(); + row.block.position = row_pos; + row.block.size = kResidualSize; + auto& cells = row.cells; + cells.resize(2); + + cells[0].block_id = i; + cells[0].position = cell_pos; + cell_pos += kResidualSize * point_size; + + cells[1].block_id = num_points + j; + cells[1].position = cell_pos; + cell_pos += kResidualSize * camera_size; + + row_pos += kResidualSize; + } + } + + auto jacobian = std::make_unique(bs); + VectorRef(jacobian->mutable_values(), jacobian->num_nonzeros()).setRandom(); + return jacobian; +} + +std::pair< + std::unique_ptr>, + std::unique_ptr> +CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras, + int num_points, + int camera_size, + int landmark_size, + double visibility, + std::mt19937& rng) { + using PartitionedView = + PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>; + auto block_sparse_matrix = CreateFakeBundleAdjustmentJacobian( + num_cameras, num_points, camera_size, landmark_size, visibility, rng); + LinearSolver::Options options; + options.elimination_groups.push_back(num_points); + auto partitioned_view = + std::make_unique(options, *block_sparse_matrix); + return std::make_pair(std::move(partitioned_view), + std::move(block_sparse_matrix)); +} + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.h b/extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.h new file mode 100644 index 00000000000..ced1b161871 --- /dev/null +++ b/extern/ceres/internal/ceres/fake_bundle_adjustment_jacobian.h @@ -0,0 +1,78 @@ + +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#ifndef CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN +#define CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN + +#include +#include + +#include "ceres/block_sparse_matrix.h" +#include "ceres/partitioned_matrix_view.h" + +namespace ceres::internal { +std::unique_ptr CreateFakeBundleAdjustmentJacobian( + int num_cameras, + int num_points, + int camera_size, + int point_size, + double visibility, + std::mt19937& prng); + +template +std::pair>, + std::unique_ptr> +CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras, + int num_points, + double visibility, + std::mt19937& rng) { + using PartitionedView = PartitionedMatrixView<2, kEBlockSize, kFBlockSize>; + auto block_sparse_matrix = CreateFakeBundleAdjustmentJacobian( + num_cameras, num_points, kFBlockSize, kEBlockSize, visibility, rng); + auto partitioned_view = + std::make_unique(*block_sparse_matrix, num_points); + return std::make_pair(std::move(partitioned_view), + std::move(block_sparse_matrix)); +} + +std::pair< + std::unique_ptr>, + std::unique_ptr> +CreateFakeBundleAdjustmentPartitionedJacobian(int num_cameras, + int num_points, + int camera_size, + int landmark_size, + double visibility, + std::mt19937& rng); + +} // namespace ceres::internal + +#endif // CERES_INTERNAL_FAKE_BUNDLE_ADJUSTMENT_JACOBIAN diff --git a/extern/ceres/internal/ceres/file.cc b/extern/ceres/internal/ceres/file.cc index 94f21355a2b..60d35fac95d 100644 --- a/extern/ceres/internal/ceres/file.cc +++ b/extern/ceres/internal/ceres/file.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,15 +33,14 @@ #include "ceres/file.h" #include +#include #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -using std::string; - -void WriteStringToFileOrDie(const string& data, const string& filename) { +void WriteStringToFileOrDie(const std::string& data, + const std::string& filename) { FILE* file_descriptor = fopen(filename.c_str(), "wb"); if (!file_descriptor) { LOG(FATAL) << "Couldn't write to file: " << filename; @@ -50,7 +49,7 @@ void WriteStringToFileOrDie(const string& data, const string& filename) { fclose(file_descriptor); } -void ReadFileToStringOrDie(const string& filename, string* data) { +void ReadFileToStringOrDie(const std::string& filename, std::string* data) { FILE* file_descriptor = fopen(filename.c_str(), "r"); if (!file_descriptor) { @@ -59,12 +58,12 @@ void ReadFileToStringOrDie(const string& filename, string* data) { // Resize the input buffer appropriately. fseek(file_descriptor, 0L, SEEK_END); - int num_bytes = ftell(file_descriptor); + int64_t num_bytes = ftell(file_descriptor); data->resize(num_bytes); // Read the data. fseek(file_descriptor, 0L, SEEK_SET); - int num_read = + int64_t num_read = fread(&((*data)[0]), sizeof((*data)[0]), num_bytes, file_descriptor); if (num_read != num_bytes) { LOG(FATAL) << "Couldn't read all of " << filename @@ -74,7 +73,7 @@ void ReadFileToStringOrDie(const string& filename, string* data) { fclose(file_descriptor); } -string JoinPath(const string& dirname, const string& basename) { +std::string JoinPath(const std::string& dirname, const std::string& basename) { #ifdef _WIN32 static const char separator = '\\'; #else @@ -86,9 +85,8 @@ string JoinPath(const string& dirname, const string& basename) { } else if (dirname[dirname.size() - 1] == separator) { return dirname + basename; } else { - return dirname + string(&separator, 1) + basename; + return dirname + std::string(&separator, 1) + basename; } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/file.h b/extern/ceres/internal/ceres/file.h index bd13128aedf..b21f1cae884 100644 --- a/extern/ceres/internal/ceres/file.h +++ b/extern/ceres/internal/ceres/file.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { CERES_NO_EXPORT void WriteStringToFileOrDie(const std::string& data, @@ -52,8 +51,7 @@ void ReadFileToStringOrDie(const std::string& filename, std::string* data); CERES_NO_EXPORT std::string JoinPath(const std::string& dirname, const std::string& basename); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/first_order_function.cc b/extern/ceres/internal/ceres/first_order_function.cc index 26f13488a1d..267b8ef64dc 100644 --- a/extern/ceres/internal/ceres/first_order_function.cc +++ b/extern/ceres/internal/ceres/first_order_function.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/float_suitesparse.cc b/extern/ceres/internal/ceres/float_suitesparse.cc index dc1d0e45bdb..6016badc67e 100644 --- a/extern/ceres/internal/ceres/float_suitesparse.cc +++ b/extern/ceres/internal/ceres/float_suitesparse.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,7 @@ #if !defined(CERES_NO_SUITESPARSE) -namespace ceres { -namespace internal { +namespace ceres::internal { std::unique_ptr FloatSuiteSparseCholesky::Create( OrderingType ordering_type) { @@ -43,7 +42,6 @@ std::unique_ptr FloatSuiteSparseCholesky::Create( return {}; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // !defined(CERES_NO_SUITESPARSE) diff --git a/extern/ceres/internal/ceres/float_suitesparse.h b/extern/ceres/internal/ceres/float_suitesparse.h index 7e76799e241..b9d298ea9c2 100644 --- a/extern/ceres/internal/ceres/float_suitesparse.h +++ b/extern/ceres/internal/ceres/float_suitesparse.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,8 +43,7 @@ #if !defined(CERES_NO_SUITESPARSE) -namespace ceres { -namespace internal { +namespace ceres::internal { // Fake implementation of a single precision Sparse Cholesky using // SuiteSparse. @@ -53,8 +52,7 @@ class CERES_NO_EXPORT FloatSuiteSparseCholesky : public SparseCholesky { static std::unique_ptr Create(OrderingType ordering_type); }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // !defined(CERES_NO_SUITESPARSE) diff --git a/extern/ceres/internal/ceres/function_sample.cc b/extern/ceres/internal/ceres/function_sample.cc index 3e0ae60ca5d..bb4bcffc751 100644 --- a/extern/ceres/internal/ceres/function_sample.cc +++ b/extern/ceres/internal/ceres/function_sample.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,7 @@ #include "ceres/stringprintf.h" -namespace ceres { -namespace internal { +namespace ceres::internal { FunctionSample::FunctionSample() : x(0.0), @@ -75,5 +74,4 @@ std::string FunctionSample::ToDebugString() const { gradient_is_valid); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/function_sample.h b/extern/ceres/internal/ceres/function_sample.h index 63ffc8ff8fc..058276974a1 100644 --- a/extern/ceres/internal/ceres/function_sample.h +++ b/extern/ceres/internal/ceres/function_sample.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,7 @@ #include "ceres/internal/eigen.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // FunctionSample is used by the line search routines to store and // communicate the value and (optionally) the gradient of the function @@ -83,13 +82,12 @@ struct CERES_NO_EXPORT FunctionSample { // // where d is the search direction. double gradient; - // True if the evaluation of the gradient was sucessful and the + // True if the evaluation of the gradient was successful and the // value is a finite number. bool gradient_is_valid; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/generate_bundle_adjustment_tests.py b/extern/ceres/internal/ceres/generate_bundle_adjustment_tests.py new file mode 100644 index 00000000000..ac83bc30f62 --- /dev/null +++ b/extern/ceres/internal/ceres/generate_bundle_adjustment_tests.py @@ -0,0 +1,305 @@ +# Ceres Solver - A fast non-linear least squares minimizer +# Copyright 2023 Google Inc. All rights reserved. +# http://ceres-solver.org/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Google Inc. nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: keir@google.com (Keir Mierle) +# +# Generate bundle adjustment tests as separate binaries. Since the bundle +# adjustment tests are fairly processing intensive, serializing them makes the +# tests take forever to run. Splitting them into separate binaries makes it +# easier to parallelize in continuous integration systems, and makes local +# processing on multi-core workstations much faster. + +# Product of ORDERINGS, THREAD_CONFIGS, and SOLVER_CONFIGS is the full set of +# tests to generate. +ORDERINGS = ["kAutomaticOrdering", "kUserOrdering"] +SINGLE_THREADED = "1" +MULTI_THREADED = "4" +THREAD_CONFIGS = [SINGLE_THREADED, MULTI_THREADED] + +DENSE_SOLVER_CONFIGS = [ + # Linear solver Dense backend + ('DENSE_SCHUR', 'EIGEN'), + ('DENSE_SCHUR', 'LAPACK'), + ('DENSE_SCHUR', 'CUDA'), +] + +SPARSE_SOLVER_CONFIGS = [ + # Linear solver Sparse backend + ('SPARSE_NORMAL_CHOLESKY', 'SUITE_SPARSE'), + ('SPARSE_NORMAL_CHOLESKY', 'EIGEN_SPARSE'), + ('SPARSE_NORMAL_CHOLESKY', 'ACCELERATE_SPARSE'), + ('SPARSE_SCHUR', 'SUITE_SPARSE'), + ('SPARSE_SCHUR', 'EIGEN_SPARSE'), + ('SPARSE_SCHUR', 'ACCELERATE_SPARSE'), +] + +ITERATIVE_SOLVER_CONFIGS = [ + # Linear solver Sparse backend Preconditioner + ('ITERATIVE_SCHUR', 'NO_SPARSE', 'JACOBI'), + ('ITERATIVE_SCHUR', 'NO_SPARSE', 'SCHUR_JACOBI'), + ('ITERATIVE_SCHUR', 'NO_SPARSE', 'SCHUR_POWER_SERIES_EXPANSION'), + ('ITERATIVE_SCHUR', 'SUITE_SPARSE', 'CLUSTER_JACOBI'), + ('ITERATIVE_SCHUR', 'EIGEN_SPARSE', 'CLUSTER_JACOBI'), + ('ITERATIVE_SCHUR', 'ACCELERATE_SPARSE','CLUSTER_JACOBI'), + ('ITERATIVE_SCHUR', 'SUITE_SPARSE', 'CLUSTER_TRIDIAGONAL'), + ('ITERATIVE_SCHUR', 'EIGEN_SPARSE', 'CLUSTER_TRIDIAGONAL'), + ('ITERATIVE_SCHUR', 'ACCELERATE_SPARSE','CLUSTER_TRIDIAGONAL'), +] + +FILENAME_SHORTENING_MAP = dict( + DENSE_SCHUR='denseschur', + ITERATIVE_SCHUR='iterschur', + SPARSE_NORMAL_CHOLESKY='sparsecholesky', + SPARSE_SCHUR='sparseschur', + EIGEN='eigen', + LAPACK='lapack', + CUDA='cuda', + NO_SPARSE='', # Omit sparse reference entirely for dense tests. + SUITE_SPARSE='suitesparse', + EIGEN_SPARSE='eigensparse', + ACCELERATE_SPARSE='acceleratesparse', + IDENTITY='identity', + JACOBI='jacobi', + SCHUR_JACOBI='schurjacobi', + CLUSTER_JACOBI='clustjacobi', + CLUSTER_TRIDIAGONAL='clusttri', + SCHUR_POWER_SERIES_EXPANSION='spse', + kAutomaticOrdering='auto', + kUserOrdering='user', +) + +COPYRIGHT_HEADER = ( +"""// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ======================================== +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// ======================================== +// +// This file is generated using generate_bundle_adjustment_tests.py.""") + +BUNDLE_ADJUSTMENT_TEST_TEMPLATE = (COPYRIGHT_HEADER + """ + +#include "ceres/bundle_adjustment_test_util.h" +#include "ceres/internal/config.h" +#include "gtest/gtest.h" +%(preprocessor_conditions_begin)s +namespace ceres::internal { + +TEST_F(BundleAdjustmentTest, + %(test_class_name)s) { // NOLINT + BundleAdjustmentProblem bundle_adjustment_problem; + Solver::Options* options = bundle_adjustment_problem.mutable_solver_options(); + options->eta = 0.01; + options->num_threads = %(num_threads)s; + options->linear_solver_type = %(linear_solver)s; + options->dense_linear_algebra_library_type = %(dense_backend)s; + options->sparse_linear_algebra_library_type = %(sparse_backend)s; + options->preconditioner_type = %(preconditioner)s; + if (%(ordering)s) { + options->linear_solver_ordering = nullptr; + } + Problem* problem = bundle_adjustment_problem.mutable_problem(); + RunSolverForConfigAndExpectResidualsMatch(*options, problem); +} + +} // namespace ceres::internal +%(preprocessor_conditions_end)s""") + +def camelcasify(token): + """Convert capitalized underscore tokens to camel case""" + return ''.join([x.lower().capitalize() for x in token.split('_')]) + + +def generate_bundle_test(linear_solver, + dense_backend, + sparse_backend, + preconditioner, + ordering, + thread_config): + """Generate a bundle adjustment test executable configured appropriately""" + + # Preconditioner only makes sense for iterative schur; drop it otherwise. + preconditioner_tag = preconditioner + if linear_solver != 'ITERATIVE_SCHUR': + preconditioner_tag = '' + + dense_backend_tag = dense_backend + if linear_solver != 'DENSE_SCHUR': + dense_backend_tag='' + + # Omit references to the sparse backend when one is not in use. + sparse_backend_tag = sparse_backend + if sparse_backend == 'NO_SPARSE': + sparse_backend_tag = '' + + # Use a double underscore; otherwise the names are harder to understand. + test_class_name = '_'.join(filter(lambda x: x, [ + camelcasify(linear_solver), + camelcasify(dense_backend_tag), + camelcasify(sparse_backend_tag), + camelcasify(preconditioner_tag), + ordering[1:], # Strip 'k' + 'Threads' if thread_config == MULTI_THREADED else ''])) + + # Initial template parameters (augmented more below). + template_parameters = dict( + linear_solver=linear_solver, + dense_backend=dense_backend, + sparse_backend=sparse_backend, + preconditioner=preconditioner, + ordering=ordering, + num_threads=thread_config, + test_class_name=test_class_name) + + # Accumulate appropriate #ifdef/#ifndefs for the solver's sparse backend. + preprocessor_conditions_begin = [] + preprocessor_conditions_end = [] + if sparse_backend == 'SUITE_SPARSE': + preprocessor_conditions_begin.append('#ifndef CERES_NO_SUITESPARSE') + preprocessor_conditions_end.insert(0, '#endif // CERES_NO_SUITESPARSE') + elif sparse_backend == 'ACCELERATE_SPARSE': + preprocessor_conditions_begin.append('#ifndef CERES_NO_ACCELERATE_SPARSE') + preprocessor_conditions_end.insert(0, '#endif // CERES_NO_ACCELERATE_SPARSE') + elif sparse_backend == 'EIGEN_SPARSE': + preprocessor_conditions_begin.append('#ifdef CERES_USE_EIGEN_SPARSE') + preprocessor_conditions_end.insert(0, '#endif // CERES_USE_EIGEN_SPARSE') + + if dense_backend == "LAPACK": + preprocessor_conditions_begin.append('#ifndef CERES_NO_LAPACK') + preprocessor_conditions_end.insert(0, '#endif // CERES_NO_LAPACK') + elif dense_backend == "CUDA": + preprocessor_conditions_begin.append('#ifndef CERES_NO_CUDA') + preprocessor_conditions_end.insert(0, '#endif // CERES_NO_CUDA') + + # If there are #ifdefs, put newlines around them. + if preprocessor_conditions_begin: + preprocessor_conditions_begin.insert(0, '') + preprocessor_conditions_begin.append('') + preprocessor_conditions_end.insert(0, '') + preprocessor_conditions_end.append('') + + # Put #ifdef/#ifndef stacks into the template parameters. + template_parameters['preprocessor_conditions_begin'] = '\n'.join( + preprocessor_conditions_begin) + template_parameters['preprocessor_conditions_end'] = '\n'.join( + preprocessor_conditions_end) + + # Substitute variables into the test template, and write the result to a file. + filename_tag = '_'.join(FILENAME_SHORTENING_MAP.get(x) for x in [ + linear_solver, + dense_backend_tag, + sparse_backend_tag, + preconditioner_tag, + ordering] + if FILENAME_SHORTENING_MAP.get(x)) + + if (thread_config == MULTI_THREADED): + filename_tag += '_threads' + + filename = ('generated_bundle_adjustment_tests/ba_%s_test.cc' % + filename_tag.lower()) + with open(filename, 'w') as fd: + fd.write(BUNDLE_ADJUSTMENT_TEST_TEMPLATE % template_parameters) + + # All done. + print('Generated', filename) + + return filename + + +if __name__ == '__main__': + # Iterate over all the possible configurations and generate the tests. + generated_files = [] + + for ordering in ORDERINGS: + for thread_config in THREAD_CONFIGS: + for linear_solver, dense_backend in DENSE_SOLVER_CONFIGS: + generated_files.append( + generate_bundle_test(linear_solver, + dense_backend, + 'NO_SPARSE', + 'IDENTITY', + ordering, + thread_config)) + + for linear_solver, sparse_backend, in SPARSE_SOLVER_CONFIGS: + generated_files.append( + generate_bundle_test(linear_solver, + 'EIGEN', + sparse_backend, + 'IDENTITY', + ordering, + thread_config)) + + for linear_solver, sparse_backend, preconditioner, in ITERATIVE_SOLVER_CONFIGS: + generated_files.append( + generate_bundle_test(linear_solver, + 'EIGEN', + sparse_backend, + preconditioner, + ordering, + thread_config)) + + + # Generate the CMakeLists.txt as well. + with open('generated_bundle_adjustment_tests/CMakeLists.txt', 'w') as fd: + fd.write(COPYRIGHT_HEADER.replace('//', '#').replace('http:#', 'http://')) + fd.write('\n') + fd.write('\n') + for generated_file in generated_files: + fd.write('ceres_test(%s)\n' % + generated_file.split('/')[1].replace('_test.cc', '')) diff --git a/extern/ceres/internal/ceres/generate_template_specializations.py b/extern/ceres/internal/ceres/generate_template_specializations.py new file mode 100644 index 00000000000..12cf0b0ffb8 --- /dev/null +++ b/extern/ceres/internal/ceres/generate_template_specializations.py @@ -0,0 +1,246 @@ +# Ceres Solver - A fast non-linear least squares minimizer +# Copyright 2023 Google Inc. All rights reserved. +# http://ceres-solver.org/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Google Inc. nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: sameeragarwal@google.com (Sameer Agarwal) +# +# Script for explicitly generating template specialization of the +# SchurEliminator class. It is a rather large class +# and the number of explicit instantiations is also large. Explicitly +# generating these instantiations in separate .cc files breaks the +# compilation into separate compilation unit rather than one large cc +# file which takes 2+GB of RAM to compile. +# +# This script creates three sets of files. +# +# 1. schur_eliminator_x_x_x.cc and partitioned_matrix_view_x_x_x.cc +# where, the x indicates the template parameters and +# +# 2. schur_eliminator.cc & partitioned_matrix_view.cc +# +# that contains a factory function for instantiating these classes +# based on runtime parameters. +# +# 3. schur_templates.cc +# +# that contains a function which can be queried to determine what +# template specializations are available. +# +# The following list of tuples, specializations indicates the set of +# specializations that is generated. +SPECIALIZATIONS = [(2, 2, 2), + (2, 2, 3), + (2, 2, 4), + (2, 2, "Eigen::Dynamic"), + (2, 3, 3), + (2, 3, 4), + (2, 3, 6), + (2, 3, 9), + (2, 3, "Eigen::Dynamic"), + (2, 4, 3), + (2, 4, 4), + (2, 4, 6), + (2, 4, 8), + (2, 4, 9), + (2, 4, "Eigen::Dynamic"), + (2, "Eigen::Dynamic", "Eigen::Dynamic"), + (3, 3, 3), + (4, 4, 2), + (4, 4, 3), + (4, 4, 4), + (4, 4, "Eigen::Dynamic")] + +import schur_eliminator_template +import partitioned_matrix_view_template +import os +import glob + +def SuffixForSize(size): + if size == "Eigen::Dynamic": + return "d" + return str(size) + +def SpecializationFilename(prefix, row_block_size, e_block_size, f_block_size): + return "_".join([prefix] + list(map(SuffixForSize, (row_block_size, + e_block_size, + f_block_size)))) + +def GenerateFactoryConditional(row_block_size, e_block_size, f_block_size): + conditionals = [] + if (row_block_size != "Eigen::Dynamic"): + conditionals.append("(options.row_block_size == %s)" % row_block_size) + if (e_block_size != "Eigen::Dynamic"): + conditionals.append("(options.e_block_size == %s)" % e_block_size) + if (f_block_size != "Eigen::Dynamic"): + conditionals.append("(options.f_block_size == %s)" % f_block_size) + if (len(conditionals) == 0): + return "%s" + + if (len(conditionals) == 1): + return " if " + conditionals[0] + " {\n %s\n }\n" + + return " if (" + " &&\n ".join(conditionals) + ") {\n %s\n }\n" + +def Specialize(name, data): + """ + Generate specialization code and the conditionals to instantiate it. + """ + + # Specialization files + for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS: + output = SpecializationFilename("generated/" + name, + row_block_size, + e_block_size, + f_block_size) + ".cc" + + with open(output, "w") as f: + f.write(data["HEADER"]) + f.write(data["SPECIALIZATION_FILE"] % + (row_block_size, e_block_size, f_block_size)) + + # Generate the _d_d_d specialization. + output = SpecializationFilename("generated/" + name, + "Eigen::Dynamic", + "Eigen::Dynamic", + "Eigen::Dynamic") + ".cc" + with open(output, "w") as f: + f.write(data["HEADER"]) + f.write(data["DYNAMIC_FILE"] % + ("Eigen::Dynamic", "Eigen::Dynamic", "Eigen::Dynamic")) + + # Factory + with open(name + ".cc", "w") as f: + f.write(data["HEADER"]) + f.write(data["FACTORY_FILE_HEADER"]) + for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS: + factory_conditional = GenerateFactoryConditional( + row_block_size, e_block_size, f_block_size) + factory = data["FACTORY"] % (row_block_size, e_block_size, f_block_size) + f.write(factory_conditional % factory); + f.write(data["FACTORY_FOOTER"]) + +QUERY_HEADER = """// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) +// +// What template specializations are available. +// +// ======================================== +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +//========================================= +// +// This file is generated using generate_template_specializations.py. +""" + +QUERY_FILE_HEADER = """ +#include "ceres/internal/eigen.h" +#include "ceres/schur_templates.h" + +namespace ceres { +namespace internal { + +void GetBestSchurTemplateSpecialization(int* row_block_size, + int* e_block_size, + int* f_block_size) { + LinearSolver::Options options; + options.row_block_size = *row_block_size; + options.e_block_size = *e_block_size; + options.f_block_size = *f_block_size; + *row_block_size = Eigen::Dynamic; + *e_block_size = Eigen::Dynamic; + *f_block_size = Eigen::Dynamic; +#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION +""" + +QUERY_FOOTER = """ +#endif + return; +} + +} // namespace internal +} // namespace ceres +""" + +QUERY_ACTION = """ *row_block_size = %s; + *e_block_size = %s; + *f_block_size = %s; + return;""" + +def GenerateQueryFile(): + """ + Generate file that allows querying for available template specializations. + """ + + with open("schur_templates.cc", "w") as f: + f.write(QUERY_HEADER) + f.write(QUERY_FILE_HEADER) + for row_block_size, e_block_size, f_block_size in SPECIALIZATIONS: + factory_conditional = GenerateFactoryConditional( + row_block_size, e_block_size, f_block_size) + action = QUERY_ACTION % (row_block_size, e_block_size, f_block_size) + f.write(factory_conditional % action) + f.write(QUERY_FOOTER) + + +if __name__ == "__main__": + for f in glob.glob("generated/*"): + os.remove(f) + + Specialize("schur_eliminator", + schur_eliminator_template.__dict__) + Specialize("partitioned_matrix_view", + partitioned_matrix_view_template.__dict__) + GenerateQueryFile() diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_2.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_2.cc index 7b4ed167d05..c37dbf09bc3 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_2.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_2.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 2, 2>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_3.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_3.cc index 0f012515a95..d856df633c9 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_3.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 2, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_4.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_4.cc index bdbe91c43f6..a62a436d3b0 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_4.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_4.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 2, 4>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_d.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_d.cc index 71f293b5512..f8b708931cc 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_d.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_2_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 2, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_3.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_3.cc index a6ea7761c9a..cd5bb9152aa 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_3.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 3, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_4.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_4.cc index e712678a28a..51af0f7bef2 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_4.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_4.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 3, 4>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_6.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_6.cc index 3aff26e657b..39b920a39c3 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_6.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_6.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 3, 6>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_9.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_9.cc index 6cd239bfd9a..3f211b9540f 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_9.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_9.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 3, 9>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_d.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_d.cc index 68c50552d42..a33d2e3e875 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_d.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_3_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 3, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_3.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_3.cc index 88c5e29c6f8..14b91b313fa 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_3.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 4, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_4.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_4.cc index b9487834441..be1c2342f81 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_4.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_4.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 4, 4>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_6.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_6.cc index 7f044ef628b..b4ad6159a0b 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_6.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_6.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 4, 6>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_8.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_8.cc index 7394e7998e7..b505f562799 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_8.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_8.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 4, 8>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_9.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_9.cc index 263f1fb36f1..f2f1469fbe5 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_9.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_9.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 4, 9>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_d.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_d.cc index d47634e0f40..a0e250c4b92 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_d.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_4_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, 4, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_d_d.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_d_d.cc index 0944cdcbfda..6878963ca47 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_d_d.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_2_d_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<2, Eigen::Dynamic, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_3_3_3.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_3_3_3.cc index 23674031bb9..2e6b81a3b5d 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_3_3_3.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_3_3_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<3, 3, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_2.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_2.cc index d5268cac481..8b09f751c38 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_2.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_2.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<4, 4, 2>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_3.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_3.cc index 67e098fc6f3..e857daa1239 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_3.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<4, 4, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_4.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_4.cc index 5fe28caee8c..f51a642d237 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_4.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_4.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<4, 4, 4>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_d.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_d.cc index d87c76d0aa4..5e27e2eb3c4 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_d.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_4_4_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView<4, 4, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_d_d_d.cc b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_d_d_d.cc index 1e124797598..6e788add016 100644 --- a/extern/ceres/internal/ceres/generated/partitioned_matrix_view_d_d_d.cc +++ b/extern/ceres/internal/ceres/generated/partitioned_matrix_view_d_d_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -41,12 +41,10 @@ #include "ceres/partitioned_matrix_view_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class PartitionedMatrixView; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_2.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_2.cc index dc47a2e6d8e..de29abeb4b1 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_2.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_2.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 2, 2>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_3.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_3.cc index e2df6f63d2f..38e24022341 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_3.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 2, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_4.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_4.cc index 0b1ae949a09..edf48eebbf2 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_4.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_4.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 2, 4>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_d.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_d.cc index 0f7b6d78c7f..48a83011a3b 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_d.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_2_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 2, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_3.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_3.cc index e4ab8eb19bf..49a450d25fa 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_3.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 3, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_4.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_4.cc index d73d466b04c..730d2b19df1 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_4.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_4.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 3, 4>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_6.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_6.cc index 800ee536bbf..84b83af3211 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_6.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_6.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 3, 6>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_9.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_9.cc index d38cd566082..bfb903f1667 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_9.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_9.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 3, 9>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_d.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_d.cc index 4ac4b8ac8b7..041b7ac0878 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_d.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_3_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 3, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_3.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_3.cc index d5f5dbea4b4..c7827d11d54 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_3.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 4, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_4.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_4.cc index d50a6d4002b..9429d4c78ea 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_4.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_4.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 4, 4>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_6.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_6.cc index f79fa4dd2f0..ba14b0871b8 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_6.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_6.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 4, 6>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_8.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_8.cc index 972b000f1ba..9210d9dc468 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_8.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_8.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 4, 8>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_9.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_9.cc index aa33e479bc5..ea45d0f0b70 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_9.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_9.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 4, 9>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_d.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_d.cc index a28ef15a522..8ba7c8cec11 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_d.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_4_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, 4, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_2_d_d.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_2_d_d.cc index 43924279a39..1f407877350 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_2_d_d.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_2_d_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<2, Eigen::Dynamic, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_3_3_3.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_3_3_3.cc index 7ff2a62341c..909fb79b23f 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_3_3_3.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_3_3_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<3, 3, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_2.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_2.cc index 9008b816843..5ca6fcab3c9 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_2.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_2.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<4, 4, 2>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_3.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_3.cc index 8e37df51bee..9d0862afc8d 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_3.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_3.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<4, 4, 3>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_4.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_4.cc index 09d50813a8a..b04ab666a78 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_4.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_4.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<4, 4, 4>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_d.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_d.cc index 089df2d7e3e..8e7554319d1 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_d.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_4_4_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,12 +46,10 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator<4, 4, Eigen::Dynamic>; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_RESTRICT_SCHUR_SPECIALIZATION diff --git a/extern/ceres/internal/ceres/generated/schur_eliminator_d_d_d.cc b/extern/ceres/internal/ceres/generated/schur_eliminator_d_d_d.cc index ca598fe5eca..49c40e86efb 100644 --- a/extern/ceres/internal/ceres/generated/schur_eliminator_d_d_d.cc +++ b/extern/ceres/internal/ceres/generated/schur_eliminator_d_d_d.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -41,10 +41,8 @@ #include "ceres/schur_eliminator_impl.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template class SchurEliminator; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/gradient_checker.cc b/extern/ceres/internal/ceres/gradient_checker.cc index 777001e013c..f49803caf96 100644 --- a/extern/ceres/internal/ceres/gradient_checker.cc +++ b/extern/ceres/internal/ceres/gradient_checker.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2016 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,7 +40,6 @@ #include #include "ceres/is_close.h" -#include "ceres/manifold_adapter.h" #include "ceres/stringprintf.h" #include "ceres/types.h" @@ -49,8 +48,6 @@ namespace ceres { using internal::IsClose; using internal::StringAppendF; using internal::StringPrintf; -using std::string; -using std::vector; namespace { // Evaluate the cost function and transform the returned Jacobians to @@ -65,12 +62,12 @@ bool EvaluateCostFunction(const CostFunction* function, CHECK(jacobians != nullptr); CHECK(local_jacobians != nullptr); - const vector& block_sizes = function->parameter_block_sizes(); + const std::vector& block_sizes = function->parameter_block_sizes(); const int num_parameter_blocks = block_sizes.size(); // Allocate Jacobian matrices in tangent space. local_jacobians->resize(num_parameter_blocks); - vector local_jacobian_data(num_parameter_blocks); + std::vector local_jacobian_data(num_parameter_blocks); for (int i = 0; i < num_parameter_blocks; ++i) { int block_size = block_sizes.at(i); if (manifolds.at(i) != nullptr) { @@ -83,7 +80,7 @@ bool EvaluateCostFunction(const CostFunction* function, // Allocate Jacobian matrices in ambient space. jacobians->resize(num_parameter_blocks); - vector jacobian_data(num_parameter_blocks); + std::vector jacobian_data(num_parameter_blocks); for (int i = 0; i < num_parameter_blocks; ++i) { jacobians->at(i).resize(function->num_residuals(), block_sizes.at(i)); jacobians->at(i).setZero(); @@ -116,39 +113,8 @@ bool EvaluateCostFunction(const CostFunction* function, } } // namespace -GradientChecker::GradientChecker( - const CostFunction* function, - const vector* local_parameterizations, - const NumericDiffOptions& options) - : delete_manifolds_(true), function_(function) { - CHECK(function != nullptr); - manifolds_.resize(function->parameter_block_sizes().size(), nullptr); - - // Wrap the local parameterization into manifold objects using - // ManifoldAdapter. - for (int i = 0; i < manifolds_.size(); ++i) { - const LocalParameterization* local_param = local_parameterizations->at(i); - if (local_param == nullptr) { - continue; - } - manifolds_[i] = new internal::ManifoldAdapter(local_param); - } - - auto finite_diff_cost_function = - std::make_unique>( - function, DO_NOT_TAKE_OWNERSHIP, options); - const vector& parameter_block_sizes = - function->parameter_block_sizes(); - for (int32_t parameter_block_size : parameter_block_sizes) { - finite_diff_cost_function->AddParameterBlock(parameter_block_size); - } - finite_diff_cost_function->SetNumResiduals(function->num_residuals()); - - finite_diff_cost_function_ = std::move(finite_diff_cost_function); -} - GradientChecker::GradientChecker(const CostFunction* function, - const vector* manifolds, + const std::vector* manifolds, const NumericDiffOptions& options) : function_(function) { CHECK(function != nullptr); @@ -161,7 +127,7 @@ GradientChecker::GradientChecker(const CostFunction* function, auto finite_diff_cost_function = std::make_unique>( function, DO_NOT_TAKE_OWNERSHIP, options); - const vector& parameter_block_sizes = + const std::vector& parameter_block_sizes = function->parameter_block_sizes(); const int num_parameter_blocks = parameter_block_sizes.size(); for (int i = 0; i < num_parameter_blocks; ++i) { @@ -172,14 +138,6 @@ GradientChecker::GradientChecker(const CostFunction* function, finite_diff_cost_function_ = std::move(finite_diff_cost_function); } -GradientChecker::~GradientChecker() { - if (delete_manifolds_) { - for (const auto m : manifolds_) { - delete m; - } - } -} - bool GradientChecker::Probe(double const* const* parameters, double relative_precision, ProbeResults* results_param) const { @@ -204,8 +162,8 @@ bool GradientChecker::Probe(double const* const* parameters, results->return_value = true; // Evaluate the derivative using the user supplied code. - vector& jacobians = results->jacobians; - vector& local_jacobians = results->local_jacobians; + std::vector& jacobians = results->jacobians; + std::vector& local_jacobians = results->local_jacobians; if (!EvaluateCostFunction(function_, parameters, manifolds_, @@ -217,8 +175,9 @@ bool GradientChecker::Probe(double const* const* parameters, } // Evaluate the derivative using numeric derivatives. - vector& numeric_jacobians = results->numeric_jacobians; - vector& local_numeric_jacobians = results->local_numeric_jacobians; + std::vector& numeric_jacobians = results->numeric_jacobians; + std::vector& local_numeric_jacobians = + results->local_numeric_jacobians; Vector finite_diff_residuals; if (!EvaluateCostFunction(finite_diff_cost_function_.get(), parameters, @@ -258,7 +217,7 @@ bool GradientChecker::Probe(double const* const* parameters, // Accumulate the error message for all the jacobians, since it won't get // output if there are no bad jacobian components. - string error_log; + std::string error_log; for (int k = 0; k < function_->parameter_block_sizes().size(); k++) { StringAppendF(&error_log, "========== " @@ -312,7 +271,7 @@ bool GradientChecker::Probe(double const* const* parameters, // Since there were some bad errors, dump comprehensive debug info. if (num_bad_jacobian_components) { - string header = StringPrintf( + std::string header = StringPrintf( "\nDetected %d bad Jacobian component(s). " "Worst relative error was %g.\n", num_bad_jacobian_components, diff --git a/extern/ceres/internal/ceres/gradient_checking_cost_function.cc b/extern/ceres/internal/ceres/gradient_checking_cost_function.cc index 1c3b318ed04..8ca449b4e26 100644 --- a/extern/ceres/internal/ceres/gradient_checking_cost_function.cc +++ b/extern/ceres/internal/ceres/gradient_checking_cost_function.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -52,13 +52,7 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::abs; -using std::max; -using std::string; -using std::vector; +namespace ceres::internal { namespace { @@ -68,7 +62,7 @@ class GradientCheckingCostFunction final : public CostFunction { const std::vector* manifolds, const NumericDiffOptions& options, double relative_precision, - string extra_info, + std::string extra_info, GradientCheckingIterationCallback* callback) : function_(function), gradient_checker_(function, manifolds, options), @@ -76,7 +70,7 @@ class GradientCheckingCostFunction final : public CostFunction { extra_info_(std::move(extra_info)), callback_(callback) { CHECK(callback_ != nullptr); - const vector& parameter_block_sizes = + const std::vector& parameter_block_sizes = function->parameter_block_sizes(); *mutable_parameter_block_sizes() = parameter_block_sizes; set_num_residuals(function->num_residuals()); @@ -105,7 +99,8 @@ class GradientCheckingCostFunction final : public CostFunction { MatrixRef(residuals, num_residuals, 1) = results.residuals; // Copy the original jacobian blocks into the jacobians array. - const vector& block_sizes = function_->parameter_block_sizes(); + const std::vector& block_sizes = + function_->parameter_block_sizes(); for (int k = 0; k < block_sizes.size(); k++) { if (jacobians[k] != nullptr) { MatrixRef(jacobians[k], @@ -127,7 +122,7 @@ class GradientCheckingCostFunction final : public CostFunction { const CostFunction* function_; GradientChecker gradient_checker_; double relative_precision_; - string extra_info_; + std::string extra_info_; GradientCheckingIterationCallback* callback_; }; @@ -137,7 +132,7 @@ GradientCheckingIterationCallback::GradientCheckingIterationCallback() : gradient_error_detected_(false) {} CallbackReturnType GradientCheckingIterationCallback::operator()( - const IterationSummary& summary) { + const IterationSummary& /*summary*/) { if (gradient_error_detected_) { LOG(ERROR) << "Gradient error detected. Terminating solver."; return SOLVER_ABORT; @@ -198,7 +193,8 @@ std::unique_ptr CreateGradientCheckingProblemImpl( // For every ParameterBlock in problem_impl, create a new parameter block with // the same manifold and constancy. - const vector& parameter_blocks = program->parameter_blocks(); + const std::vector& parameter_blocks = + program->parameter_blocks(); for (auto* parameter_block : parameter_blocks) { gradient_checking_problem_impl->AddParameterBlock( parameter_block->mutable_user_state(), @@ -225,17 +221,18 @@ std::unique_ptr CreateGradientCheckingProblemImpl( // For every ResidualBlock in problem_impl, create a new // ResidualBlock by wrapping its CostFunction inside a // GradientCheckingCostFunction. - const vector& residual_blocks = program->residual_blocks(); + const std::vector& residual_blocks = + program->residual_blocks(); for (int i = 0; i < residual_blocks.size(); ++i) { ResidualBlock* residual_block = residual_blocks[i]; // Build a human readable string which identifies the // ResidualBlock. This is used by the GradientCheckingCostFunction // when logging debugging information. - string extra_info = + std::string extra_info = StringPrintf("Residual block id %d; depends on parameters [", i); - vector parameter_blocks; - vector manifolds; + std::vector parameter_blocks; + std::vector manifolds; parameter_blocks.reserve(residual_block->NumParameterBlocks()); manifolds.reserve(residual_block->NumParameterBlocks()); for (int j = 0; j < residual_block->NumParameterBlocks(); ++j) { @@ -280,5 +277,4 @@ std::unique_ptr CreateGradientCheckingProblemImpl( return gradient_checking_problem_impl; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/gradient_checking_cost_function.h b/extern/ceres/internal/ceres/gradient_checking_cost_function.h index 0caafafa8fa..4ad3b6c754d 100644 --- a/extern/ceres/internal/ceres/gradient_checking_cost_function.h +++ b/extern/ceres/internal/ceres/gradient_checking_cost_function.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -42,8 +42,7 @@ #include "ceres/iteration_callback.h" #include "ceres/manifold.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class ProblemImpl; @@ -109,8 +108,7 @@ CERES_NO_EXPORT std::unique_ptr CreateGradientCheckingProblemImpl( double relative_precision, GradientCheckingIterationCallback* callback); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/gradient_problem.cc b/extern/ceres/internal/ceres/gradient_problem.cc index cdd472fe87f..ee228b842d7 100644 --- a/extern/ceres/internal/ceres/gradient_problem.cc +++ b/extern/ceres/internal/ceres/gradient_problem.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,6 @@ #include -#include "ceres/local_parameterization.h" -#include "ceres/manifold_adapter.h" #include "glog/logging.h" namespace ceres { @@ -46,22 +44,6 @@ GradientProblem::GradientProblem(FirstOrderFunction* function) CHECK(function != nullptr); } -GradientProblem::GradientProblem(FirstOrderFunction* function, - LocalParameterization* parameterization) - : function_(function), - parameterization_(parameterization), - scratch_(new double[function_->NumParameters()]) { - CHECK(function != nullptr); - if (parameterization != nullptr) { - manifold_ = - std::make_unique(parameterization_.get()); - } else { - manifold_ = std::make_unique>( - function_->NumParameters()); - } - CHECK_EQ(function_->NumParameters(), manifold_->AmbientSize()); -} - GradientProblem::GradientProblem(FirstOrderFunction* function, Manifold* manifold) : function_(function), scratch_(new double[function_->NumParameters()]) { diff --git a/extern/ceres/internal/ceres/gradient_problem_evaluator.h b/extern/ceres/internal/ceres/gradient_problem_evaluator.h index efbb257ec75..fe99767587d 100644 --- a/extern/ceres/internal/ceres/gradient_problem_evaluator.h +++ b/extern/ceres/internal/ceres/gradient_problem_evaluator.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,8 +43,7 @@ #include "ceres/sparse_matrix.h" #include "ceres/wall_time.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator { public: @@ -53,10 +52,10 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator { std::unique_ptr CreateJacobian() const final { return nullptr; } - bool Evaluate(const EvaluateOptions& evaluate_options, + bool Evaluate(const EvaluateOptions& /*evaluate_options*/, const double* state, double* cost, - double* residuals, + double* /*residuals*/, double* gradient, SparseMatrix* jacobian) final { CHECK(jacobian == nullptr); @@ -83,7 +82,7 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator { int NumParameters() const final { return problem_.NumParameters(); } int NumEffectiveParameters() const final { - return problem_.NumLocalParameters(); + return problem_.NumTangentParameters(); } int NumResiduals() const final { return 1; } @@ -97,8 +96,7 @@ class CERES_NO_EXPORT GradientProblemEvaluator final : public Evaluator { ::ceres::internal::ExecutionSummary execution_summary_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/gradient_problem_solver.cc b/extern/ceres/internal/ceres/gradient_problem_solver.cc index 9382556d292..ad2ea136b4a 100644 --- a/extern/ceres/internal/ceres/gradient_problem_solver.cc +++ b/extern/ceres/internal/ceres/gradient_problem_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,7 +30,9 @@ #include "ceres/gradient_problem_solver.h" +#include #include +#include #include "ceres/callbacks.h" #include "ceres/gradient_problem.h" @@ -48,7 +50,6 @@ namespace ceres { using internal::StringAppendF; using internal::StringPrintf; -using std::string; namespace { @@ -112,7 +113,7 @@ void GradientProblemSolver::Solve(const GradientProblemSolver::Options& options, *summary = Summary(); // clang-format off summary->num_parameters = problem.NumParameters(); - summary->num_local_parameters = problem.NumLocalParameters(); + summary->num_tangent_parameters = problem.NumTangentParameters(); summary->line_search_direction_type = options.line_search_direction_type; // NOLINT summary->line_search_interpolation_type = options.line_search_interpolation_type; // NOLINT summary->line_search_type = options.line_search_type; @@ -180,7 +181,7 @@ void GradientProblemSolver::Solve(const GradientProblemSolver::Options& options, SetSummaryFinalCost(summary); } - const std::map& evaluator_statistics = + const std::map& evaluator_statistics = minimizer_options.evaluator->Statistics(); { const CallStatistics& call_stats = FindWithDefault( @@ -203,7 +204,7 @@ bool GradientProblemSolver::Summary::IsSolutionUsable() const { return internal::IsSolutionUsable(*this); } -string GradientProblemSolver::Summary::BriefReport() const { +std::string GradientProblemSolver::Summary::BriefReport() const { return StringPrintf( "Ceres GradientProblemSolver Report: " "Iterations: %d, " @@ -216,17 +217,20 @@ string GradientProblemSolver::Summary::BriefReport() const { TerminationTypeToString(termination_type)); } -string GradientProblemSolver::Summary::FullReport() const { +std::string GradientProblemSolver::Summary::FullReport() const { using internal::VersionString; - string report = string("\nSolver Summary (v " + VersionString() + ")\n\n"); + // NOTE operator+ is not usable for concatenating a string and a string_view. + std::string report = + std::string{"\nSolver Summary (v "}.append(VersionString()) + ")\n\n"; StringAppendF(&report, "Parameters % 25d\n", num_parameters); - if (num_local_parameters != num_parameters) { - StringAppendF(&report, "Local parameters % 25d\n", num_local_parameters); + if (num_tangent_parameters != num_parameters) { + StringAppendF( + &report, "Tangent parameters % 25d\n", num_tangent_parameters); } - string line_search_direction_string; + std::string line_search_direction_string; if (line_search_direction_type == LBFGS) { line_search_direction_string = StringPrintf("LBFGS (%d)", max_lbfgs_rank); } else if (line_search_direction_type == NONLINEAR_CONJUGATE_GRADIENT) { @@ -241,7 +245,7 @@ string GradientProblemSolver::Summary::FullReport() const { "Line search direction %19s\n", line_search_direction_string.c_str()); - const string line_search_type_string = StringPrintf( + const std::string line_search_type_string = StringPrintf( "%s %s", LineSearchInterpolationTypeToString(line_search_interpolation_type), LineSearchTypeToString(line_search_type)); diff --git a/extern/ceres/internal/ceres/graph.h b/extern/ceres/internal/ceres/graph.h index 6a6f8f01c00..4f8dfb98f70 100644 --- a/extern/ceres/internal/ceres/graph.h +++ b/extern/ceres/internal/ceres/graph.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -42,8 +42,7 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // A unweighted undirected graph templated over the vertex ids. Vertex // should be hashable. @@ -206,7 +205,6 @@ class WeightedGraph { edge_weights_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_GRAPH_H_ diff --git a/extern/ceres/internal/ceres/graph_algorithms.h b/extern/ceres/internal/ceres/graph_algorithms.h index 5299f80d963..4ebc8b38a91 100644 --- a/extern/ceres/internal/ceres/graph_algorithms.h +++ b/extern/ceres/internal/ceres/graph_algorithms.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -45,8 +45,7 @@ #include "ceres/wall_time.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Compare two vertices of a graph by their degrees, if the degrees // are equal then order them by their ids. @@ -340,7 +339,6 @@ std::unique_ptr> Degree2MaximumSpanningForest( return forest; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_GRAPH_ALGORITHMS_H_ diff --git a/extern/ceres/internal/ceres/implicit_schur_complement.cc b/extern/ceres/internal/ceres/implicit_schur_complement.cc index 677d767fa93..a63352916f4 100644 --- a/extern/ceres/internal/ceres/implicit_schur_complement.cc +++ b/extern/ceres/internal/ceres/implicit_schur_complement.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,15 +35,16 @@ #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" #include "ceres/linear_solver.h" +#include "ceres/parallel_for.h" +#include "ceres/parallel_vector_ops.h" #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { ImplicitSchurComplement::ImplicitSchurComplement( const LinearSolver::Options& options) - : options_(options), D_(nullptr), b_(nullptr) {} + : options_(options) {} void ImplicitSchurComplement::Init(const BlockSparseMatrix& A, const double* D, @@ -57,11 +58,16 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A, D_ = D; b_ = b; + compute_ftf_inverse_ = + options_.use_spse_initialization || + options_.preconditioner_type == JACOBI || + options_.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION; + // Initialize temporary storage and compute the block diagonals of // E'E and F'E. if (block_diagonal_EtE_inverse_ == nullptr) { block_diagonal_EtE_inverse_ = A_->CreateBlockDiagonalEtE(); - if (options_.preconditioner_type == JACOBI) { + if (compute_ftf_inverse_) { block_diagonal_FtF_inverse_ = A_->CreateBlockDiagonalFtF(); } rhs_.resize(A_->num_cols_f()); @@ -72,7 +78,7 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A, tmp_f_cols_.resize(A_->num_cols_f()); } else { A_->UpdateBlockDiagonalEtE(block_diagonal_EtE_inverse_.get()); - if (options_.preconditioner_type == JACOBI) { + if (compute_ftf_inverse_) { A_->UpdateBlockDiagonalFtF(block_diagonal_FtF_inverse_.get()); } } @@ -81,7 +87,7 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A, // contributions from the diagonal D if it is non-null. Add that to // the block diagonals and invert them. AddDiagonalAndInvert(D_, block_diagonal_EtE_inverse_.get()); - if (options_.preconditioner_type == JACOBI) { + if (compute_ftf_inverse_) { AddDiagonalAndInvert((D_ == nullptr) ? nullptr : D_ + A_->num_cols_e(), block_diagonal_FtF_inverse_.get()); } @@ -97,36 +103,74 @@ void ImplicitSchurComplement::Init(const BlockSparseMatrix& A, // By breaking it down into individual matrix vector products // involving the matrices E and F. This is implemented using a // PartitionedMatrixView of the input matrix A. -void ImplicitSchurComplement::RightMultiply(const double* x, double* y) const { +void ImplicitSchurComplement::RightMultiplyAndAccumulate(const double* x, + double* y) const { // y1 = F x - tmp_rows_.setZero(); - A_->RightMultiplyF(x, tmp_rows_.data()); + ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); + A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); // y2 = E' y1 - tmp_e_cols_.setZero(); - A_->LeftMultiplyE(tmp_rows_.data(), tmp_e_cols_.data()); + ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_); + A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); // y3 = -(E'E)^-1 y2 - tmp_e_cols_2_.setZero(); - block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(), - tmp_e_cols_2_.data()); - tmp_e_cols_2_ *= -1.0; + ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_); + block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(), + tmp_e_cols_2_.data(), + options_.context, + options_.num_threads); + + ParallelAssign( + options_.context, options_.num_threads, tmp_e_cols_2_, -tmp_e_cols_2_); // y1 = y1 + E y3 - A_->RightMultiplyE(tmp_e_cols_2_.data(), tmp_rows_.data()); + A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data()); // y5 = D * x if (D_ != nullptr) { ConstVectorRef Dref(D_ + A_->num_cols_e(), num_cols()); - VectorRef(y, num_cols()) = - (Dref.array().square() * ConstVectorRef(x, num_cols()).array()) - .matrix(); + VectorRef y_cols(y, num_cols()); + ParallelAssign( + options_.context, + options_.num_threads, + y_cols, + (Dref.array().square() * ConstVectorRef(x, num_cols()).array())); } else { - VectorRef(y, num_cols()).setZero(); + ParallelSetZero(options_.context, options_.num_threads, y, num_cols()); } // y = y5 + F' y1 - A_->LeftMultiplyF(tmp_rows_.data(), y); + A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), y); +} + +void ImplicitSchurComplement::InversePowerSeriesOperatorRightMultiplyAccumulate( + const double* x, double* y) const { + CHECK(compute_ftf_inverse_); + // y1 = F x + ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); + A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); + + // y2 = E' y1 + ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_); + A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); + + // y3 = (E'E)^-1 y2 + ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_); + block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(), + tmp_e_cols_2_.data(), + options_.context, + options_.num_threads); + // y1 = E y3 + ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); + A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data()); + + // y4 = F' y1 + ParallelSetZero(options_.context, options_.num_threads, tmp_f_cols_); + A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), tmp_f_cols_.data()); + + // y += (F'F)^-1 y4 + block_diagonal_FtF_inverse_->RightMultiplyAndAccumulate( + tmp_f_cols_.data(), y, options_.context, options_.num_threads); } // Given a block diagonal matrix and an optional array of diagonal @@ -136,26 +180,31 @@ void ImplicitSchurComplement::AddDiagonalAndInvert( const double* D, BlockSparseMatrix* block_diagonal) { const CompressedRowBlockStructure* block_diagonal_structure = block_diagonal->block_structure(); - for (const auto& row : block_diagonal_structure->rows) { - const int row_block_pos = row.block.position; - const int row_block_size = row.block.size; - const Cell& cell = row.cells[0]; - MatrixRef m(block_diagonal->mutable_values() + cell.position, - row_block_size, - row_block_size); + ParallelFor(options_.context, + 0, + block_diagonal_structure->rows.size(), + options_.num_threads, + [block_diagonal_structure, D, block_diagonal](int row_block_id) { + auto& row = block_diagonal_structure->rows[row_block_id]; + const int row_block_pos = row.block.position; + const int row_block_size = row.block.size; + const Cell& cell = row.cells[0]; + MatrixRef m(block_diagonal->mutable_values() + cell.position, + row_block_size, + row_block_size); - if (D != nullptr) { - ConstVectorRef d(D + row_block_pos, row_block_size); - m += d.array().square().matrix().asDiagonal(); - } + if (D != nullptr) { + ConstVectorRef d(D + row_block_pos, row_block_size); + m += d.array().square().matrix().asDiagonal(); + } - m = m.selfadjointView().llt().solve( - Matrix::Identity(row_block_size, row_block_size)); - } + m = m.selfadjointView().llt().solve( + Matrix::Identity(row_block_size, row_block_size)); + }); } -// Similar to RightMultiply, use the block structure of the matrix A -// to compute y = (E'E)^-1 (E'b - E'F x). +// Similar to RightMultiplyAndAccumulate, use the block structure of the matrix +// A to compute y = (E'E)^-1 (E'b - E'F x). void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) { const int num_cols_e = A_->num_cols_e(); const int num_cols_f = A_->num_cols_f(); @@ -163,26 +212,34 @@ void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) { const int num_rows = A_->num_rows(); // y1 = F x - tmp_rows_.setZero(); - A_->RightMultiplyF(x, tmp_rows_.data()); + ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); + A_->RightMultiplyAndAccumulateF(x, tmp_rows_.data()); // y2 = b - y1 - tmp_rows_ = ConstVectorRef(b_, num_rows) - tmp_rows_; + ParallelAssign(options_.context, + options_.num_threads, + tmp_rows_, + ConstVectorRef(b_, num_rows) - tmp_rows_); // y3 = E' y2 - tmp_e_cols_.setZero(); - A_->LeftMultiplyE(tmp_rows_.data(), tmp_e_cols_.data()); + ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_); + A_->LeftMultiplyAndAccumulateE(tmp_rows_.data(), tmp_e_cols_.data()); // y = (E'E)^-1 y3 - VectorRef(y, num_cols).setZero(); - block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(), y); + ParallelSetZero(options_.context, options_.num_threads, y, num_cols); + block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate( + tmp_e_cols_.data(), y, options_.context, options_.num_threads); // The full solution vector y has two blocks. The first block of // variables corresponds to the eliminated variables, which we just // computed via back substitution. The second block of variables // corresponds to the Schur complement system, so we just copy those // values from the solution to the Schur complement. - VectorRef(y + num_cols_e, num_cols_f) = ConstVectorRef(x, num_cols_f); + VectorRef y_cols_f(y + num_cols_e, num_cols_f); + ParallelAssign(options_.context, + options_.num_threads, + y_cols_f, + ConstVectorRef(x, num_cols_f)); } // Compute the RHS of the Schur complement system. @@ -193,24 +250,29 @@ void ImplicitSchurComplement::BackSubstitute(const double* x, double* y) { // this using a series of matrix vector products. void ImplicitSchurComplement::UpdateRhs() { // y1 = E'b - tmp_e_cols_.setZero(); - A_->LeftMultiplyE(b_, tmp_e_cols_.data()); + ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_); + A_->LeftMultiplyAndAccumulateE(b_, tmp_e_cols_.data()); // y2 = (E'E)^-1 y1 - Vector y2 = Vector::Zero(A_->num_cols_e()); - block_diagonal_EtE_inverse_->RightMultiply(tmp_e_cols_.data(), y2.data()); + ParallelSetZero(options_.context, options_.num_threads, tmp_e_cols_2_); + block_diagonal_EtE_inverse_->RightMultiplyAndAccumulate(tmp_e_cols_.data(), + tmp_e_cols_2_.data(), + options_.context, + options_.num_threads); // y3 = E y2 - tmp_rows_.setZero(); - A_->RightMultiplyE(y2.data(), tmp_rows_.data()); + ParallelSetZero(options_.context, options_.num_threads, tmp_rows_); + A_->RightMultiplyAndAccumulateE(tmp_e_cols_2_.data(), tmp_rows_.data()); // y3 = b - y3 - tmp_rows_ = ConstVectorRef(b_, A_->num_rows()) - tmp_rows_; + ParallelAssign(options_.context, + options_.num_threads, + tmp_rows_, + ConstVectorRef(b_, A_->num_rows()) - tmp_rows_); // rhs = F' y3 - rhs_.setZero(); - A_->LeftMultiplyF(tmp_rows_.data(), rhs_.data()); + ParallelSetZero(options_.context, options_.num_threads, rhs_); + A_->LeftMultiplyAndAccumulateF(tmp_rows_.data(), rhs_.data()); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/implicit_schur_complement.h b/extern/ceres/internal/ceres/implicit_schur_complement.h index 598d48411aa..b4eb0b072aa 100644 --- a/extern/ceres/internal/ceres/implicit_schur_complement.h +++ b/extern/ceres/internal/ceres/implicit_schur_complement.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,8 +44,7 @@ #include "ceres/partitioned_matrix_view.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockSparseMatrix; @@ -82,13 +81,13 @@ class BlockSparseMatrix; // (which for our purposes is an easily inverted block diagonal // matrix), it can be done in terms of matrix vector products with E, // F and (E'E)^-1. This class implements this functionality and other -// auxilliary bits needed to implement a CG solver on the Schur +// auxiliary bits needed to implement a CG solver on the Schur // complement using the PartitionedMatrixView object. // -// THREAD SAFETY: This class is nqot thread safe. In particular, the -// RightMultiply (and the LeftMultiply) methods are not thread safe as -// they depend on mutable arrays used for the temporaries needed to -// compute the product y += Sx; +// THREAD SAFETY: This class is not thread safe. In particular, the +// RightMultiplyAndAccumulate (and the LeftMultiplyAndAccumulate) methods are +// not thread safe as they depend on mutable arrays used for the temporaries +// needed to compute the product y += Sx; class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator { public: // num_eliminate_blocks is the number of E blocks in the matrix @@ -115,14 +114,20 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator { void Init(const BlockSparseMatrix& A, const double* D, const double* b); // y += Sx, where S is the Schur complement. - void RightMultiply(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, double* y) const final; // The Schur complement is a symmetric positive definite matrix, // thus the left and right multiply operators are the same. - void LeftMultiply(const double* x, double* y) const final { - RightMultiply(x, y); + void LeftMultiplyAndAccumulate(const double* x, double* y) const final { + RightMultiplyAndAccumulate(x, y); } + // Following is useful for approximation of S^-1 via power series expansion. + // Z = (F'F)^-1 F'E (E'E)^-1 E'F + // y += Zx + void InversePowerSeriesOperatorRightMultiplyAccumulate(const double* x, + double* y) const; + // y = (E'E)^-1 (E'b - E'F x). Given an estimate of the solution to // the Schur complement system, this method computes the value of // the e_block variables that were eliminated to form the Schur @@ -138,6 +143,7 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator { } const BlockSparseMatrix* block_diagonal_FtF_inverse() const { + CHECK(compute_ftf_inverse_); return block_diagonal_FtF_inverse_.get(); } @@ -146,25 +152,24 @@ class CERES_NO_EXPORT ImplicitSchurComplement final : public LinearOperator { void UpdateRhs(); const LinearSolver::Options& options_; - + bool compute_ftf_inverse_ = false; std::unique_ptr A_; - const double* D_; - const double* b_; + const double* D_ = nullptr; + const double* b_ = nullptr; std::unique_ptr block_diagonal_EtE_inverse_; std::unique_ptr block_diagonal_FtF_inverse_; Vector rhs_; - // Temporary storage vectors used to implement RightMultiply. + // Temporary storage vectors used to implement RightMultiplyAndAccumulate. mutable Vector tmp_rows_; mutable Vector tmp_e_cols_; mutable Vector tmp_e_cols_2_; mutable Vector tmp_f_cols_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/inner_product_computer.cc b/extern/ceres/internal/ceres/inner_product_computer.cc index fbc43bfed8b..59b5d94727b 100644 --- a/extern/ceres/internal/ceres/inner_product_computer.cc +++ b/extern/ceres/internal/ceres/inner_product_computer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,7 @@ #include "ceres/small_blas.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Create the CompressedRowSparseMatrix matrix that will contain the // inner product. @@ -52,16 +51,9 @@ InnerProductComputer::CreateResultMatrix( auto matrix = std::make_unique( m_.num_cols(), m_.num_cols(), num_nonzeros); matrix->set_storage_type(storage_type); - const CompressedRowBlockStructure* bs = m_.block_structure(); - const std::vector& blocks = bs->cols; - matrix->mutable_row_blocks()->resize(blocks.size()); - matrix->mutable_col_blocks()->resize(blocks.size()); - for (int i = 0; i < blocks.size(); ++i) { - (*(matrix->mutable_row_blocks()))[i] = blocks[i].size; - (*(matrix->mutable_col_blocks()))[i] = blocks[i].size; - } - + *matrix->mutable_row_blocks() = bs->cols; + *matrix->mutable_col_blocks() = bs->cols; return matrix; } @@ -78,6 +70,10 @@ int InnerProductComputer::ComputeNonzeros( row_nnz->resize(blocks.size()); std::fill(row_nnz->begin(), row_nnz->end(), 0); + if (product_terms.empty()) { + return 0; + } + // First product term. (*row_nnz)[product_terms[0].row] = blocks[product_terms[0].col].size; int num_nonzeros = @@ -130,8 +126,10 @@ std::unique_ptr InnerProductComputer::Create( const int start_row_block, const int end_row_block, CompressedRowSparseMatrix::StorageType product_storage_type) { - CHECK(product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR || - product_storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR); + CHECK(product_storage_type == + CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR || + product_storage_type == + CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR); CHECK_GT(m.num_nonzeros(), 0) << "Congratulations, you found a bug in Ceres. Please report it."; std::unique_ptr inner_product_computer( @@ -157,7 +155,8 @@ void InnerProductComputer::Init( for (int c1 = 0; c1 < row.cells.size(); ++c1) { const Cell& cell1 = row.cells[c1]; int c2_begin, c2_end; - if (product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) { + if (product_storage_type == + CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) { c2_begin = 0; c2_end = c1 + 1; } else { @@ -195,6 +194,10 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix( *(crsm_rows + 1) = *crsm_rows + row_block_nnz[i]; } } + result_offsets_.resize(product_terms.size()); + if (num_nonzeros == 0) { + return; + } // The following macro FILL_CRSM_COL_BLOCK is key to understanding // how this class works. @@ -241,12 +244,11 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix( } \ } - result_offsets_.resize(product_terms.size()); int col_nnz = 0; int nnz = 0; // Process the first term. - const InnerProductComputer::ProductTerm* current = &product_terms[0]; + const InnerProductComputer::ProductTerm* current = product_terms.data(); FILL_CRSM_COL_BLOCK; // Process the rest of the terms. @@ -264,7 +266,7 @@ void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix( if (previous->row == current->row) { // if the current and previous terms are in the same row block, // then they differ in the column block, in which case advance - // col_nnz by the column size of the prevous term. + // col_nnz by the column size of the previous term. col_nnz += col_blocks[previous->col].size; } else { // If we have moved to a new row-block , then col_nnz is zero, @@ -302,7 +304,8 @@ void InnerProductComputer::Compute() { rows[bs->cols[cell1.block_id].position]; int c2_begin, c2_end; - if (storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) { + if (storage_type == + CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) { c2_begin = 0; c2_end = c1 + 1; } else { @@ -330,5 +333,4 @@ void InnerProductComputer::Compute() { CHECK_EQ(cursor, result_offsets_.size()); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/inner_product_computer.h b/extern/ceres/internal/ceres/inner_product_computer.h index c6ed0b23e87..c1c0a3489ec 100644 --- a/extern/ceres/internal/ceres/inner_product_computer.h +++ b/extern/ceres/internal/ceres/inner_product_computer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // This class is used to repeatedly compute the inner product // @@ -153,8 +152,7 @@ class CERES_NO_EXPORT InnerProductComputer { std::vector result_offsets_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/invert_psd_matrix.h b/extern/ceres/internal/ceres/invert_psd_matrix.h index ac8808b5a04..bc749009169 100644 --- a/extern/ceres/internal/ceres/invert_psd_matrix.h +++ b/extern/ceres/internal/ceres/invert_psd_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,7 @@ #include "ceres/internal/eigen.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Helper routine to compute the inverse or pseudo-inverse of a // symmetric positive semi-definite matrix. @@ -73,7 +72,6 @@ typename EigenTypes::Matrix InvertPSDMatrix( return svd.solve(MType::Identity(size, size)); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_INVERT_PSD_MATRIX_H_ diff --git a/extern/ceres/internal/ceres/is_close.cc b/extern/ceres/internal/ceres/is_close.cc index 0becf5546a0..575918bc6a7 100644 --- a/extern/ceres/internal/ceres/is_close.cc +++ b/extern/ceres/internal/ceres/is_close.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2016 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,8 +33,7 @@ #include #include -namespace ceres { -namespace internal { +namespace ceres::internal { bool IsClose(double x, double y, double relative_precision, @@ -57,5 +56,4 @@ bool IsClose(double x, } return *relative_error < std::fabs(relative_precision); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/is_close.h b/extern/ceres/internal/ceres/is_close.h index a1e4e2f6721..1f6c82fc122 100644 --- a/extern/ceres/internal/ceres/is_close.h +++ b/extern/ceres/internal/ceres/is_close.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2016 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Returns true if x and y have a relative (unsigned) difference less than // relative_precision and false otherwise. Stores the relative and absolute // difference in relative/absolute_error if non-nullptr. If one of the two @@ -48,8 +47,7 @@ CERES_NO_EXPORT bool IsClose(double x, double relative_precision, double* relative_error, double* absolute_error); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/iteration_callback.cc b/extern/ceres/internal/ceres/iteration_callback.cc index 804811d2807..0cec07142b7 100644 --- a/extern/ceres/internal/ceres/iteration_callback.cc +++ b/extern/ceres/internal/ceres/iteration_callback.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/iterative_refiner.cc b/extern/ceres/internal/ceres/iterative_refiner.cc index 18154690597..54d48f30572 100644 --- a/extern/ceres/internal/ceres/iterative_refiner.cc +++ b/extern/ceres/internal/ceres/iterative_refiner.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,43 +33,69 @@ #include #include "Eigen/Core" +#include "ceres/dense_cholesky.h" #include "ceres/sparse_cholesky.h" #include "ceres/sparse_matrix.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -IterativeRefiner::IterativeRefiner(const int max_num_iterations) +SparseIterativeRefiner::SparseIterativeRefiner(const int max_num_iterations) : max_num_iterations_(max_num_iterations) {} -IterativeRefiner::~IterativeRefiner() = default; +SparseIterativeRefiner::~SparseIterativeRefiner() = default; -void IterativeRefiner::Allocate(int num_cols) { +void SparseIterativeRefiner::Allocate(int num_cols) { residual_.resize(num_cols); correction_.resize(num_cols); lhs_x_solution_.resize(num_cols); } -void IterativeRefiner::Refine(const SparseMatrix& lhs, - const double* rhs_ptr, - SparseCholesky* sparse_cholesky, - double* solution_ptr) { +void SparseIterativeRefiner::Refine(const SparseMatrix& lhs, + const double* rhs_ptr, + SparseCholesky* cholesky, + double* solution_ptr) { const int num_cols = lhs.num_cols(); Allocate(num_cols); ConstVectorRef rhs(rhs_ptr, num_cols); VectorRef solution(solution_ptr, num_cols); + std::string ignored_message; for (int i = 0; i < max_num_iterations_; ++i) { // residual = rhs - lhs * solution lhs_x_solution_.setZero(); - lhs.RightMultiply(solution_ptr, lhs_x_solution_.data()); + lhs.RightMultiplyAndAccumulate(solution_ptr, lhs_x_solution_.data()); residual_ = rhs - lhs_x_solution_; // solution += lhs^-1 residual - std::string ignored_message; - sparse_cholesky->Solve( - residual_.data(), correction_.data(), &ignored_message); + cholesky->Solve(residual_.data(), correction_.data(), &ignored_message); solution += correction_; } }; -} // namespace internal -} // namespace ceres +DenseIterativeRefiner::DenseIterativeRefiner(const int max_num_iterations) + : max_num_iterations_(max_num_iterations) {} + +DenseIterativeRefiner::~DenseIterativeRefiner() = default; + +void DenseIterativeRefiner::Allocate(int num_cols) { + residual_.resize(num_cols); + correction_.resize(num_cols); +} + +void DenseIterativeRefiner::Refine(const int num_cols, + const double* lhs_ptr, + const double* rhs_ptr, + DenseCholesky* cholesky, + double* solution_ptr) { + Allocate(num_cols); + ConstMatrixRef lhs(lhs_ptr, num_cols, num_cols); + ConstVectorRef rhs(rhs_ptr, num_cols); + VectorRef solution(solution_ptr, num_cols); + std::string ignored_message; + for (int i = 0; i < max_num_iterations_; ++i) { + residual_ = rhs - lhs * solution; + // solution += lhs^-1 residual + cholesky->Solve(residual_.data(), correction_.data(), &ignored_message); + solution += correction_; + } +}; + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/iterative_refiner.h b/extern/ceres/internal/ceres/iterative_refiner.h index 837af178ab4..660726897ab 100644 --- a/extern/ceres/internal/ceres/iterative_refiner.h +++ b/extern/ceres/internal/ceres/iterative_refiner.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,9 +39,9 @@ #include "ceres/internal/eigen.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { +class DenseCholesky; class SparseCholesky; class SparseMatrix; @@ -58,20 +58,20 @@ class SparseMatrix; // Definite linear systems. // // The above iterative loop is run until max_num_iterations is reached. -class CERES_NO_EXPORT IterativeRefiner { +class CERES_NO_EXPORT SparseIterativeRefiner { public: // max_num_iterations is the number of refinement iterations to // perform. - explicit IterativeRefiner(int max_num_iterations); + explicit SparseIterativeRefiner(int max_num_iterations); // Needed for mocking. - virtual ~IterativeRefiner(); + virtual ~SparseIterativeRefiner(); // Given an initial estimate of the solution of lhs * x = rhs, use // max_num_iterations rounds of iterative refinement to improve it. // - // sparse_cholesky is assumed to contain an already computed - // factorization (or approximation thereof) of lhs. + // cholesky is assumed to contain an already computed factorization (or + // an approximation thereof) of lhs. // // solution is expected to contain a approximation to the solution // to lhs * x = rhs. It can be zero. @@ -79,7 +79,7 @@ class CERES_NO_EXPORT IterativeRefiner { // This method is virtual to facilitate mocking. virtual void Refine(const SparseMatrix& lhs, const double* rhs, - SparseCholesky* sparse_cholesky, + SparseCholesky* cholesky, double* solution); private: @@ -91,7 +91,39 @@ class CERES_NO_EXPORT IterativeRefiner { Vector lhs_x_solution_; }; -} // namespace internal -} // namespace ceres +class CERES_NO_EXPORT DenseIterativeRefiner { + public: + // max_num_iterations is the number of refinement iterations to + // perform. + explicit DenseIterativeRefiner(int max_num_iterations); + + // Needed for mocking. + virtual ~DenseIterativeRefiner(); + + // Given an initial estimate of the solution of lhs * x = rhs, use + // max_num_iterations rounds of iterative refinement to improve it. + // + // cholesky is assumed to contain an already computed factorization (or + // an approximation thereof) of lhs. + // + // solution is expected to contain a approximation to the solution + // to lhs * x = rhs. It can be zero. + // + // This method is virtual to facilitate mocking. + virtual void Refine(int num_cols, + const double* lhs, + const double* rhs, + DenseCholesky* cholesky, + double* solution); + + private: + void Allocate(int num_cols); + + int max_num_iterations_; + Vector residual_; + Vector correction_; +}; + +} // namespace ceres::internal #endif // CERES_INTERNAL_ITERATIVE_REFINER_H_ diff --git a/extern/ceres/internal/ceres/iterative_schur_complement_solver.cc b/extern/ceres/internal/ceres/iterative_schur_complement_solver.cc index bc22d68bc55..bcfb6e41eb4 100644 --- a/extern/ceres/internal/ceres/iterative_schur_complement_solver.cc +++ b/extern/ceres/internal/ceres/iterative_schur_complement_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ #include "ceres/implicit_schur_complement.h" #include "ceres/internal/eigen.h" #include "ceres/linear_solver.h" +#include "ceres/power_series_expansion_preconditioner.h" #include "ceres/preconditioner.h" #include "ceres/schur_jacobi_preconditioner.h" #include "ceres/triplet_sparse_matrix.h" @@ -51,8 +52,7 @@ #include "ceres/wall_time.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { IterativeSchurComplementSolver::IterativeSchurComplementSolver( LinearSolver::Options options) @@ -68,6 +68,8 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl( EventLogger event_logger("IterativeSchurComplementSolver::Solve"); CHECK(A->block_structure() != nullptr); + CHECK(A->transpose_block_structure() != nullptr); + const int num_eliminate_blocks = options_.elimination_groups[0]; // Initialize a ImplicitSchurComplement object. if (schur_complement_ == nullptr) { @@ -86,45 +88,66 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl( VLOG(2) << "No parameter blocks left in the schur complement."; LinearSolver::Summary summary; summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.termination_type = LinearSolverTerminationType::SUCCESS; schur_complement_->BackSubstitute(nullptr, x); return summary; } - // Initialize the solution to the Schur complement system to zero. + // Initialize the solution to the Schur complement system. reduced_linear_system_solution_.resize(schur_complement_->num_rows()); reduced_linear_system_solution_.setZero(); - - LinearSolver::Options cg_options; - cg_options.min_num_iterations = options_.min_num_iterations; - cg_options.max_num_iterations = options_.max_num_iterations; - ConjugateGradientsSolver cg_solver(cg_options); - - LinearSolver::PerSolveOptions cg_per_solve_options; - cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance; - cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance; + if (options_.use_spse_initialization) { + Preconditioner::Options preconditioner_options(options_); + preconditioner_options.type = SCHUR_POWER_SERIES_EXPANSION; + PowerSeriesExpansionPreconditioner pse_solver( + schur_complement_.get(), + options_.max_num_spse_iterations, + options_.spse_tolerance, + preconditioner_options); + pse_solver.RightMultiplyAndAccumulate( + schur_complement_->rhs().data(), + reduced_linear_system_solution_.data()); + } CreatePreconditioner(A); - if (preconditioner_.get() != nullptr) { + if (preconditioner_ != nullptr) { if (!preconditioner_->Update(*A, per_solve_options.D)) { LinearSolver::Summary summary; summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_FAILURE; + summary.termination_type = LinearSolverTerminationType::FAILURE; summary.message = "Preconditioner update failed."; return summary; } - - cg_per_solve_options.preconditioner = preconditioner_.get(); } + ConjugateGradientsSolverOptions cg_options; + cg_options.min_num_iterations = options_.min_num_iterations; + cg_options.max_num_iterations = options_.max_num_iterations; + cg_options.residual_reset_period = options_.residual_reset_period; + cg_options.q_tolerance = per_solve_options.q_tolerance; + cg_options.r_tolerance = per_solve_options.r_tolerance; + + LinearOperatorAdapter lhs(*schur_complement_); + LinearOperatorAdapter preconditioner(*preconditioner_); + + Vector scratch[4]; + for (int i = 0; i < 4; ++i) { + scratch[i].resize(schur_complement_->num_cols()); + } + Vector* scratch_ptr[4] = {&scratch[0], &scratch[1], &scratch[2], &scratch[3]}; + event_logger.AddEvent("Setup"); + LinearSolver::Summary summary = - cg_solver.Solve(schur_complement_.get(), - schur_complement_->rhs().data(), - cg_per_solve_options, - reduced_linear_system_solution_.data()); - if (summary.termination_type != LINEAR_SOLVER_FAILURE && - summary.termination_type != LINEAR_SOLVER_FATAL_ERROR) { + ConjugateGradientsSolver(cg_options, + lhs, + schur_complement_->rhs(), + preconditioner, + scratch_ptr, + reduced_linear_system_solution_); + + if (summary.termination_type != LinearSolverTerminationType::FAILURE && + summary.termination_type != LinearSolverTerminationType::FATAL_ERROR) { schur_complement_->BackSubstitute(reduced_linear_system_solution_.data(), x); } @@ -134,29 +157,31 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl( void IterativeSchurComplementSolver::CreatePreconditioner( BlockSparseMatrix* A) { - if (options_.preconditioner_type == IDENTITY || - preconditioner_.get() != nullptr) { + if (preconditioner_ != nullptr) { return; } - Preconditioner::Options preconditioner_options; - preconditioner_options.type = options_.preconditioner_type; - preconditioner_options.visibility_clustering_type = - options_.visibility_clustering_type; - preconditioner_options.sparse_linear_algebra_library_type = - options_.sparse_linear_algebra_library_type; - preconditioner_options.num_threads = options_.num_threads; - preconditioner_options.row_block_size = options_.row_block_size; - preconditioner_options.e_block_size = options_.e_block_size; - preconditioner_options.f_block_size = options_.f_block_size; - preconditioner_options.elimination_groups = options_.elimination_groups; + Preconditioner::Options preconditioner_options(options_); CHECK(options_.context != nullptr); - preconditioner_options.context = options_.context; switch (options_.preconditioner_type) { + case IDENTITY: + preconditioner_ = std::make_unique( + schur_complement_->num_cols()); + break; case JACOBI: preconditioner_ = std::make_unique( - schur_complement_->block_diagonal_FtF_inverse()); + schur_complement_->block_diagonal_FtF_inverse(), + preconditioner_options); + break; + case SCHUR_POWER_SERIES_EXPANSION: + // Ignoring the value of spse_tolerance to ensure preconditioner stays + // fixed during the iterations of cg. + preconditioner_ = std::make_unique( + schur_complement_.get(), + options_.max_num_spse_iterations, + 0, + preconditioner_options); break; case SCHUR_JACOBI: preconditioner_ = std::make_unique( @@ -172,5 +197,4 @@ void IterativeSchurComplementSolver::CreatePreconditioner( } }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/iterative_schur_complement_solver.h b/extern/ceres/internal/ceres/iterative_schur_complement_solver.h index 50f469484f8..a4b6b53bb77 100644 --- a/extern/ceres/internal/ceres/iterative_schur_complement_solver.h +++ b/extern/ceres/internal/ceres/iterative_schur_complement_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,7 @@ #include "ceres/linear_solver.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockSparseMatrix; class ImplicitSchurComplement; @@ -53,7 +52,7 @@ class Preconditioner; // The algorithm used by this solver was developed in a series of // papers - "Agarwal et al, Bundle Adjustment in the Large, ECCV 2010" // and "Wu et al, Multicore Bundle Adjustment, submitted to CVPR -// 2011" at the Univeristy of Washington. +// 2011" at the University of Washington. // // The key idea is that one can run Conjugate Gradients on the Schur // Complement system without explicitly forming the Schur Complement @@ -94,8 +93,7 @@ class CERES_NO_EXPORT IterativeSchurComplementSolver final Vector reduced_linear_system_solution_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/levenberg_marquardt_strategy.cc b/extern/ceres/internal/ceres/levenberg_marquardt_strategy.cc index 2445f5bb99a..37bc6f47513 100644 --- a/extern/ceres/internal/ceres/levenberg_marquardt_strategy.cc +++ b/extern/ceres/internal/ceres/levenberg_marquardt_strategy.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,13 +38,13 @@ #include "ceres/internal/eigen.h" #include "ceres/linear_least_squares_problems.h" #include "ceres/linear_solver.h" +#include "ceres/parallel_vector_ops.h" #include "ceres/sparse_matrix.h" #include "ceres/trust_region_strategy.h" #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { LevenbergMarquardtStrategy::LevenbergMarquardtStrategy( const TrustRegionStrategy::Options& options) @@ -54,7 +54,9 @@ LevenbergMarquardtStrategy::LevenbergMarquardtStrategy( min_diagonal_(options.min_lm_diagonal), max_diagonal_(options.max_lm_diagonal), decrease_factor_(2.0), - reuse_diagonal_(false) { + reuse_diagonal_(false), + context_(options.context), + num_threads_(options.num_threads) { CHECK(linear_solver_ != nullptr); CHECK_GT(min_diagonal_, 0.0); CHECK_LE(min_diagonal_, max_diagonal_); @@ -78,14 +80,18 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep( diagonal_.resize(num_parameters, 1); } - jacobian->SquaredColumnNorm(diagonal_.data()); - for (int i = 0; i < num_parameters; ++i) { - diagonal_[i] = - std::min(std::max(diagonal_[i], min_diagonal_), max_diagonal_); - } + jacobian->SquaredColumnNorm(diagonal_.data(), context_, num_threads_); + ParallelAssign(context_, + num_threads_, + diagonal_, + diagonal_.array().max(min_diagonal_).min(max_diagonal_)); } - lm_diagonal_ = (diagonal_ / radius_).array().sqrt(); + if (lm_diagonal_.size() == 0) { + lm_diagonal_.resize(num_parameters); + } + ParallelAssign( + context_, num_threads_, lm_diagonal_, (diagonal_ / radius_).cwiseSqrt()); LinearSolver::PerSolveOptions solve_options; solve_options.D = lm_diagonal_.data(); @@ -99,7 +105,7 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep( // Invalidate the output array lm_step, so that we can detect if // the linear solver generated numerical garbage. This is known // to happen for the DENSE_QR and then DENSE_SCHUR solver when - // the Jacobin is severely rank deficient and mu is too small. + // the Jacobian is severely rank deficient and mu is too small. InvalidateArray(num_parameters, step); // Instead of solving Jx = -r, solve Jy = r. @@ -108,17 +114,21 @@ TrustRegionStrategy::Summary LevenbergMarquardtStrategy::ComputeStep( LinearSolver::Summary linear_solver_summary = linear_solver_->Solve(jacobian, residuals, solve_options, step); - if (linear_solver_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) { + if (linear_solver_summary.termination_type == + LinearSolverTerminationType::FATAL_ERROR) { LOG(WARNING) << "Linear solver fatal error: " << linear_solver_summary.message; - } else if (linear_solver_summary.termination_type == LINEAR_SOLVER_FAILURE) { + } else if (linear_solver_summary.termination_type == + LinearSolverTerminationType::FAILURE) { LOG(WARNING) << "Linear solver failure. Failed to compute a step: " << linear_solver_summary.message; } else if (!IsArrayValid(num_parameters, step)) { LOG(WARNING) << "Linear solver failure. Failed to compute a finite step."; - linear_solver_summary.termination_type = LINEAR_SOLVER_FAILURE; + linear_solver_summary.termination_type = + LinearSolverTerminationType::FAILURE; } else { - VectorRef(step, num_parameters) *= -1.0; + VectorRef step_vec(step, num_parameters); + ParallelAssign(context_, num_threads_, step_vec, -step_vec); } reuse_diagonal_ = true; @@ -153,7 +163,7 @@ void LevenbergMarquardtStrategy::StepAccepted(double step_quality) { reuse_diagonal_ = false; } -void LevenbergMarquardtStrategy::StepRejected(double step_quality) { +void LevenbergMarquardtStrategy::StepRejected(double /*step_quality*/) { radius_ = radius_ / decrease_factor_; decrease_factor_ *= 2.0; reuse_diagonal_ = true; @@ -161,5 +171,4 @@ void LevenbergMarquardtStrategy::StepRejected(double step_quality) { double LevenbergMarquardtStrategy::Radius() const { return radius_; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/levenberg_marquardt_strategy.h b/extern/ceres/internal/ceres/levenberg_marquardt_strategy.h index 4383a493cde..1b341c126d1 100644 --- a/extern/ceres/internal/ceres/levenberg_marquardt_strategy.h +++ b/extern/ceres/internal/ceres/levenberg_marquardt_strategy.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,9 @@ #include "ceres/internal/export.h" #include "ceres/trust_region_strategy.h" -namespace ceres { -namespace internal { +namespace ceres::internal { + +class ContextImpl; // Levenberg-Marquardt step computation and trust region sizing // strategy based on on "Methods for Nonlinear Least Squares" by @@ -82,10 +83,11 @@ class CERES_NO_EXPORT LevenbergMarquardtStrategy final // allocations in every iteration and reuse when a step fails and // ComputeStep is called again. Vector lm_diagonal_; // lm_diagonal_ = sqrt(diagonal_ / radius_); + ContextImpl* context_; + int num_threads_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/line_search.cc b/extern/ceres/internal/ceres/line_search.cc index 7e7d97f6d93..eb2c7c903e8 100644 --- a/extern/ceres/internal/ceres/line_search.cc +++ b/extern/ceres/internal/ceres/line_search.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,8 +33,11 @@ #include #include #include -#include // NOLINT +#include #include +#include // NOLINT +#include +#include #include "ceres/evaluator.h" #include "ceres/function_sample.h" @@ -45,23 +48,17 @@ #include "ceres/wall_time.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::map; -using std::ostream; -using std::string; -using std::vector; +namespace ceres::internal { namespace { // Precision used for floating point values in error message output. const int kErrorMessageNumericPrecision = 8; } // namespace -ostream& operator<<(ostream& os, const FunctionSample& sample); +std::ostream& operator<<(std::ostream& os, const FunctionSample& sample); // Convenience stream operator for pushing FunctionSamples into log messages. -ostream& operator<<(ostream& os, const FunctionSample& sample) { +std::ostream& operator<<(std::ostream& os, const FunctionSample& sample) { os << sample.ToDebugString(); return os; } @@ -74,16 +71,16 @@ LineSearch::LineSearch(const LineSearch::Options& options) std::unique_ptr LineSearch::Create( const LineSearchType line_search_type, const LineSearch::Options& options, - string* error) { + std::string* error) { switch (line_search_type) { case ceres::ARMIJO: return std::make_unique(options); case ceres::WOLFE: return std::make_unique(options); default: - *error = string("Invalid line search algorithm type: ") + + *error = std::string("Invalid line search algorithm type: ") + LineSearchTypeToString(line_search_type) + - string(", unable to create line search."); + std::string(", unable to create line search."); } return nullptr; } @@ -150,7 +147,7 @@ double LineSearchFunction::DirectionInfinityNorm() const { } void LineSearchFunction::ResetTimeStatistics() { - const map evaluator_statistics = + const std::map evaluator_statistics = evaluator_->Statistics(); initial_evaluator_residual_time_in_seconds = @@ -166,7 +163,7 @@ void LineSearchFunction::ResetTimeStatistics() { void LineSearchFunction::TimeStatistics( double* cost_evaluation_time_in_seconds, double* gradient_evaluation_time_in_seconds) const { - const map evaluator_time_statistics = + const std::map evaluator_time_statistics = evaluator_->Statistics(); *cost_evaluation_time_in_seconds = FindWithDefault( @@ -243,7 +240,7 @@ double LineSearch::InterpolatingPolynomialMinimizingStepSize( // Select step size by interpolating the function and gradient values // and minimizing the corresponding polynomial. - vector samples; + std::vector samples; samples.push_back(lowerbound); if (interpolation_type == QUADRATIC) { @@ -427,7 +424,7 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate, // shrank the bracket width until it was below our minimum tolerance. // As these are 'artificial' constraints, and we would otherwise fail to // produce a valid point when ArmijoLineSearch would succeed, we return the - // point with the lowest cost found thus far which satsifies the Armijo + // point with the lowest cost found thus far which satisfies the Armijo // condition (but not the Wolfe conditions). summary->optimal_point = bracket_low; summary->success = true; @@ -449,8 +446,8 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate, // defined by bracket_low & bracket_high, which satisfy: // // 1. The interval bounded by step sizes: bracket_low.x & bracket_high.x - // contains step sizes that satsify the strong Wolfe conditions. - // 2. bracket_low.x is of all the step sizes evaluated *which satisifed the + // contains step sizes that satisfy the strong Wolfe conditions. + // 2. bracket_low.x is of all the step sizes evaluated *which satisfied the // Armijo sufficient decrease condition*, the one which generated the // smallest function value, i.e. bracket_low.value < // f(all other steps satisfying Armijo). @@ -494,7 +491,7 @@ void WolfeLineSearch::DoSearch(const double step_size_estimate, // Or, searching was stopped due to an 'artificial' constraint, i.e. not // a condition imposed / required by the underlying algorithm, but instead an // engineering / implementation consideration. But a step which exceeds the -// minimum step size, and satsifies the Armijo condition was still found, +// minimum step size, and satisfies the Armijo condition was still found, // and should thus be used [zoom not required]. // // Returns false if no step size > minimum step size was found which @@ -518,7 +515,7 @@ bool WolfeLineSearch::BracketingPhase(const FunctionSample& initial_position, // As we require the gradient to evaluate the Wolfe condition, we always // calculate it together with the value, irrespective of the interpolation // type. As opposed to only calculating the gradient after the Armijo - // condition is satisifed, as the computational saving from this approach + // condition is satisfied, as the computational saving from this approach // would be slight (perhaps even negative due to the extra call). Also, // always calculating the value & gradient together protects against us // reporting invalid solutions if the cost function returns slightly different @@ -821,7 +818,7 @@ bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position, // As we require the gradient to evaluate the Wolfe condition, we always // calculate it together with the value, irrespective of the interpolation // type. As opposed to only calculating the gradient after the Armijo - // condition is satisifed, as the computational saving from this approach + // condition is satisfied, as the computational saving from this approach // would be slight (perhaps even negative due to the extra call). Also, // always calculating the value & gradient together protects against us // reporting invalid solutions if the cost function returns slightly @@ -883,5 +880,4 @@ bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position, return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/line_search.h b/extern/ceres/internal/ceres/line_search.h index c2c744afe00..acf85c00235 100644 --- a/extern/ceres/internal/ceres/line_search.h +++ b/extern/ceres/internal/ceres/line_search.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -42,8 +42,7 @@ #include "ceres/internal/export.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Evaluator; class LineSearchFunction; @@ -302,7 +301,6 @@ class CERES_NO_EXPORT WolfeLineSearch final : public LineSearch { Summary* summary) const final; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_LINE_SEARCH_H_ diff --git a/extern/ceres/internal/ceres/line_search_direction.cc b/extern/ceres/internal/ceres/line_search_direction.cc index 98e335a8029..62fcc81c70f 100644 --- a/extern/ceres/internal/ceres/line_search_direction.cc +++ b/extern/ceres/internal/ceres/line_search_direction.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,12 +38,11 @@ #include "ceres/low_rank_inverse_hessian.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CERES_NO_EXPORT SteepestDescent final : public LineSearchDirection { public: - bool NextDirection(const LineSearchMinimizer::State& previous, + bool NextDirection(const LineSearchMinimizer::State& /*previous*/, const LineSearchMinimizer::State& current, Vector* search_direction) override { *search_direction = -current.gradient; @@ -121,8 +120,8 @@ class CERES_NO_EXPORT LBFGS final : public LineSearchDirection { current.gradient - previous.gradient); search_direction->setZero(); - low_rank_inverse_hessian_.RightMultiply(current.gradient.data(), - search_direction->data()); + low_rank_inverse_hessian_.RightMultiplyAndAccumulate( + current.gradient.data(), search_direction->data()); *search_direction *= -1.0; if (search_direction->dot(current.gradient) >= 0.0) { @@ -242,7 +241,7 @@ class CERES_NO_EXPORT BFGS final : public LineSearchDirection { // // The original origin of this rescaling trick is somewhat unclear, the // earliest reference appears to be Oren [1], however it is widely - // discussed without specific attributation in various texts including + // discussed without specific attribution in various texts including // [2] (p143). // // [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms @@ -367,5 +366,4 @@ std::unique_ptr LineSearchDirection::Create( return nullptr; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/line_search_direction.h b/extern/ceres/internal/ceres/line_search_direction.h index 47b256d7133..671684056cf 100644 --- a/extern/ceres/internal/ceres/line_search_direction.h +++ b/extern/ceres/internal/ceres/line_search_direction.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "ceres/line_search_minimizer.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CERES_NO_EXPORT LineSearchDirection { public: @@ -61,7 +60,6 @@ class CERES_NO_EXPORT LineSearchDirection { Vector* search_direction) = 0; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_LINE_SEARCH_DIRECTION_H_ diff --git a/extern/ceres/internal/ceres/line_search_minimizer.cc b/extern/ceres/internal/ceres/line_search_minimizer.cc index ad1e1852386..58a4bf9a871 100644 --- a/extern/ceres/internal/ceres/line_search_minimizer.cc +++ b/extern/ceres/internal/ceres/line_search_minimizer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,7 +30,7 @@ // // Generic loop for line search based optimization algorithms. // -// This is primarily inpsired by the minFunc packaged written by Mark +// This is primarily inspired by the minFunc packaged written by Mark // Schmidt. // // http://www.di.ens.fr/~mschmidt/Software/minFunc.html @@ -59,8 +59,7 @@ #include "ceres/wall_time.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { namespace { bool EvaluateGradientNorms(Evaluator* evaluator, @@ -473,5 +472,4 @@ void LineSearchMinimizer::Minimize(const Minimizer::Options& options, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/line_search_minimizer.h b/extern/ceres/internal/ceres/line_search_minimizer.h index 9a0e994dcfc..f3621d92b47 100644 --- a/extern/ceres/internal/ceres/line_search_minimizer.h +++ b/extern/ceres/internal/ceres/line_search_minimizer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Generic line search minimization algorithm. // @@ -47,7 +46,7 @@ namespace internal { class CERES_NO_EXPORT LineSearchMinimizer final : public Minimizer { public: struct State { - State(int num_parameters, int num_effective_parameters) + State(int /*num_parameters*/, int num_effective_parameters) : cost(0.0), gradient(num_effective_parameters), gradient_squared_norm(0.0), @@ -69,7 +68,6 @@ class CERES_NO_EXPORT LineSearchMinimizer final : public Minimizer { Solver::Summary* summary) final; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_LINE_SEARCH_MINIMIZER_H_ diff --git a/extern/ceres/internal/ceres/line_search_preprocessor.cc b/extern/ceres/internal/ceres/line_search_preprocessor.cc index 26b8d99a4c7..3109c48b6e0 100644 --- a/extern/ceres/internal/ceres/line_search_preprocessor.cc +++ b/extern/ceres/internal/ceres/line_search_preprocessor.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -41,8 +41,7 @@ #include "ceres/program.h" #include "ceres/wall_time.h" -namespace ceres { -namespace internal { +namespace ceres::internal { namespace { bool IsProgramValid(const Program& program, std::string* error) { @@ -102,5 +101,4 @@ bool LineSearchPreprocessor::Preprocess(const Solver::Options& options, return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/line_search_preprocessor.h b/extern/ceres/internal/ceres/line_search_preprocessor.h index 27e9c2db9b9..0ffdba173a5 100644 --- a/extern/ceres/internal/ceres/line_search_preprocessor.h +++ b/extern/ceres/internal/ceres/line_search_preprocessor.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,7 @@ #include "ceres/internal/export.h" #include "ceres/preprocessor.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CERES_NO_EXPORT LineSearchPreprocessor final : public Preprocessor { public: @@ -45,8 +44,7 @@ class CERES_NO_EXPORT LineSearchPreprocessor final : public Preprocessor { PreprocessedProblem* preprocessed_problem) final; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/linear_least_squares_problems.cc b/extern/ceres/internal/ceres/linear_least_squares_problems.cc index 2d415af338f..36cffec0278 100644 --- a/extern/ceres/internal/ceres/linear_least_squares_problems.cc +++ b/extern/ceres/internal/ceres/linear_least_squares_problems.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,10 +44,7 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::string; +namespace ceres::internal { std::unique_ptr CreateLinearLeastSquaresProblemFromId(int id) { @@ -62,6 +59,10 @@ CreateLinearLeastSquaresProblemFromId(int id) { return LinearLeastSquaresProblem3(); case 4: return LinearLeastSquaresProblem4(); + case 5: + return LinearLeastSquaresProblem5(); + case 6: + return LinearLeastSquaresProblem6(); default: LOG(FATAL) << "Unknown problem id requested " << id; } @@ -87,8 +88,7 @@ x_D = [1.78448275; 2.82327586;] */ std::unique_ptr LinearLeastSquaresProblem0() { - std::unique_ptr problem = - std::make_unique(); + auto problem = std::make_unique(); auto A = std::make_unique(3, 2, 6); problem->b = std::make_unique(3); @@ -161,13 +161,15 @@ std::unique_ptr LinearLeastSquaresProblem0() { 12 0 1 17 1 0 30 1 1 37] + cond(A'A) = 200.36 + S = [ 42.3419 -1.4000 -11.5806 -1.4000 2.6000 1.0000 -11.5806 1.0000 31.1935] r = [ 4.3032 5.4000 - 5.0323] + 4.0323] S\r = [ 0.2102 2.1367 @@ -187,14 +189,21 @@ std::unique_ptr LinearLeastSquaresProblem1() { int num_rows = 6; int num_cols = 5; - std::unique_ptr problem = - std::make_unique(); + auto problem = std::make_unique(); + auto A = std::make_unique( num_rows, num_cols, num_rows * num_cols); problem->b = std::make_unique(num_rows); problem->D = std::make_unique(num_cols); problem->num_eliminate_blocks = 2; + problem->x = std::make_unique(num_cols); + problem->x[0] = -2.3061; + problem->x[1] = 0.3172; + problem->x[2] = 0.2102; + problem->x[3] = 2.1367; + problem->x[4] = 0.1388; + int* rows = A->mutable_rows(); int* cols = A->mutable_cols(); double* values = A->mutable_values(); @@ -292,16 +301,21 @@ std::unique_ptr LinearLeastSquaresProblem2() { int num_rows = 6; int num_cols = 5; - std::unique_ptr problem = - std::make_unique(); + auto problem = std::make_unique(); problem->b = std::make_unique(num_rows); problem->D = std::make_unique(num_cols); problem->num_eliminate_blocks = 2; + problem->x = std::make_unique(num_cols); + problem->x[0] = -2.3061; + problem->x[1] = 0.3172; + problem->x[2] = 0.2102; + problem->x[3] = 2.1367; + problem->x[4] = 0.1388; + auto* bs = new CompressedRowBlockStructure; - std::unique_ptr values = - std::make_unique(num_rows * num_cols); + auto values = std::make_unique(num_rows * num_cols); for (int c = 0; c < num_cols; ++c) { bs->cols.emplace_back(); @@ -427,16 +441,14 @@ std::unique_ptr LinearLeastSquaresProblem3() { int num_rows = 5; int num_cols = 2; - std::unique_ptr problem = - std::make_unique(); + auto problem = std::make_unique(); problem->b = std::make_unique(num_rows); problem->D = std::make_unique(num_cols); problem->num_eliminate_blocks = 2; auto* bs = new CompressedRowBlockStructure; - std::unique_ptr values = - std::make_unique(num_rows * num_cols); + auto values = std::make_unique(num_rows * num_cols); for (int c = 0; c < num_cols; ++c) { bs->cols.emplace_back(); @@ -536,16 +548,14 @@ std::unique_ptr LinearLeastSquaresProblem4() { int num_rows = 3; int num_cols = 7; - std::unique_ptr problem = - std::make_unique(); + auto problem = std::make_unique(); problem->b = std::make_unique(num_rows); problem->D = std::make_unique(num_cols); problem->num_eliminate_blocks = 1; auto* bs = new CompressedRowBlockStructure; - std::unique_ptr values = - std::make_unique(num_rows * num_cols); + auto values = std::make_unique(num_rows * num_cols); // Column block structure bs->cols.emplace_back(); @@ -614,12 +624,313 @@ std::unique_ptr LinearLeastSquaresProblem4() { return problem; } +/* +A problem with block-diagonal F'F. + + A = [1 0 | 0 0 2 + 3 0 | 0 0 4 + 0 -1 | 0 1 0 + 0 -3 | 0 1 0 + 0 -1 | 3 0 0 + 0 -2 | 1 0 0] + + b = [0 + 1 + 2 + 3 + 4 + 5] + + c = A'* b = [ 22 + -25 + 17 + 7 + 4] + + A'A = [10 0 0 0 10 + 0 15 -5 -4 0 + 0 -5 10 0 0 + 0 -4 0 2 0 + 10 0 0 0 20] + + cond(A'A) = 41.402 + + S = [ 8.3333 -1.3333 0 + -1.3333 0.9333 0 + 0 0 10.0000] + + r = [ 8.6667 + -1.6667 + 1.0000] + + S\r = [ 0.9778 + -0.3889 + 0.1000] + + A\b = [ 0.2 + -1.4444 + 0.9777 + -0.3888 + 0.1] +*/ + +std::unique_ptr LinearLeastSquaresProblem5() { + int num_rows = 6; + int num_cols = 5; + + auto problem = std::make_unique(); + problem->b = std::make_unique(num_rows); + problem->D = std::make_unique(num_cols); + problem->num_eliminate_blocks = 2; + + // TODO: add x + problem->x = std::make_unique(num_cols); + problem->x[0] = 0.2; + problem->x[1] = -1.4444; + problem->x[2] = 0.9777; + problem->x[3] = -0.3888; + problem->x[4] = 0.1; + + auto* bs = new CompressedRowBlockStructure; + auto values = std::make_unique(num_rows * num_cols); + + for (int c = 0; c < num_cols; ++c) { + bs->cols.emplace_back(); + bs->cols.back().size = 1; + bs->cols.back().position = c; + } + + int nnz = 0; + + // Row 1 + { + values[nnz++] = -1; + values[nnz++] = 2; + + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 1; + row.block.position = 0; + row.cells.emplace_back(0, 0); + row.cells.emplace_back(4, 1); + } + + // Row 2 + { + values[nnz++] = 3; + values[nnz++] = 4; + + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 1; + row.block.position = 1; + row.cells.emplace_back(0, 2); + row.cells.emplace_back(4, 3); + } + + // Row 3 + { + values[nnz++] = -1; + values[nnz++] = 1; + + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 1; + row.block.position = 2; + row.cells.emplace_back(1, 4); + row.cells.emplace_back(3, 5); + } + + // Row 4 + { + values[nnz++] = -3; + values[nnz++] = 1; + + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 1; + row.block.position = 3; + row.cells.emplace_back(1, 6); + row.cells.emplace_back(3, 7); + } + + // Row 5 + { + values[nnz++] = -1; + values[nnz++] = 3; + + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 1; + row.block.position = 4; + row.cells.emplace_back(1, 8); + row.cells.emplace_back(2, 9); + } + + // Row 6 + { + // values[nnz++] = 2; + values[nnz++] = -2; + values[nnz++] = 1; + + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 1; + row.block.position = 5; + // row.cells.emplace_back(0, 10); + row.cells.emplace_back(1, 10); + row.cells.emplace_back(2, 11); + } + + auto A = std::make_unique(bs); + memcpy(A->mutable_values(), values.get(), nnz * sizeof(*A->values())); + + for (int i = 0; i < num_cols; ++i) { + problem->D.get()[i] = 1; + } + + for (int i = 0; i < num_rows; ++i) { + problem->b.get()[i] = i; + } + + problem->A = std::move(A); + + return problem; +} + +/* + A = [1 2 0 0 0 1 1 + 1 4 0 0 0 5 6 + 3 4 0 0 0 7 8 + 5 6 0 0 0 9 0 + 0 0 9 0 0 3 1] + + b = [0 + 1 + 2 + 3 + 4] +*/ +// BlockSparseMatrix version +// +// This problem has the unique property that it has two different +// sized f-blocks, but only one of them occurs in the rows involving +// the one e-block. So performing Schur elimination on this problem +// tests the Schur Eliminator's ability to handle non-e-block rows +// correctly when their structure does not conform to the static +// structure determined by DetectStructure. +// +// Additionally, this problem has the first row of the last row block of E being +// larger than number of row blocks in E +// +// NOTE: This problem is too small and rank deficient to be solved without +// the diagonal regularization. +std::unique_ptr LinearLeastSquaresProblem6() { + int num_rows = 5; + int num_cols = 7; + + auto problem = std::make_unique(); + + problem->b = std::make_unique(num_rows); + problem->D = std::make_unique(num_cols); + problem->num_eliminate_blocks = 1; + + auto* bs = new CompressedRowBlockStructure; + auto values = std::make_unique(num_rows * num_cols); + + // Column block structure + bs->cols.emplace_back(); + bs->cols.back().size = 2; + bs->cols.back().position = 0; + + bs->cols.emplace_back(); + bs->cols.back().size = 3; + bs->cols.back().position = 2; + + bs->cols.emplace_back(); + bs->cols.back().size = 2; + bs->cols.back().position = 5; + + int nnz = 0; + + // Row 1 & 2 + { + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 2; + row.block.position = 0; + + row.cells.emplace_back(0, nnz); + values[nnz++] = 1; + values[nnz++] = 2; + values[nnz++] = 1; + values[nnz++] = 4; + + row.cells.emplace_back(2, nnz); + values[nnz++] = 1; + values[nnz++] = 1; + values[nnz++] = 5; + values[nnz++] = 6; + } + + // Row 3 & 4 + { + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 2; + row.block.position = 2; + + row.cells.emplace_back(0, nnz); + values[nnz++] = 3; + values[nnz++] = 4; + values[nnz++] = 5; + values[nnz++] = 6; + + row.cells.emplace_back(2, nnz); + values[nnz++] = 7; + values[nnz++] = 8; + values[nnz++] = 9; + values[nnz++] = 0; + } + + // Row 5 + { + bs->rows.emplace_back(); + CompressedRow& row = bs->rows.back(); + row.block.size = 1; + row.block.position = 4; + + row.cells.emplace_back(1, nnz); + values[nnz++] = 9; + values[nnz++] = 0; + values[nnz++] = 0; + + row.cells.emplace_back(2, nnz); + values[nnz++] = 3; + values[nnz++] = 1; + } + + auto A = std::make_unique(bs); + memcpy(A->mutable_values(), values.get(), nnz * sizeof(*A->values())); + + for (int i = 0; i < num_cols; ++i) { + problem->D.get()[i] = (i + 1) * 100; + } + + for (int i = 0; i < num_rows; ++i) { + problem->b.get()[i] = i; + } + + problem->A = std::move(A); + return problem; +} + namespace { bool DumpLinearLeastSquaresProblemToConsole(const SparseMatrix* A, const double* D, const double* b, const double* x, - int num_eliminate_blocks) { + int /*num_eliminate_blocks*/) { CHECK(A != nullptr); Matrix AA; A->ToDenseMatrix(&AA); @@ -639,7 +950,7 @@ bool DumpLinearLeastSquaresProblemToConsole(const SparseMatrix* A, return true; } -void WriteArrayToFileOrDie(const string& filename, +void WriteArrayToFileOrDie(const std::string& filename, const double* x, const int size) { CHECK(x != nullptr); @@ -652,23 +963,23 @@ void WriteArrayToFileOrDie(const string& filename, fclose(fptr); } -bool DumpLinearLeastSquaresProblemToTextFile(const string& filename_base, +bool DumpLinearLeastSquaresProblemToTextFile(const std::string& filename_base, const SparseMatrix* A, const double* D, const double* b, const double* x, - int num_eliminate_blocks) { + int /*num_eliminate_blocks*/) { CHECK(A != nullptr); LOG(INFO) << "writing to: " << filename_base << "*"; - string matlab_script; + std::string matlab_script; StringAppendF(&matlab_script, "function lsqp = load_trust_region_problem()\n"); StringAppendF(&matlab_script, "lsqp.num_rows = %d;\n", A->num_rows()); StringAppendF(&matlab_script, "lsqp.num_cols = %d;\n", A->num_cols()); { - string filename = filename_base + "_A.txt"; + std::string filename = filename_base + "_A.txt"; FILE* fptr = fopen(filename.c_str(), "w"); CHECK(fptr != nullptr); A->ToTextFile(fptr); @@ -683,33 +994,33 @@ bool DumpLinearLeastSquaresProblemToTextFile(const string& filename_base, } if (D != nullptr) { - string filename = filename_base + "_D.txt"; + std::string filename = filename_base + "_D.txt"; WriteArrayToFileOrDie(filename, D, A->num_cols()); StringAppendF( &matlab_script, "lsqp.D = load('%s', '-ascii');\n", filename.c_str()); } if (b != nullptr) { - string filename = filename_base + "_b.txt"; + std::string filename = filename_base + "_b.txt"; WriteArrayToFileOrDie(filename, b, A->num_rows()); StringAppendF( &matlab_script, "lsqp.b = load('%s', '-ascii');\n", filename.c_str()); } if (x != nullptr) { - string filename = filename_base + "_x.txt"; + std::string filename = filename_base + "_x.txt"; WriteArrayToFileOrDie(filename, x, A->num_cols()); StringAppendF( &matlab_script, "lsqp.x = load('%s', '-ascii');\n", filename.c_str()); } - string matlab_filename = filename_base + ".m"; + std::string matlab_filename = filename_base + ".m"; WriteStringToFileOrDie(matlab_script, matlab_filename); return true; } } // namespace -bool DumpLinearLeastSquaresProblem(const string& filename_base, +bool DumpLinearLeastSquaresProblem(const std::string& filename_base, DumpFormatType dump_format_type, const SparseMatrix* A, const double* D, @@ -730,5 +1041,4 @@ bool DumpLinearLeastSquaresProblem(const string& filename_base, return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/linear_least_squares_problems.h b/extern/ceres/internal/ceres/linear_least_squares_problems.h index a1f67eb306e..9d01adddc52 100644 --- a/extern/ceres/internal/ceres/linear_least_squares_problems.h +++ b/extern/ceres/internal/ceres/linear_least_squares_problems.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,7 @@ #include "ceres/internal/export.h" #include "ceres/sparse_matrix.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Structure defining a linear least squares problem and if possible // ground truth solutions. To be used by various LinearSolver tests. @@ -74,6 +73,10 @@ CERES_NO_EXPORT std::unique_ptr LinearLeastSquaresProblem3(); CERES_NO_EXPORT std::unique_ptr LinearLeastSquaresProblem4(); +CERES_NO_EXPORT +std::unique_ptr LinearLeastSquaresProblem5(); +CERES_NO_EXPORT +std::unique_ptr LinearLeastSquaresProblem6(); // Write the linear least squares problem to disk. The exact format // depends on dump_format_type. @@ -85,8 +88,7 @@ bool DumpLinearLeastSquaresProblem(const std::string& filename_base, const double* b, const double* x, int num_eliminate_blocks); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/linear_operator.cc b/extern/ceres/internal/ceres/linear_operator.cc index 88b7cc752d4..f4c2c5e41ba 100644 --- a/extern/ceres/internal/ceres/linear_operator.cc +++ b/extern/ceres/internal/ceres/linear_operator.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,10 +30,34 @@ #include "ceres/linear_operator.h" -namespace ceres { -namespace internal { +#include + +namespace ceres::internal { + +void LinearOperator::RightMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const { + (void)context; + if (num_threads != 1) { + VLOG(3) << "Parallel right product is not supported by linear operator " + "implementation"; + } + RightMultiplyAndAccumulate(x, y); +} + +void LinearOperator::LeftMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const { + (void)context; + if (num_threads != 1) { + VLOG(3) << "Parallel left product is not supported by linear operator " + "implementation"; + } + LeftMultiplyAndAccumulate(x, y); +} LinearOperator::~LinearOperator() = default; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/linear_operator.h b/extern/ceres/internal/ceres/linear_operator.h index c9e6188e2e8..aafc58464c8 100644 --- a/extern/ceres/internal/ceres/linear_operator.h +++ b/extern/ceres/internal/ceres/linear_operator.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,11 +33,13 @@ #ifndef CERES_INTERNAL_LINEAR_OPERATOR_H_ #define CERES_INTERNAL_LINEAR_OPERATOR_H_ +#include "ceres/internal/eigen.h" #include "ceres/internal/export.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { + +class ContextImpl; // This is an abstract base class for linear operators. It supports // access to size information and left and right multiply operators. @@ -46,15 +48,44 @@ class CERES_NO_EXPORT LinearOperator { virtual ~LinearOperator(); // y = y + Ax; - virtual void RightMultiply(const double* x, double* y) const = 0; + virtual void RightMultiplyAndAccumulate(const double* x, double* y) const = 0; + virtual void RightMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const; // y = y + A'x; - virtual void LeftMultiply(const double* x, double* y) const = 0; + virtual void LeftMultiplyAndAccumulate(const double* x, double* y) const = 0; + virtual void LeftMultiplyAndAccumulate(const double* x, + double* y, + ContextImpl* context, + int num_threads) const; + + virtual void RightMultiplyAndAccumulate(const Vector& x, Vector& y) const { + RightMultiplyAndAccumulate(x.data(), y.data()); + } + + virtual void LeftMultiplyAndAccumulate(const Vector& x, Vector& y) const { + LeftMultiplyAndAccumulate(x.data(), y.data()); + } + + virtual void RightMultiplyAndAccumulate(const Vector& x, + Vector& y, + ContextImpl* context, + int num_threads) const { + RightMultiplyAndAccumulate(x.data(), y.data(), context, num_threads); + } + + virtual void LeftMultiplyAndAccumulate(const Vector& x, + Vector& y, + ContextImpl* context, + int num_threads) const { + LeftMultiplyAndAccumulate(x.data(), y.data(), context, num_threads); + } virtual int num_rows() const = 0; virtual int num_cols() const = 0; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_LINEAR_OPERATOR_H_ diff --git a/extern/ceres/internal/ceres/linear_solver.cc b/extern/ceres/internal/ceres/linear_solver.cc index fe324f81301..4ba0b75fb28 100644 --- a/extern/ceres/internal/ceres/linear_solver.cc +++ b/extern/ceres/internal/ceres/linear_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,8 +43,7 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { LinearSolver::~LinearSolver() = default; @@ -77,8 +76,15 @@ std::unique_ptr LinearSolver::Create( CHECK(options.context != nullptr); switch (options.type) { - case CGNR: + case CGNR: { +#ifndef CERES_NO_CUDA + if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) { + std::string error; + return CudaCgnrSolver::Create(options, &error); + } +#endif return std::make_unique(options); + } break; case SPARSE_NORMAL_CHOLESKY: #if defined(CERES_NO_SPARSE) @@ -120,5 +126,4 @@ std::unique_ptr LinearSolver::Create( } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/linear_solver.h b/extern/ceres/internal/ceres/linear_solver.h index 2f709c297e5..1d5338f4b66 100644 --- a/extern/ceres/internal/ceres/linear_solver.h +++ b/extern/ceres/internal/ceres/linear_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -52,39 +52,81 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -enum LinearSolverTerminationType { +enum class LinearSolverTerminationType { // Termination criterion was met. - LINEAR_SOLVER_SUCCESS, + SUCCESS, // Solver ran for max_num_iterations and terminated before the // termination tolerance could be satisfied. - LINEAR_SOLVER_NO_CONVERGENCE, + NO_CONVERGENCE, // Solver was terminated due to numerical problems, generally due to // the linear system being poorly conditioned. - LINEAR_SOLVER_FAILURE, + FAILURE, // Solver failed with a fatal error that cannot be recovered from, // e.g. CHOLMOD ran out of memory when computing the symbolic or // numeric factorization or an underlying library was called with // the wrong arguments. - LINEAR_SOLVER_FATAL_ERROR + FATAL_ERROR }; +inline std::ostream& operator<<(std::ostream& s, + LinearSolverTerminationType type) { + switch (type) { + case LinearSolverTerminationType::SUCCESS: + s << "LINEAR_SOLVER_SUCCESS"; + break; + case LinearSolverTerminationType::NO_CONVERGENCE: + s << "LINEAR_SOLVER_NO_CONVERGENCE"; + break; + case LinearSolverTerminationType::FAILURE: + s << "LINEAR_SOLVER_FAILURE"; + break; + case LinearSolverTerminationType::FATAL_ERROR: + s << "LINEAR_SOLVER_FATAL_ERROR"; + break; + default: + s << "UNKNOWN LinearSolverTerminationType"; + } + return s; +} + // This enum controls the fill-reducing ordering a sparse linear // algebra library should use before computing a sparse factorization // (usually Cholesky). -enum OrderingType { +// +// TODO(sameeragarwal): Add support for nested dissection +enum class OrderingType { NATURAL, // Do not re-order the matrix. This is useful when the // matrix has been ordered using a fill-reducing ordering // already. - AMD // Use the Approximate Minimum Degree algorithm to re-order - // the matrix. + + AMD, // Use the Approximate Minimum Degree algorithm to re-order + // the matrix. + + NESDIS, // Use the Nested Dissection algorithm to re-order the matrix. }; +inline std::ostream& operator<<(std::ostream& s, OrderingType type) { + switch (type) { + case OrderingType::NATURAL: + s << "NATURAL"; + break; + case OrderingType::AMD: + s << "AMD"; + break; + case OrderingType::NESDIS: + s << "NESDIS"; + break; + default: + s << "UNKNOWN OrderingType"; + } + return s; +} + class LinearOperator; // Abstract base class for objects that implement algorithms for @@ -112,9 +154,9 @@ class CERES_NO_EXPORT LinearSolver { DenseLinearAlgebraLibraryType dense_linear_algebra_library_type = EIGEN; SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type = SUITE_SPARSE; + OrderingType ordering_type = OrderingType::NATURAL; // See solver.h for information about these flags. - bool use_postordering = false; bool dynamic_sparsity = false; bool use_explicit_schur_complement = false; @@ -123,6 +165,23 @@ class CERES_NO_EXPORT LinearSolver { int min_num_iterations = 1; int max_num_iterations = 1; + // Maximum number of iterations performed by SCHUR_POWER_SERIES_EXPANSION. + // This value controls the maximum number of iterations whether it is used + // as a preconditioner or just to initialize the solution for + // ITERATIVE_SCHUR. + int max_num_spse_iterations = 5; + + // Use SCHUR_POWER_SERIES_EXPANSION to initialize the solution for + // ITERATIVE_SCHUR. This option can be set true regardless of what + // preconditioner is being used. + bool use_spse_initialization = false; + + // When use_spse_initialization is true, this parameter along with + // max_num_spse_iterations controls the number of + // SCHUR_POWER_SERIES_EXPANSION iterations performed for initialization. It + // is not used to control the preconditioner. + double spse_tolerance = 0.1; + // If possible, how many threads can the solver use. int num_threads = 1; @@ -261,7 +320,8 @@ class CERES_NO_EXPORT LinearSolver { struct Summary { double residual_norm = -1.0; int num_iterations = -1; - LinearSolverTerminationType termination_type = LINEAR_SOLVER_FAILURE; + LinearSolverTerminationType termination_type = + LinearSolverTerminationType::FAILURE; std::string message; }; @@ -329,17 +389,16 @@ class TypedLinearSolver : public LinearSolver { ExecutionSummary execution_summary_; }; -// Linear solvers that depend on acccess to the low level structure of +// Linear solvers that depend on access to the low level structure of // a SparseMatrix. // clang-format off -typedef TypedLinearSolver BlockSparseMatrixSolver; // NOLINT -typedef TypedLinearSolver CompressedRowSparseMatrixSolver; // NOLINT -typedef TypedLinearSolver DenseSparseMatrixSolver; // NOLINT -typedef TypedLinearSolver TripletSparseMatrixSolver; // NOLINT +using BlockSparseMatrixSolver = TypedLinearSolver; // NOLINT +using CompressedRowSparseMatrixSolver = TypedLinearSolver; // NOLINT +using DenseSparseMatrixSolver = TypedLinearSolver; // NOLINT +using TripletSparseMatrixSolver = TypedLinearSolver; // NOLINT // clang-format on -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/local_parameterization.cc b/extern/ceres/internal/ceres/local_parameterization.cc deleted file mode 100644 index db6f95a1984..00000000000 --- a/extern/ceres/internal/ceres/local_parameterization.cc +++ /dev/null @@ -1,349 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: sameeragarwal@google.com (Sameer Agarwal) - -#include "ceres/local_parameterization.h" - -#include - -#include "Eigen/Geometry" -#include "ceres/internal/eigen.h" -#include "ceres/internal/fixed_array.h" -#include "ceres/internal/householder_vector.h" -#include "ceres/rotation.h" -#include "glog/logging.h" - -namespace ceres { - -using std::vector; - -LocalParameterization::~LocalParameterization() = default; - -bool LocalParameterization::MultiplyByJacobian(const double* x, - const int num_rows, - const double* global_matrix, - double* local_matrix) const { - if (LocalSize() == 0) { - return true; - } - - Matrix jacobian(GlobalSize(), LocalSize()); - if (!ComputeJacobian(x, jacobian.data())) { - return false; - } - - MatrixRef(local_matrix, num_rows, LocalSize()) = - ConstMatrixRef(global_matrix, num_rows, GlobalSize()) * jacobian; - return true; -} - -IdentityParameterization::IdentityParameterization(const int size) - : size_(size) { - CHECK_GT(size, 0); -} - -bool IdentityParameterization::Plus(const double* x, - const double* delta, - double* x_plus_delta) const { - VectorRef(x_plus_delta, size_) = - ConstVectorRef(x, size_) + ConstVectorRef(delta, size_); - return true; -} - -bool IdentityParameterization::ComputeJacobian(const double* x, - double* jacobian) const { - MatrixRef(jacobian, size_, size_).setIdentity(); - return true; -} - -bool IdentityParameterization::MultiplyByJacobian(const double* x, - const int num_cols, - const double* global_matrix, - double* local_matrix) const { - std::copy( - global_matrix, global_matrix + num_cols * GlobalSize(), local_matrix); - return true; -} - -SubsetParameterization::SubsetParameterization( - int size, const vector& constant_parameters) - : local_size_(size - constant_parameters.size()), constancy_mask_(size, 0) { - if (constant_parameters.empty()) { - return; - } - - vector constant = constant_parameters; - std::sort(constant.begin(), constant.end()); - CHECK_GE(constant.front(), 0) << "Indices indicating constant parameter must " - "be greater than equal to zero."; - CHECK_LT(constant.back(), size) - << "Indices indicating constant parameter must be less than the size " - << "of the parameter block."; - CHECK(std::adjacent_find(constant.begin(), constant.end()) == constant.end()) - << "The set of constant parameters cannot contain duplicates"; - for (int parameter : constant_parameters) { - constancy_mask_[parameter] = 1; - } -} - -bool SubsetParameterization::Plus(const double* x, - const double* delta, - double* x_plus_delta) const { - const int global_size = GlobalSize(); - for (int i = 0, j = 0; i < global_size; ++i) { - if (constancy_mask_[i]) { - x_plus_delta[i] = x[i]; - } else { - x_plus_delta[i] = x[i] + delta[j++]; - } - } - return true; -} - -bool SubsetParameterization::ComputeJacobian(const double* x, - double* jacobian) const { - if (local_size_ == 0) { - return true; - } - - const int global_size = GlobalSize(); - MatrixRef m(jacobian, global_size, local_size_); - m.setZero(); - for (int i = 0, j = 0; i < global_size; ++i) { - if (!constancy_mask_[i]) { - m(i, j++) = 1.0; - } - } - return true; -} - -bool SubsetParameterization::MultiplyByJacobian(const double* x, - const int num_cols, - const double* global_matrix, - double* local_matrix) const { - if (local_size_ == 0) { - return true; - } - - const int global_size = GlobalSize(); - for (int col = 0; col < num_cols; ++col) { - for (int i = 0, j = 0; i < global_size; ++i) { - if (!constancy_mask_[i]) { - local_matrix[col * local_size_ + j++] = - global_matrix[col * global_size + i]; - } - } - } - return true; -} - -bool QuaternionParameterization::Plus(const double* x, - const double* delta, - double* x_plus_delta) const { - const double norm_delta = - sqrt(delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]); - if (norm_delta > 0.0) { - const double sin_delta_by_delta = (sin(norm_delta) / norm_delta); - double q_delta[4]; - q_delta[0] = cos(norm_delta); - q_delta[1] = sin_delta_by_delta * delta[0]; - q_delta[2] = sin_delta_by_delta * delta[1]; - q_delta[3] = sin_delta_by_delta * delta[2]; - QuaternionProduct(q_delta, x, x_plus_delta); - } else { - for (int i = 0; i < 4; ++i) { - x_plus_delta[i] = x[i]; - } - } - return true; -} - -bool QuaternionParameterization::ComputeJacobian(const double* x, - double* jacobian) const { - // clang-format off - jacobian[0] = -x[1]; jacobian[1] = -x[2]; jacobian[2] = -x[3]; - jacobian[3] = x[0]; jacobian[4] = x[3]; jacobian[5] = -x[2]; - jacobian[6] = -x[3]; jacobian[7] = x[0]; jacobian[8] = x[1]; - jacobian[9] = x[2]; jacobian[10] = -x[1]; jacobian[11] = x[0]; - // clang-format on - return true; -} - -bool EigenQuaternionParameterization::Plus(const double* x_ptr, - const double* delta, - double* x_plus_delta_ptr) const { - Eigen::Map x_plus_delta(x_plus_delta_ptr); - Eigen::Map x(x_ptr); - - const double norm_delta = - sqrt(delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]); - if (norm_delta > 0.0) { - const double sin_delta_by_delta = sin(norm_delta) / norm_delta; - - // Note, in the constructor w is first. - Eigen::Quaterniond delta_q(cos(norm_delta), - sin_delta_by_delta * delta[0], - sin_delta_by_delta * delta[1], - sin_delta_by_delta * delta[2]); - x_plus_delta = delta_q * x; - } else { - x_plus_delta = x; - } - - return true; -} - -bool EigenQuaternionParameterization::ComputeJacobian(const double* x, - double* jacobian) const { - // clang-format off - jacobian[0] = x[3]; jacobian[1] = x[2]; jacobian[2] = -x[1]; - jacobian[3] = -x[2]; jacobian[4] = x[3]; jacobian[5] = x[0]; - jacobian[6] = x[1]; jacobian[7] = -x[0]; jacobian[8] = x[3]; - jacobian[9] = -x[0]; jacobian[10] = -x[1]; jacobian[11] = -x[2]; - // clang-format on - return true; -} - -HomogeneousVectorParameterization::HomogeneousVectorParameterization(int size) - : size_(size) { - CHECK_GT(size_, 1) << "The size of the homogeneous vector needs to be " - << "greater than 1."; -} - -bool HomogeneousVectorParameterization::Plus(const double* x_ptr, - const double* delta_ptr, - double* x_plus_delta_ptr) const { - ConstVectorRef x(x_ptr, size_); - ConstVectorRef delta(delta_ptr, size_ - 1); - VectorRef x_plus_delta(x_plus_delta_ptr, size_); - - const double norm_delta = delta.norm(); - - if (norm_delta == 0.0) { - x_plus_delta = x; - return true; - } - - // Map the delta from the minimum representation to the over parameterized - // homogeneous vector. See section A6.9.2 on page 624 of Hartley & Zisserman - // (2nd Edition) for a detailed description. Note there is a typo on Page - // 625, line 4 so check the book errata. - const double norm_delta_div_2 = 0.5 * norm_delta; - const double sin_delta_by_delta = - std::sin(norm_delta_div_2) / norm_delta_div_2; - - Vector y(size_); - y.head(size_ - 1) = 0.5 * sin_delta_by_delta * delta; - y(size_ - 1) = std::cos(norm_delta_div_2); - - Vector v(size_); - double beta; - - // NOTE: The explicit template arguments are needed here because - // ComputeHouseholderVector is templated and some versions of MSVC - // have trouble deducing the type of v automatically. - internal::ComputeHouseholderVector( - x, &v, &beta); - - // Apply the delta update to remain on the unit sphere. See section A6.9.3 - // on page 625 of Hartley & Zisserman (2nd Edition) for a detailed - // description. - x_plus_delta = x.norm() * (y - v * (beta * (v.transpose() * y))); - - return true; -} - -bool HomogeneousVectorParameterization::ComputeJacobian( - const double* x_ptr, double* jacobian_ptr) const { - ConstVectorRef x(x_ptr, size_); - MatrixRef jacobian(jacobian_ptr, size_, size_ - 1); - - Vector v(size_); - double beta; - - // NOTE: The explicit template arguments are needed here because - // ComputeHouseholderVector is templated and some versions of MSVC - // have trouble deducing the type of v automatically. - internal::ComputeHouseholderVector( - x, &v, &beta); - - // The Jacobian is equal to J = 0.5 * H.leftCols(size_ - 1) where H is the - // Householder matrix (H = I - beta * v * v'). - for (int i = 0; i < size_ - 1; ++i) { - jacobian.col(i) = -0.5 * beta * v(i) * v; - jacobian.col(i)(i) += 0.5; - } - jacobian *= x.norm(); - - return true; -} - -bool ProductParameterization::Plus(const double* x, - const double* delta, - double* x_plus_delta) const { - int x_cursor = 0; - int delta_cursor = 0; - for (const auto& param : local_params_) { - if (!param->Plus( - x + x_cursor, delta + delta_cursor, x_plus_delta + x_cursor)) { - return false; - } - delta_cursor += param->LocalSize(); - x_cursor += param->GlobalSize(); - } - - return true; -} - -bool ProductParameterization::ComputeJacobian(const double* x, - double* jacobian_ptr) const { - MatrixRef jacobian(jacobian_ptr, GlobalSize(), LocalSize()); - jacobian.setZero(); - internal::FixedArray buffer(buffer_size_); - - int x_cursor = 0; - int delta_cursor = 0; - for (const auto& param : local_params_) { - const int local_size = param->LocalSize(); - const int global_size = param->GlobalSize(); - - if (!param->ComputeJacobian(x + x_cursor, buffer.data())) { - return false; - } - jacobian.block(x_cursor, delta_cursor, global_size, local_size) = - MatrixRef(buffer.data(), global_size, local_size); - - delta_cursor += local_size; - x_cursor += global_size; - } - - return true; -} - -} // namespace ceres diff --git a/extern/ceres/internal/ceres/loss_function.cc b/extern/ceres/internal/ceres/loss_function.cc index 3392b3b7f81..82563c820ef 100644 --- a/extern/ceres/internal/ceres/loss_function.cc +++ b/extern/ceres/internal/ceres/loss_function.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/low_rank_inverse_hessian.cc b/extern/ceres/internal/ceres/low_rank_inverse_hessian.cc index 2fd1ac83f00..14559b677fc 100644 --- a/extern/ceres/internal/ceres/low_rank_inverse_hessian.cc +++ b/extern/ceres/internal/ceres/low_rank_inverse_hessian.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,10 +35,7 @@ #include "ceres/internal/eigen.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::list; +namespace ceres::internal { // The (L)BFGS algorithm explicitly requires that the secant equation: // @@ -117,8 +114,8 @@ bool LowRankInverseHessian::Update(const Vector& delta_x, return true; } -void LowRankInverseHessian::RightMultiply(const double* x_ptr, - double* y_ptr) const { +void LowRankInverseHessian::RightMultiplyAndAccumulate(const double* x_ptr, + double* y_ptr) const { ConstVectorRef gradient(x_ptr, num_parameters_); VectorRef search_direction(y_ptr, num_parameters_); @@ -159,7 +156,7 @@ void LowRankInverseHessian::RightMultiply(const double* x_ptr, // // The original origin of this rescaling trick is somewhat unclear, the // earliest reference appears to be Oren [1], however it is widely discussed - // without specific attributation in various texts including [2] (p143/178). + // without specific attribution in various texts including [2] (p143/178). // // [1] Oren S.S., Self-scaling variable metric (SSVM) algorithms Part II: // Implementation and experiments, Management Science, @@ -179,5 +176,4 @@ void LowRankInverseHessian::RightMultiply(const double* x_ptr, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/low_rank_inverse_hessian.h b/extern/ceres/internal/ceres/low_rank_inverse_hessian.h index 36519360262..72f6f6509b8 100644 --- a/extern/ceres/internal/ceres/low_rank_inverse_hessian.h +++ b/extern/ceres/internal/ceres/low_rank_inverse_hessian.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "ceres/internal/export.h" #include "ceres/linear_operator.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // LowRankInverseHessian is a positive definite approximation to the // Hessian using the limited memory variant of the @@ -65,7 +64,7 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator { // num_parameters is the row/column size of the Hessian. // max_num_corrections is the rank of the Hessian approximation. // use_approximate_eigenvalue_scaling controls whether the initial - // inverse Hessian used during Right/LeftMultiply() is scaled by + // inverse Hessian used during Right/LeftMultiplyAndAccumulate() is scaled by // the approximate eigenvalue of the true inverse Hessian at the // current operating point. // The approximation uses: @@ -84,9 +83,9 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator { bool Update(const Vector& delta_x, const Vector& delta_gradient); // LinearOperator interface - void RightMultiply(const double* x, double* y) const final; - void LeftMultiply(const double* x, double* y) const final { - RightMultiply(x, y); + void RightMultiplyAndAccumulate(const double* x, double* y) const final; + void LeftMultiplyAndAccumulate(const double* x, double* y) const final { + RightMultiplyAndAccumulate(x, y); } int num_rows() const final { return num_parameters_; } int num_cols() const final { return num_parameters_; } @@ -102,7 +101,6 @@ class CERES_NO_EXPORT LowRankInverseHessian final : public LinearOperator { std::list indices_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_LOW_RANK_INVERSE_HESSIAN_H_ diff --git a/extern/ceres/internal/ceres/manifold.cc b/extern/ceres/internal/ceres/manifold.cc index f412a793f93..c4895fd48ce 100644 --- a/extern/ceres/internal/ceres/manifold.cc +++ b/extern/ceres/internal/ceres/manifold.cc @@ -30,13 +30,11 @@ inline void QuaternionPlusImpl(const double* x, double* x_plus_delta) { // x_plus_delta = QuaternionProduct(q_delta, x), where q_delta is the // quaternion constructed from delta. - const double norm_delta = std::sqrt( - delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]); + const double norm_delta = std::hypot(delta[0], delta[1], delta[2]); - if (norm_delta == 0.0) { - for (int i = 0; i < 4; ++i) { - x_plus_delta[i] = x[i]; - } + if (std::fpclassify(norm_delta) == FP_ZERO) { + // No change in rotation: return the quaternion as is. + std::copy_n(x, 4, x_plus_delta); return; } @@ -100,19 +98,16 @@ inline void QuaternionMinusImpl(const double* y, -y[Order::kW] * x[Order::kZ] - y[Order::kX] * x[Order::kY] + y[Order::kY] * x[Order::kX] + y[Order::kZ] * x[Order::kW]; - const double u_norm = - std::sqrt(ambient_y_minus_x[Order::kX] * ambient_y_minus_x[Order::kX] + - ambient_y_minus_x[Order::kY] * ambient_y_minus_x[Order::kY] + - ambient_y_minus_x[Order::kZ] * ambient_y_minus_x[Order::kZ]); - if (u_norm > 0.0) { + const double u_norm = std::hypot(ambient_y_minus_x[Order::kX], + ambient_y_minus_x[Order::kY], + ambient_y_minus_x[Order::kZ]); + if (std::fpclassify(u_norm) != FP_ZERO) { const double theta = std::atan2(u_norm, ambient_y_minus_x[Order::kW]); y_minus_x[0] = theta * ambient_y_minus_x[Order::kX] / u_norm; y_minus_x[1] = theta * ambient_y_minus_x[Order::kY] / u_norm; y_minus_x[2] = theta * ambient_y_minus_x[Order::kZ] / u_norm; } else { - y_minus_x[0] = 0.0; - y_minus_x[1] = 0.0; - y_minus_x[2] = 0.0; + std::fill_n(y_minus_x, 3, 0.0); } } @@ -201,7 +196,7 @@ bool SubsetManifold::Plus(const double* x, return true; } -bool SubsetManifold::PlusJacobian(const double* x, +bool SubsetManifold::PlusJacobian(const double* /*x*/, double* plus_jacobian) const { if (tangent_size_ == 0) { return true; @@ -218,7 +213,7 @@ bool SubsetManifold::PlusJacobian(const double* x, return true; } -bool SubsetManifold::RightMultiplyByPlusJacobian(const double* x, +bool SubsetManifold::RightMultiplyByPlusJacobian(const double* /*x*/, const int num_rows, const double* ambient_matrix, double* tangent_matrix) const { @@ -254,7 +249,7 @@ bool SubsetManifold::Minus(const double* y, return true; } -bool SubsetManifold::MinusJacobian(const double* x, +bool SubsetManifold::MinusJacobian(const double* /*x*/, double* minus_jacobian) const { const int ambient_size = AmbientSize(); MatrixRef m(minus_jacobian, tangent_size_, ambient_size); diff --git a/extern/ceres/internal/ceres/manifold_adapter.h b/extern/ceres/internal/ceres/manifold_adapter.h deleted file mode 100644 index 9a21456a731..00000000000 --- a/extern/ceres/internal/ceres/manifold_adapter.h +++ /dev/null @@ -1,60 +0,0 @@ -#include "ceres/internal/export.h" -#include "ceres/local_parameterization.h" -#include "ceres/manifold.h" -#include "glog/logging.h" - -namespace ceres { -namespace internal { - -// Adapter to wrap LocalParameterization and make them look like Manifolds. -// -// ManifoldAdapter NEVER takes ownership of local_parameterization. -class CERES_NO_EXPORT ManifoldAdapter final : public Manifold { - public: - explicit ManifoldAdapter(const LocalParameterization* local_parameterization) - : local_parameterization_(local_parameterization) { - CHECK(local_parameterization != nullptr); - } - - bool Plus(const double* x, - const double* delta, - double* x_plus_delta) const override { - return local_parameterization_->Plus(x, delta, x_plus_delta); - } - - bool PlusJacobian(const double* x, double* jacobian) const override { - return local_parameterization_->ComputeJacobian(x, jacobian); - } - - bool RightMultiplyByPlusJacobian(const double* x, - const int num_rows, - const double* ambient_matrix, - double* tangent_matrix) const override { - return local_parameterization_->MultiplyByJacobian( - x, num_rows, ambient_matrix, tangent_matrix); - } - - bool Minus(const double* y, const double* x, double* delta) const override { - LOG(FATAL) << "This should never be called."; - return false; - } - - bool MinusJacobian(const double* x, double* jacobian) const override { - LOG(FATAL) << "This should never be called."; - return false; - } - - int AmbientSize() const override { - return local_parameterization_->GlobalSize(); - } - - int TangentSize() const override { - return local_parameterization_->LocalSize(); - } - - private: - const LocalParameterization* local_parameterization_; -}; - -} // namespace internal -} // namespace ceres diff --git a/extern/ceres/internal/ceres/map_util.h b/extern/ceres/internal/ceres/map_util.h index 5632c22e916..aee2bf58f40 100644 --- a/extern/ceres/internal/ceres/map_util.h +++ b/extern/ceres/internal/ceres/map_util.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/minimizer.cc b/extern/ceres/internal/ceres/minimizer.cc index 449c728774d..531738829ee 100644 --- a/extern/ceres/internal/ceres/minimizer.cc +++ b/extern/ceres/internal/ceres/minimizer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,7 @@ #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { std::unique_ptr Minimizer::Create(MinimizerType minimizer_type) { if (minimizer_type == TRUST_REGION) { @@ -89,5 +88,4 @@ bool Minimizer::RunCallbacks(const Minimizer::Options& options, return false; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/minimizer.h b/extern/ceres/internal/ceres/minimizer.h index c2c1f71df9f..be7290e4c4b 100644 --- a/extern/ceres/internal/ceres/minimizer.h +++ b/extern/ceres/internal/ceres/minimizer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,14 +40,14 @@ #include "ceres/iteration_callback.h" #include "ceres/solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Evaluator; class SparseMatrix; class TrustRegionStrategy; class CoordinateDescentMinimizer; class LinearSolver; +class ContextImpl; // Interface for non-linear least squares solvers. class CERES_NO_EXPORT Minimizer { @@ -114,6 +114,7 @@ class CERES_NO_EXPORT Minimizer { int max_num_iterations; double max_solver_time_in_seconds; int num_threads; + ContextImpl* context = nullptr; // Number of times the linear solver should be retried in case of // numerical failure. The retries are done by exponentially scaling up @@ -193,8 +194,7 @@ class CERES_NO_EXPORT Minimizer { Solver::Summary* summary) = 0; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/normal_prior.cc b/extern/ceres/internal/ceres/normal_prior.cc index 17de40f2e77..c8a7a273854 100644 --- a/extern/ceres/internal/ceres/normal_prior.cc +++ b/extern/ceres/internal/ceres/normal_prior.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -31,6 +31,7 @@ #include "ceres/normal_prior.h" #include +#include #include #include "ceres/internal/eigen.h" @@ -39,7 +40,7 @@ namespace ceres { -NormalPrior::NormalPrior(const Matrix& A, const Vector& b) : A_(A), b_(b) { +NormalPrior::NormalPrior(const Matrix& A, Vector b) : A_(A), b_(std::move(b)) { CHECK_GT(b_.rows(), 0); CHECK_GT(A_.rows(), 0); CHECK_EQ(b_.rows(), A.cols()); @@ -54,7 +55,7 @@ bool NormalPrior::Evaluate(double const* const* parameters, VectorRef r(residuals, num_residuals()); // The following line should read // r = A_ * (p - b_); - // The extra eval is to get around a bug in the eigen library. + // The extra eval is to get around a bug in the Eigen library. r = A_ * (p - b_).eval(); if ((jacobians != nullptr) && (jacobians[0] != nullptr)) { MatrixRef(jacobians[0], num_residuals(), parameter_block_sizes()[0]) = A_; diff --git a/extern/ceres/internal/ceres/pair_hash.h b/extern/ceres/internal/ceres/pair_hash.h index 83ff2b46401..64882cd6fee 100644 --- a/extern/ceres/internal/ceres/pair_hash.h +++ b/extern/ceres/internal/ceres/pair_hash.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { #if defined(_WIN32) && !defined(__MINGW64__) && !defined(__MINGW32__) #define GG_LONGLONG(x) x##I64 @@ -112,7 +111,6 @@ struct pair_hash { } }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_PAIR_HASH_H_ diff --git a/extern/ceres/internal/ceres/parallel_for.h b/extern/ceres/internal/ceres/parallel_for.h index 9528c267d49..11db1fbc488 100644 --- a/extern/ceres/internal/ceres/parallel_for.h +++ b/extern/ceres/internal/ceres/parallel_for.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -26,48 +26,161 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // -// Author: vitus@google.com (Michael Vitus) +// Authors: vitus@google.com (Michael Vitus), +// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) #ifndef CERES_INTERNAL_PARALLEL_FOR_H_ #define CERES_INTERNAL_PARALLEL_FOR_H_ -#include +#include +#include #include "ceres/context_impl.h" -#include "ceres/internal/disable_warnings.h" +#include "ceres/internal/eigen.h" #include "ceres/internal/export.h" +#include "ceres/parallel_invoke.h" +#include "ceres/partition_range_for_parallel_for.h" +#include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -// Returns the maximum number of threads supported by the threading backend -// Ceres was compiled with. -CERES_NO_EXPORT -int MaxNumThreadsAvailable(); +// Use a dummy mutex if num_threads = 1. +inline decltype(auto) MakeConditionalLock(const int num_threads, + std::mutex& m) { + return (num_threads == 1) ? std::unique_lock{} + : std::unique_lock{m}; +} // Execute the function for every element in the range [start, end) with at most // num_threads. It will execute all the work on the calling thread if -// num_threads is 1. -CERES_NO_EXPORT void ParallelFor(ContextImpl* context, - int start, - int end, - int num_threads, - const std::function& function); +// num_threads or (end - start) is equal to 1. +// Depending on function signature, it will be supplied with either loop index +// or a range of loop indicies; function can also be supplied with thread_id. +// The following function signatures are supported: +// - Functions accepting a single loop index: +// - [](int index) { ... } +// - [](int thread_id, int index) { ... } +// - Functions accepting a range of loop index: +// - [](std::tuple index) { ... } +// - [](int thread_id, std::tuple index) { ... } +// +// When distributing workload between threads, it is assumed that each loop +// iteration takes approximately equal time to complete. +template +void ParallelFor(ContextImpl* context, + int start, + int end, + int num_threads, + F&& function, + int min_block_size = 1) { + CHECK_GT(num_threads, 0); + if (start >= end) { + return; + } -// Execute the function for every element in the range [start, end) with at most -// num_threads. It will execute all the work on the calling thread if -// num_threads is 1. Each invocation of function() will be passed a thread_id -// in [0, num_threads) that is guaranteed to be distinct from the value passed -// to any concurrent execution of function(). -CERES_NO_EXPORT void ParallelFor( - ContextImpl* context, - int start, - int end, - int num_threads, - const std::function& function); -} // namespace internal -} // namespace ceres + if (num_threads == 1 || end - start < min_block_size * 2) { + InvokeOnSegment(0, std::make_tuple(start, end), std::forward(function)); + return; + } -#include "ceres/internal/disable_warnings.h" + CHECK(context != nullptr); + ParallelInvoke(context, + start, + end, + num_threads, + std::forward(function), + min_block_size); +} + +// Execute function for every element in the range [start, end) with at most +// num_threads, using user-provided partitions array. +// When distributing workload between threads, it is assumed that each segment +// bounded by adjacent elements of partitions array takes approximately equal +// time to process. +template +void ParallelFor(ContextImpl* context, + int start, + int end, + int num_threads, + F&& function, + const std::vector& partitions) { + CHECK_GT(num_threads, 0); + if (start >= end) { + return; + } + CHECK_EQ(partitions.front(), start); + CHECK_EQ(partitions.back(), end); + if (num_threads == 1 || end - start <= num_threads) { + ParallelFor(context, start, end, num_threads, std::forward(function)); + return; + } + CHECK_GT(partitions.size(), 1); + const int num_partitions = partitions.size() - 1; + ParallelFor(context, + 0, + num_partitions, + num_threads, + [&function, &partitions](int thread_id, + std::tuple partition_ids) { + // partition_ids is a range of partition indices + const auto [partition_start, partition_end] = partition_ids; + // Execution over several adjacent segments is equivalent + // to execution over union of those segments (which is also a + // contiguous segment) + const int range_start = partitions[partition_start]; + const int range_end = partitions[partition_end]; + // Range of original loop indices + const auto range = std::make_tuple(range_start, range_end); + InvokeOnSegment(thread_id, range, function); + }); +} + +// Execute function for every element in the range [start, end) with at most +// num_threads, taking into account user-provided integer cumulative costs of +// iterations. Cumulative costs of iteration for indices in range [0, end) are +// stored in objects from cumulative_cost_data. User-provided +// cumulative_cost_fun returns non-decreasing integer values corresponding to +// inclusive cumulative cost of loop iterations, provided with a reference to +// user-defined object. Only indices from [start, end) will be referenced. This +// routine assumes that cumulative_cost_fun is non-decreasing (in other words, +// all costs are non-negative); +// When distributing workload between threads, input range of loop indices will +// be partitioned into disjoint contiguous intervals, with the maximal cost +// being minimized. +// For example, with iteration costs of [1, 1, 5, 3, 1, 4] cumulative_cost_fun +// should return [1, 2, 7, 10, 11, 15], and with num_threads = 4 this range +// will be split into segments [0, 2) [2, 3) [3, 5) [5, 6) with costs +// [2, 5, 4, 4]. +template +void ParallelFor(ContextImpl* context, + int start, + int end, + int num_threads, + F&& function, + const CumulativeCostData* cumulative_cost_data, + CumulativeCostFun&& cumulative_cost_fun) { + CHECK_GT(num_threads, 0); + if (start >= end) { + return; + } + if (num_threads == 1 || end - start <= num_threads) { + ParallelFor(context, start, end, num_threads, std::forward(function)); + return; + } + // Creating several partitions allows us to tolerate imperfections of + // partitioning and user-supplied iteration costs up to a certain extent + constexpr int kNumPartitionsPerThread = 4; + const int kMaxPartitions = num_threads * kNumPartitionsPerThread; + const auto& partitions = PartitionRangeForParallelFor( + start, + end, + kMaxPartitions, + cumulative_cost_data, + std::forward(cumulative_cost_fun)); + CHECK_GT(partitions.size(), 1); + ParallelFor( + context, start, end, num_threads, std::forward(function), partitions); +} +} // namespace ceres::internal #endif // CERES_INTERNAL_PARALLEL_FOR_H_ diff --git a/extern/ceres/internal/ceres/parallel_for_cxx.cc b/extern/ceres/internal/ceres/parallel_for_cxx.cc deleted file mode 100644 index 5b78db19a44..00000000000 --- a/extern/ceres/internal/ceres/parallel_for_cxx.cc +++ /dev/null @@ -1,245 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: vitus@google.com (Michael Vitus) - -// This include must come before any #ifndef check on Ceres compile options. -#include "ceres/internal/config.h" - -#ifdef CERES_USE_CXX_THREADS - -#include -#include -#include -#include - -#include "ceres/concurrent_queue.h" -#include "ceres/parallel_for.h" -#include "ceres/scoped_thread_token.h" -#include "ceres/thread_token_provider.h" -#include "glog/logging.h" - -namespace ceres { -namespace internal { -namespace { -// This class creates a thread safe barrier which will block until a -// pre-specified number of threads call Finished. This allows us to block the -// main thread until all the parallel threads are finished processing all the -// work. -class BlockUntilFinished { - public: - explicit BlockUntilFinished(int num_total) - : num_finished_(0), num_total_(num_total) {} - - // Increment the number of jobs that have finished and signal the blocking - // thread if all jobs have finished. - void Finished() { - std::lock_guard lock(mutex_); - ++num_finished_; - CHECK_LE(num_finished_, num_total_); - if (num_finished_ == num_total_) { - condition_.notify_one(); - } - } - - // Block until all threads have signaled they are finished. - void Block() { - std::unique_lock lock(mutex_); - condition_.wait(lock, [&]() { return num_finished_ == num_total_; }); - } - - private: - std::mutex mutex_; - std::condition_variable condition_; - // The current number of jobs finished. - int num_finished_; - // The total number of jobs. - int num_total_; -}; - -// Shared state between the parallel tasks. Each thread will use this -// information to get the next block of work to be performed. -struct SharedState { - SharedState(int start, int end, int num_work_items) - : start(start), - end(end), - num_work_items(num_work_items), - i(0), - thread_token_provider(num_work_items), - block_until_finished(num_work_items) {} - - // The start and end index of the for loop. - const int start; - const int end; - // The number of blocks that need to be processed. - const int num_work_items; - - // The next block of work to be assigned to a worker. The parallel for loop - // range is split into num_work_items blocks of work, i.e. a single block of - // work is: - // for (int j = start + i; j < end; j += num_work_items) { ... }. - int i; - std::mutex mutex_i; - - // Provides a unique thread ID among all active threads working on the same - // group of tasks. Thread-safe. - ThreadTokenProvider thread_token_provider; - - // Used to signal when all the work has been completed. Thread safe. - BlockUntilFinished block_until_finished; -}; - -} // namespace - -int MaxNumThreadsAvailable() { return ThreadPool::MaxNumThreadsAvailable(); } - -// See ParallelFor (below) for more details. -void ParallelFor(ContextImpl* context, - int start, - int end, - int num_threads, - const std::function& function) { - CHECK_GT(num_threads, 0); - CHECK(context != nullptr); - if (end <= start) { - return; - } - - // Fast path for when it is single threaded. - if (num_threads == 1) { - for (int i = start; i < end; ++i) { - function(i); - } - return; - } - - ParallelFor( - context, start, end, num_threads, [&function](int /*thread_id*/, int i) { - function(i); - }); -} - -// This implementation uses a fixed size max worker pool with a shared task -// queue. The problem of executing the function for the interval of [start, end) -// is broken up into at most num_threads blocks and added to the thread pool. To -// avoid deadlocks, the calling thread is allowed to steal work from the worker -// pool. This is implemented via a shared state between the tasks. In order for -// the calling thread or thread pool to get a block of work, it will query the -// shared state for the next block of work to be done. If there is nothing left, -// it will return. We will exit the ParallelFor call when all of the work has -// been done, not when all of the tasks have been popped off the task queue. -// -// A unique thread ID among all active tasks will be acquired once for each -// block of work. This avoids the significant performance penalty for acquiring -// it on every iteration of the for loop. The thread ID is guaranteed to be in -// [0, num_threads). -// -// A performance analysis has shown this implementation is onpar with OpenMP and -// TBB. -void ParallelFor(ContextImpl* context, - int start, - int end, - int num_threads, - const std::function& function) { - CHECK_GT(num_threads, 0); - CHECK(context != nullptr); - if (end <= start) { - return; - } - - // Fast path for when it is single threaded. - if (num_threads == 1) { - // Even though we only have one thread, use the thread token provider to - // guarantee the exact same behavior when running with multiple threads. - ThreadTokenProvider thread_token_provider(num_threads); - const ScopedThreadToken scoped_thread_token(&thread_token_provider); - const int thread_id = scoped_thread_token.token(); - for (int i = start; i < end; ++i) { - function(thread_id, i); - } - return; - } - - // We use a std::shared_ptr because the main thread can finish all - // the work before the tasks have been popped off the queue. So the - // shared state needs to exist for the duration of all the tasks. - const int num_work_items = std::min((end - start), num_threads); - std::shared_ptr shared_state( - new SharedState(start, end, num_work_items)); - - // A function which tries to perform a chunk of work. This returns false if - // there is no work to be done. - auto task_function = [shared_state, &function]() { - int i = 0; - { - // Get the next available chunk of work to be performed. If there is no - // work, return false. - std::lock_guard lock(shared_state->mutex_i); - if (shared_state->i >= shared_state->num_work_items) { - return false; - } - i = shared_state->i; - ++shared_state->i; - } - - const ScopedThreadToken scoped_thread_token( - &shared_state->thread_token_provider); - const int thread_id = scoped_thread_token.token(); - - // Perform each task. - for (int j = shared_state->start + i; j < shared_state->end; - j += shared_state->num_work_items) { - function(thread_id, j); - } - shared_state->block_until_finished.Finished(); - return true; - }; - - // Add all the tasks to the thread pool. - for (int i = 0; i < num_work_items; ++i) { - // Note we are taking the task_function as value so the shared_state - // shared pointer is copied and the ref count is increased. This is to - // prevent it from being deleted when the main thread finishes all the - // work and exits before the threads finish. - context->thread_pool.AddTask([task_function]() { task_function(); }); - } - - // Try to do any available work on the main thread. This may steal work from - // the thread pool, but when there is no work left the thread pool tasks - // will be no-ops. - while (task_function()) { - } - - // Wait until all tasks have finished. - shared_state->block_until_finished.Block(); -} - -} // namespace internal -} // namespace ceres - -#endif // CERES_USE_CXX_THREADS diff --git a/extern/ceres/internal/ceres/parallel_for_openmp.cc b/extern/ceres/internal/ceres/parallel_invoke.cc similarity index 54% rename from extern/ceres/internal/ceres/parallel_for_openmp.cc rename to extern/ceres/internal/ceres/parallel_invoke.cc index 1d44bf9977a..0e387c5eaaa 100644 --- a/extern/ceres/internal/ceres/parallel_for_openmp.cc +++ b/extern/ceres/internal/ceres/parallel_invoke.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -28,58 +28,50 @@ // // Author: vitus@google.com (Michael Vitus) -// This include must come before any #ifndef check on Ceres compile options. +#include +#include +#include +#include +#include +#include +#include + #include "ceres/internal/config.h" - -#if defined(CERES_USE_OPENMP) - #include "ceres/parallel_for.h" -#include "ceres/scoped_thread_token.h" -#include "ceres/thread_token_provider.h" +#include "ceres/parallel_vector_ops.h" #include "glog/logging.h" -#include "omp.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -int MaxNumThreadsAvailable() { return omp_get_max_threads(); } +BlockUntilFinished::BlockUntilFinished(int num_total_jobs) + : num_total_jobs_finished_(0), num_total_jobs_(num_total_jobs) {} -void ParallelFor(ContextImpl* context, - int start, - int end, - int num_threads, - const std::function& function) { - CHECK_GT(num_threads, 0); - CHECK(context != nullptr); - if (end <= start) { - return; - } - -#ifdef CERES_USE_OPENMP -#pragma omp parallel for num_threads(num_threads) \ - schedule(dynamic) if (num_threads > 1) -#endif // CERES_USE_OPENMP - for (int i = start; i < end; ++i) { - function(i); +void BlockUntilFinished::Finished(int num_jobs_finished) { + if (num_jobs_finished == 0) return; + std::lock_guard lock(mutex_); + num_total_jobs_finished_ += num_jobs_finished; + CHECK_LE(num_total_jobs_finished_, num_total_jobs_); + if (num_total_jobs_finished_ == num_total_jobs_) { + condition_.notify_one(); } } -void ParallelFor(ContextImpl* context, - int start, - int end, - int num_threads, - const std::function& function) { - CHECK(context != nullptr); - - ThreadTokenProvider thread_token_provider(num_threads); - ParallelFor(context, start, end, num_threads, [&](int i) { - const ScopedThreadToken scoped_thread_token(&thread_token_provider); - const int thread_id = scoped_thread_token.token(); - function(thread_id, i); - }); +void BlockUntilFinished::Block() { + std::unique_lock lock(mutex_); + condition_.wait( + lock, [this]() { return num_total_jobs_finished_ == num_total_jobs_; }); } -} // namespace internal -} // namespace ceres +ParallelInvokeState::ParallelInvokeState(int start, + int end, + int num_work_blocks) + : start(start), + end(end), + num_work_blocks(num_work_blocks), + base_block_size((end - start) / num_work_blocks), + num_base_p1_sized_blocks((end - start) % num_work_blocks), + block_id(0), + thread_id(0), + block_until_finished(num_work_blocks) {} -#endif // defined(CERES_USE_OPENMP) +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/parallel_invoke.h b/extern/ceres/internal/ceres/parallel_invoke.h new file mode 100644 index 00000000000..398f8f28f3d --- /dev/null +++ b/extern/ceres/internal/ceres/parallel_invoke.h @@ -0,0 +1,272 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: vitus@google.com (Michael Vitus), +// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#ifndef CERES_INTERNAL_PARALLEL_INVOKE_H_ +#define CERES_INTERNAL_PARALLEL_INVOKE_H_ + +#include +#include +#include +#include +#include +#include + +namespace ceres::internal { + +// InvokeWithThreadId handles passing thread_id to the function +template +void InvokeWithThreadId(int thread_id, F&& function, Args&&... args) { + constexpr bool kPassThreadId = std::is_invocable_v; + + if constexpr (kPassThreadId) { + function(thread_id, std::forward(args)...); + } else { + function(std::forward(args)...); + } +} + +// InvokeOnSegment either runs a loop over segment indices or passes it to the +// function +template +void InvokeOnSegment(int thread_id, std::tuple range, F&& function) { + constexpr bool kExplicitLoop = + std::is_invocable_v || std::is_invocable_v; + + if constexpr (kExplicitLoop) { + const auto [start, end] = range; + for (int i = start; i != end; ++i) { + InvokeWithThreadId(thread_id, std::forward(function), i); + } + } else { + InvokeWithThreadId(thread_id, std::forward(function), range); + } +} + +// This class creates a thread safe barrier which will block until a +// pre-specified number of threads call Finished. This allows us to block the +// main thread until all the parallel threads are finished processing all the +// work. +class BlockUntilFinished { + public: + explicit BlockUntilFinished(int num_total_jobs); + + // Increment the number of jobs that have been processed by the number of + // jobs processed by caller and signal the blocking thread if all jobs + // have finished. + void Finished(int num_jobs_finished); + + // Block until receiving confirmation of all jobs being finished. + void Block(); + + private: + std::mutex mutex_; + std::condition_variable condition_; + int num_total_jobs_finished_; + const int num_total_jobs_; +}; + +// Shared state between the parallel tasks. Each thread will use this +// information to get the next block of work to be performed. +struct ParallelInvokeState { + // The entire range [start, end) is split into num_work_blocks contiguous + // disjoint intervals (blocks), which are as equal as possible given + // total index count and requested number of blocks. + // + // Those num_work_blocks blocks are then processed in parallel. + // + // Total number of integer indices in interval [start, end) is + // end - start, and when splitting them into num_work_blocks blocks + // we can either + // - Split into equal blocks when (end - start) is divisible by + // num_work_blocks + // - Split into blocks with size difference at most 1: + // - Size of the smallest block(s) is (end - start) / num_work_blocks + // - (end - start) % num_work_blocks will need to be 1 index larger + // + // Note that this splitting is optimal in the sense of maximal difference + // between block sizes, since splitting into equal blocks is possible + // if and only if number of indices is divisible by number of blocks. + ParallelInvokeState(int start, int end, int num_work_blocks); + + // The start and end index of the for loop. + const int start; + const int end; + // The number of blocks that need to be processed. + const int num_work_blocks; + // Size of the smallest block + const int base_block_size; + // Number of blocks of size base_block_size + 1 + const int num_base_p1_sized_blocks; + + // The next block of work to be assigned to a worker. The parallel for loop + // range is split into num_work_blocks blocks of work, with a single block of + // work being of size + // - base_block_size + 1 for the first num_base_p1_sized_blocks blocks + // - base_block_size for the rest of the blocks + // blocks of indices are contiguous and disjoint + std::atomic block_id; + + // Provides a unique thread ID among all active threads + // We do not schedule more than num_threads threads via thread pool + // and caller thread might steal one ID + std::atomic thread_id; + + // Used to signal when all the work has been completed. Thread safe. + BlockUntilFinished block_until_finished; +}; + +// This implementation uses a fixed size max worker pool with a shared task +// queue. The problem of executing the function for the interval of [start, end) +// is broken up into at most num_threads * kWorkBlocksPerThread blocks (each of +// size at least min_block_size) and added to the thread pool. To avoid +// deadlocks, the calling thread is allowed to steal work from the worker pool. +// This is implemented via a shared state between the tasks. In order for +// the calling thread or thread pool to get a block of work, it will query the +// shared state for the next block of work to be done. If there is nothing left, +// it will return. We will exit the ParallelFor call when all of the work has +// been done, not when all of the tasks have been popped off the task queue. +// +// A unique thread ID among all active tasks will be acquired once for each +// block of work. This avoids the significant performance penalty for acquiring +// it on every iteration of the for loop. The thread ID is guaranteed to be in +// [0, num_threads). +// +// A performance analysis has shown this implementation is on par with OpenMP +// and TBB. +template +void ParallelInvoke(ContextImpl* context, + int start, + int end, + int num_threads, + F&& function, + int min_block_size) { + CHECK(context != nullptr); + + // Maximal number of work items scheduled for a single thread + // - Lower number of work items results in larger runtimes on unequal tasks + // - Higher number of work items results in larger losses for synchronization + constexpr int kWorkBlocksPerThread = 4; + + // Interval [start, end) is being split into + // num_threads * kWorkBlocksPerThread contiguous disjoint blocks. + // + // In order to avoid creating empty blocks of work, we need to limit + // number of work blocks by a total number of indices. + const int num_work_blocks = std::min((end - start) / min_block_size, + num_threads * kWorkBlocksPerThread); + + // We use a std::shared_ptr because the main thread can finish all + // the work before the tasks have been popped off the queue. So the + // shared state needs to exist for the duration of all the tasks. + auto shared_state = + std::make_shared(start, end, num_work_blocks); + + // A function which tries to schedule another task in the thread pool and + // perform several chunks of work. Function expects itself as the argument in + // order to schedule next task in the thread pool. + auto task = [context, shared_state, num_threads, &function](auto& task_copy) { + int num_jobs_finished = 0; + const int thread_id = shared_state->thread_id.fetch_add(1); + // In order to avoid dead-locks in nested parallel for loops, task() will be + // invoked num_threads + 1 times: + // - num_threads times via enqueueing task into thread pool + // - one more time in the main thread + // Tasks enqueued to thread pool might take some time before execution, and + // the last task being executed will be terminated here in order to avoid + // having more than num_threads active threads + if (thread_id >= num_threads) return; + const int num_work_blocks = shared_state->num_work_blocks; + if (thread_id + 1 < num_threads && + shared_state->block_id < num_work_blocks) { + // Add another thread to the thread pool. + // Note we are taking the task as value so the copy of shared_state shared + // pointer (captured by value at declaration of task lambda-function) is + // copied and the ref count is increased. This is to prevent it from being + // deleted when the main thread finishes all the work and exits before the + // threads finish. + context->thread_pool.AddTask([task_copy]() { task_copy(task_copy); }); + } + + const int start = shared_state->start; + const int base_block_size = shared_state->base_block_size; + const int num_base_p1_sized_blocks = shared_state->num_base_p1_sized_blocks; + + while (true) { + // Get the next available chunk of work to be performed. If there is no + // work, return. + int block_id = shared_state->block_id.fetch_add(1); + if (block_id >= num_work_blocks) { + break; + } + ++num_jobs_finished; + + // For-loop interval [start, end) was split into num_work_blocks, + // with num_base_p1_sized_blocks of size base_block_size + 1 and remaining + // num_work_blocks - num_base_p1_sized_blocks of size base_block_size + // + // Then, start index of the block #block_id is given by a total + // length of preceeding blocks: + // * Total length of preceeding blocks of size base_block_size + 1: + // min(block_id, num_base_p1_sized_blocks) * (base_block_size + 1) + // + // * Total length of preceeding blocks of size base_block_size: + // (block_id - min(block_id, num_base_p1_sized_blocks)) * + // base_block_size + // + // Simplifying sum of those quantities yields a following + // expression for start index of the block #block_id + const int curr_start = start + block_id * base_block_size + + std::min(block_id, num_base_p1_sized_blocks); + // First num_base_p1_sized_blocks have size base_block_size + 1 + // + // Note that it is guaranteed that all blocks are within + // [start, end) interval + const int curr_end = curr_start + base_block_size + + (block_id < num_base_p1_sized_blocks ? 1 : 0); + // Perform each task in current block + const auto range = std::make_tuple(curr_start, curr_end); + InvokeOnSegment(thread_id, range, function); + } + shared_state->block_until_finished.Finished(num_jobs_finished); + }; + + // Start scheduling threads and doing work. We might end up with less threads + // scheduled than expected, if scheduling overhead is larger than the amount + // of work to be done. + task(task); + + // Wait until all tasks have finished. + shared_state->block_until_finished.Block(); +} + +} // namespace ceres::internal + +#endif diff --git a/extern/ceres/internal/ceres/parallel_utils.cc b/extern/ceres/internal/ceres/parallel_utils.cc index e1cb5f979ec..2e6ee134882 100644 --- a/extern/ceres/internal/ceres/parallel_utils.cc +++ b/extern/ceres/internal/ceres/parallel_utils.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,8 +30,7 @@ #include "ceres/parallel_utils.h" -namespace ceres { -namespace internal { +namespace ceres::internal { void LinearIndexToUpperTriangularIndex(int k, int n, int* i, int* j) { // This works by unfolding a rectangle into a triangle. @@ -86,5 +85,4 @@ void LinearIndexToUpperTriangularIndex(int k, int n, int* i, int* j) { } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/parallel_utils.h b/extern/ceres/internal/ceres/parallel_utils.h index b2d9e0da765..2a7925f8b27 100644 --- a/extern/ceres/internal/ceres/parallel_utils.h +++ b/extern/ceres/internal/ceres/parallel_utils.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,8 +33,7 @@ #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Converts a linear iteration order into a triangular iteration order. // Suppose you have nested loops that look like @@ -66,7 +65,6 @@ CERES_NO_EXPORT void LinearIndexToUpperTriangularIndex(int k, int* i, int* j); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_PARALLEL_UTILS_H_ diff --git a/extern/ceres/internal/ceres/float_cxsparse.h b/extern/ceres/internal/ceres/parallel_vector_ops.cc similarity index 65% rename from extern/ceres/internal/ceres/float_cxsparse.h rename to extern/ceres/internal/ceres/parallel_vector_ops.cc index 8b4514acb18..9ebce299fac 100644 --- a/extern/ceres/internal/ceres/float_cxsparse.h +++ b/extern/ceres/internal/ceres/parallel_vector_ops.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -25,35 +25,30 @@ // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. -// -// Author: sameeragarwal@google.com (Sameer Agarwal) -#ifndef CERES_INTERNAL_FLOAT_CXSPARSE_H_ -#define CERES_INTERNAL_FLOAT_CXSPARSE_H_ +#include "ceres/parallel_vector_ops.h" -// This include must come before any #ifndef check on Ceres compile options. -#include "ceres/internal/config.h" +#include +#include -#if !defined(CERES_NO_CXSPARSE) +#include "ceres/context_impl.h" +#include "ceres/parallel_for.h" -#include +namespace ceres::internal { +void ParallelSetZero(ContextImpl* context, + int num_threads, + double* values, + int num_values) { + ParallelFor( + context, + 0, + num_values, + num_threads, + [values](std::tuple range) { + auto [start, end] = range; + std::fill(values + start, values + end, 0.); + }, + kMinBlockSizeParallelVectorOps); +} -#include "ceres/internal/export.h" -#include "ceres/sparse_cholesky.h" - -namespace ceres { -namespace internal { - -// Fake implementation of a single precision Sparse Cholesky using -// CXSparse. -class CERES_NO_EXPORT FloatCXSparseCholesky : public SparseCholesky { - public: - static std::unique_ptr Create(OrderingType ordering_type); -}; - -} // namespace internal -} // namespace ceres - -#endif // !defined(CERES_NO_CXSPARSE) - -#endif // CERES_INTERNAL_FLOAT_CXSPARSE_H_ +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/parallel_vector_ops.h b/extern/ceres/internal/ceres/parallel_vector_ops.h new file mode 100644 index 00000000000..812950a6db9 --- /dev/null +++ b/extern/ceres/internal/ceres/parallel_vector_ops.h @@ -0,0 +1,90 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: vitus@google.com (Michael Vitus), +// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#ifndef CERES_INTERNAL_PARALLEL_VECTOR_OPS_H_ +#define CERES_INTERNAL_PARALLEL_VECTOR_OPS_H_ + +#include +#include + +#include "ceres/context_impl.h" +#include "ceres/internal/eigen.h" +#include "ceres/internal/export.h" +#include "ceres/parallel_for.h" + +namespace ceres::internal { + +// Lower bound on block size for parallel vector operations. +// Operations with vectors of less than kMinBlockSizeParallelVectorOps elements +// will be executed in a single thread. +constexpr int kMinBlockSizeParallelVectorOps = 1 << 16; +// Evaluate vector expression in parallel +// Assuming LhsExpression and RhsExpression are some sort of column-vector +// expression, assignment lhs = rhs is eavluated over a set of contiguous blocks +// in parallel. This is expected to work well in the case of vector-based +// expressions (since they typically do not result into temporaries). This +// method expects lhs to be size-compatible with rhs +template +void ParallelAssign(ContextImpl* context, + int num_threads, + LhsExpression& lhs, + const RhsExpression& rhs) { + static_assert(LhsExpression::ColsAtCompileTime == 1); + static_assert(RhsExpression::ColsAtCompileTime == 1); + CHECK_EQ(lhs.rows(), rhs.rows()); + const int num_rows = lhs.rows(); + ParallelFor( + context, + 0, + num_rows, + num_threads, + [&lhs, &rhs](const std::tuple& range) { + auto [start, end] = range; + lhs.segment(start, end - start) = rhs.segment(start, end - start); + }, + kMinBlockSizeParallelVectorOps); +} + +// Set vector to zero using num_threads +template +void ParallelSetZero(ContextImpl* context, + int num_threads, + VectorType& vector) { + ParallelSetZero(context, num_threads, vector.data(), vector.rows()); +} +void ParallelSetZero(ContextImpl* context, + int num_threads, + double* values, + int num_values); + +} // namespace ceres::internal + +#endif // CERES_INTERNAL_PARALLEL_FOR_H_ diff --git a/extern/ceres/internal/ceres/parameter_block.h b/extern/ceres/internal/ceres/parameter_block.h index a9845a3a9e3..925d1c42cd2 100644 --- a/extern/ceres/internal/ceres/parameter_block.h +++ b/extern/ceres/internal/ceres/parameter_block.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2021 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,8 +47,7 @@ #include "ceres/stringprintf.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class ProblemImpl; class ResidualBlock; @@ -382,8 +381,7 @@ class CERES_NO_EXPORT ParameterBlock { friend class ProblemImpl; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/parameter_block_ordering.cc b/extern/ceres/internal/ceres/parameter_block_ordering.cc index 570a09c60ba..2b8bf6e10f2 100644 --- a/extern/ceres/internal/ceres/parameter_block_ordering.cc +++ b/extern/ceres/internal/ceres/parameter_block_ordering.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,8 +30,11 @@ #include "ceres/parameter_block_ordering.h" +#include #include +#include #include +#include #include "ceres/graph.h" #include "ceres/graph_algorithms.h" @@ -42,22 +45,18 @@ #include "ceres/wall_time.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::map; -using std::set; -using std::vector; +namespace ceres::internal { int ComputeStableSchurOrdering(const Program& program, - vector* ordering) { + std::vector* ordering) { CHECK(ordering != nullptr); ordering->clear(); EventLogger event_logger("ComputeStableSchurOrdering"); auto graph = CreateHessianGraph(program); event_logger.AddEvent("CreateHessianGraph"); - const vector& parameter_blocks = program.parameter_blocks(); + const std::vector& parameter_blocks = + program.parameter_blocks(); const std::unordered_set& vertices = graph->vertices(); for (auto* parameter_block : parameter_blocks) { if (vertices.count(parameter_block) > 0) { @@ -81,13 +80,14 @@ int ComputeStableSchurOrdering(const Program& program, } int ComputeSchurOrdering(const Program& program, - vector* ordering) { + std::vector* ordering) { CHECK(ordering != nullptr); ordering->clear(); auto graph = CreateHessianGraph(program); int independent_set_size = IndependentSetOrdering(*graph, ordering); - const vector& parameter_blocks = program.parameter_blocks(); + const std::vector& parameter_blocks = + program.parameter_blocks(); // Add the excluded blocks to back of the ordering vector. for (auto* parameter_block : parameter_blocks) { @@ -103,13 +103,14 @@ void ComputeRecursiveIndependentSetOrdering(const Program& program, ParameterBlockOrdering* ordering) { CHECK(ordering != nullptr); ordering->Clear(); - const vector parameter_blocks = program.parameter_blocks(); + const std::vector parameter_blocks = + program.parameter_blocks(); auto graph = CreateHessianGraph(program); int num_covered = 0; int round = 0; while (num_covered < parameter_blocks.size()) { - vector independent_set_ordering; + std::vector independent_set_ordering; const int independent_set_size = IndependentSetOrdering(*graph, &independent_set_ordering); for (int i = 0; i < independent_set_size; ++i) { @@ -126,14 +127,16 @@ std::unique_ptr> CreateHessianGraph( const Program& program) { auto graph = std::make_unique>(); CHECK(graph != nullptr); - const vector& parameter_blocks = program.parameter_blocks(); + const std::vector& parameter_blocks = + program.parameter_blocks(); for (auto* parameter_block : parameter_blocks) { if (!parameter_block->IsConstant()) { graph->AddVertex(parameter_block); } } - const vector& residual_blocks = program.residual_blocks(); + const std::vector& residual_blocks = + program.residual_blocks(); for (auto* residual_block : residual_blocks) { const int num_parameter_blocks = residual_block->NumParameterBlocks(); ParameterBlock* const* parameter_blocks = @@ -157,19 +160,20 @@ std::unique_ptr> CreateHessianGraph( } void OrderingToGroupSizes(const ParameterBlockOrdering* ordering, - vector* group_sizes) { + std::vector* group_sizes) { CHECK(group_sizes != nullptr); group_sizes->clear(); if (ordering == nullptr) { return; } - const map>& group_to_elements = + // TODO(sameeragarwal): Investigate if this should be a set or an + // unordered_set. + const std::map>& group_to_elements = ordering->group_to_elements(); for (const auto& g_t_e : group_to_elements) { group_sizes->push_back(g_t_e.second.size()); } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/parameter_block_ordering.h b/extern/ceres/internal/ceres/parameter_block_ordering.h index f9a447adf87..2ec3db72203 100644 --- a/extern/ceres/internal/ceres/parameter_block_ordering.h +++ b/extern/ceres/internal/ceres/parameter_block_ordering.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,15 +40,14 @@ #include "ceres/ordered_groups.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Program; class ParameterBlock; // Uses an approximate independent set ordering to order the parameter -// blocks of a problem so that it is suitable for use with Schur -// complement based solvers. The output variable ordering contains an +// blocks of a problem so that it is suitable for use with Schur- +// complement-based solvers. The output variable ordering contains an // ordering of the parameter blocks and the return value is size of // the independent set or the number of e_blocks (see // schur_complement_solver.h for an explanation). Constant parameters @@ -88,8 +87,7 @@ CERES_NO_EXPORT std::unique_ptr> CreateHessianGraph( CERES_NO_EXPORT void OrderingToGroupSizes( const ParameterBlockOrdering* ordering, std::vector* group_sizes); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/partition_range_for_parallel_for.h b/extern/ceres/internal/ceres/partition_range_for_parallel_for.h new file mode 100644 index 00000000000..309d7a89d4d --- /dev/null +++ b/extern/ceres/internal/ceres/partition_range_for_parallel_for.h @@ -0,0 +1,150 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Authors: vitus@google.com (Michael Vitus), +// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin) + +#ifndef CERES_INTERNAL_PARTITION_RANGE_FOR_PARALLEL_FOR_H_ +#define CERES_INTERNAL_PARTITION_RANGE_FOR_PARALLEL_FOR_H_ + +#include +#include + +namespace ceres::internal { +// Check if it is possible to split range [start; end) into at most +// max_num_partitions contiguous partitions of cost not greater than +// max_partition_cost. Inclusive integer cumulative costs are provided by +// cumulative_cost_data objects, with cumulative_cost_offset being a total cost +// of all indices (starting from zero) preceding start element. Cumulative costs +// are returned by cumulative_cost_fun called with a reference to +// cumulative_cost_data element with index from range[start; end), and should be +// non-decreasing. Partition of the range is returned via partition argument +template +bool MaxPartitionCostIsFeasible(int start, + int end, + int max_num_partitions, + int max_partition_cost, + int cumulative_cost_offset, + const CumulativeCostData* cumulative_cost_data, + CumulativeCostFun&& cumulative_cost_fun, + std::vector* partition) { + partition->clear(); + partition->push_back(start); + int partition_start = start; + int cost_offset = cumulative_cost_offset; + + while (partition_start < end) { + // Already have max_num_partitions + if (partition->size() > max_num_partitions) { + return false; + } + const int target = max_partition_cost + cost_offset; + const int partition_end = + std::partition_point( + cumulative_cost_data + partition_start, + cumulative_cost_data + end, + [&cumulative_cost_fun, target](const CumulativeCostData& item) { + return cumulative_cost_fun(item) <= target; + }) - + cumulative_cost_data; + // Unable to make a partition from a single element + if (partition_end == partition_start) { + return false; + } + + const int cost_last = + cumulative_cost_fun(cumulative_cost_data[partition_end - 1]); + partition->push_back(partition_end); + partition_start = partition_end; + cost_offset = cost_last; + } + return true; +} + +// Split integer interval [start, end) into at most max_num_partitions +// contiguous intervals, minimizing maximal total cost of a single interval. +// Inclusive integer cumulative costs for each (zero-based) index are provided +// by cumulative_cost_data objects, and are returned by cumulative_cost_fun call +// with a reference to one of the objects from range [start, end) +template +std::vector PartitionRangeForParallelFor( + int start, + int end, + int max_num_partitions, + const CumulativeCostData* cumulative_cost_data, + CumulativeCostFun&& cumulative_cost_fun) { + // Given maximal partition cost, it is possible to verify if it is admissible + // and obtain corresponding partition using MaxPartitionCostIsFeasible + // function. In order to find the lowest admissible value, a binary search + // over all potentially optimal cost values is being performed + const int cumulative_cost_last = + cumulative_cost_fun(cumulative_cost_data[end - 1]); + const int cumulative_cost_offset = + start ? cumulative_cost_fun(cumulative_cost_data[start - 1]) : 0; + const int total_cost = cumulative_cost_last - cumulative_cost_offset; + + // Minimal maximal partition cost is not smaller than the average + // We will use non-inclusive lower bound + int partition_cost_lower_bound = total_cost / max_num_partitions - 1; + // Minimal maximal partition cost is not larger than the total cost + // Upper bound is inclusive + int partition_cost_upper_bound = total_cost; + + std::vector partition; + // Range partition corresponding to the latest evaluated upper bound. + // A single segment covering the whole input interval [start, end) corresponds + // to minimal maximal partition cost of total_cost. + std::vector partition_upper_bound = {start, end}; + // Binary search over partition cost, returning the lowest admissible cost + while (partition_cost_upper_bound - partition_cost_lower_bound > 1) { + partition.reserve(max_num_partitions + 1); + const int partition_cost = + partition_cost_lower_bound + + (partition_cost_upper_bound - partition_cost_lower_bound) / 2; + bool admissible = MaxPartitionCostIsFeasible( + start, + end, + max_num_partitions, + partition_cost, + cumulative_cost_offset, + cumulative_cost_data, + std::forward(cumulative_cost_fun), + &partition); + if (admissible) { + partition_cost_upper_bound = partition_cost; + std::swap(partition, partition_upper_bound); + } else { + partition_cost_lower_bound = partition_cost; + } + } + + return partition_upper_bound; +} +} // namespace ceres::internal + +#endif diff --git a/extern/ceres/internal/ceres/partitioned_matrix_view.cc b/extern/ceres/internal/ceres/partitioned_matrix_view.cc index d38f30a09d7..cffdbc5610f 100644 --- a/extern/ceres/internal/ceres/partitioned_matrix_view.cc +++ b/extern/ceres/internal/ceres/partitioned_matrix_view.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,8 +44,7 @@ #include "ceres/linear_solver.h" #include "ceres/partitioned_matrix_view.h" -namespace ceres { -namespace internal { +namespace ceres::internal { PartitionedMatrixViewBase::~PartitionedMatrixViewBase() = default; @@ -56,121 +55,121 @@ std::unique_ptr PartitionedMatrixViewBase::Create( (options.e_block_size == 2) && (options.f_block_size == 2)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 2) && (options.f_block_size == 3)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 2) && (options.f_block_size == 4)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 2)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3) && (options.f_block_size == 3)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3) && (options.f_block_size == 4)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3) && (options.f_block_size == 6)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3) && (options.f_block_size == 9)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 3)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 3)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 4)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 6)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 8)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4) && (options.f_block_size == 9)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 2) && (options.e_block_size == 4)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if (options.row_block_size == 2) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 3) && (options.e_block_size == 3) && (options.f_block_size == 3)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 4) && (options.e_block_size == 4) && (options.f_block_size == 2)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 4) && (options.e_block_size == 4) && (options.f_block_size == 3)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 4) && (options.e_block_size == 4) && (options.f_block_size == 4)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } if ((options.row_block_size == 4) && (options.e_block_size == 4)) { return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); } #endif @@ -180,8 +179,7 @@ std::unique_ptr PartitionedMatrixViewBase::Create( return std::make_unique>( - matrix, options.elimination_groups[0]); + options, matrix); }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/partitioned_matrix_view.h b/extern/ceres/internal/ceres/partitioned_matrix_view.h index 5623d3b6bca..8589a3b6a68 100644 --- a/extern/ceres/internal/ceres/partitioned_matrix_view.h +++ b/extern/ceres/internal/ceres/partitioned_matrix_view.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -50,12 +50,13 @@ #include "ceres/small_blas.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { + +class ContextImpl; // Given generalized bi-partite matrix A = [E F], with the same block // structure as required by the Schur complement based solver, found -// in explicit_schur_complement_solver.h, provide access to the +// in schur_complement_solver.h, provide access to the // matrices E and F and their outer products E'E and F'F with // themselves. // @@ -68,16 +69,26 @@ class CERES_NO_EXPORT PartitionedMatrixViewBase { virtual ~PartitionedMatrixViewBase(); // y += E'x - virtual void LeftMultiplyE(const double* x, double* y) const = 0; + virtual void LeftMultiplyAndAccumulateE(const double* x, double* y) const = 0; + virtual void LeftMultiplyAndAccumulateESingleThreaded(const double* x, + double* y) const = 0; + virtual void LeftMultiplyAndAccumulateEMultiThreaded(const double* x, + double* y) const = 0; // y += F'x - virtual void LeftMultiplyF(const double* x, double* y) const = 0; + virtual void LeftMultiplyAndAccumulateF(const double* x, double* y) const = 0; + virtual void LeftMultiplyAndAccumulateFSingleThreaded(const double* x, + double* y) const = 0; + virtual void LeftMultiplyAndAccumulateFMultiThreaded(const double* x, + double* y) const = 0; // y += Ex - virtual void RightMultiplyE(const double* x, double* y) const = 0; + virtual void RightMultiplyAndAccumulateE(const double* x, + double* y) const = 0; // y += Fx - virtual void RightMultiplyF(const double* x, double* y) const = 0; + virtual void RightMultiplyAndAccumulateF(const double* x, + double* y) const = 0; // Create and return the block diagonal of the matrix E'E. virtual std::unique_ptr CreateBlockDiagonalEtE() const = 0; @@ -109,6 +120,8 @@ class CERES_NO_EXPORT PartitionedMatrixViewBase { virtual int num_cols_f() const = 0; virtual int num_rows() const = 0; virtual int num_cols() const = 0; + virtual const std::vector& e_cols_partition() const = 0; + virtual const std::vector& f_cols_partition() const = 0; // clang-format on static std::unique_ptr Create( @@ -122,17 +135,46 @@ class CERES_NO_EXPORT PartitionedMatrixView final : public PartitionedMatrixViewBase { public: // matrix = [E F], where the matrix E contains the first - // num_col_blocks_a column blocks. - PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e); + // options.elimination_groups[0] column blocks. + PartitionedMatrixView(const LinearSolver::Options& options, + const BlockSparseMatrix& matrix); + + // y += E'x + virtual void LeftMultiplyAndAccumulateE(const double* x, + double* y) const final; + virtual void LeftMultiplyAndAccumulateESingleThreaded(const double* x, + double* y) const final; + virtual void LeftMultiplyAndAccumulateEMultiThreaded(const double* x, + double* y) const final; + + // y += F'x + virtual void LeftMultiplyAndAccumulateF(const double* x, + double* y) const final; + virtual void LeftMultiplyAndAccumulateFSingleThreaded(const double* x, + double* y) const final; + virtual void LeftMultiplyAndAccumulateFMultiThreaded(const double* x, + double* y) const final; + + // y += Ex + virtual void RightMultiplyAndAccumulateE(const double* x, + double* y) const final; + + // y += Fx + virtual void RightMultiplyAndAccumulateF(const double* x, + double* y) const final; - void LeftMultiplyE(const double* x, double* y) const final; - void LeftMultiplyF(const double* x, double* y) const final; - void RightMultiplyE(const double* x, double* y) const final; - void RightMultiplyF(const double* x, double* y) const final; std::unique_ptr CreateBlockDiagonalEtE() const final; std::unique_ptr CreateBlockDiagonalFtF() const final; void UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const final; + void UpdateBlockDiagonalEtESingleThreaded( + BlockSparseMatrix* block_diagonal) const; + void UpdateBlockDiagonalEtEMultiThreaded( + BlockSparseMatrix* block_diagonal) const; void UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const final; + void UpdateBlockDiagonalFtFSingleThreaded( + BlockSparseMatrix* block_diagonal) const; + void UpdateBlockDiagonalFtFMultiThreaded( + BlockSparseMatrix* block_diagonal) const; // clang-format off int num_col_blocks_e() const final { return num_col_blocks_e_; } int num_col_blocks_f() const final { return num_col_blocks_f_; } @@ -141,21 +183,29 @@ class CERES_NO_EXPORT PartitionedMatrixView final int num_rows() const final { return matrix_.num_rows(); } int num_cols() const final { return matrix_.num_cols(); } // clang-format on + const std::vector& e_cols_partition() const final { + return e_cols_partition_; + } + const std::vector& f_cols_partition() const final { + return f_cols_partition_; + } private: std::unique_ptr CreateBlockDiagonalMatrixLayout( int start_col_block, int end_col_block) const; + const LinearSolver::Options options_; const BlockSparseMatrix& matrix_; int num_row_blocks_e_; int num_col_blocks_e_; int num_col_blocks_f_; int num_cols_e_; int num_cols_f_; + std::vector e_cols_partition_; + std::vector f_cols_partition_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/partitioned_matrix_view_impl.h b/extern/ceres/internal/ceres/partitioned_matrix_view_impl.h index 2e818caa6ef..bd02439b500 100644 --- a/extern/ceres/internal/ceres/partitioned_matrix_view_impl.h +++ b/extern/ceres/internal/ceres/partitioned_matrix_view_impl.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,27 +36,31 @@ #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" +#include "ceres/parallel_for.h" +#include "ceres/partition_range_for_parallel_for.h" #include "ceres/partitioned_matrix_view.h" #include "ceres/small_blas.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template PartitionedMatrixView:: - PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e) - : matrix_(matrix), num_col_blocks_e_(num_col_blocks_e) { + PartitionedMatrixView(const LinearSolver::Options& options, + const BlockSparseMatrix& matrix) + + : options_(options), matrix_(matrix) { const CompressedRowBlockStructure* bs = matrix_.block_structure(); CHECK(bs != nullptr); + num_col_blocks_e_ = options_.elimination_groups[0]; num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_; // Compute the number of row blocks in E. The number of row blocks // in E maybe less than the number of row blocks in the input matrix // as some of the row blocks at the bottom may not have any // e_blocks. For a definition of what an e_block is, please see - // explicit_schur_complement_solver.h + // schur_complement_solver.h num_row_blocks_e_ = 0; for (const auto& row : bs->rows) { const std::vector& cells = row.cells; @@ -79,6 +83,25 @@ PartitionedMatrixView:: } CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols()); + + auto transpose_bs = matrix_.transpose_block_structure(); + const int num_threads = options_.num_threads; + if (transpose_bs != nullptr && num_threads > 1) { + int kMaxPartitions = num_threads * 4; + e_cols_partition_ = PartitionRangeForParallelFor( + 0, + num_col_blocks_e_, + kMaxPartitions, + transpose_bs->rows.data(), + [](const CompressedRow& row) { return row.cumulative_nnz; }); + + f_cols_partition_ = PartitionRangeForParallelFor( + num_col_blocks_e_, + num_col_blocks_e_ + num_col_blocks_f_, + kMaxPartitions, + transpose_bs->rows.data(), + [](const CompressedRow& row) { return row.cumulative_nnz; }); + } } // The next four methods don't seem to be particularly cache @@ -88,77 +111,101 @@ PartitionedMatrixView:: template void PartitionedMatrixView:: - RightMultiplyE(const double* x, double* y) const { - const CompressedRowBlockStructure* bs = matrix_.block_structure(); - + RightMultiplyAndAccumulateE(const double* x, double* y) const { // Iterate over the first num_row_blocks_e_ row blocks, and multiply // by the first cell in each row block. + auto bs = matrix_.block_structure(); const double* values = matrix_.values(); - for (int r = 0; r < num_row_blocks_e_; ++r) { - const Cell& cell = bs->rows[r].cells[0]; - const int row_block_pos = bs->rows[r].block.position; - const int row_block_size = bs->rows[r].block.size; - const int col_block_id = cell.block_id; - const int col_block_pos = bs->cols[col_block_id].position; - const int col_block_size = bs->cols[col_block_id].size; - // clang-format off - MatrixVectorMultiply( - values + cell.position, row_block_size, col_block_size, - x + col_block_pos, - y + row_block_pos); - // clang-format on - } + ParallelFor(options_.context, + 0, + num_row_blocks_e_, + options_.num_threads, + [values, bs, x, y](int row_block_id) { + const Cell& cell = bs->rows[row_block_id].cells[0]; + const int row_block_pos = bs->rows[row_block_id].block.position; + const int row_block_size = bs->rows[row_block_id].block.size; + const int col_block_id = cell.block_id; + const int col_block_pos = bs->cols[col_block_id].position; + const int col_block_size = bs->cols[col_block_id].size; + // clang-format off + MatrixVectorMultiply( + values + cell.position, row_block_size, col_block_size, + x + col_block_pos, + y + row_block_pos); + // clang-format on + }); } template void PartitionedMatrixView:: - RightMultiplyF(const double* x, double* y) const { - const CompressedRowBlockStructure* bs = matrix_.block_structure(); - + RightMultiplyAndAccumulateF(const double* x, double* y) const { // Iterate over row blocks, and if the row block is in E, then // multiply by all the cells except the first one which is of type // E. If the row block is not in E (i.e its in the bottom // num_row_blocks - num_row_blocks_e row blocks), then all the cells // are of type F and multiply by them all. + const CompressedRowBlockStructure* bs = matrix_.block_structure(); + const int num_row_blocks = bs->rows.size(); + const int num_cols_e = num_cols_e_; const double* values = matrix_.values(); - for (int r = 0; r < num_row_blocks_e_; ++r) { - const int row_block_pos = bs->rows[r].block.position; - const int row_block_size = bs->rows[r].block.size; - const std::vector& cells = bs->rows[r].cells; - for (int c = 1; c < cells.size(); ++c) { - const int col_block_id = cells[c].block_id; - const int col_block_pos = bs->cols[col_block_id].position; - const int col_block_size = bs->cols[col_block_id].size; - // clang-format off - MatrixVectorMultiply( - values + cells[c].position, row_block_size, col_block_size, - x + col_block_pos - num_cols_e_, - y + row_block_pos); - // clang-format on - } - } + ParallelFor(options_.context, + 0, + num_row_blocks_e_, + options_.num_threads, + [values, bs, num_cols_e, x, y](int row_block_id) { + const int row_block_pos = bs->rows[row_block_id].block.position; + const int row_block_size = bs->rows[row_block_id].block.size; + const auto& cells = bs->rows[row_block_id].cells; + for (int c = 1; c < cells.size(); ++c) { + const int col_block_id = cells[c].block_id; + const int col_block_pos = bs->cols[col_block_id].position; + const int col_block_size = bs->cols[col_block_id].size; + // clang-format off + MatrixVectorMultiply( + values + cells[c].position, row_block_size, col_block_size, + x + col_block_pos - num_cols_e, + y + row_block_pos); + // clang-format on + } + }); + ParallelFor(options_.context, + num_row_blocks_e_, + num_row_blocks, + options_.num_threads, + [values, bs, num_cols_e, x, y](int row_block_id) { + const int row_block_pos = bs->rows[row_block_id].block.position; + const int row_block_size = bs->rows[row_block_id].block.size; + const auto& cells = bs->rows[row_block_id].cells; + for (const auto& cell : cells) { + const int col_block_id = cell.block_id; + const int col_block_pos = bs->cols[col_block_id].position; + const int col_block_size = bs->cols[col_block_id].size; + // clang-format off + MatrixVectorMultiply( + values + cell.position, row_block_size, col_block_size, + x + col_block_pos - num_cols_e, + y + row_block_pos); + // clang-format on + } + }); +} - for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) { - const int row_block_pos = bs->rows[r].block.position; - const int row_block_size = bs->rows[r].block.size; - const std::vector& cells = bs->rows[r].cells; - for (const auto& cell : cells) { - const int col_block_id = cell.block_id; - const int col_block_pos = bs->cols[col_block_id].position; - const int col_block_size = bs->cols[col_block_id].size; - // clang-format off - MatrixVectorMultiply( - values + cell.position, row_block_size, col_block_size, - x + col_block_pos - num_cols_e_, - y + row_block_pos); - // clang-format on - } +template +void PartitionedMatrixView:: + LeftMultiplyAndAccumulateE(const double* x, double* y) const { + if (!num_col_blocks_e_) return; + if (!num_row_blocks_e_) return; + if (options_.num_threads == 1) { + LeftMultiplyAndAccumulateESingleThreaded(x, y); + } else { + CHECK(options_.context != nullptr); + LeftMultiplyAndAccumulateEMultiThreaded(x, y); } } template void PartitionedMatrixView:: - LeftMultiplyE(const double* x, double* y) const { + LeftMultiplyAndAccumulateESingleThreaded(const double* x, double* y) const { const CompressedRowBlockStructure* bs = matrix_.block_structure(); // Iterate over the first num_row_blocks_e_ row blocks, and multiply @@ -182,7 +229,55 @@ void PartitionedMatrixView:: template void PartitionedMatrixView:: - LeftMultiplyF(const double* x, double* y) const { + LeftMultiplyAndAccumulateEMultiThreaded(const double* x, double* y) const { + auto transpose_bs = matrix_.transpose_block_structure(); + CHECK(transpose_bs != nullptr); + + // Local copies of class members in order to avoid capturing pointer to the + // whole object in lambda function + auto values = matrix_.values(); + const int num_row_blocks_e = num_row_blocks_e_; + ParallelFor( + options_.context, + 0, + num_col_blocks_e_, + options_.num_threads, + [values, transpose_bs, num_row_blocks_e, x, y](int row_block_id) { + int row_block_pos = transpose_bs->rows[row_block_id].block.position; + int row_block_size = transpose_bs->rows[row_block_id].block.size; + auto& cells = transpose_bs->rows[row_block_id].cells; + + for (auto& cell : cells) { + const int col_block_id = cell.block_id; + const int col_block_size = transpose_bs->cols[col_block_id].size; + const int col_block_pos = transpose_bs->cols[col_block_id].position; + if (col_block_id >= num_row_blocks_e) break; + MatrixTransposeVectorMultiply( + values + cell.position, + col_block_size, + row_block_size, + x + col_block_pos, + y + row_block_pos); + } + }, + e_cols_partition()); +} + +template +void PartitionedMatrixView:: + LeftMultiplyAndAccumulateF(const double* x, double* y) const { + if (!num_col_blocks_f_) return; + if (options_.num_threads == 1) { + LeftMultiplyAndAccumulateFSingleThreaded(x, y); + } else { + CHECK(options_.context != nullptr); + LeftMultiplyAndAccumulateFMultiThreaded(x, y); + } +} + +template +void PartitionedMatrixView:: + LeftMultiplyAndAccumulateFSingleThreaded(const double* x, double* y) const { const CompressedRowBlockStructure* bs = matrix_.block_structure(); // Iterate over row blocks, and if the row block is in E, then @@ -226,10 +321,63 @@ void PartitionedMatrixView:: } } +template +void PartitionedMatrixView:: + LeftMultiplyAndAccumulateFMultiThreaded(const double* x, double* y) const { + auto transpose_bs = matrix_.transpose_block_structure(); + CHECK(transpose_bs != nullptr); + // Local copies of class members in order to avoid capturing pointer to the + // whole object in lambda function + auto values = matrix_.values(); + const int num_row_blocks_e = num_row_blocks_e_; + const int num_cols_e = num_cols_e_; + ParallelFor( + options_.context, + num_col_blocks_e_, + num_col_blocks_e_ + num_col_blocks_f_, + options_.num_threads, + [values, transpose_bs, num_row_blocks_e, num_cols_e, x, y]( + int row_block_id) { + int row_block_pos = transpose_bs->rows[row_block_id].block.position; + int row_block_size = transpose_bs->rows[row_block_id].block.size; + auto& cells = transpose_bs->rows[row_block_id].cells; + + const int num_cells = cells.size(); + int cell_idx = 0; + for (; cell_idx < num_cells; ++cell_idx) { + auto& cell = cells[cell_idx]; + const int col_block_id = cell.block_id; + const int col_block_size = transpose_bs->cols[col_block_id].size; + const int col_block_pos = transpose_bs->cols[col_block_id].position; + if (col_block_id >= num_row_blocks_e) break; + + MatrixTransposeVectorMultiply( + values + cell.position, + col_block_size, + row_block_size, + x + col_block_pos, + y + row_block_pos - num_cols_e); + } + for (; cell_idx < num_cells; ++cell_idx) { + auto& cell = cells[cell_idx]; + const int col_block_id = cell.block_id; + const int col_block_size = transpose_bs->cols[col_block_id].size; + const int col_block_pos = transpose_bs->cols[col_block_id].position; + MatrixTransposeVectorMultiply( + values + cell.position, + col_block_size, + row_block_size, + x + col_block_pos, + y + row_block_pos - num_cols_e); + } + }, + f_cols_partition()); +} + // Given a range of columns blocks of a matrix m, compute the block // structure of the block diagonal of the matrix m(:, // start_col_block:end_col_block)'m(:, start_col_block:end_col_block) -// and return a BlockSparseMatrix with the this block structure. The +// and return a BlockSparseMatrix with this block structure. The // caller owns the result. template std::unique_ptr @@ -290,17 +438,17 @@ PartitionedMatrixView:: return block_diagonal; } -// Similar to the code in RightMultiplyE, except instead of the matrix -// vector multiply its an outer product. +// Similar to the code in RightMultiplyAndAccumulateE, except instead of the +// matrix vector multiply its an outer product. // // block_diagonal = block_diagonal(E'E) // template void PartitionedMatrixView:: - UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const { - const CompressedRowBlockStructure* bs = matrix_.block_structure(); - const CompressedRowBlockStructure* block_diagonal_structure = - block_diagonal->block_structure(); + UpdateBlockDiagonalEtESingleThreaded( + BlockSparseMatrix* block_diagonal) const { + auto bs = matrix_.block_structure(); + auto block_diagonal_structure = block_diagonal->block_structure(); block_diagonal->SetZero(); const double* values = matrix_.values(); @@ -323,17 +471,68 @@ void PartitionedMatrixView:: } } -// Similar to the code in RightMultiplyF, except instead of the matrix -// vector multiply its an outer product. +template +void PartitionedMatrixView:: + UpdateBlockDiagonalEtEMultiThreaded( + BlockSparseMatrix* block_diagonal) const { + auto transpose_block_structure = matrix_.transpose_block_structure(); + CHECK(transpose_block_structure != nullptr); + auto block_diagonal_structure = block_diagonal->block_structure(); + + const double* values = matrix_.values(); + double* values_diagonal = block_diagonal->mutable_values(); + ParallelFor( + options_.context, + 0, + num_col_blocks_e_, + options_.num_threads, + [values, + transpose_block_structure, + values_diagonal, + block_diagonal_structure](int col_block_id) { + int cell_position = + block_diagonal_structure->rows[col_block_id].cells[0].position; + double* cell_values = values_diagonal + cell_position; + int col_block_size = + transpose_block_structure->rows[col_block_id].block.size; + auto& cells = transpose_block_structure->rows[col_block_id].cells; + MatrixRef(cell_values, col_block_size, col_block_size).setZero(); + + for (auto& c : cells) { + int row_block_size = transpose_block_structure->cols[c.block_id].size; + // clang-format off + MatrixTransposeMatrixMultiply( + values + c.position, row_block_size, col_block_size, + values + c.position, row_block_size, col_block_size, + cell_values, 0, 0, col_block_size, col_block_size); + // clang-format on + } + }, + e_cols_partition_); +} + +template +void PartitionedMatrixView:: + UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const { + if (options_.num_threads == 1) { + UpdateBlockDiagonalEtESingleThreaded(block_diagonal); + } else { + CHECK(options_.context != nullptr); + UpdateBlockDiagonalEtEMultiThreaded(block_diagonal); + } +} + +// Similar to the code in RightMultiplyAndAccumulateF, except instead of the +// matrix vector multiply its an outer product. // // block_diagonal = block_diagonal(F'F) // template void PartitionedMatrixView:: - UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const { - const CompressedRowBlockStructure* bs = matrix_.block_structure(); - const CompressedRowBlockStructure* block_diagonal_structure = - block_diagonal->block_structure(); + UpdateBlockDiagonalFtFSingleThreaded( + BlockSparseMatrix* block_diagonal) const { + auto bs = matrix_.block_structure(); + auto block_diagonal_structure = block_diagonal->block_structure(); block_diagonal->SetZero(); const double* values = matrix_.values(); @@ -380,5 +579,82 @@ void PartitionedMatrixView:: } } -} // namespace internal -} // namespace ceres +template +void PartitionedMatrixView:: + UpdateBlockDiagonalFtFMultiThreaded( + BlockSparseMatrix* block_diagonal) const { + auto transpose_block_structure = matrix_.transpose_block_structure(); + CHECK(transpose_block_structure != nullptr); + auto block_diagonal_structure = block_diagonal->block_structure(); + + const double* values = matrix_.values(); + double* values_diagonal = block_diagonal->mutable_values(); + + const int num_col_blocks_e = num_col_blocks_e_; + const int num_row_blocks_e = num_row_blocks_e_; + ParallelFor( + options_.context, + num_col_blocks_e_, + num_col_blocks_e + num_col_blocks_f_, + options_.num_threads, + [transpose_block_structure, + block_diagonal_structure, + num_col_blocks_e, + num_row_blocks_e, + values, + values_diagonal](int col_block_id) { + const int col_block_size = + transpose_block_structure->rows[col_block_id].block.size; + const int diagonal_block_id = col_block_id - num_col_blocks_e; + const int cell_position = + block_diagonal_structure->rows[diagonal_block_id].cells[0].position; + double* cell_values = values_diagonal + cell_position; + + MatrixRef(cell_values, col_block_size, col_block_size).setZero(); + + auto& cells = transpose_block_structure->rows[col_block_id].cells; + const int num_cells = cells.size(); + int i = 0; + for (; i < num_cells; ++i) { + auto& cell = cells[i]; + const int row_block_id = cell.block_id; + if (row_block_id >= num_row_blocks_e) break; + const int row_block_size = + transpose_block_structure->cols[row_block_id].size; + // clang-format off + MatrixTransposeMatrixMultiply + ( + values + cell.position, row_block_size, col_block_size, + values + cell.position, row_block_size, col_block_size, + cell_values, 0, 0, col_block_size, col_block_size); + // clang-format on + } + for (; i < num_cells; ++i) { + auto& cell = cells[i]; + const int row_block_id = cell.block_id; + const int row_block_size = + transpose_block_structure->cols[row_block_id].size; + // clang-format off + MatrixTransposeMatrixMultiply + ( + values + cell.position, row_block_size, col_block_size, + values + cell.position, row_block_size, col_block_size, + cell_values, 0, 0, col_block_size, col_block_size); + // clang-format on + } + }, + f_cols_partition_); +} + +template +void PartitionedMatrixView:: + UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const { + if (options_.num_threads == 1) { + UpdateBlockDiagonalFtFSingleThreaded(block_diagonal); + } else { + CHECK(options_.context != nullptr); + UpdateBlockDiagonalFtFMultiThreaded(block_diagonal); + } +} + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/partitioned_matrix_view_template.py b/extern/ceres/internal/ceres/partitioned_matrix_view_template.py new file mode 100644 index 00000000000..9af4c0e522d --- /dev/null +++ b/extern/ceres/internal/ceres/partitioned_matrix_view_template.py @@ -0,0 +1,149 @@ +# Ceres Solver - A fast non-linear least squares minimizer +# Copyright 2023 Google Inc. All rights reserved. +# http://ceres-solver.org/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Google Inc. nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: sameeragarwal@google.com (Sameer Agarwal) +# +# Script for explicitly generating template specialization of the +# PartitionedMatrixView class. Explicitly generating these +# instantiations in separate .cc files breaks the compilation into +# separate compilation unit rather than one large cc file. +# +# This script creates two sets of files. +# +# 1. partitioned_matrix_view_x_x_x.cc +# where the x indicates the template parameters and +# +# 2. partitioned_matrix_view.cc +# +# that contains a factory function for instantiating these classes +# based on runtime parameters. +# +# The list of tuples, specializations indicates the set of +# specializations that is generated. + +HEADER = """// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) +// +// Template specialization of PartitionedMatrixView. +// +// ======================================== +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +//========================================= +// +// This file is generated using generate_template_specializations.py. +""" + +DYNAMIC_FILE = """ +#include "ceres/partitioned_matrix_view_impl.h" + +namespace ceres::internal { + +template class PartitionedMatrixView<%s, + %s, + %s>; + +} // namespace ceres::internal +""" + +SPECIALIZATION_FILE = """ +// This include must come before any #ifndef check on Ceres compile options. +#include "ceres/internal/config.h" + +#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION + +#include "ceres/partitioned_matrix_view_impl.h" + +namespace ceres::internal { + +template class PartitionedMatrixView<%s, %s, %s>; + +} // namespace ceres::internal + +#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION +""" + +FACTORY_FILE_HEADER = """ +#include + +#include "ceres/linear_solver.h" +#include "ceres/partitioned_matrix_view.h" + +namespace ceres::internal { + +PartitionedMatrixViewBase::~PartitionedMatrixViewBase() = default; + +std::unique_ptr PartitionedMatrixViewBase::Create( + const LinearSolver::Options& options, const BlockSparseMatrix& matrix) { +#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION +""" +FACTORY = """ return std::make_unique>( + options, matrix);""" + +FACTORY_FOOTER = """ +#endif + VLOG(1) << "Template specializations not found for <" + << options.row_block_size << "," << options.e_block_size << "," + << options.f_block_size << ">"; + return std::make_unique>( + options, matrix); +}; + +} // namespace ceres::internal +""" diff --git a/extern/ceres/internal/ceres/polynomial.cc b/extern/ceres/internal/ceres/polynomial.cc index 96267aae97f..8e99e347886 100644 --- a/extern/ceres/internal/ceres/polynomial.cc +++ b/extern/ceres/internal/ceres/polynomial.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,10 +40,7 @@ #include "ceres/internal/export.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::vector; +namespace ceres::internal { namespace { @@ -326,7 +323,7 @@ void MinimizePolynomial(const Vector& polynomial, } } -Vector FindInterpolatingPolynomial(const vector& samples) { +Vector FindInterpolatingPolynomial(const std::vector& samples) { const int num_samples = samples.size(); int num_constraints = 0; for (int i = 0; i < num_samples; ++i) { @@ -369,7 +366,7 @@ Vector FindInterpolatingPolynomial(const vector& samples) { return lu.setThreshold(0.0).solve(rhs); } -void MinimizeInterpolatingPolynomial(const vector& samples, +void MinimizeInterpolatingPolynomial(const std::vector& samples, double x_min, double x_max, double* optimal_x, @@ -389,5 +386,4 @@ void MinimizeInterpolatingPolynomial(const vector& samples, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/polynomial.h b/extern/ceres/internal/ceres/polynomial.h index 3ca753c4618..8c40628b5c1 100644 --- a/extern/ceres/internal/ceres/polynomial.h +++ b/extern/ceres/internal/ceres/polynomial.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "ceres/internal/eigen.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { struct FunctionSample; @@ -116,8 +115,7 @@ CERES_NO_EXPORT void MinimizeInterpolatingPolynomial( double* optimal_x, double* optimal_value); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/power_series_expansion_preconditioner.cc b/extern/ceres/internal/ceres/power_series_expansion_preconditioner.cc new file mode 100644 index 00000000000..af98646c4de --- /dev/null +++ b/extern/ceres/internal/ceres/power_series_expansion_preconditioner.cc @@ -0,0 +1,88 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: markshachkov@gmail.com (Mark Shachkov) + +#include "ceres/power_series_expansion_preconditioner.h" + +#include "ceres/eigen_vector_ops.h" +#include "ceres/parallel_vector_ops.h" +#include "ceres/preconditioner.h" + +namespace ceres::internal { + +PowerSeriesExpansionPreconditioner::PowerSeriesExpansionPreconditioner( + const ImplicitSchurComplement* isc, + const int max_num_spse_iterations, + const double spse_tolerance, + const Preconditioner::Options& options) + : isc_(isc), + max_num_spse_iterations_(max_num_spse_iterations), + spse_tolerance_(spse_tolerance), + options_(options) {} + +PowerSeriesExpansionPreconditioner::~PowerSeriesExpansionPreconditioner() = + default; + +bool PowerSeriesExpansionPreconditioner::Update(const LinearOperator& /*A*/, + const double* /*D*/) { + return true; +} + +void PowerSeriesExpansionPreconditioner::RightMultiplyAndAccumulate( + const double* x, double* y) const { + VectorRef yref(y, num_rows()); + Vector series_term(num_rows()); + Vector previous_series_term(num_rows()); + ParallelSetZero(options_.context, options_.num_threads, yref); + isc_->block_diagonal_FtF_inverse()->RightMultiplyAndAccumulate( + x, y, options_.context, options_.num_threads); + ParallelAssign( + options_.context, options_.num_threads, previous_series_term, yref); + + const double norm_threshold = + spse_tolerance_ * Norm(yref, options_.context, options_.num_threads); + + for (int i = 1;; i++) { + ParallelSetZero(options_.context, options_.num_threads, series_term); + isc_->InversePowerSeriesOperatorRightMultiplyAccumulate( + previous_series_term.data(), series_term.data()); + ParallelAssign( + options_.context, options_.num_threads, yref, yref + series_term); + if (i >= max_num_spse_iterations_ || series_term.norm() < norm_threshold) { + break; + } + std::swap(previous_series_term, series_term); + } +} + +int PowerSeriesExpansionPreconditioner::num_rows() const { + return isc_->num_rows(); +} + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/power_series_expansion_preconditioner.h b/extern/ceres/internal/ceres/power_series_expansion_preconditioner.h new file mode 100644 index 00000000000..9a993cf8738 --- /dev/null +++ b/extern/ceres/internal/ceres/power_series_expansion_preconditioner.h @@ -0,0 +1,71 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: markshachkov@gmail.com (Mark Shachkov) + +#ifndef CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_ +#define CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_ + +#include "ceres/implicit_schur_complement.h" +#include "ceres/internal/eigen.h" +#include "ceres/internal/export.h" +#include "ceres/preconditioner.h" + +namespace ceres::internal { + +// This is a preconditioner via power series expansion of Schur +// complement inverse based on "Weber et al, Power Bundle Adjustment for +// Large-Scale 3D Reconstruction". +class CERES_NO_EXPORT PowerSeriesExpansionPreconditioner + : public Preconditioner { + public: + // TODO: Consider moving max_num_spse_iterations and spse_tolerance to + // Preconditioner::Options + PowerSeriesExpansionPreconditioner(const ImplicitSchurComplement* isc, + const int max_num_spse_iterations, + const double spse_tolerance, + const Preconditioner::Options& options); + PowerSeriesExpansionPreconditioner( + const PowerSeriesExpansionPreconditioner&) = delete; + void operator=(const PowerSeriesExpansionPreconditioner&) = delete; + ~PowerSeriesExpansionPreconditioner() override; + + void RightMultiplyAndAccumulate(const double* x, double* y) const final; + bool Update(const LinearOperator& A, const double* D) final; + int num_rows() const final; + + private: + const ImplicitSchurComplement* isc_; + const int max_num_spse_iterations_; + const double spse_tolerance_; + const Preconditioner::Options options_; +}; + +} // namespace ceres::internal + +#endif // CERES_INTERNAL_POWER_SERIES_EXPANSION_PRECONDITIONER_H_ diff --git a/extern/ceres/internal/ceres/preconditioner.cc b/extern/ceres/internal/ceres/preconditioner.cc index 17b9629cf94..0b9ce963575 100644 --- a/extern/ceres/internal/ceres/preconditioner.cc +++ b/extern/ceres/internal/ceres/preconditioner.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,7 @@ #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { Preconditioner::~Preconditioner() = default; @@ -48,27 +47,27 @@ PreconditionerType Preconditioner::PreconditionerForZeroEBlocks( } SparseMatrixPreconditionerWrapper::SparseMatrixPreconditionerWrapper( - const SparseMatrix* matrix) - : matrix_(matrix) { + const SparseMatrix* matrix, const Preconditioner::Options& options) + : matrix_(matrix), options_(options) { CHECK(matrix != nullptr); } SparseMatrixPreconditionerWrapper::~SparseMatrixPreconditionerWrapper() = default; -bool SparseMatrixPreconditionerWrapper::UpdateImpl(const SparseMatrix& A, - const double* D) { +bool SparseMatrixPreconditionerWrapper::UpdateImpl(const SparseMatrix& /*A*/, + const double* /*D*/) { return true; } -void SparseMatrixPreconditionerWrapper::RightMultiply(const double* x, - double* y) const { - matrix_->RightMultiply(x, y); +void SparseMatrixPreconditionerWrapper::RightMultiplyAndAccumulate( + const double* x, double* y) const { + matrix_->RightMultiplyAndAccumulate( + x, y, options_.context, options_.num_threads); } int SparseMatrixPreconditionerWrapper::num_rows() const { return matrix_->num_rows(); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/preconditioner.h b/extern/ceres/internal/ceres/preconditioner.h index 6433cc7dd38..07a7b1fe313 100644 --- a/extern/ceres/internal/ceres/preconditioner.h +++ b/extern/ceres/internal/ceres/preconditioner.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,11 +39,11 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" #include "ceres/linear_operator.h" +#include "ceres/linear_solver.h" #include "ceres/sparse_matrix.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockSparseMatrix; class SparseMatrix; @@ -51,10 +51,25 @@ class SparseMatrix; class CERES_NO_EXPORT Preconditioner : public LinearOperator { public: struct Options { + Options() = default; + Options(const LinearSolver::Options& linear_solver_options) + : type(linear_solver_options.preconditioner_type), + visibility_clustering_type( + linear_solver_options.visibility_clustering_type), + sparse_linear_algebra_library_type( + linear_solver_options.sparse_linear_algebra_library_type), + num_threads(linear_solver_options.num_threads), + row_block_size(linear_solver_options.row_block_size), + e_block_size(linear_solver_options.e_block_size), + f_block_size(linear_solver_options.f_block_size), + elimination_groups(linear_solver_options.elimination_groups), + context(linear_solver_options.context) {} + PreconditionerType type = JACOBI; VisibilityClusteringType visibility_clustering_type = CANONICAL_VIEWS; SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type = SUITE_SPARSE; + OrderingType ordering_type = OrderingType::NATURAL; // When using the subset preconditioner, all row blocks starting // from this row block are used to construct the preconditioner. @@ -68,9 +83,6 @@ class CERES_NO_EXPORT Preconditioner : public LinearOperator { // and the preconditioner is the inverse of the matrix Q'Q. int subset_preconditioner_start_row_block = -1; - // See solver.h for information about these flags. - bool use_postordering = false; - // If possible, how many threads the preconditioner can use. int num_threads = 1; @@ -132,18 +144,37 @@ class CERES_NO_EXPORT Preconditioner : public LinearOperator { virtual bool Update(const LinearOperator& A, const double* D) = 0; // LinearOperator interface. Since the operator is symmetric, - // LeftMultiply and num_cols are just calls to RightMultiply and - // num_rows respectively. Update() must be called before - // RightMultiply can be called. - void RightMultiply(const double* x, double* y) const override = 0; - void LeftMultiply(const double* x, double* y) const override { - return RightMultiply(x, y); + // LeftMultiplyAndAccumulate and num_cols are just calls to + // RightMultiplyAndAccumulate and num_rows respectively. Update() must be + // called before RightMultiplyAndAccumulate can be called. + void RightMultiplyAndAccumulate(const double* x, + double* y) const override = 0; + void LeftMultiplyAndAccumulate(const double* x, double* y) const override { + return RightMultiplyAndAccumulate(x, y); } int num_rows() const override = 0; int num_cols() const override { return num_rows(); } }; +class CERES_NO_EXPORT IdentityPreconditioner : public Preconditioner { + public: + IdentityPreconditioner(int num_rows) : num_rows_(num_rows) {} + + bool Update(const LinearOperator& /*A*/, const double* /*D*/) final { + return true; + } + + void RightMultiplyAndAccumulate(const double* x, double* y) const final { + VectorRef(y, num_rows_) += ConstVectorRef(x, num_rows_); + } + + int num_rows() const final { return num_rows_; } + + private: + int num_rows_ = -1; +}; + // This templated subclass of Preconditioner serves as a base class for // other preconditioners that depend on the particular matrix layout of // the underlying linear operator. @@ -171,20 +202,21 @@ class CERES_NO_EXPORT SparseMatrixPreconditionerWrapper final : public SparseMatrixPreconditioner { public: // Wrapper does NOT take ownership of the matrix pointer. - explicit SparseMatrixPreconditionerWrapper(const SparseMatrix* matrix); + explicit SparseMatrixPreconditionerWrapper( + const SparseMatrix* matrix, const Preconditioner::Options& options); ~SparseMatrixPreconditionerWrapper() override; // Preconditioner interface - void RightMultiply(const double* x, double* y) const override; + void RightMultiplyAndAccumulate(const double* x, double* y) const override; int num_rows() const override; private: bool UpdateImpl(const SparseMatrix& A, const double* D) override; const SparseMatrix* matrix_; + const Preconditioner::Options options_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/preprocessor.cc b/extern/ceres/internal/ceres/preprocessor.cc index 44f0974dc5a..83c05d40fd6 100644 --- a/extern/ceres/internal/ceres/preprocessor.cc +++ b/extern/ceres/internal/ceres/preprocessor.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,13 +35,12 @@ #include "ceres/callbacks.h" #include "ceres/gradient_checking_cost_function.h" #include "ceres/line_search_preprocessor.h" -#include "ceres/parallel_for.h" #include "ceres/problem_impl.h" #include "ceres/solver.h" +#include "ceres/thread_pool.h" #include "ceres/trust_region_preprocessor.h" -namespace ceres { -namespace internal { +namespace ceres::internal { std::unique_ptr Preprocessor::Create( MinimizerType minimizer_type) { @@ -63,7 +62,7 @@ void ChangeNumThreadsIfNeeded(Solver::Options* options) { if (options->num_threads == 1) { return; } - const int num_threads_available = MaxNumThreadsAvailable(); + const int num_threads_available = ThreadPool::MaxNumThreadsAvailable(); if (options->num_threads > num_threads_available) { LOG(WARNING) << "Specified options.num_threads: " << options->num_threads << " exceeds maximum available from the threading model Ceres " @@ -83,9 +82,11 @@ void SetupCommonMinimizerOptions(PreprocessedProblem* pp) { double* reduced_parameters = pp->reduced_parameters.data(); program->ParameterBlocksToStateVector(reduced_parameters); + auto context = pp->problem->context(); Minimizer::Options& minimizer_options = pp->minimizer_options; minimizer_options = Minimizer::Options(options); minimizer_options.evaluator = pp->evaluator; + minimizer_options.context = context; if (options.logging_type != SILENT) { pp->logging_callback = std::make_unique( @@ -104,5 +105,4 @@ void SetupCommonMinimizerOptions(PreprocessedProblem* pp) { } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/preprocessor.h b/extern/ceres/internal/ceres/preprocessor.h index b5db80af7e6..ed031f6a6aa 100644 --- a/extern/ceres/internal/ceres/preprocessor.h +++ b/extern/ceres/internal/ceres/preprocessor.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,8 +47,7 @@ #include "ceres/program.h" #include "ceres/solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { struct PreprocessedProblem; @@ -118,8 +117,7 @@ void ChangeNumThreadsIfNeeded(Solver::Options* options); CERES_NO_EXPORT void SetupCommonMinimizerOptions(PreprocessedProblem* pp); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/problem.cc b/extern/ceres/internal/ceres/problem.cc index 4269ca3ebc3..00c1786c3ea 100644 --- a/extern/ceres/internal/ceres/problem.cc +++ b/extern/ceres/internal/ceres/problem.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2021 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,6 @@ namespace ceres { -using std::vector; - Problem::Problem() : impl_(new internal::ProblemImpl) {} Problem::Problem(const Problem::Options& options) : impl_(new internal::ProblemImpl(options)) {} @@ -52,7 +50,7 @@ Problem::~Problem() = default; ResidualBlockId Problem::AddResidualBlock( CostFunction* cost_function, LossFunction* loss_function, - const vector& parameter_blocks) { + const std::vector& parameter_blocks) { return impl_->AddResidualBlock(cost_function, loss_function, parameter_blocks.data(), @@ -71,12 +69,6 @@ void Problem::AddParameterBlock(double* values, int size) { impl_->AddParameterBlock(values, size); } -void Problem::AddParameterBlock(double* values, - int size, - LocalParameterization* local_parameterization) { - impl_->AddParameterBlock(values, size, local_parameterization); -} - void Problem::AddParameterBlock(double* values, int size, Manifold* manifold) { impl_->AddParameterBlock(values, size, manifold); } @@ -101,20 +93,6 @@ bool Problem::IsParameterBlockConstant(const double* values) const { return impl_->IsParameterBlockConstant(values); } -void Problem::SetParameterization( - double* values, LocalParameterization* local_parameterization) { - impl_->SetParameterization(values, local_parameterization); -} - -const LocalParameterization* Problem::GetParameterization( - const double* values) const { - return impl_->GetParameterization(values); -} - -bool Problem::HasParameterization(const double* values) const { - return impl_->HasParameterization(values); -} - void Problem::SetManifold(double* values, Manifold* manifold) { impl_->SetManifold(values, manifold); } @@ -149,8 +127,8 @@ double Problem::GetParameterLowerBound(const double* values, int index) const { bool Problem::Evaluate(const EvaluateOptions& evaluate_options, double* cost, - vector* residuals, - vector* gradient, + std::vector* residuals, + std::vector* gradient, CRSMatrix* jacobian) { return impl_->Evaluate(evaluate_options, cost, residuals, gradient, jacobian); } @@ -194,10 +172,6 @@ int Problem::ParameterBlockSize(const double* values) const { return impl_->ParameterBlockSize(values); } -int Problem::ParameterBlockLocalSize(const double* values) const { - return impl_->ParameterBlockTangentSize(values); -} - int Problem::ParameterBlockTangentSize(const double* values) const { return impl_->ParameterBlockTangentSize(values); } @@ -206,18 +180,18 @@ bool Problem::HasParameterBlock(const double* values) const { return impl_->HasParameterBlock(values); } -void Problem::GetParameterBlocks(vector* parameter_blocks) const { +void Problem::GetParameterBlocks(std::vector* parameter_blocks) const { impl_->GetParameterBlocks(parameter_blocks); } void Problem::GetResidualBlocks( - vector* residual_blocks) const { + std::vector* residual_blocks) const { impl_->GetResidualBlocks(residual_blocks); } void Problem::GetParameterBlocksForResidualBlock( const ResidualBlockId residual_block, - vector* parameter_blocks) const { + std::vector* parameter_blocks) const { impl_->GetParameterBlocksForResidualBlock(residual_block, parameter_blocks); } @@ -232,8 +206,12 @@ const LossFunction* Problem::GetLossFunctionForResidualBlock( } void Problem::GetResidualBlocksForParameterBlock( - const double* values, vector* residual_blocks) const { + const double* values, std::vector* residual_blocks) const { impl_->GetResidualBlocksForParameterBlock(values, residual_blocks); } +const Problem::Options& Problem::options() const { return impl_->options(); } + +internal::ProblemImpl* Problem::mutable_impl() { return impl_.get(); } + } // namespace ceres diff --git a/extern/ceres/internal/ceres/problem_impl.cc b/extern/ceres/internal/ceres/problem_impl.cc index 01a22c128be..40458bf187b 100644 --- a/extern/ceres/internal/ceres/problem_impl.cc +++ b/extern/ceres/internal/ceres/problem_impl.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -53,7 +53,6 @@ #include "ceres/internal/fixed_array.h" #include "ceres/loss_function.h" #include "ceres/manifold.h" -#include "ceres/manifold_adapter.h" #include "ceres/map_util.h" #include "ceres/parameter_block.h" #include "ceres/program.h" @@ -64,8 +63,7 @@ #include "ceres/stringprintf.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { namespace { // Returns true if two regions of memory, a and b, with sizes size_a and size_b // respectively, overlap. @@ -257,10 +255,6 @@ ProblemImpl::~ProblemImpl() { DeleteBlock(parameter_block); } - // Delete the owned parameterizations. - STLDeleteUniqueContainerPointers(local_parameterizations_to_delete_.begin(), - local_parameterizations_to_delete_.end()); - // Delete the owned manifolds. STLDeleteUniqueContainerPointers(manifolds_to_delete_.begin(), manifolds_to_delete_.end()); @@ -365,45 +359,15 @@ void ProblemImpl::AddParameterBlock(double* values, int size) { InternalAddParameterBlock(values, size); } -void ProblemImpl::InternalSetParameterization( - double* values, - ParameterBlock* parameter_block, - LocalParameterization* local_parameterization) { - parameter_block_to_local_param_[values] = local_parameterization; - Manifold* manifold = nullptr; - if (local_parameterization != nullptr) { - if (options_.local_parameterization_ownership == TAKE_OWNERSHIP) { - local_parameterizations_to_delete_.push_back(local_parameterization); - } - - manifold = new ManifoldAdapter(local_parameterization); - // Add the manifold to manifolds_to_delete_ unconditionally since - // we own it and it will need to be deleted. - manifolds_to_delete_.push_back(manifold); - } - - parameter_block->SetManifold(manifold); -} - -void ProblemImpl::InternalSetManifold(double* values, +void ProblemImpl::InternalSetManifold(double* /*values*/, ParameterBlock* parameter_block, Manifold* manifold) { - // Reset any association between this parameter block and a local - // parameterization. This only needs done while we are in the transition from - // LocalParameterization to Manifold. - parameter_block_to_local_param_[values] = nullptr; if (manifold != nullptr && options_.manifold_ownership == TAKE_OWNERSHIP) { manifolds_to_delete_.push_back(manifold); } parameter_block->SetManifold(manifold); } -void ProblemImpl::AddParameterBlock( - double* values, int size, LocalParameterization* local_parameterization) { - ParameterBlock* parameter_block = InternalAddParameterBlock(values, size); - InternalSetParameterization(values, parameter_block, local_parameterization); -} - void ProblemImpl::AddParameterBlock(double* values, int size, Manifold* manifold) { @@ -539,19 +503,6 @@ void ProblemImpl::SetParameterBlockVariable(double* values) { parameter_block->SetVarying(); } -void ProblemImpl::SetParameterization( - double* values, LocalParameterization* local_parameterization) { - ParameterBlock* parameter_block = - FindWithDefault(parameter_block_map_, values, nullptr); - if (parameter_block == nullptr) { - LOG(FATAL) << "Parameter block not found: " << values - << ". You must add the parameter block to the problem before " - << "you can set its local parameterization."; - } - - InternalSetParameterization(values, parameter_block, local_parameterization); -} - void ProblemImpl::SetManifold(double* values, Manifold* manifold) { ParameterBlock* parameter_block = FindWithDefault(parameter_block_map_, values, nullptr); @@ -564,22 +515,13 @@ void ProblemImpl::SetManifold(double* values, Manifold* manifold) { InternalSetManifold(values, parameter_block, manifold); } -const LocalParameterization* ProblemImpl::GetParameterization( - const double* values) const { - return FindWithDefault(parameter_block_to_local_param_, values, nullptr); -} - -bool ProblemImpl::HasParameterization(const double* values) const { - return GetParameterization(values) != nullptr; -} - const Manifold* ProblemImpl::GetManifold(const double* values) const { ParameterBlock* parameter_block = FindWithDefault( parameter_block_map_, const_cast(values), nullptr); if (parameter_block == nullptr) { LOG(FATAL) << "Parameter block not found: " << values << ". You must add the parameter block to the problem before " - << "you can get its local parameterization."; + << "you can get its manifold."; } return parameter_block->manifold(); @@ -730,17 +672,7 @@ bool ProblemImpl::Evaluate(const Problem::EvaluateOptions& evaluate_options, // the Evaluator decides the storage for the Jacobian based on the // type of linear solver being used. evaluator_options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; -#ifdef CERES_NO_THREADS - if (evaluate_options.num_threads > 1) { - LOG(WARNING) - << "No threading support is compiled into this binary; " - << "only evaluate_options.num_threads = 1 is supported. Switching " - << "to single threaded mode."; - } - evaluator_options.num_threads = 1; -#else evaluator_options.num_threads = evaluate_options.num_threads; -#endif // CERES_NO_THREADS // The main thread also does work so we only need to launch num_threads - 1. context_impl_->EnsureMinimumThreads(evaluator_options.num_threads - 1); @@ -968,5 +900,4 @@ void ProblemImpl::GetResidualBlocksForParameterBlock( } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/problem_impl.h b/extern/ceres/internal/ceres/problem_impl.h index 22073b674f1..733f26ed588 100644 --- a/extern/ceres/internal/ceres/problem_impl.h +++ b/extern/ceres/internal/ceres/problem_impl.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2021 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -59,7 +59,6 @@ namespace ceres { class CostFunction; class EvaluationCallback; class LossFunction; -class LocalParameterization; struct CRSMatrix; namespace internal { @@ -100,10 +99,6 @@ class CERES_NO_EXPORT ProblemImpl { } void AddParameterBlock(double* values, int size); - void AddParameterBlock(double* values, - int size, - LocalParameterization* local_parameterization); - void AddParameterBlock(double* values, int size, Manifold* manifold); void RemoveResidualBlock(ResidualBlock* residual_block); @@ -113,11 +108,6 @@ class CERES_NO_EXPORT ProblemImpl { void SetParameterBlockVariable(double* values); bool IsParameterBlockConstant(const double* values) const; - void SetParameterization(double* values, - LocalParameterization* local_parameterization); - const LocalParameterization* GetParameterization(const double* values) const; - bool HasParameterization(const double* values) const; - void SetManifold(double* values, Manifold* manifold); const Manifold* GetManifold(const double* values) const; bool HasManifold(const double* values) const; @@ -176,14 +166,12 @@ class CERES_NO_EXPORT ProblemImpl { return residual_block_set_; } + const Problem::Options& options() const { return options_; } + ContextImpl* context() { return context_impl_; } private: ParameterBlock* InternalAddParameterBlock(double* values, int size); - void InternalSetParameterization( - double* values, - ParameterBlock* parameter_block, - LocalParameterization* local_parameterization); void InternalSetManifold(double* values, ParameterBlock* parameter_block, Manifold* manifold); @@ -214,15 +202,8 @@ class CERES_NO_EXPORT ProblemImpl { std::unique_ptr program_; // TODO(sameeragarwal): Unify the shared object handling across object types. - // Right now we are using vectors for LocalParameterization and Manifold - // objects and reference counting for CostFunctions and LossFunctions. Ideally - // this should be done uniformly. - - // When removing parameter blocks, parameterizations have ambiguous - // ownership. Instead of scanning the entire problem to see if the - // parameterization is shared with other parameter blocks, buffer - // them until destruction. - std::vector local_parameterizations_to_delete_; + // Right now we are using vectors for Manifold objects and reference counting + // for CostFunctions and LossFunctions. Ideally this should be done uniformly. // When removing parameter blocks, manifolds have ambiguous // ownership. Instead of scanning the entire problem to see if the @@ -236,17 +217,6 @@ class CERES_NO_EXPORT ProblemImpl { // destroyed. CostFunctionRefCount cost_function_ref_count_; LossFunctionRefCount loss_function_ref_count_; - - // Because we wrap LocalParameterization objects using a ManifoldAdapter, when - // the user calls GetParameterization we cannot use the same logic as - // GetManifold as the ParameterBlock object only returns a Manifold object. So - // this map stores the association between parameter blocks and local - // parameterizations. - // - // This is a temporary object which will be removed once the - // LocalParameterization to Manifold transition is complete. - std::unordered_map - parameter_block_to_local_param_; }; } // namespace internal diff --git a/extern/ceres/internal/ceres/program.cc b/extern/ceres/internal/ceres/program.cc index 1cb9ebcbe73..a5a243df5b1 100644 --- a/extern/ceres/internal/ceres/program.cc +++ b/extern/ceres/internal/ceres/program.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -45,14 +45,14 @@ #include "ceres/loss_function.h" #include "ceres/manifold.h" #include "ceres/map_util.h" +#include "ceres/parallel_for.h" #include "ceres/parameter_block.h" #include "ceres/problem.h" #include "ceres/residual_block.h" #include "ceres/stl_util.h" #include "ceres/triplet_sparse_matrix.h" -namespace ceres { -namespace internal { +namespace ceres::internal { const std::vector& Program::parameter_blocks() const { return parameter_blocks_; @@ -109,16 +109,32 @@ bool Program::SetParameterBlockStatePtrsToUserStatePtrs() { bool Program::Plus(const double* state, const double* delta, - double* state_plus_delta) const { - for (auto* parameter_block : parameter_blocks_) { - if (!parameter_block->Plus(state, delta, state_plus_delta)) { - return false; - } - state += parameter_block->Size(); - delta += parameter_block->TangentSize(); - state_plus_delta += parameter_block->Size(); - } - return true; + double* state_plus_delta, + ContextImpl* context, + int num_threads) const { + std::atomic abort(false); + auto* parameter_blocks = parameter_blocks_.data(); + ParallelFor( + context, + 0, + parameter_blocks_.size(), + num_threads, + [&abort, state, delta, state_plus_delta, parameter_blocks](int block_id) { + if (abort) { + return; + } + auto parameter_block = parameter_blocks[block_id]; + + auto block_state = state + parameter_block->state_offset(); + auto block_delta = delta + parameter_block->delta_offset(); + auto block_state_plus_delta = + state_plus_delta + parameter_block->state_offset(); + if (!parameter_block->Plus( + block_state, block_delta, block_state_plus_delta)) { + abort = true; + } + }); + return abort == false; } void Program::SetParameterOffsetsAndIndex() { @@ -545,5 +561,4 @@ std::string Program::ToString() const { return ret; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/program.h b/extern/ceres/internal/ceres/program.h index 4dbd1ba5ff1..e2b9bd743e5 100644 --- a/extern/ceres/internal/ceres/program.h +++ b/extern/ceres/internal/ceres/program.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,13 +40,13 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class ParameterBlock; class ProblemImpl; class ResidualBlock; class TripletSparseMatrix; +class ContextImpl; // A nonlinear least squares optimization problem. This is different from the // similarly-named "Problem" object, which offers a mutation interface for @@ -87,7 +87,9 @@ class CERES_NO_EXPORT Program { // Update a state vector for the program given a delta. bool Plus(const double* state, const double* delta, - double* state_plus_delta) const; + double* state_plus_delta, + ContextImpl* context, + int num_threads) const; // Set the parameter indices and offsets. This permits mapping backward // from a ParameterBlock* to an index in the parameter_blocks() vector. For @@ -192,8 +194,7 @@ class CERES_NO_EXPORT Program { friend class ProblemImpl; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/program_evaluator.h b/extern/ceres/internal/ceres/program_evaluator.h index 826a73a9af1..5d549a7b6dc 100644 --- a/extern/ceres/internal/ceres/program_evaluator.h +++ b/extern/ceres/internal/ceres/program_evaluator.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ // residual jacobians are written directly into their final position in the // block sparse matrix by the user's CostFunction; there is no copying. // -// The evaluation is threaded with OpenMP or C++ threads. +// The evaluation is threaded with C++ threads. // // The EvaluatePreparer and JacobianWriter interfaces are as follows: // @@ -96,6 +96,7 @@ #include "ceres/execution_summary.h" #include "ceres/internal/eigen.h" #include "ceres/parallel_for.h" +#include "ceres/parallel_vector_ops.h" #include "ceres/parameter_block.h" #include "ceres/program.h" #include "ceres/residual_block.h" @@ -105,7 +106,7 @@ namespace ceres { namespace internal { struct NullJacobianFinalizer { - void operator()(SparseMatrix* jacobian, int num_parameters) {} + void operator()(SparseMatrix* /*jacobian*/, int /*num_parameters*/) {} }; template 1) { - LOG(WARNING) << "No threading support is compiled into this binary; " - << "only options.num_threads = 1 is supported. Switching " - << "to single threaded mode."; - options_.num_threads = 1; - } -#endif // CERES_NO_THREADS - + jacobian_writer_.CreateEvaluatePreparers(options.num_threads))), + num_parameters_(program->NumEffectiveParameters()) { BuildResidualLayout(*program, &residual_layout_); - evaluate_scratch_ = - std::move(CreateEvaluatorScratch(*program, options.num_threads)); + evaluate_scratch_ = std::move(CreateEvaluatorScratch( + *program, static_cast(options.num_threads))); } // Implementation of Evaluator interface. @@ -164,20 +157,24 @@ class ProgramEvaluator final : public Evaluator { } if (residuals != nullptr) { - VectorRef(residuals, program_->NumResiduals()).setZero(); + ParallelSetZero(options_.context, + options_.num_threads, + residuals, + program_->NumResiduals()); } if (jacobian != nullptr) { - jacobian->SetZero(); + jacobian->SetZero(options_.context, options_.num_threads); } // Each thread gets it's own cost and evaluate scratch space. for (int i = 0; i < options_.num_threads; ++i) { evaluate_scratch_[i].cost = 0.0; if (gradient != nullptr) { - VectorRef(evaluate_scratch_[i].gradient.get(), - program_->NumEffectiveParameters()) - .setZero(); + ParallelSetZero(options_.context, + options_.num_threads, + evaluate_scratch_[i].gradient.get(), + num_parameters_); } } @@ -259,38 +256,55 @@ class ProgramEvaluator final : public Evaluator { } }); - if (!abort) { - const int num_parameters = program_->NumEffectiveParameters(); + if (abort) { + return false; + } - // Sum the cost and gradient (if requested) from each thread. - (*cost) = 0.0; + // Sum the cost and gradient (if requested) from each thread. + (*cost) = 0.0; + if (gradient != nullptr) { + auto gradient_vector = VectorRef(gradient, num_parameters_); + ParallelSetZero(options_.context, options_.num_threads, gradient_vector); + } + + for (int i = 0; i < options_.num_threads; ++i) { + (*cost) += evaluate_scratch_[i].cost; if (gradient != nullptr) { - VectorRef(gradient, num_parameters).setZero(); - } - for (int i = 0; i < options_.num_threads; ++i) { - (*cost) += evaluate_scratch_[i].cost; - if (gradient != nullptr) { - VectorRef(gradient, num_parameters) += - VectorRef(evaluate_scratch_[i].gradient.get(), num_parameters); - } - } - - // Finalize the Jacobian if it is available. - // `num_parameters` is passed to the finalizer so that additional - // storage can be reserved for additional diagonal elements if - // necessary. - if (jacobian != nullptr) { - JacobianFinalizer f; - f(jacobian, num_parameters); + auto gradient_vector = VectorRef(gradient, num_parameters_); + ParallelAssign( + options_.context, + options_.num_threads, + gradient_vector, + gradient_vector + VectorRef(evaluate_scratch_[i].gradient.get(), + num_parameters_)); } } - return !abort; + + // It is possible that after accumulation that the cost has become infinite + // or a nan. + if (!std::isfinite(*cost)) { + LOG(ERROR) << "Accumulated cost = " << *cost + << " is not a finite number. Evaluation failed."; + return false; + } + + // Finalize the Jacobian if it is available. + // `num_parameters` is passed to the finalizer so that additional + // storage can be reserved for additional diagonal elements if + // necessary. + if (jacobian != nullptr) { + JacobianFinalizer f; + f(jacobian, num_parameters_); + } + + return true; } bool Plus(const double* state, const double* delta, double* state_plus_delta) const final { - return program_->Plus(state, delta, state_plus_delta); + return program_->Plus( + state, delta, state_plus_delta, options_.context, options_.num_threads); } int NumParameters() const final { return program_->NumParameters(); } @@ -345,7 +359,7 @@ class ProgramEvaluator final : public Evaluator { // Create scratch space for each thread evaluating the program. static std::unique_ptr CreateEvaluatorScratch( - const Program& program, int num_threads) { + const Program& program, unsigned num_threads) { int max_parameters_per_residual_block = program.MaxParametersPerResidualBlock(); int max_scratch_doubles_needed_for_evaluate = @@ -370,6 +384,7 @@ class ProgramEvaluator final : public Evaluator { std::unique_ptr evaluate_preparers_; std::unique_ptr evaluate_scratch_; std::vector residual_layout_; + int num_parameters_; ::ceres::internal::ExecutionSummary execution_summary_; }; diff --git a/extern/ceres/internal/ceres/random.h b/extern/ceres/internal/ceres/random.h deleted file mode 100644 index 0495d67581d..00000000000 --- a/extern/ceres/internal/ceres/random.h +++ /dev/null @@ -1,73 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. -// http://ceres-solver.org/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: keir@google.com (Keir Mierle) -// sameeragarwal@google.com (Sameer Agarwal) - -#ifndef CERES_INTERNAL_RANDOM_H_ -#define CERES_INTERNAL_RANDOM_H_ - -#include -#include - -#include "ceres/internal/export.h" - -namespace ceres { - -inline void SetRandomState(int state) { srand(state); } - -inline int Uniform(int n) { - if (n) { - return rand() % n; - } else { - return 0; - } -} - -inline double RandDouble() { - auto r = static_cast(rand()); - return r / RAND_MAX; -} - -// Box-Muller algorithm for normal random number generation. -// http://en.wikipedia.org/wiki/Box-Muller_transform -inline double RandNormal() { - double x1, x2, w; - do { - x1 = 2.0 * RandDouble() - 1.0; - x2 = 2.0 * RandDouble() - 1.0; - w = x1 * x1 + x2 * x2; - } while (w >= 1.0 || w == 0.0); - - w = sqrt((-2.0 * log(w)) / w); - return x1 * w; -} - -} // namespace ceres - -#endif // CERES_INTERNAL_RANDOM_H_ diff --git a/extern/ceres/internal/ceres/reorder_program.cc b/extern/ceres/internal/ceres/reorder_program.cc index d552ebf3de3..b9ff7a31f91 100644 --- a/extern/ceres/internal/ceres/reorder_program.cc +++ b/extern/ceres/internal/ceres/reorder_program.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -31,12 +31,14 @@ #include "ceres/reorder_program.h" #include +#include #include #include +#include +#include #include #include "Eigen/SparseCore" -#include "ceres/cxsparse.h" #include "ceres/internal/config.h" #include "ceres/internal/export.h" #include "ceres/ordered_groups.h" @@ -51,18 +53,19 @@ #include "ceres/types.h" #ifdef CERES_USE_EIGEN_SPARSE + +#ifndef CERES_NO_EIGEN_METIS +#include // Need this because MetisSupport refers to std::cerr. + +#include "Eigen/MetisSupport" +#endif + #include "Eigen/OrderingMethods" #endif #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::map; -using std::set; -using std::string; -using std::vector; +namespace ceres::internal { namespace { @@ -86,7 +89,6 @@ static int MinParameterBlock(const ResidualBlock* residual_block, return min_parameter_block_position; } -#if defined(CERES_USE_EIGEN_SPARSE) Eigen::SparseMatrix CreateBlockJacobian( const TripletSparseMatrix& block_jacobian_transpose) { using SparseMatrix = Eigen::SparseMatrix; @@ -95,7 +97,7 @@ Eigen::SparseMatrix CreateBlockJacobian( const int* rows = block_jacobian_transpose.rows(); const int* cols = block_jacobian_transpose.cols(); int num_nonzeros = block_jacobian_transpose.num_nonzeros(); - vector triplets; + std::vector triplets; triplets.reserve(num_nonzeros); for (int i = 0; i < num_nonzeros; ++i) { triplets.emplace_back(cols[i], rows[i], 1); @@ -106,14 +108,20 @@ Eigen::SparseMatrix CreateBlockJacobian( block_jacobian.setFromTriplets(triplets.begin(), triplets.end()); return block_jacobian; } -#endif void OrderingForSparseNormalCholeskyUsingSuiteSparse( + const LinearSolverOrderingType linear_solver_ordering_type, const TripletSparseMatrix& tsm_block_jacobian_transpose, - const vector& parameter_blocks, + const std::vector& parameter_blocks, const ParameterBlockOrdering& parameter_block_ordering, int* ordering) { #ifdef CERES_NO_SUITESPARSE + // "Void"ing values to avoid compiler warnings about unused parameters + (void)linear_solver_ordering_type; + (void)tsm_block_jacobian_transpose; + (void)parameter_blocks; + (void)parameter_block_ordering; + (void)ordering; LOG(FATAL) << "Congratulations, you found a Ceres bug! " << "Please report this error to the developers."; #else @@ -121,61 +129,47 @@ void OrderingForSparseNormalCholeskyUsingSuiteSparse( cholmod_sparse* block_jacobian_transpose = ss.CreateSparseMatrix( const_cast(&tsm_block_jacobian_transpose)); - // No CAMD or the user did not supply a useful ordering, then just - // use regular AMD. - if (parameter_block_ordering.NumGroups() <= 1 || - !SuiteSparse::IsConstrainedApproximateMinimumDegreeOrderingAvailable()) { - ss.ApproximateMinimumDegreeOrdering(block_jacobian_transpose, &ordering[0]); - } else { - vector constraints; - for (auto* parameter_block : parameter_blocks) { - constraints.push_back(parameter_block_ordering.GroupId( - parameter_block->mutable_user_state())); + if (linear_solver_ordering_type == ceres::AMD) { + if (parameter_block_ordering.NumGroups() <= 1) { + // The user did not supply a useful ordering so just go ahead + // and use AMD. + ss.Ordering(block_jacobian_transpose, OrderingType::AMD, ordering); + } else { + // The user supplied an ordering, so use CAMD. + std::vector constraints; + constraints.reserve(parameter_blocks.size()); + for (auto* parameter_block : parameter_blocks) { + constraints.push_back(parameter_block_ordering.GroupId( + parameter_block->mutable_user_state())); + } + + // Renumber the entries of constraints to be contiguous integers + // as CAMD requires that the group ids be in the range [0, + // parameter_blocks.size() - 1]. + MapValuesToContiguousRange(constraints.size(), constraints.data()); + ss.ConstrainedApproximateMinimumDegreeOrdering( + block_jacobian_transpose, constraints.data(), ordering); } - - // Renumber the entries of constraints to be contiguous integers - // as CAMD requires that the group ids be in the range [0, - // parameter_blocks.size() - 1]. - MapValuesToContiguousRange(constraints.size(), &constraints[0]); - ss.ConstrainedApproximateMinimumDegreeOrdering( - block_jacobian_transpose, &constraints[0], ordering); + } else if (linear_solver_ordering_type == ceres::NESDIS) { + // If nested dissection is chosen as an ordering algorithm, then + // ignore any user provided linear_solver_ordering. + CHECK(SuiteSparse::IsNestedDissectionAvailable()) + << "Congratulations, you found a Ceres bug! " + << "Please report this error to the developers."; + ss.Ordering(block_jacobian_transpose, OrderingType::NESDIS, ordering); + } else { + LOG(FATAL) << "Congratulations, you found a Ceres bug! " + << "Please report this error to the developers."; } - VLOG(2) << "Block ordering stats: " - << " flops: " << ss.mutable_cc()->fl - << " lnz : " << ss.mutable_cc()->lnz - << " anz : " << ss.mutable_cc()->anz; - ss.Free(block_jacobian_transpose); #endif // CERES_NO_SUITESPARSE } -void OrderingForSparseNormalCholeskyUsingCXSparse( - const TripletSparseMatrix& tsm_block_jacobian_transpose, int* ordering) { -#ifdef CERES_NO_CXSPARSE - LOG(FATAL) << "Congratulations, you found a Ceres bug! " - << "Please report this error to the developers."; -#else - // CXSparse works with J'J instead of J'. So compute the block - // sparsity for J'J and compute an approximate minimum degree - // ordering. - CXSparse cxsparse; - cs_di* block_jacobian_transpose; - block_jacobian_transpose = cxsparse.CreateSparseMatrix( - const_cast(&tsm_block_jacobian_transpose)); - cs_di* block_jacobian = cxsparse.TransposeMatrix(block_jacobian_transpose); - cs_di* block_hessian = - cxsparse.MatrixMatrixMultiply(block_jacobian_transpose, block_jacobian); - cxsparse.Free(block_jacobian); - cxsparse.Free(block_jacobian_transpose); - - cxsparse.ApproximateMinimumDegreeOrdering(block_hessian, ordering); - cxsparse.Free(block_hessian); -#endif // CERES_NO_CXSPARSE -} - void OrderingForSparseNormalCholeskyUsingEigenSparse( - const TripletSparseMatrix& tsm_block_jacobian_transpose, int* ordering) { + const LinearSolverOrderingType linear_solver_ordering_type, + const TripletSparseMatrix& tsm_block_jacobian_transpose, + int* ordering) { #ifndef CERES_USE_EIGEN_SPARSE LOG(FATAL) << "SPARSE_NORMAL_CHOLESKY cannot be used with EIGEN_SPARSE " "because Ceres was not built with support for " @@ -183,12 +177,12 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse( "This requires enabling building with -DEIGENSPARSE=ON."; #else - // This conversion from a TripletSparseMatrix to a Eigen::Triplet - // matrix is unfortunate, but unavoidable for now. It is not a - // significant performance penalty in the grand scheme of - // things. The right thing to do here would be to get a compressed - // row sparse matrix representation of the jacobian and go from - // there. But that is a project for another day. + // TODO(sameeragarwal): This conversion from a TripletSparseMatrix + // to a Eigen::Triplet matrix is unfortunate, but unavoidable for + // now. It is not a significant performance penalty in the grand + // scheme of things. The right thing to do here would be to get a + // compressed row sparse matrix representation of the jacobian and + // go from there. But that is a project for another day. using SparseMatrix = Eigen::SparseMatrix; const SparseMatrix block_jacobian = @@ -196,9 +190,19 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse( const SparseMatrix block_hessian = block_jacobian.transpose() * block_jacobian; - Eigen::AMDOrdering amd_ordering; Eigen::PermutationMatrix perm; - amd_ordering(block_hessian, perm); + if (linear_solver_ordering_type == ceres::AMD) { + Eigen::AMDOrdering amd_ordering; + amd_ordering(block_hessian, perm); + } else { +#ifndef CERES_NO_EIGEN_METIS + Eigen::MetisOrdering metis_ordering; + metis_ordering(block_hessian, perm); +#else + perm.setIdentity(block_hessian.rows()); +#endif + } + for (int i = 0; i < block_hessian.rows(); ++i) { ordering[i] = perm.indices()[i]; } @@ -210,7 +214,7 @@ void OrderingForSparseNormalCholeskyUsingEigenSparse( bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map, const ParameterBlockOrdering& ordering, Program* program, - string* error) { + std::string* error) { const int num_parameter_blocks = program->NumParameterBlocks(); if (ordering.NumElements() != num_parameter_blocks) { *error = StringPrintf( @@ -222,13 +226,15 @@ bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map, return false; } - vector* parameter_blocks = + std::vector* parameter_blocks = program->mutable_parameter_blocks(); parameter_blocks->clear(); - const map>& groups = ordering.group_to_elements(); + // TODO(sameeragarwal): Investigate whether this should be a set or an + // unordered_set. + const std::map>& groups = ordering.group_to_elements(); for (const auto& p : groups) { - const set& group = p.second; + const std::set& group = p.second; for (double* parameter_block_ptr : group) { auto it = parameter_map.find(parameter_block_ptr); if (it == parameter_map.end()) { @@ -248,16 +254,18 @@ bool ApplyOrdering(const ProblemImpl::ParameterMap& parameter_map, bool LexicographicallyOrderResidualBlocks( const int size_of_first_elimination_group, Program* program, - string* error) { + std::string* /*error*/) { CHECK_GE(size_of_first_elimination_group, 1) << "Congratulations, you found a Ceres bug! Please report this error " << "to the developers."; // Create a histogram of the number of residuals for each E block. There is an // extra bucket at the end to catch all non-eliminated F blocks. - vector residual_blocks_per_e_block(size_of_first_elimination_group + 1); - vector* residual_blocks = program->mutable_residual_blocks(); - vector min_position_per_residual(residual_blocks->size()); + std::vector residual_blocks_per_e_block(size_of_first_elimination_group + + 1); + std::vector* residual_blocks = + program->mutable_residual_blocks(); + std::vector min_position_per_residual(residual_blocks->size()); for (int i = 0; i < residual_blocks->size(); ++i) { ResidualBlock* residual_block = (*residual_blocks)[i]; int position = @@ -270,7 +278,7 @@ bool LexicographicallyOrderResidualBlocks( // Run a cumulative sum on the histogram, to obtain offsets to the start of // each histogram bucket (where each bucket is for the residuals for that // E-block). - vector offsets(size_of_first_elimination_group + 1); + std::vector offsets(size_of_first_elimination_group + 1); std::partial_sum(residual_blocks_per_e_block.begin(), residual_blocks_per_e_block.end(), offsets.begin()); @@ -289,9 +297,9 @@ bool LexicographicallyOrderResidualBlocks( // of the bucket. The filling order among the buckets is dictated by the // residual blocks. This loop uses the offsets as counters; subtracting one // from each offset as a residual block is placed in the bucket. When the - // filling is finished, the offset pointerts should have shifted down one + // filling is finished, the offset pointers should have shifted down one // entry (this is verified below). - vector reordered_residual_blocks( + std::vector reordered_residual_blocks( (*residual_blocks).size(), static_cast(nullptr)); for (int i = 0; i < residual_blocks->size(); ++i) { int bucket = min_position_per_residual[i]; @@ -326,18 +334,18 @@ bool LexicographicallyOrderResidualBlocks( return true; } -// Pre-order the columns corresponding to the schur complement if +// Pre-order the columns corresponding to the Schur complement if // possible. -static void MaybeReorderSchurComplementColumnsUsingSuiteSparse( +static void ReorderSchurComplementColumnsUsingSuiteSparse( const ParameterBlockOrdering& parameter_block_ordering, Program* program) { -#ifndef CERES_NO_SUITESPARSE +#ifdef CERES_NO_SUITESPARSE + // "Void"ing values to avoid compiler warnings about unused parameters + (void)parameter_block_ordering; + (void)program; +#else SuiteSparse ss; - if (!SuiteSparse::IsConstrainedApproximateMinimumDegreeOrderingAvailable()) { - return; - } - - vector constraints; - vector& parameter_blocks = + std::vector constraints; + std::vector& parameter_blocks = *(program->mutable_parameter_blocks()); for (auto* parameter_block : parameter_blocks) { @@ -348,7 +356,7 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse( // Renumber the entries of constraints to be contiguous integers as // CAMD requires that the group ids be in the range [0, // parameter_blocks.size() - 1]. - MapValuesToContiguousRange(constraints.size(), &constraints[0]); + MapValuesToContiguousRange(constraints.size(), constraints.data()); // Compute a block sparse presentation of J'. std::unique_ptr tsm_block_jacobian_transpose( @@ -357,12 +365,12 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse( cholmod_sparse* block_jacobian_transpose = ss.CreateSparseMatrix(tsm_block_jacobian_transpose.get()); - vector ordering(parameter_blocks.size(), 0); + std::vector ordering(parameter_blocks.size(), 0); ss.ConstrainedApproximateMinimumDegreeOrdering( - block_jacobian_transpose, &constraints[0], &ordering[0]); + block_jacobian_transpose, constraints.data(), ordering.data()); ss.Free(block_jacobian_transpose); - const vector parameter_blocks_copy(parameter_blocks); + const std::vector parameter_blocks_copy(parameter_blocks); for (int i = 0; i < program->NumParameterBlocks(); ++i) { parameter_blocks[i] = parameter_blocks_copy[ordering[i]]; } @@ -371,14 +379,14 @@ static void MaybeReorderSchurComplementColumnsUsingSuiteSparse( #endif } -static void MaybeReorderSchurComplementColumnsUsingEigen( +static void ReorderSchurComplementColumnsUsingEigen( + LinearSolverOrderingType ordering_type, const int size_of_first_elimination_group, - const ProblemImpl::ParameterMap& parameter_map, + const ProblemImpl::ParameterMap& /*parameter_map*/, Program* program) { #if defined(CERES_USE_EIGEN_SPARSE) std::unique_ptr tsm_block_jacobian_transpose( program->CreateJacobianBlockSparsityTranspose()); - using SparseMatrix = Eigen::SparseMatrix; const SparseMatrix block_jacobian = CreateBlockJacobian(*tsm_block_jacobian_transpose); @@ -399,12 +407,22 @@ static void MaybeReorderSchurComplementColumnsUsingEigen( const SparseMatrix block_schur_complement = F.transpose() * F - F.transpose() * E * E.transpose() * F; - Eigen::AMDOrdering amd_ordering; Eigen::PermutationMatrix perm; - amd_ordering(block_schur_complement, perm); + if (ordering_type == ceres::AMD) { + Eigen::AMDOrdering amd_ordering; + amd_ordering(block_schur_complement, perm); + } else { +#ifndef CERES_NO_EIGEN_METIS + Eigen::MetisOrdering metis_ordering; + metis_ordering(block_schur_complement, perm); +#else + perm.setIdentity(block_schur_complement.rows()); +#endif + } - const vector& parameter_blocks = program->parameter_blocks(); - vector ordering(num_cols); + const std::vector& parameter_blocks = + program->parameter_blocks(); + std::vector ordering(num_cols); // The ordering of the first size_of_first_elimination_group does // not matter, so we preserve the existing ordering. @@ -426,10 +444,11 @@ static void MaybeReorderSchurComplementColumnsUsingEigen( bool ReorderProgramForSchurTypeLinearSolver( const LinearSolverType linear_solver_type, const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, + const LinearSolverOrderingType linear_solver_ordering_type, const ProblemImpl::ParameterMap& parameter_map, ParameterBlockOrdering* parameter_block_ordering, Program* program, - string* error) { + std::string* error) { if (parameter_block_ordering->NumElements() != program->NumParameterBlocks()) { *error = StringPrintf( @@ -447,7 +466,7 @@ bool ReorderProgramForSchurTypeLinearSolver( // parameter block ordering as it sees fit. For Schur type solvers, // this means that the user wishes for Ceres to identify the // e_blocks, which we do by computing a maximal independent set. - vector schur_ordering; + std::vector schur_ordering; const int size_of_first_elimination_group = ComputeStableSchurOrdering(*program, &schur_ordering); @@ -470,7 +489,10 @@ bool ReorderProgramForSchurTypeLinearSolver( // group. // Verify that the first elimination group is an independent set. - const set& first_elimination_group = + + // TODO(sameeragarwal): Investigate if this should be a set or an + // unordered_set. + const std::set& first_elimination_group = parameter_block_ordering->group_to_elements().begin()->second; if (!program->IsParameterBlockSetIndependent(first_elimination_group)) { *error = StringPrintf( @@ -492,12 +514,20 @@ bool ReorderProgramForSchurTypeLinearSolver( parameter_block_ordering->group_to_elements().begin()->second.size(); if (linear_solver_type == SPARSE_SCHUR) { - if (sparse_linear_algebra_library_type == SUITE_SPARSE) { - MaybeReorderSchurComplementColumnsUsingSuiteSparse( - *parameter_block_ordering, program); + if (sparse_linear_algebra_library_type == SUITE_SPARSE && + linear_solver_ordering_type == ceres::AMD) { + // Preordering support for schur complement only works with AMD + // for now, since we are using CAMD. + // + // TODO(sameeragarwal): It maybe worth adding pre-ordering support for + // nested dissection too. + ReorderSchurComplementColumnsUsingSuiteSparse(*parameter_block_ordering, + program); } else if (sparse_linear_algebra_library_type == EIGEN_SPARSE) { - MaybeReorderSchurComplementColumnsUsingEigen( - size_of_first_elimination_group, parameter_map, program); + ReorderSchurComplementColumnsUsingEigen(linear_solver_ordering_type, + size_of_first_elimination_group, + parameter_map, + program); } } @@ -509,10 +539,11 @@ bool ReorderProgramForSchurTypeLinearSolver( bool ReorderProgramForSparseCholesky( const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, + const LinearSolverOrderingType linear_solver_ordering_type, const ParameterBlockOrdering& parameter_block_ordering, int start_row_block, Program* program, - string* error) { + std::string* error) { if (parameter_block_ordering.NumElements() != program->NumParameterBlocks()) { *error = StringPrintf( "The program has %d parameter blocks, but the parameter block " @@ -526,19 +557,17 @@ bool ReorderProgramForSparseCholesky( std::unique_ptr tsm_block_jacobian_transpose( program->CreateJacobianBlockSparsityTranspose(start_row_block)); - vector ordering(program->NumParameterBlocks(), 0); - vector& parameter_blocks = + std::vector ordering(program->NumParameterBlocks(), 0); + std::vector& parameter_blocks = *(program->mutable_parameter_blocks()); if (sparse_linear_algebra_library_type == SUITE_SPARSE) { OrderingForSparseNormalCholeskyUsingSuiteSparse( + linear_solver_ordering_type, *tsm_block_jacobian_transpose, parameter_blocks, parameter_block_ordering, - &ordering[0]); - } else if (sparse_linear_algebra_library_type == CX_SPARSE) { - OrderingForSparseNormalCholeskyUsingCXSparse(*tsm_block_jacobian_transpose, - &ordering[0]); + ordering.data()); } else if (sparse_linear_algebra_library_type == ACCELERATE_SPARSE) { // Accelerate does not provide a function to perform reordering without // performing a full symbolic factorisation. As such, we have nothing @@ -550,11 +579,13 @@ bool ReorderProgramForSparseCholesky( } else if (sparse_linear_algebra_library_type == EIGEN_SPARSE) { OrderingForSparseNormalCholeskyUsingEigenSparse( - *tsm_block_jacobian_transpose, &ordering[0]); + linear_solver_ordering_type, + *tsm_block_jacobian_transpose, + ordering.data()); } // Apply ordering. - const vector parameter_blocks_copy(parameter_blocks); + const std::vector parameter_blocks_copy(parameter_blocks); for (int i = 0; i < program->NumParameterBlocks(); ++i) { parameter_blocks[i] = parameter_blocks_copy[ordering[i]]; } @@ -575,5 +606,39 @@ int ReorderResidualBlocksByPartition( return it - residual_blocks->begin(); } -} // namespace internal -} // namespace ceres +bool AreJacobianColumnsOrdered( + const LinearSolverType linear_solver_type, + const PreconditionerType preconditioner_type, + const SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, + const LinearSolverOrderingType linear_solver_ordering_type) { + if (sparse_linear_algebra_library_type == SUITE_SPARSE) { + if (linear_solver_type == SPARSE_NORMAL_CHOLESKY || + (linear_solver_type == CGNR && preconditioner_type == SUBSET)) { + return true; + } + if (linear_solver_type == SPARSE_SCHUR && + linear_solver_ordering_type == ceres::AMD) { + return true; + } + return false; + } + + if (sparse_linear_algebra_library_type == ceres::EIGEN_SPARSE) { + if (linear_solver_type == SPARSE_NORMAL_CHOLESKY || + linear_solver_type == SPARSE_SCHUR || + (linear_solver_type == CGNR && preconditioner_type == SUBSET)) { + return true; + } + return false; + } + + if (sparse_linear_algebra_library_type == ceres::ACCELERATE_SPARSE) { + // Apple's accelerate framework does not allow direct access to + // ordering algorithms, so jacobian columns are never pre-ordered. + return false; + } + + return false; +} + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/reorder_program.h b/extern/ceres/internal/ceres/reorder_program.h index fbc49231c33..368a6edd27e 100644 --- a/extern/ceres/internal/ceres/reorder_program.h +++ b/extern/ceres/internal/ceres/reorder_program.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,12 +35,12 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" +#include "ceres/linear_solver.h" #include "ceres/parameter_block_ordering.h" #include "ceres/problem_impl.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Program; @@ -76,6 +76,7 @@ CERES_NO_EXPORT bool LexicographicallyOrderResidualBlocks( CERES_NO_EXPORT bool ReorderProgramForSchurTypeLinearSolver( LinearSolverType linear_solver_type, SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, + LinearSolverOrderingType linear_solver_ordering_type, const ProblemImpl::ParameterMap& parameter_map, ParameterBlockOrdering* parameter_block_ordering, Program* program, @@ -93,6 +94,7 @@ CERES_NO_EXPORT bool ReorderProgramForSchurTypeLinearSolver( // ordering will take it into account, otherwise it will be ignored. CERES_NO_EXPORT bool ReorderProgramForSparseCholesky( SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, + LinearSolverOrderingType linear_solver_ordering_type, const ParameterBlockOrdering& parameter_block_ordering, int start_row_block, Program* program, @@ -112,8 +114,15 @@ CERES_NO_EXPORT int ReorderResidualBlocksByPartition( const std::unordered_set& bottom_residual_blocks, Program* program); -} // namespace internal -} // namespace ceres +// The return value of this function indicates whether the columns of +// the Jacobian can be reordered using a fill reducing ordering. +CERES_NO_EXPORT bool AreJacobianColumnsOrdered( + LinearSolverType linear_solver_type, + PreconditionerType preconditioner_type, + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, + LinearSolverOrderingType linear_solver_ordering_type); + +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/residual_block.cc b/extern/ceres/internal/ceres/residual_block.cc index cd408f2f98e..f5ad1256e80 100644 --- a/extern/ceres/internal/ceres/residual_block.cc +++ b/extern/ceres/internal/ceres/residual_block.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,8 +47,7 @@ using Eigen::Dynamic; -namespace ceres { -namespace internal { +namespace ceres::internal { ResidualBlock::ResidualBlock( const CostFunction* cost_function, @@ -114,8 +113,7 @@ bool ResidualBlock::Evaluate(const bool apply_loss_function, return false; } - if (!IsEvaluationValid( - *this, parameters.data(), cost, residuals, eval_jacobians)) { + if (!IsEvaluationValid(*this, parameters.data(), residuals, eval_jacobians)) { // clang-format off std::string message = "\n\n" @@ -216,5 +214,4 @@ int ResidualBlock::NumScratchDoublesForEvaluate() const { return scratch_doubles; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/residual_block.h b/extern/ceres/internal/ceres/residual_block.h index 978b94640fe..62460c70954 100644 --- a/extern/ceres/internal/ceres/residual_block.h +++ b/extern/ceres/internal/ceres/residual_block.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/residual_block_utils.cc b/extern/ceres/internal/ceres/residual_block_utils.cc index 11c7623ce22..91370d8c23d 100644 --- a/extern/ceres/internal/ceres/residual_block_utils.cc +++ b/extern/ceres/internal/ceres/residual_block_utils.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@ #include #include #include +#include #include "ceres/array_utils.h" #include "ceres/internal/eigen.h" @@ -42,10 +43,7 @@ #include "ceres/stringprintf.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::string; +namespace ceres::internal { void InvalidateEvaluation(const ResidualBlock& block, double* cost, @@ -64,17 +62,17 @@ void InvalidateEvaluation(const ResidualBlock& block, } } -string EvaluationToString(const ResidualBlock& block, - double const* const* parameters, - double* cost, - double* residuals, - double** jacobians) { +std::string EvaluationToString(const ResidualBlock& block, + double const* const* parameters, + double* cost, + double* residuals, + double** jacobians) { CHECK(cost != nullptr); CHECK(residuals != nullptr); const int num_parameter_blocks = block.NumParameterBlocks(); const int num_residuals = block.NumResiduals(); - string result = ""; + std::string result = ""; // clang-format off StringAppendF(&result, @@ -89,7 +87,7 @@ string EvaluationToString(const ResidualBlock& block, "to Inf or NaN is also an error. \n\n"; // NOLINT // clang-format on - string space = "Residuals: "; + std::string space = "Residuals: "; result += space; AppendArrayToString(num_residuals, residuals, &result); StringAppendF(&result, "\n\n"); @@ -117,9 +115,11 @@ string EvaluationToString(const ResidualBlock& block, return result; } +// TODO(sameeragarwal) Check cost value validness here +// Cost value is a part of evaluation but not checked here since according to +// residual_block.cc cost is not valid at the time this method is called bool IsEvaluationValid(const ResidualBlock& block, - double const* const* parameters, - double* cost, + double const* const* /*parameters*/, double* residuals, double** jacobians) { const int num_parameter_blocks = block.NumParameterBlocks(); @@ -141,5 +141,4 @@ bool IsEvaluationValid(const ResidualBlock& block, return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/residual_block_utils.h b/extern/ceres/internal/ceres/residual_block_utils.h index f75b6aecce9..1bf1ca17703 100644 --- a/extern/ceres/internal/ceres/residual_block_utils.h +++ b/extern/ceres/internal/ceres/residual_block_utils.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,8 +47,7 @@ #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class ResidualBlock; @@ -64,7 +63,6 @@ void InvalidateEvaluation(const ResidualBlock& block, CERES_NO_EXPORT bool IsEvaluationValid(const ResidualBlock& block, double const* const* parameters, - double* cost, double* residuals, double** jacobians); @@ -78,7 +76,6 @@ std::string EvaluationToString(const ResidualBlock& block, double* residuals, double** jacobians); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_RESIDUAL_BLOCK_UTILS_H_ diff --git a/extern/ceres/internal/ceres/schur_complement_solver.cc b/extern/ceres/internal/ceres/schur_complement_solver.cc index bb442b4280b..7c4b234e554 100644 --- a/extern/ceres/internal/ceres/schur_complement_solver.cc +++ b/extern/ceres/internal/ceres/schur_complement_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -34,6 +34,7 @@ #include #include #include +#include #include #include "Eigen/Dense" @@ -52,58 +53,36 @@ #include "ceres/types.h" #include "ceres/wall_time.h" -namespace ceres { -namespace internal { - -using std::make_pair; -using std::pair; -using std::set; -using std::vector; - +namespace ceres::internal { namespace { -class BlockRandomAccessSparseMatrixAdapter final : public LinearOperator { +class BlockRandomAccessSparseMatrixAdapter final + : public ConjugateGradientsLinearOperator { public: explicit BlockRandomAccessSparseMatrixAdapter( const BlockRandomAccessSparseMatrix& m) : m_(m) {} - // y = y + Ax; - void RightMultiply(const double* x, double* y) const final { - m_.SymmetricRightMultiply(x, y); + void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final { + m_.SymmetricRightMultiplyAndAccumulate(x.data(), y.data()); } - // y = y + A'x; - void LeftMultiply(const double* x, double* y) const final { - m_.SymmetricRightMultiply(x, y); - } - - int num_rows() const final { return m_.num_rows(); } - int num_cols() const final { return m_.num_rows(); } - private: const BlockRandomAccessSparseMatrix& m_; }; -class BlockRandomAccessDiagonalMatrixAdapter final : public LinearOperator { +class BlockRandomAccessDiagonalMatrixAdapter final + : public ConjugateGradientsLinearOperator { public: explicit BlockRandomAccessDiagonalMatrixAdapter( const BlockRandomAccessDiagonalMatrix& m) : m_(m) {} // y = y + Ax; - void RightMultiply(const double* x, double* y) const final { - m_.RightMultiply(x, y); + void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final { + m_.RightMultiplyAndAccumulate(x.data(), y.data()); } - // y = y + A'x; - void LeftMultiply(const double* x, double* y) const final { - m_.RightMultiply(x, y); - } - - int num_rows() const final { return m_.num_rows(); } - int num_cols() const final { return m_.num_rows(); } - private: const BlockRandomAccessDiagonalMatrix& m_; }; @@ -126,7 +105,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl( EventLogger event_logger("SchurComplementSolver::Solve"); const CompressedRowBlockStructure* bs = A->block_structure(); - if (eliminator_.get() == nullptr) { + if (eliminator_ == nullptr) { const int num_eliminate_blocks = options_.elimination_groups[0]; const int num_f_blocks = bs->cols.size() - num_eliminate_blocks; @@ -161,7 +140,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl( b, per_solve_options.D, lhs_.get(), - rhs_.get()); + rhs_.data()); event_logger.AddEvent("Eliminate"); double* reduced_solution = x + A->num_cols() - lhs_->num_cols(); @@ -169,7 +148,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl( SolveReducedLinearSystem(per_solve_options, reduced_solution); event_logger.AddEvent("ReducedSolve"); - if (summary.termination_type == LINEAR_SOLVER_SUCCESS) { + if (summary.termination_type == LinearSolverTerminationType::SUCCESS) { eliminator_->BackSubstitute( BlockSparseMatrixData(*A), b, per_solve_options.D, reduced_solution, x); event_logger.AddEvent("BackSubstitute"); @@ -190,24 +169,21 @@ void DenseSchurComplementSolver::InitStorage( const CompressedRowBlockStructure* bs) { const int num_eliminate_blocks = options().elimination_groups[0]; const int num_col_blocks = bs->cols.size(); - - vector blocks(num_col_blocks - num_eliminate_blocks, 0); - for (int i = num_eliminate_blocks, j = 0; i < num_col_blocks; ++i, ++j) { - blocks[j] = bs->cols[i].size; - } - - set_lhs(std::make_unique(blocks)); - set_rhs(std::make_unique(lhs()->num_rows())); + auto blocks = Tail(bs->cols, num_col_blocks - num_eliminate_blocks); + set_lhs(std::make_unique( + blocks, options().context, options().num_threads)); + ResizeRhs(lhs()->num_rows()); } // Solve the system Sx = r, assuming that the matrix S is stored in a // BlockRandomAccessDenseMatrix. The linear system is solved using // Eigen's Cholesky factorization. LinearSolver::Summary DenseSchurComplementSolver::SolveReducedLinearSystem( - const LinearSolver::PerSolveOptions& per_solve_options, double* solution) { + const LinearSolver::PerSolveOptions& /*per_solve_options*/, + double* solution) { LinearSolver::Summary summary; summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.termination_type = LinearSolverTerminationType::SUCCESS; summary.message = "Success."; auto* m = down_cast(mutable_lhs()); @@ -221,7 +197,7 @@ LinearSolver::Summary DenseSchurComplementSolver::SolveReducedLinearSystem( summary.num_iterations = 1; summary.termination_type = cholesky_->FactorAndSolve( - num_rows, m->mutable_values(), rhs(), solution, &summary.message); + num_rows, m->mutable_values(), rhs().data(), solution, &summary.message); return summary; } @@ -233,7 +209,14 @@ SparseSchurComplementSolver::SparseSchurComplementSolver( } } -SparseSchurComplementSolver::~SparseSchurComplementSolver() = default; +SparseSchurComplementSolver::~SparseSchurComplementSolver() { + for (int i = 0; i < 4; ++i) { + if (scratch_[i]) { + delete scratch_[i]; + scratch_[i] = nullptr; + } + } +} // Determine the non-zero blocks in the Schur Complement matrix, and // initialize a BlockRandomAccessSparseMatrix object. @@ -243,14 +226,11 @@ void SparseSchurComplementSolver::InitStorage( const int num_col_blocks = bs->cols.size(); const int num_row_blocks = bs->rows.size(); - blocks_.resize(num_col_blocks - num_eliminate_blocks, 0); - for (int i = num_eliminate_blocks; i < num_col_blocks; ++i) { - blocks_[i - num_eliminate_blocks] = bs->cols[i].size; - } + blocks_ = Tail(bs->cols, num_col_blocks - num_eliminate_blocks); - set> block_pairs; + std::set> block_pairs; for (int i = 0; i < blocks_.size(); ++i) { - block_pairs.insert(make_pair(i, i)); + block_pairs.emplace(i, i); } int r = 0; @@ -259,7 +239,7 @@ void SparseSchurComplementSolver::InitStorage( if (e_block_id >= num_eliminate_blocks) { break; } - vector f_blocks; + std::vector f_blocks; // Add to the chunk until the first block in the row is // different than the one in the first row for the chunk. @@ -281,7 +261,7 @@ void SparseSchurComplementSolver::InitStorage( f_blocks.erase(unique(f_blocks.begin(), f_blocks.end()), f_blocks.end()); for (int i = 0; i < f_blocks.size(); ++i) { for (int j = i + 1; j < f_blocks.size(); ++j) { - block_pairs.insert(make_pair(f_blocks[i], f_blocks[j])); + block_pairs.emplace(f_blocks[i], f_blocks[j]); } } } @@ -296,15 +276,15 @@ void SparseSchurComplementSolver::InitStorage( for (const auto& cell : row.cells) { int r_block2_id = cell.block_id - num_eliminate_blocks; if (r_block1_id <= r_block2_id) { - block_pairs.insert(make_pair(r_block1_id, r_block2_id)); + block_pairs.emplace(r_block1_id, r_block2_id); } } } } - set_lhs( - std::make_unique(blocks_, block_pairs)); - set_rhs(std::make_unique(lhs()->num_rows())); + set_lhs(std::make_unique( + blocks_, block_pairs, options().context, options().num_threads)); + ResizeRhs(lhs()->num_rows()); } LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystem( @@ -316,32 +296,39 @@ LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystem( LinearSolver::Summary summary; summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.termination_type = LinearSolverTerminationType::SUCCESS; summary.message = "Success."; - const TripletSparseMatrix* tsm = + const BlockSparseMatrix* bsm = down_cast(lhs())->matrix(); - if (tsm->num_rows() == 0) { + if (bsm->num_rows() == 0) { return summary; } - std::unique_ptr lhs; const CompressedRowSparseMatrix::StorageType storage_type = sparse_cholesky_->StorageType(); - if (storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR) { - lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm); - lhs->set_storage_type(CompressedRowSparseMatrix::UPPER_TRIANGULAR); + if (storage_type == + CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) { + if (!crs_lhs_) { + crs_lhs_ = bsm->ToCompressedRowSparseMatrix(); + crs_lhs_->set_storage_type( + CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR); + } else { + bsm->UpdateCompressedRowSparseMatrix(crs_lhs_.get()); + } } else { - lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm); - lhs->set_storage_type(CompressedRowSparseMatrix::LOWER_TRIANGULAR); + if (!crs_lhs_) { + crs_lhs_ = bsm->ToCompressedRowSparseMatrixTranspose(); + crs_lhs_->set_storage_type( + CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR); + } else { + bsm->UpdateCompressedRowSparseMatrixTranspose(crs_lhs_.get()); + } } - *lhs->mutable_col_blocks() = blocks_; - *lhs->mutable_row_blocks() = blocks_; - summary.num_iterations = 1; summary.termination_type = sparse_cholesky_->FactorAndSolve( - lhs.get(), rhs(), solution, &summary.message); + crs_lhs_.get(), rhs().data(), solution, &summary.message); return summary; } @@ -355,7 +342,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients( if (num_rows == 0) { LinearSolver::Summary summary; summary.num_iterations = 0; - summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.termination_type = LinearSolverTerminationType::SUCCESS; summary.message = "Success."; return summary; } @@ -363,9 +350,9 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients( // Only SCHUR_JACOBI is supported over here right now. CHECK_EQ(options().preconditioner_type, SCHUR_JACOBI); - if (preconditioner_.get() == nullptr) { - preconditioner_ = - std::make_unique(blocks_); + if (preconditioner_ == nullptr) { + preconditioner_ = std::make_unique( + blocks_, options().context, options().num_threads); } auto* sc = down_cast(mutable_lhs()); @@ -373,7 +360,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients( // Extract block diagonal from the Schur complement to construct the // schur_jacobi preconditioner. for (int i = 0; i < blocks_.size(); ++i) { - const int block_size = blocks_[i]; + const int block_size = blocks_[i].size; int sc_r, sc_c, sc_row_stride, sc_col_stride; CellInfo* sc_cell_info = @@ -394,25 +381,28 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients( VectorRef(solution, num_rows).setZero(); - std::unique_ptr lhs_adapter = - std::make_unique(*sc); - std::unique_ptr preconditioner_adapter = + auto lhs = std::make_unique(*sc); + auto preconditioner = std::make_unique( *preconditioner_); - LinearSolver::Options cg_options; + ConjugateGradientsSolverOptions cg_options; cg_options.min_num_iterations = options().min_num_iterations; cg_options.max_num_iterations = options().max_num_iterations; - ConjugateGradientsSolver cg_solver(cg_options); + cg_options.residual_reset_period = options().residual_reset_period; + cg_options.q_tolerance = per_solve_options.q_tolerance; + cg_options.r_tolerance = per_solve_options.r_tolerance; - LinearSolver::PerSolveOptions cg_per_solve_options; - cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance; - cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance; - cg_per_solve_options.preconditioner = preconditioner_adapter.get(); - - return cg_solver.Solve( - lhs_adapter.get(), rhs(), cg_per_solve_options, solution); + cg_solution_ = Vector::Zero(sc->num_rows()); + for (int i = 0; i < 4; ++i) { + if (scratch_[i] == nullptr) { + scratch_[i] = new Vector(sc->num_rows()); + } + } + auto summary = ConjugateGradientsSolver( + cg_options, *lhs, rhs(), *preconditioner, scratch_, cg_solution_); + VectorRef(solution, sc->num_rows()) = cg_solution_; + return summary; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/schur_complement_solver.h b/extern/ceres/internal/ceres/schur_complement_solver.h index 859a086cdf4..5e11b9488bb 100644 --- a/extern/ceres/internal/ceres/schur_complement_solver.h +++ b/extern/ceres/internal/ceres/schur_complement_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -54,8 +54,7 @@ #include "ceres/internal/disable_warnings.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockSparseMatrix; class SparseCholesky; @@ -66,7 +65,7 @@ class SparseCholesky; // // E y + F z = b // -// Where x = [y;z] is a partition of the variables. The paritioning +// Where x = [y;z] is a partition of the variables. The partitioning // of the variables is such that, E'E is a block diagonal // matrix. Further, the rows of A are ordered so that for every // variable block in y, all the rows containing that variable block @@ -131,9 +130,8 @@ class CERES_NO_EXPORT SchurComplementSolver : public BlockSparseMatrixSolver { } const BlockRandomAccessMatrix* lhs() const { return lhs_.get(); } BlockRandomAccessMatrix* mutable_lhs() { return lhs_.get(); } - - void set_rhs(std::unique_ptr rhs) { rhs_ = std::move(rhs); } - const double* rhs() const { return rhs_.get(); } + void ResizeRhs(int n) { rhs_.resize(n); } + const Vector& rhs() const { return rhs_; } private: virtual void InitStorage(const CompressedRowBlockStructure* bs) = 0; @@ -145,7 +143,7 @@ class CERES_NO_EXPORT SchurComplementSolver : public BlockSparseMatrixSolver { std::unique_ptr eliminator_; std::unique_ptr lhs_; - std::unique_ptr rhs_; + Vector rhs_; }; // Dense Cholesky factorization based solver. @@ -185,14 +183,15 @@ class CERES_NO_EXPORT SparseSchurComplementSolver final LinearSolver::Summary SolveReducedLinearSystemUsingConjugateGradients( const LinearSolver::PerSolveOptions& per_solve_options, double* solution); - // Size of the blocks in the Schur complement. - std::vector blocks_; + std::vector blocks_; std::unique_ptr sparse_cholesky_; std::unique_ptr preconditioner_; + std::unique_ptr crs_lhs_; + Vector cg_solution_; + Vector* scratch_[4] = {nullptr, nullptr, nullptr, nullptr}; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/schur_eliminator.cc b/extern/ceres/internal/ceres/schur_eliminator.cc index 22e7358070f..cb079b5ce3d 100644 --- a/extern/ceres/internal/ceres/schur_eliminator.cc +++ b/extern/ceres/internal/ceres/schur_eliminator.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,8 +44,7 @@ #include "ceres/linear_solver.h" #include "ceres/schur_eliminator.h" -namespace ceres { -namespace internal { +namespace ceres::internal { SchurEliminatorBase::~SchurEliminatorBase() = default; @@ -161,5 +160,4 @@ std::unique_ptr SchurEliminatorBase::Create( Eigen::Dynamic>>(options); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/schur_eliminator.h b/extern/ceres/internal/ceres/schur_eliminator.h index 91831dceb5a..3832fe63e50 100644 --- a/extern/ceres/internal/ceres/schur_eliminator.h +++ b/extern/ceres/internal/ceres/schur_eliminator.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2019 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -46,8 +46,7 @@ #include "ceres/internal/export.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Classes implementing the SchurEliminatorBase interface implement // variable elimination for linear least squares problems. Assuming @@ -169,9 +168,9 @@ class CERES_NO_EXPORT SchurEliminatorBase { public: virtual ~SchurEliminatorBase(); - // Initialize the eliminator. It is the user's responsibilty to call + // Initialize the eliminator. It is the user's responsibility to call // this function before calling Eliminate or BackSubstitute. It is - // also the caller's responsibilty to ensure that the + // also the caller's responsibility to ensure that the // CompressedRowBlockStructure object passed to this method is the // same one (or is equivalent to) the one associated with the // BlockSparseMatrix objects below. @@ -383,8 +382,9 @@ template ::ConstVectorRef diag( D + bs->cols[num_eliminate_blocks_].position, kFBlockSize); @@ -479,7 +479,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final const Chunk& chunk = chunks_[i]; const int e_block_id = bs->rows[chunk.start].cells.front().block_id; - // Naming covention, e_t_e = e_block.transpose() * e_block; + // Naming convention, e_t_e = e_block.transpose() * e_block; Eigen::Matrix e_t_e; Eigen::Matrix e_t_f; Eigen::Matrix e_t_b; @@ -570,7 +570,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final // y_i = e_t_e_inverse * sum_i e_i^T * (b_i - f_i * z); void BackSubstitute(const BlockSparseMatrixData& A, const double* b, - const double* D, + const double* /*D*/, const double* z_ptr, double* y) override { typename EigenTypes::ConstVectorRef z(z_ptr, kFBlockSize); @@ -623,8 +623,7 @@ class CERES_NO_EXPORT SchurEliminatorForOneFBlock final std::vector e_t_e_inverse_matrices_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/schur_eliminator_impl.h b/extern/ceres/internal/ceres/schur_eliminator_impl.h index de3ba3e5dcb..ef5ce66946d 100644 --- a/extern/ceres/internal/ceres/schur_eliminator_impl.h +++ b/extern/ceres/internal/ceres/schur_eliminator_impl.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -69,8 +69,7 @@ #include "ceres/thread_token_provider.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template SchurEliminator::~SchurEliminator() { @@ -107,7 +106,7 @@ void SchurEliminator::Init( } // TODO(sameeragarwal): Now that we may have subset block structure, - // we need to make sure that we account for the fact that somep + // we need to make sure that we account for the fact that some // point blocks only have a "diagonal" row and nothing more. // // This likely requires a slightly different algorithm, which works @@ -206,8 +205,6 @@ void SchurEliminator::Eliminate( const int block_size = bs->cols[i].size; typename EigenTypes::ConstVectorRef diag( D + bs->cols[i].position, block_size); - - std::lock_guard l(cell_info->m); MatrixRef m(cell_info->values, row_stride, col_stride); m.block(r, c, block_size, block_size).diagonal() += diag.array().square().matrix(); @@ -301,7 +298,7 @@ void SchurEliminator::Eliminate( thread_id, bs, inverse_ete, buffer, chunk.buffer_layout, lhs); }); - // For rows with no e_blocks, the schur complement update reduces to + // For rows with no e_blocks, the Schur complement update reduces to // S += F'F. NoEBlockRowsUpdate(A, b, uneliminated_row_begins_, lhs, rhs); } @@ -410,7 +407,7 @@ void SchurEliminator::UpdateRhs( const int block_id = row.cells[c].block_id; const int block_size = bs->cols[block_id].size; const int block = block_id - num_eliminate_blocks_; - std::lock_guard l(*rhs_locks_[block]); + auto lock = MakeConditionalLock(num_threads_, *rhs_locks_[block]); // clang-format off MatrixTransposeVectorMultiply( values + row.cells[c].position, @@ -433,7 +430,7 @@ void SchurEliminator::UpdateRhs( // // ete = y11 * y11' + y12 * y12' // -// and the off diagonal blocks in the Guass Newton Hessian. +// and the off diagonal blocks in the Gauss Newton Hessian. // // buffer = [y11'(z11 + z12), y12' * z22, y11' * z51] // @@ -550,7 +547,7 @@ void SchurEliminator:: lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride); if (cell_info != nullptr) { const int block2_size = bs->cols[it2->first].size; - std::lock_guard l(cell_info->m); + auto lock = MakeConditionalLock(num_threads_, cell_info->m); // clang-format off MatrixMatrixMultiply ( @@ -563,7 +560,7 @@ void SchurEliminator:: } } -// For rows with no e_blocks, the schur complement update reduces to S +// For rows with no e_blocks, the Schur complement update reduces to S // += F'F. This function iterates over the rows of A with no e_block, // and calls NoEBlockRowOuterProduct on each row. template @@ -596,7 +593,7 @@ void SchurEliminator:: } // A row r of A, which has no e_blocks gets added to the Schur -// Complement as S += r r'. This function is responsible for computing +// complement as S += r r'. This function is responsible for computing // the contribution of a single row r to the Schur complement. It is // very similar in structure to EBlockRowOuterProduct except for // one difference. It does not use any of the template @@ -627,7 +624,7 @@ void SchurEliminator:: CellInfo* cell_info = lhs->GetCell(block1, block1, &r, &c, &row_stride, &col_stride); if (cell_info != nullptr) { - std::lock_guard l(cell_info->m); + auto lock = MakeConditionalLock(num_threads_, cell_info->m); // This multiply currently ignores the fact that this is a // symmetric outer product. // clang-format off @@ -648,7 +645,7 @@ void SchurEliminator:: lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride); if (cell_info != nullptr) { const int block2_size = bs->cols[row.cells[j].block_id].size; - std::lock_guard l(cell_info->m); + auto lock = MakeConditionalLock(num_threads_, cell_info->m); // clang-format off MatrixTransposeMatrixMultiply ( @@ -682,7 +679,7 @@ void SchurEliminator:: CellInfo* cell_info = lhs->GetCell(block1, block1, &r, &c, &row_stride, &col_stride); if (cell_info != nullptr) { - std::lock_guard l(cell_info->m); + auto lock = MakeConditionalLock(num_threads_, cell_info->m); // block += b1.transpose() * b1; // clang-format off MatrixTransposeMatrixMultiply @@ -703,7 +700,7 @@ void SchurEliminator:: lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride); if (cell_info != nullptr) { // block += b1.transpose() * b2; - std::lock_guard l(cell_info->m); + auto lock = MakeConditionalLock(num_threads_, cell_info->m); // clang-format off MatrixTransposeMatrixMultiply ( @@ -716,7 +713,6 @@ void SchurEliminator:: } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ diff --git a/extern/ceres/internal/ceres/schur_eliminator_template.py b/extern/ceres/internal/ceres/schur_eliminator_template.py new file mode 100644 index 00000000000..99e6f3eac37 --- /dev/null +++ b/extern/ceres/internal/ceres/schur_eliminator_template.py @@ -0,0 +1,150 @@ +# Ceres Solver - A fast non-linear least squares minimizer +# Copyright 2023 Google Inc. All rights reserved. +# http://ceres-solver.org/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of Google Inc. nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: sameeragarwal@google.com (Sameer Agarwal) +# +# Script for explicitly generating template specialization of the +# SchurEliminator class. It is a rather large class +# and the number of explicit instantiations is also large. Explicitly +# generating these instantiations in separate .cc files breaks the +# compilation into separate compilation unit rather than one large cc +# file which takes 2+GB of RAM to compile. +# +# This script creates two sets of files. +# +# 1. schur_eliminator_x_x_x.cc +# where, the x indicates the template parameters and +# +# 2. schur_eliminator.cc +# +# that contains a factory function for instantiating these classes +# based on runtime parameters. +# +# The list of tuples, specializations indicates the set of +# specializations that is generated. + +# Set of template specializations to generate + +HEADER = """// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2023 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) +// +// Template specialization of SchurEliminator. +// +// ======================================== +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +// THIS FILE IS AUTOGENERATED. DO NOT EDIT. +//========================================= +// +// This file is generated using generate_template_specializations.py. +""" + +DYNAMIC_FILE = """ +#include "ceres/schur_eliminator_impl.h" + +namespace ceres::internal { + +template class SchurEliminator<%s, %s, %s>; + +} // namespace ceres::internal +""" + +SPECIALIZATION_FILE = """ +// This include must come before any #ifndef check on Ceres compile options. +#include "ceres/internal/config.h" + +#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION + +#include "ceres/schur_eliminator_impl.h" + +namespace ceres::internal { + +template class SchurEliminator<%s, %s, %s>; + +} // namespace ceres::internal + +#endif // CERES_RESTRICT_SCHUR_SPECIALIZATION +""" + +FACTORY_FILE_HEADER = """ +#include + +#include "ceres/linear_solver.h" +#include "ceres/schur_eliminator.h" + +namespace ceres::internal { + +SchurEliminatorBase::~SchurEliminatorBase() = default; + +std::unique_ptr SchurEliminatorBase::Create( + const LinearSolver::Options& options) { +#ifndef CERES_RESTRICT_SCHUR_SPECIALIZATION +""" + +FACTORY = """ return std::make_unique>(options);""" + +FACTORY_FOOTER = """ +#endif + VLOG(1) << "Template specializations not found for <" + << options.row_block_size << "," << options.e_block_size << "," + << options.f_block_size << ">"; + return std::make_unique>(options); +} + +} // namespace ceres::internal +""" diff --git a/extern/ceres/internal/ceres/schur_jacobi_preconditioner.cc b/extern/ceres/internal/ceres/schur_jacobi_preconditioner.cc index 3ecec728262..fbe258d38d1 100644 --- a/extern/ceres/internal/ceres/schur_jacobi_preconditioner.cc +++ b/extern/ceres/internal/ceres/schur_jacobi_preconditioner.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,6 +30,7 @@ #include "ceres/schur_jacobi_preconditioner.h" +#include #include #include @@ -39,8 +40,7 @@ #include "ceres/schur_eliminator.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { SchurJacobiPreconditioner::SchurJacobiPreconditioner( const CompressedRowBlockStructure& bs, Preconditioner::Options options) @@ -52,12 +52,16 @@ SchurJacobiPreconditioner::SchurJacobiPreconditioner( << "SCHUR_JACOBI preconditioner."; CHECK(options_.context != nullptr); - std::vector blocks(num_blocks); + std::vector blocks(num_blocks); + int position = 0; for (int i = 0; i < num_blocks; ++i) { - blocks[i] = bs.cols[i + options_.elimination_groups[0]].size; + blocks[i] = + Block(bs.cols[i + options_.elimination_groups[0]].size, position); + position += blocks[i].size; } - m_ = std::make_unique(blocks); + m_ = std::make_unique( + blocks, options_.context, options_.num_threads); InitEliminator(bs); } @@ -92,12 +96,11 @@ bool SchurJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A, return true; } -void SchurJacobiPreconditioner::RightMultiply(const double* x, - double* y) const { - m_->RightMultiply(x, y); +void SchurJacobiPreconditioner::RightMultiplyAndAccumulate(const double* x, + double* y) const { + m_->RightMultiplyAndAccumulate(x, y); } int SchurJacobiPreconditioner::num_rows() const { return m_->num_rows(); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/schur_jacobi_preconditioner.h b/extern/ceres/internal/ceres/schur_jacobi_preconditioner.h index a43bc3388a1..b540bc078cc 100644 --- a/extern/ceres/internal/ceres/schur_jacobi_preconditioner.h +++ b/extern/ceres/internal/ceres/schur_jacobi_preconditioner.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -47,8 +47,7 @@ #include "ceres/internal/export.h" #include "ceres/preconditioner.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockRandomAccessDiagonalMatrix; class BlockSparseMatrix; @@ -72,8 +71,10 @@ class SchurEliminatorBase; // SchurJacobiPreconditioner preconditioner( // *A.block_structure(), options); // preconditioner.Update(A, nullptr); -// preconditioner.RightMultiply(x, y); +// preconditioner.RightMultiplyAndAccumulate(x, y); // +// TODO(https://github.com/ceres-solver/ceres-solver/issues/935): +// SchurJacobiPreconditioner::RightMultiply will benefit from multithreading class CERES_NO_EXPORT SchurJacobiPreconditioner : public BlockSparseMatrixPreconditioner { public: @@ -91,7 +92,7 @@ class CERES_NO_EXPORT SchurJacobiPreconditioner ~SchurJacobiPreconditioner() override; // Preconditioner interface. - void RightMultiply(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, double* y) const final; int num_rows() const final; private: @@ -104,8 +105,7 @@ class CERES_NO_EXPORT SchurJacobiPreconditioner std::unique_ptr m_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/schur_templates.cc b/extern/ceres/internal/ceres/schur_templates.cc index bcf0d14902d..95df671357d 100644 --- a/extern/ceres/internal/ceres/schur_templates.cc +++ b/extern/ceres/internal/ceres/schur_templates.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/schur_templates.h b/extern/ceres/internal/ceres/schur_templates.h index cacee20c412..218fb51105e 100644 --- a/extern/ceres/internal/ceres/schur_templates.h +++ b/extern/ceres/internal/ceres/schur_templates.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,14 +36,12 @@ #include "ceres/internal/export.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { CERES_NO_EXPORT void GetBestSchurTemplateSpecialization(int* row_block_size, int* e_block_size, int* f_block_size); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_SCHUR_TEMPLATES_H_ diff --git a/extern/ceres/internal/ceres/scoped_thread_token.h b/extern/ceres/internal/ceres/scoped_thread_token.h index 533bfd5a387..76da95ba54a 100644 --- a/extern/ceres/internal/ceres/scoped_thread_token.h +++ b/extern/ceres/internal/ceres/scoped_thread_token.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,7 @@ #include "ceres/internal/export.h" #include "ceres/thread_token_provider.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Helper class for ThreadTokenProvider. This object acquires a token in its // constructor and puts that token back with destruction. @@ -55,7 +54,6 @@ class CERES_NO_EXPORT ScopedThreadToken { int token_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_SCOPED_THREAD_TOKEN_H_ diff --git a/extern/ceres/internal/ceres/scratch_evaluate_preparer.cc b/extern/ceres/internal/ceres/scratch_evaluate_preparer.cc index 0a1b0f3e7d1..86cad9307af 100644 --- a/extern/ceres/internal/ceres/scratch_evaluate_preparer.cc +++ b/extern/ceres/internal/ceres/scratch_evaluate_preparer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,23 +36,22 @@ #include "ceres/program.h" #include "ceres/residual_block.h" -namespace ceres { -namespace internal { +namespace ceres::internal { std::unique_ptr ScratchEvaluatePreparer::Create( - const Program& program, int num_threads) { + const Program& program, unsigned num_threads) { auto preparers = std::make_unique(num_threads); int max_derivatives_per_residual_block = program.MaxDerivativesPerResidualBlock(); - for (int i = 0; i < num_threads; i++) { + for (unsigned i = 0; i < num_threads; i++) { preparers[i].Init(max_derivatives_per_residual_block); } return preparers; } void ScratchEvaluatePreparer::Init(int max_derivatives_per_residual_block) { - jacobian_scratch_ = - std::make_unique(max_derivatives_per_residual_block); + jacobian_scratch_ = std::make_unique( + static_cast(max_derivatives_per_residual_block)); } // Point the jacobian blocks into the scratch area of this evaluate preparer. @@ -75,5 +74,4 @@ void ScratchEvaluatePreparer::Prepare(const ResidualBlock* residual_block, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/scratch_evaluate_preparer.h b/extern/ceres/internal/ceres/scratch_evaluate_preparer.h index 3f4e7df8de0..a7fd8a8db91 100644 --- a/extern/ceres/internal/ceres/scratch_evaluate_preparer.h +++ b/extern/ceres/internal/ceres/scratch_evaluate_preparer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class Program; class ResidualBlock; @@ -51,7 +50,7 @@ class CERES_NO_EXPORT ScratchEvaluatePreparer { public: // Create num_threads ScratchEvaluatePreparers. static std::unique_ptr Create( - const Program& program, int num_threads); + const Program& program, unsigned num_threads); // EvaluatePreparer interface void Init(int max_derivatives_per_residual_block); @@ -66,8 +65,7 @@ class CERES_NO_EXPORT ScratchEvaluatePreparer { std::unique_ptr jacobian_scratch_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/single_linkage_clustering.cc b/extern/ceres/internal/ceres/single_linkage_clustering.cc index 0e7813140da..06e76dfd5b1 100644 --- a/extern/ceres/internal/ceres/single_linkage_clustering.cc +++ b/extern/ceres/internal/ceres/single_linkage_clustering.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,7 @@ #include "ceres/graph.h" #include "ceres/graph_algorithms.h" -namespace ceres { -namespace internal { +namespace ceres::internal { int ComputeSingleLinkageClustering( const SingleLinkageClusteringOptions& options, @@ -91,5 +90,4 @@ int ComputeSingleLinkageClustering( return num_clusters; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/single_linkage_clustering.h b/extern/ceres/internal/ceres/single_linkage_clustering.h index b4a7e077619..3f49540bdc2 100644 --- a/extern/ceres/internal/ceres/single_linkage_clustering.h +++ b/extern/ceres/internal/ceres/single_linkage_clustering.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { struct SingleLinkageClusteringOptions { // Graph edges with edge weight less than min_similarity are ignored @@ -61,8 +60,7 @@ CERES_NO_EXPORT int ComputeSingleLinkageClustering( const WeightedGraph& graph, std::unordered_map* membership); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/small_blas.h b/extern/ceres/internal/ceres/small_blas.h index 1cf41a5f1c2..fb8d7fa5817 100644 --- a/extern/ceres/internal/ceres/small_blas.h +++ b/extern/ceres/internal/ceres/small_blas.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "glog/logging.h" #include "small_blas_generic.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // The following three macros are used to share code and reduce // template junk across the various GEMM variants. @@ -561,7 +560,6 @@ inline void MatrixTransposeVectorMultiply(const double* A, #undef CERES_GEMM_STORE_SINGLE #undef CERES_GEMM_STORE_PAIR -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_SMALL_BLAS_H_ diff --git a/extern/ceres/internal/ceres/small_blas_generic.h b/extern/ceres/internal/ceres/small_blas_generic.h index f5aa909a8a3..93ee338813a 100644 --- a/extern/ceres/internal/ceres/small_blas_generic.h +++ b/extern/ceres/internal/ceres/small_blas_generic.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,38 +35,35 @@ #ifndef CERES_INTERNAL_SMALL_BLAS_GENERIC_H_ #define CERES_INTERNAL_SMALL_BLAS_GENERIC_H_ -namespace ceres { -namespace internal { +namespace ceres::internal { // The following macros are used to share code -#define CERES_GEMM_OPT_NAIVE_HEADER \ - double c0 = 0.0; \ - double c1 = 0.0; \ - double c2 = 0.0; \ - double c3 = 0.0; \ - const double* pa = a; \ - const double* pb = b; \ - const int span = 4; \ - int col_r = col_a & (span - 1); \ +#define CERES_GEMM_OPT_NAIVE_HEADER \ + double cvec4[4] = {0.0, 0.0, 0.0, 0.0}; \ + const double* pa = a; \ + const double* pb = b; \ + const int span = 4; \ + int col_r = col_a & (span - 1); \ int col_m = col_a - col_r; #define CERES_GEMM_OPT_STORE_MAT1X4 \ if (kOperation > 0) { \ - *c++ += c0; \ - *c++ += c1; \ - *c++ += c2; \ - *c++ += c3; \ + c[0] += cvec4[0]; \ + c[1] += cvec4[1]; \ + c[2] += cvec4[2]; \ + c[3] += cvec4[3]; \ } else if (kOperation < 0) { \ - *c++ -= c0; \ - *c++ -= c1; \ - *c++ -= c2; \ - *c++ -= c3; \ + c[0] -= cvec4[0]; \ + c[1] -= cvec4[1]; \ + c[2] -= cvec4[2]; \ + c[3] -= cvec4[3]; \ } else { \ - *c++ = c0; \ - *c++ = c1; \ - *c++ = c2; \ - *c++ = c3; \ - } + c[0] = cvec4[0]; \ + c[1] = cvec4[1]; \ + c[2] = cvec4[2]; \ + c[3] = cvec4[3]; \ + } \ + c += 4; // Matrix-Matrix Multiplication // Figure out 1x4 of Matrix C in one batch @@ -100,10 +97,10 @@ static inline void MMM_mat1x4(const int col_a, #define CERES_GEMM_OPT_MMM_MAT1X4_MUL \ av = pa[k]; \ pb = b + bi; \ - c0 += av * pb[0]; \ - c1 += av * pb[1]; \ - c2 += av * pb[2]; \ - c3 += av * pb[3]; \ + cvec4[0] += av * pb[0]; \ + cvec4[1] += av * pb[1]; \ + cvec4[2] += av * pb[2]; \ + cvec4[3] += av * pb[3]; \ pb += 4; \ bi += col_stride_b; \ k++; @@ -168,10 +165,10 @@ static inline void MTM_mat1x4(const int col_a, #define CERES_GEMM_OPT_MTM_MAT1X4_MUL \ av = pa[ai]; \ pb = b + bi; \ - c0 += av * pb[0]; \ - c1 += av * pb[1]; \ - c2 += av * pb[2]; \ - c3 += av * pb[3]; \ + cvec4[0] += av * pb[0]; \ + cvec4[1] += av * pb[1]; \ + cvec4[2] += av * pb[2]; \ + cvec4[3] += av * pb[3]; \ pb += 4; \ ai += col_stride_a; \ bi += col_stride_b; @@ -221,13 +218,13 @@ static inline void MVM_mat4x1(const int col_a, double bv = 0.0; // clang-format off -#define CERES_GEMM_OPT_MVM_MAT4X1_MUL \ - bv = *pb; \ - c0 += *(pa ) * bv; \ - c1 += *(pa + col_stride_a ) * bv; \ - c2 += *(pa + col_stride_a * 2) * bv; \ - c3 += *(pa + col_stride_a * 3) * bv; \ - pa++; \ +#define CERES_GEMM_OPT_MVM_MAT4X1_MUL \ + bv = *pb; \ + cvec4[0] += *(pa ) * bv; \ + cvec4[1] += *(pa + col_stride_a ) * bv; \ + cvec4[2] += *(pa + col_stride_a * 2) * bv; \ + cvec4[3] += *(pa + col_stride_a * 3) * bv; \ + pa++; \ pb++; // clang-format on @@ -285,16 +282,14 @@ static inline void MTV_mat4x1(const int col_a, CERES_GEMM_OPT_NAIVE_HEADER double bv = 0.0; - // clang-format off #define CERES_GEMM_OPT_MTV_MAT4X1_MUL \ bv = *pb; \ - c0 += *(pa ) * bv; \ - c1 += *(pa + 1) * bv; \ - c2 += *(pa + 2) * bv; \ - c3 += *(pa + 3) * bv; \ + cvec4[0] += pa[0] * bv; \ + cvec4[1] += pa[1] * bv; \ + cvec4[2] += pa[2] * bv; \ + cvec4[3] += pa[3] * bv; \ pa += col_stride_a; \ pb++; - // clang-format on for (int k = 0; k < col_m; k += span) { CERES_GEMM_OPT_MTV_MAT4X1_MUL @@ -315,7 +310,6 @@ static inline void MTV_mat4x1(const int col_a, #undef CERES_GEMM_OPT_NAIVE_HEADER #undef CERES_GEMM_OPT_STORE_MAT1X4 -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_SMALL_BLAS_GENERIC_H_ diff --git a/extern/ceres/internal/ceres/solver.cc b/extern/ceres/internal/ceres/solver.cc index 150c5550fc9..b72661cb96e 100644 --- a/extern/ceres/internal/ceres/solver.cc +++ b/extern/ceres/internal/ceres/solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,14 +32,17 @@ #include "ceres/solver.h" #include +#include #include #include // NOLINT +#include #include #include "ceres/casts.h" #include "ceres/context.h" #include "ceres/context_impl.h" #include "ceres/detect_structure.h" +#include "ceres/eigensparse.h" #include "ceres/gradient_checking_cost_function.h" #include "ceres/internal/export.h" #include "ceres/parameter_block_ordering.h" @@ -50,6 +53,7 @@ #include "ceres/schur_templates.h" #include "ceres/solver_utils.h" #include "ceres/stringprintf.h" +#include "ceres/suitesparse.h" #include "ceres/types.h" #include "ceres/wall_time.h" @@ -58,32 +62,29 @@ namespace { using internal::StringAppendF; using internal::StringPrintf; -using std::map; -using std::string; -using std::vector; -#define OPTION_OP(x, y, OP) \ - if (!(options.x OP y)) { \ - std::stringstream ss; \ - ss << "Invalid configuration. "; \ - ss << string("Solver::Options::" #x " = ") << options.x << ". "; \ - ss << "Violated constraint: "; \ - ss << string("Solver::Options::" #x " " #OP " " #y); \ - *error = ss.str(); \ - return false; \ +#define OPTION_OP(x, y, OP) \ + if (!(options.x OP y)) { \ + std::stringstream ss; \ + ss << "Invalid configuration. "; \ + ss << std::string("Solver::Options::" #x " = ") << options.x << ". "; \ + ss << "Violated constraint: "; \ + ss << std::string("Solver::Options::" #x " " #OP " " #y); \ + *error = ss.str(); \ + return false; \ } -#define OPTION_OP_OPTION(x, y, OP) \ - if (!(options.x OP options.y)) { \ - std::stringstream ss; \ - ss << "Invalid configuration. "; \ - ss << string("Solver::Options::" #x " = ") << options.x << ". "; \ - ss << string("Solver::Options::" #y " = ") << options.y << ". "; \ - ss << "Violated constraint: "; \ - ss << string("Solver::Options::" #x); \ - ss << string(#OP " Solver::Options::" #y "."); \ - *error = ss.str(); \ - return false; \ +#define OPTION_OP_OPTION(x, y, OP) \ + if (!(options.x OP options.y)) { \ + std::stringstream ss; \ + ss << "Invalid configuration. "; \ + ss << std::string("Solver::Options::" #x " = ") << options.x << ". "; \ + ss << std::string("Solver::Options::" #y " = ") << options.y << ". "; \ + ss << "Violated constraint: "; \ + ss << std::string("Solver::Options::" #x); \ + ss << std::string(#OP " Solver::Options::" #y "."); \ + *error = ss.str(); \ + return false; \ } #define OPTION_GE(x, y) OPTION_OP(x, y, >=); @@ -93,7 +94,7 @@ using std::vector; #define OPTION_LE_OPTION(x, y) OPTION_OP_OPTION(x, y, <=) #define OPTION_LT_OPTION(x, y) OPTION_OP_OPTION(x, y, <) -bool CommonOptionsAreValid(const Solver::Options& options, string* error) { +bool CommonOptionsAreValid(const Solver::Options& options, std::string* error) { OPTION_GE(max_num_iterations, 0); OPTION_GE(max_solver_time_in_seconds, 0.0); OPTION_GE(function_tolerance, 0.0); @@ -107,7 +108,286 @@ bool CommonOptionsAreValid(const Solver::Options& options, string* error) { return true; } -bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) { +bool IsNestedDissectionAvailable(SparseLinearAlgebraLibraryType type) { + return (((type == SUITE_SPARSE) && + internal::SuiteSparse::IsNestedDissectionAvailable()) || + (type == ACCELERATE_SPARSE) || + ((type == EIGEN_SPARSE) && + internal::EigenSparse::IsNestedDissectionAvailable())); +} + +bool IsIterativeSolver(LinearSolverType type) { + return (type == CGNR || type == ITERATIVE_SCHUR); +} + +bool OptionsAreValidForDenseSolver(const Solver::Options& options, + std::string* error) { + const char* library_name = DenseLinearAlgebraLibraryTypeToString( + options.dense_linear_algebra_library_type); + const char* solver_name = + LinearSolverTypeToString(options.linear_solver_type); + constexpr char kFormat[] = + "Can't use %s with dense_linear_algebra_library_type = %s " + "because support not enabled when Ceres was built."; + + if (!IsDenseLinearAlgebraLibraryTypeAvailable( + options.dense_linear_algebra_library_type)) { + *error = StringPrintf(kFormat, solver_name, library_name); + return false; + } + return true; +} + +bool OptionsAreValidForSparseCholeskyBasedSolver(const Solver::Options& options, + std::string* error) { + const char* library_name = SparseLinearAlgebraLibraryTypeToString( + options.sparse_linear_algebra_library_type); + // Sparse factorization based solvers and some preconditioners require a + // sparse Cholesky factorization. + const char* solver_name = + IsIterativeSolver(options.linear_solver_type) + ? PreconditionerTypeToString(options.preconditioner_type) + : LinearSolverTypeToString(options.linear_solver_type); + + constexpr char kNoSparseFormat[] = + "Can't use %s with sparse_linear_algebra_library_type = %s."; + constexpr char kNoLibraryFormat[] = + "Can't use %s sparse_linear_algebra_library_type = %s, because support " + "was not enabled when Ceres Solver was built."; + constexpr char kNoNesdisFormat[] = + "NESDIS is not available with sparse_linear_algebra_library_type = %s."; + constexpr char kMixedFormat[] = + "use_mixed_precision_solves with %s is not supported with " + "sparse_linear_algebra_library_type = %s"; + constexpr char kDynamicSparsityFormat[] = + "dynamic sparsity is not supported with " + "sparse_linear_algebra_library_type = %s"; + + if (options.sparse_linear_algebra_library_type == NO_SPARSE) { + *error = StringPrintf(kNoSparseFormat, solver_name, library_name); + return false; + } + + if (!IsSparseLinearAlgebraLibraryTypeAvailable( + options.sparse_linear_algebra_library_type)) { + *error = StringPrintf(kNoLibraryFormat, solver_name, library_name); + return false; + } + + if (options.linear_solver_ordering_type == ceres::NESDIS && + !IsNestedDissectionAvailable( + options.sparse_linear_algebra_library_type)) { + *error = StringPrintf(kNoNesdisFormat, library_name); + return false; + } + + if (options.use_mixed_precision_solves && + options.sparse_linear_algebra_library_type == SUITE_SPARSE) { + *error = StringPrintf(kMixedFormat, solver_name, library_name); + return false; + } + + if (options.dynamic_sparsity && + options.sparse_linear_algebra_library_type == ACCELERATE_SPARSE) { + *error = StringPrintf(kDynamicSparsityFormat, library_name); + return false; + } + + return true; +} + +bool OptionsAreValidForDenseNormalCholesky(const Solver::Options& options, + std::string* error) { + CHECK_EQ(options.linear_solver_type, DENSE_NORMAL_CHOLESKY); + return OptionsAreValidForDenseSolver(options, error); +} + +bool OptionsAreValidForDenseQr(const Solver::Options& options, + std::string* error) { + CHECK_EQ(options.linear_solver_type, DENSE_QR); + + if (!OptionsAreValidForDenseSolver(options, error)) { + return false; + } + + if (options.use_mixed_precision_solves) { + *error = "Can't use use_mixed_precision_solves with DENSE_QR."; + return false; + } + + return true; +} + +bool OptionsAreValidForSparseNormalCholesky(const Solver::Options& options, + std::string* error) { + CHECK_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY); + return OptionsAreValidForSparseCholeskyBasedSolver(options, error); +} + +bool OptionsAreValidForDenseSchur(const Solver::Options& options, + std::string* error) { + CHECK_EQ(options.linear_solver_type, DENSE_SCHUR); + + if (options.dynamic_sparsity) { + *error = "dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY"; + return false; + } + + if (!OptionsAreValidForDenseSolver(options, error)) { + return false; + } + + return true; +} + +bool OptionsAreValidForSparseSchur(const Solver::Options& options, + std::string* error) { + CHECK_EQ(options.linear_solver_type, SPARSE_SCHUR); + if (options.dynamic_sparsity) { + *error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY."; + return false; + } + return OptionsAreValidForSparseCholeskyBasedSolver(options, error); +} + +bool OptionsAreValidForIterativeSchur(const Solver::Options& options, + std::string* error) { + CHECK_EQ(options.linear_solver_type, ITERATIVE_SCHUR); + if (options.dynamic_sparsity) { + *error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY."; + return false; + } + + if (options.use_explicit_schur_complement) { + if (options.preconditioner_type != SCHUR_JACOBI) { + *error = + "use_explicit_schur_complement only supports " + "SCHUR_JACOBI as the preconditioner."; + return false; + } + if (options.use_spse_initialization) { + *error = + "use_explicit_schur_complement does not support " + "use_spse_initialization."; + return false; + } + } + + if (options.use_spse_initialization || + options.preconditioner_type == SCHUR_POWER_SERIES_EXPANSION) { + OPTION_GE(max_num_spse_iterations, 1) + OPTION_GE(spse_tolerance, 0.0) + } + + if (options.use_mixed_precision_solves) { + *error = "Can't use use_mixed_precision_solves with ITERATIVE_SCHUR"; + return false; + } + + if (options.dynamic_sparsity) { + *error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY."; + return false; + } + + if (options.preconditioner_type == SUBSET) { + *error = "Can't use SUBSET preconditioner with ITERATIVE_SCHUR"; + return false; + } + + // CLUSTER_JACOBI and CLUSTER_TRIDIAGONAL require sparse Cholesky + // factorization. + if (options.preconditioner_type == CLUSTER_JACOBI || + options.preconditioner_type == CLUSTER_TRIDIAGONAL) { + return OptionsAreValidForSparseCholeskyBasedSolver(options, error); + } + + return true; +} + +bool OptionsAreValidForCgnr(const Solver::Options& options, + std::string* error) { + CHECK_EQ(options.linear_solver_type, CGNR); + + if (options.preconditioner_type != IDENTITY && + options.preconditioner_type != JACOBI && + options.preconditioner_type != SUBSET) { + *error = + StringPrintf("Can't use CGNR with preconditioner_type = %s.", + PreconditionerTypeToString(options.preconditioner_type)); + return false; + } + + if (options.use_mixed_precision_solves) { + *error = "use_mixed_precision_solves cannot be used with CGNR"; + return false; + } + + if (options.dynamic_sparsity) { + *error = "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY."; + return false; + } + + if (options.preconditioner_type == SUBSET) { + if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) { + *error = + "Can't use CGNR with preconditioner_type = SUBSET when " + "sparse_linear_algebra_library_type = CUDA_SPARSE."; + return false; + } + + if (options.residual_blocks_for_subset_preconditioner.empty()) { + *error = + "When using SUBSET preconditioner, " + "residual_blocks_for_subset_preconditioner cannot be empty"; + return false; + } + + // SUBSET preconditioner requires sparse Cholesky factorization. + if (!OptionsAreValidForSparseCholeskyBasedSolver(options, error)) { + return false; + } + } + + // Check options for CGNR with CUDA_SPARSE. + if (options.sparse_linear_algebra_library_type == CUDA_SPARSE) { + if (!IsSparseLinearAlgebraLibraryTypeAvailable(CUDA_SPARSE)) { + *error = + "Can't use CGNR with sparse_linear_algebra_library_type = " + "CUDA_SPARSE because support was not enabled when Ceres was built."; + return false; + } + } + return true; +} + +bool OptionsAreValidForLinearSolver(const Solver::Options& options, + std::string* error) { + switch (options.linear_solver_type) { + case DENSE_NORMAL_CHOLESKY: + return OptionsAreValidForDenseNormalCholesky(options, error); + case DENSE_QR: + return OptionsAreValidForDenseQr(options, error); + case SPARSE_NORMAL_CHOLESKY: + return OptionsAreValidForSparseNormalCholesky(options, error); + case DENSE_SCHUR: + return OptionsAreValidForDenseSchur(options, error); + case SPARSE_SCHUR: + return OptionsAreValidForSparseSchur(options, error); + case ITERATIVE_SCHUR: + return OptionsAreValidForIterativeSchur(options, error); + case CGNR: + return OptionsAreValidForCgnr(options, error); + default: + LOG(FATAL) << "Congratulations you have found a bug. Please report " + "this to the " + "Ceres Solver developers. Unknown linear solver type: " + << LinearSolverTypeToString(options.linear_solver_type); + } + return false; +} + +bool TrustRegionOptionsAreValid(const Solver::Options& options, + std::string* error) { OPTION_GT(initial_trust_region_radius, 0.0); OPTION_GT(min_trust_region_radius, 0.0); OPTION_GT(max_trust_region_radius, 0.0); @@ -121,7 +401,7 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) { OPTION_GE(max_num_consecutive_invalid_steps, 0); OPTION_GT(eta, 0.0); OPTION_GE(min_linear_solver_iterations, 0); - OPTION_GE(max_linear_solver_iterations, 1); + OPTION_GE(max_linear_solver_iterations, 0); OPTION_LE_OPTION(min_linear_solver_iterations, max_linear_solver_iterations); if (options.use_inner_iterations) { @@ -132,80 +412,19 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) { OPTION_GT(max_consecutive_nonmonotonic_steps, 0); } - if (options.linear_solver_type == ITERATIVE_SCHUR && - options.use_explicit_schur_complement && - options.preconditioner_type != SCHUR_JACOBI) { + if ((options.trust_region_strategy_type == DOGLEG) && + IsIterativeSolver(options.linear_solver_type)) { *error = - "use_explicit_schur_complement only supports " - "SCHUR_JACOBI as the preconditioner."; + "DOGLEG only supports exact factorization based linear " + "solvers. If you want to use an iterative solver please " + "use LEVENBERG_MARQUARDT as the trust_region_strategy_type"; return false; } - if (!IsDenseLinearAlgebraLibraryTypeAvailable( - options.dense_linear_algebra_library_type) && - (options.linear_solver_type == DENSE_NORMAL_CHOLESKY || - options.linear_solver_type == DENSE_QR || - options.linear_solver_type == DENSE_SCHUR)) { - *error = StringPrintf( - "Can't use %s with " - "Solver::Options::dense_linear_algebra_library_type = %s " - "because %s was not enabled when Ceres was built.", - LinearSolverTypeToString(options.linear_solver_type), - DenseLinearAlgebraLibraryTypeToString( - options.dense_linear_algebra_library_type), - DenseLinearAlgebraLibraryTypeToString( - options.dense_linear_algebra_library_type)); + if (!OptionsAreValidForLinearSolver(options, error)) { return false; } - { - const char* sparse_linear_algebra_library_name = - SparseLinearAlgebraLibraryTypeToString( - options.sparse_linear_algebra_library_type); - const char* name = nullptr; - if (options.linear_solver_type == SPARSE_NORMAL_CHOLESKY || - options.linear_solver_type == SPARSE_SCHUR) { - name = LinearSolverTypeToString(options.linear_solver_type); - } else if ((options.linear_solver_type == ITERATIVE_SCHUR && - (options.preconditioner_type == CLUSTER_JACOBI || - options.preconditioner_type == CLUSTER_TRIDIAGONAL)) || - (options.linear_solver_type == CGNR && - options.preconditioner_type == SUBSET)) { - name = PreconditionerTypeToString(options.preconditioner_type); - } - - if (name) { - if (options.sparse_linear_algebra_library_type == NO_SPARSE) { - *error = StringPrintf( - "Can't use %s with " - "Solver::Options::sparse_linear_algebra_library_type = %s.", - name, - sparse_linear_algebra_library_name); - return false; - } else if (!IsSparseLinearAlgebraLibraryTypeAvailable( - options.sparse_linear_algebra_library_type)) { - *error = StringPrintf( - "Can't use %s with " - "Solver::Options::sparse_linear_algebra_library_type = %s, " - "because support was not enabled when Ceres Solver was built.", - name, - sparse_linear_algebra_library_name); - return false; - } - } - } - - if (options.trust_region_strategy_type == DOGLEG) { - if (options.linear_solver_type == ITERATIVE_SCHUR || - options.linear_solver_type == CGNR) { - *error = - "DOGLEG only supports exact factorization based linear " - "solvers. If you want to use an iterative solver please " - "use LEVENBERG_MARQUARDT as the trust_region_strategy_type"; - return false; - } - } - if (!options.trust_region_minimizer_iterations_to_dump.empty() && options.trust_region_problem_dump_format_type != CONSOLE && options.trust_region_problem_dump_directory.empty()) { @@ -213,33 +432,11 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) { return false; } - if (options.dynamic_sparsity) { - if (options.linear_solver_type != SPARSE_NORMAL_CHOLESKY) { - *error = - "Dynamic sparsity is only supported with SPARSE_NORMAL_CHOLESKY."; - return false; - } - if (options.sparse_linear_algebra_library_type == ACCELERATE_SPARSE) { - *error = - "ACCELERATE_SPARSE is not currently supported with dynamic sparsity."; - return false; - } - } - - if (options.linear_solver_type == CGNR && - options.preconditioner_type == SUBSET && - options.residual_blocks_for_subset_preconditioner.empty()) { - *error = - "When using SUBSET preconditioner, " - "Solver::Options::residual_blocks_for_subset_preconditioner cannot be " - "empty"; - return false; - } - return true; } -bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) { +bool LineSearchOptionsAreValid(const Solver::Options& options, + std::string* error) { OPTION_GT(max_lbfgs_rank, 0); OPTION_GT(min_line_search_step_size, 0.0); OPTION_GT(max_line_search_step_contraction, 0.0); @@ -259,9 +456,10 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) { options.line_search_direction_type == ceres::LBFGS) && options.line_search_type != ceres::WOLFE) { *error = - string("Invalid configuration: Solver::Options::line_search_type = ") + - string(LineSearchTypeToString(options.line_search_type)) + - string( + std::string( + "Invalid configuration: Solver::Options::line_search_type = ") + + std::string(LineSearchTypeToString(options.line_search_type)) + + std::string( ". When using (L)BFGS, " "Solver::Options::line_search_type must be set to WOLFE."); return false; @@ -269,8 +467,8 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) { // Warn user if they have requested BISECTION interpolation, but constraints // on max/min step size change during line search prevent bisection scaling - // from occurring. Warn only, as this is likely a user mistake, but one which - // does not prevent us from continuing. + // from occurring. Warn only, as this is likely a user mistake, but one + // which does not prevent us from continuing. if (options.line_search_interpolation_type == ceres::BISECTION && (options.max_line_search_step_contraction > 0.5 || options.min_line_search_step_contraction < 0.5)) { @@ -295,7 +493,7 @@ bool LineSearchOptionsAreValid(const Solver::Options& options, string* error) { #undef OPTION_LE_OPTION #undef OPTION_LT_OPTION -void StringifyOrdering(const vector& ordering, string* report) { +void StringifyOrdering(const std::vector& ordering, std::string* report) { if (ordering.empty()) { internal::StringAppendF(report, "AUTOMATIC"); return; @@ -339,7 +537,7 @@ void PreSolveSummarize(const Solver::Options& options, &(summary->inner_iteration_ordering_given)); // clang-format off - summary->dense_linear_algebra_library_type = options.dense_linear_algebra_library_type; // NOLINT + summary->dense_linear_algebra_library_type = options.dense_linear_algebra_library_type; summary->dogleg_type = options.dogleg_type; summary->inner_iteration_time_in_seconds = 0.0; summary->num_line_search_steps = 0; @@ -348,18 +546,19 @@ void PreSolveSummarize(const Solver::Options& options, summary->line_search_polynomial_minimization_time_in_seconds = 0.0; summary->line_search_total_time_in_seconds = 0.0; summary->inner_iterations_given = options.use_inner_iterations; - summary->line_search_direction_type = options.line_search_direction_type; // NOLINT - summary->line_search_interpolation_type = options.line_search_interpolation_type; // NOLINT + summary->line_search_direction_type = options.line_search_direction_type; + summary->line_search_interpolation_type = options.line_search_interpolation_type; summary->line_search_type = options.line_search_type; summary->linear_solver_type_given = options.linear_solver_type; summary->max_lbfgs_rank = options.max_lbfgs_rank; summary->minimizer_type = options.minimizer_type; - summary->nonlinear_conjugate_gradient_type = options.nonlinear_conjugate_gradient_type; // NOLINT + summary->nonlinear_conjugate_gradient_type = options.nonlinear_conjugate_gradient_type; summary->num_threads_given = options.num_threads; summary->preconditioner_type_given = options.preconditioner_type; - summary->sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type; // NOLINT - summary->trust_region_strategy_type = options.trust_region_strategy_type; // NOLINT - summary->visibility_clustering_type = options.visibility_clustering_type; // NOLINT + summary->sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type; + summary->linear_solver_ordering_type = options.linear_solver_ordering_type; + summary->trust_region_strategy_type = options.trust_region_strategy_type; + summary->visibility_clustering_type = options.visibility_clustering_type; // clang-format on } @@ -367,19 +566,23 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp, Solver::Summary* summary) { internal::OrderingToGroupSizes(pp.options.linear_solver_ordering.get(), &(summary->linear_solver_ordering_used)); + // TODO(sameeragarwal): Update the preprocessor to collapse the + // second and higher groups into one group when nested dissection is + // used. internal::OrderingToGroupSizes(pp.options.inner_iteration_ordering.get(), &(summary->inner_iteration_ordering_used)); // clang-format off - summary->inner_iterations_used = pp.inner_iteration_minimizer.get() != nullptr; // NOLINT + summary->inner_iterations_used = pp.inner_iteration_minimizer != nullptr; summary->linear_solver_type_used = pp.linear_solver_options.type; + summary->mixed_precision_solves_used = pp.options.use_mixed_precision_solves; summary->num_threads_used = pp.options.num_threads; summary->preconditioner_type_used = pp.options.preconditioner_type; // clang-format on internal::SetSummaryFinalCost(summary); - if (pp.reduced_program.get() != nullptr) { + if (pp.reduced_program != nullptr) { SummarizeReducedProgram(*pp.reduced_program, summary); } @@ -389,8 +592,8 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp, // case if the preprocessor failed, or if the reduced problem did // not contain any parameter blocks. Thus, only extract the // evaluator statistics if one exists. - if (pp.evaluator.get() != nullptr) { - const map& evaluator_statistics = + if (pp.evaluator != nullptr) { + const std::map& evaluator_statistics = pp.evaluator->Statistics(); { const CallStatistics& call_stats = FindWithDefault( @@ -411,8 +614,8 @@ void PostSolveSummarize(const internal::PreprocessedProblem& pp, // Again, like the evaluator, there may or may not be a linear // solver from which we can extract run time statistics. In // particular the line search solver does not use a linear solver. - if (pp.linear_solver.get() != nullptr) { - const map& linear_solver_statistics = + if (pp.linear_solver != nullptr) { + const std::map& linear_solver_statistics = pp.linear_solver->Statistics(); const CallStatistics& call_stats = FindWithDefault( linear_solver_statistics, "LinearSolver::Solve", CallStatistics()); @@ -468,9 +671,23 @@ std::string SchurStructureToString(const int row_block_size, return internal::StringPrintf("%s,%s,%s", row.c_str(), e.c_str(), f.c_str()); } +#ifndef CERES_NO_CUDA +bool IsCudaRequired(const Solver::Options& options) { + if (options.linear_solver_type == DENSE_NORMAL_CHOLESKY || + options.linear_solver_type == DENSE_SCHUR || + options.linear_solver_type == DENSE_QR) { + return (options.dense_linear_algebra_library_type == CUDA); + } + if (options.linear_solver_type == CGNR) { + return (options.sparse_linear_algebra_library_type == CUDA_SPARSE); + } + return false; +} +#endif + } // namespace -bool Solver::Options::IsValid(string* error) const { +bool Solver::Options::IsValid(std::string* error) const { if (!CommonOptionsAreValid(*this, error)) { return false; } @@ -509,10 +726,19 @@ void Solver::Solve(const Solver::Options& options, return; } - ProblemImpl* problem_impl = problem->impl_.get(); + ProblemImpl* problem_impl = problem->mutable_impl(); Program* program = problem_impl->mutable_program(); PreSolveSummarize(options, problem_impl, summary); +#ifndef CERES_NO_CUDA + if (IsCudaRequired(options)) { + if (!problem_impl->context()->InitCuda(&summary->message)) { + LOG(ERROR) << "Terminating: " << summary->message; + return; + } + } +#endif // CERES_NO_CUDA + // If gradient_checking is enabled, wrap all cost functions in a // gradient checker and install a callback that terminates if any gradient // error is detected. @@ -582,7 +808,7 @@ void Solver::Solve(const Solver::Options& options, } const double postprocessor_start_time = WallTimeInSeconds(); - problem_impl = problem->impl_.get(); + problem_impl = problem->mutable_impl(); program = problem_impl->mutable_program(); // On exit, ensure that the parameter blocks again point at the user // provided values and the parameter blocks are numbered according @@ -610,7 +836,7 @@ void Solve(const Solver::Options& options, solver.Solve(options, problem, summary); } -string Solver::Summary::BriefReport() const { +std::string Solver::Summary::BriefReport() const { return StringPrintf( "Ceres Solver Report: " "Iterations: %d, " @@ -623,10 +849,12 @@ string Solver::Summary::BriefReport() const { TerminationTypeToString(termination_type)); } -string Solver::Summary::FullReport() const { +std::string Solver::Summary::FullReport() const { using internal::VersionString; - string report = string("\nSolver Summary (v " + VersionString() + ")\n\n"); + // NOTE operator+ is not usable for concatenating a string and a string_view. + std::string report = + std::string{"\nSolver Summary (v "}.append(VersionString()) + ")\n\n"; StringAppendF(&report, "%45s %21s\n", "Original", "Reduced"); StringAppendF(&report, @@ -660,21 +888,13 @@ string Solver::Summary::FullReport() const { if (linear_solver_type_used == DENSE_NORMAL_CHOLESKY || linear_solver_type_used == DENSE_SCHUR || linear_solver_type_used == DENSE_QR) { + const char* mixed_precision_suffix = + (mixed_precision_solves_used ? "(Mixed Precision)" : ""); StringAppendF(&report, - "\nDense linear algebra library %15s\n", + "\nDense linear algebra library %15s %s\n", DenseLinearAlgebraLibraryTypeToString( - dense_linear_algebra_library_type)); - } - - if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY || - linear_solver_type_used == SPARSE_SCHUR || - (linear_solver_type_used == ITERATIVE_SCHUR && - (preconditioner_type_used == CLUSTER_JACOBI || - preconditioner_type_used == CLUSTER_TRIDIAGONAL))) { - StringAppendF(&report, - "\nSparse linear algebra library %15s\n", - SparseLinearAlgebraLibraryTypeToString( - sparse_linear_algebra_library_type)); + dense_linear_algebra_library_type), + mixed_precision_suffix); } StringAppendF(&report, @@ -687,17 +907,50 @@ string Solver::Summary::FullReport() const { StringAppendF(&report, " (SUBSPACE)"); } } - StringAppendF(&report, "\n"); - StringAppendF(&report, "\n"); + const bool used_sparse_linear_algebra_library = + linear_solver_type_used == SPARSE_NORMAL_CHOLESKY || + linear_solver_type_used == SPARSE_SCHUR || + linear_solver_type_used == CGNR || + (linear_solver_type_used == ITERATIVE_SCHUR && + (preconditioner_type_used == CLUSTER_JACOBI || + preconditioner_type_used == CLUSTER_TRIDIAGONAL)); + + const bool linear_solver_ordering_required = + linear_solver_type_used == SPARSE_SCHUR || + (linear_solver_type_used == ITERATIVE_SCHUR && + (preconditioner_type_used == CLUSTER_JACOBI || + preconditioner_type_used == CLUSTER_TRIDIAGONAL)) || + (linear_solver_type_used == CGNR && preconditioner_type_used == SUBSET); + + if (used_sparse_linear_algebra_library) { + const char* mixed_precision_suffix = + (mixed_precision_solves_used ? "(Mixed Precision)" : ""); + if (linear_solver_ordering_required) { + StringAppendF( + &report, + "\nSparse linear algebra library %15s + %s %s\n", + SparseLinearAlgebraLibraryTypeToString( + sparse_linear_algebra_library_type), + LinearSolverOrderingTypeToString(linear_solver_ordering_type), + mixed_precision_suffix); + } else { + StringAppendF(&report, + "\nSparse linear algebra library %15s %s\n", + SparseLinearAlgebraLibraryTypeToString( + sparse_linear_algebra_library_type), + mixed_precision_suffix); + } + } + + StringAppendF(&report, "\n"); StringAppendF(&report, "%45s %21s\n", "Given", "Used"); StringAppendF(&report, "Linear solver %25s%25s\n", LinearSolverTypeToString(linear_solver_type_given), LinearSolverTypeToString(linear_solver_type_used)); - if (linear_solver_type_given == CGNR || - linear_solver_type_given == ITERATIVE_SCHUR) { + if (IsIterativeSolver(linear_solver_type_given)) { StringAppendF(&report, "Preconditioner %25s%25s\n", PreconditionerTypeToString(preconditioner_type_given), @@ -717,9 +970,9 @@ string Solver::Summary::FullReport() const { num_threads_given, num_threads_used); - string given; + std::string given; StringifyOrdering(linear_solver_ordering_given, &given); - string used; + std::string used; StringifyOrdering(linear_solver_ordering_used, &used); StringAppendF(&report, "Linear solver ordering %22s %24s\n", @@ -740,9 +993,9 @@ string Solver::Summary::FullReport() const { } if (inner_iterations_used) { - string given; + std::string given; StringifyOrdering(inner_iteration_ordering_given, &given); - string used; + std::string used; StringifyOrdering(inner_iteration_ordering_used, &used); StringAppendF(&report, "Inner iteration ordering %20s %24s\n", @@ -753,7 +1006,7 @@ string Solver::Summary::FullReport() const { // LINE_SEARCH HEADER StringAppendF(&report, "\nMinimizer %19s\n", "LINE_SEARCH"); - string line_search_direction_string; + std::string line_search_direction_string; if (line_search_direction_type == LBFGS) { line_search_direction_string = StringPrintf("LBFGS (%d)", max_lbfgs_rank); } else if (line_search_direction_type == NONLINEAR_CONJUGATE_GRADIENT) { @@ -768,7 +1021,7 @@ string Solver::Summary::FullReport() const { "Line search direction %19s\n", line_search_direction_string.c_str()); - const string line_search_type_string = StringPrintf( + const std::string line_search_type_string = StringPrintf( "%s %s", LineSearchInterpolationTypeToString(line_search_interpolation_type), LineSearchTypeToString(line_search_type)); diff --git a/extern/ceres/internal/ceres/solver_utils.cc b/extern/ceres/internal/ceres/solver_utils.cc index 22fa137055d..3356f3b485d 100644 --- a/extern/ceres/internal/ceres/solver_utils.cc +++ b/extern/ceres/internal/ceres/solver_utils.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,8 +30,6 @@ #include "ceres/solver_utils.h" -#include - #include "Eigen/Core" #include "ceres/internal/config.h" #include "ceres/internal/export.h" @@ -40,8 +38,7 @@ #include "cuda_runtime.h" #endif // CERES_NO_CUDA -namespace ceres { -namespace internal { +namespace ceres::internal { // clang-format off #define CERES_EIGEN_VERSION \ @@ -50,52 +47,47 @@ namespace internal { CERES_TO_STRING(EIGEN_MINOR_VERSION) // clang-format on -std::string VersionString() { - std::string value = std::string(CERES_VERSION_STRING); - value += "-eigen-(" + std::string(CERES_EIGEN_VERSION) + ")"; +constexpr char kVersion[] = + // clang-format off + CERES_VERSION_STRING + "-eigen-(" CERES_EIGEN_VERSION ")" #ifdef CERES_NO_LAPACK - value += "-no_lapack"; + "-no_lapack" #else - value += "-lapack"; + "-lapack" #endif #ifndef CERES_NO_SUITESPARSE - value += "-suitesparse-(" + std::string(CERES_SUITESPARSE_VERSION) + ")"; + "-suitesparse-(" CERES_SUITESPARSE_VERSION ")" #endif -#ifndef CERES_NO_CXSPARSE - value += "-cxsparse-(" + std::string(CERES_CXSPARSE_VERSION) + ")"; +#if !defined(CERES_NO_EIGEN_METIS) || !defined(CERES_NO_CHOLMOD_PARTITION) + "-metis-(" CERES_METIS_VERSION ")" #endif #ifndef CERES_NO_ACCELERATE_SPARSE - value += "-acceleratesparse"; + "-acceleratesparse" #endif #ifdef CERES_USE_EIGEN_SPARSE - value += "-eigensparse"; + "-eigensparse" #endif #ifdef CERES_RESTRUCT_SCHUR_SPECIALIZATIONS - value += "-no_schur_specializations"; -#endif - -#ifdef CERES_USE_OPENMP - value += "-openmp"; -#else - value += "-no_openmp"; + "-no_schur_specializations" #endif #ifdef CERES_NO_CUSTOM_BLAS - value += "-no_custom_blas"; + "-no_custom_blas" #endif #ifndef CERES_NO_CUDA - value += "-cuda-(" + std::to_string(CUDART_VERSION) + ")"; + "-cuda-(" CERES_TO_STRING(CUDART_VERSION) ")" #endif + ; +// clang-format on - return value; -} +std::string_view VersionString() noexcept { return kVersion; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/solver_utils.h b/extern/ceres/internal/ceres/solver_utils.h index 298564a897d..ff5e280aa24 100644 --- a/extern/ceres/internal/ceres/solver_utils.h +++ b/extern/ceres/internal/ceres/solver_utils.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,15 +32,14 @@ #define CERES_INTERNAL_SOLVER_UTILS_H_ #include -#include +#include #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" #include "ceres/iteration_callback.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { template bool IsSolutionUsable(const SummaryType& summary) { @@ -61,10 +60,9 @@ void SetSummaryFinalCost(SummaryType* summary) { } CERES_NO_EXPORT -std::string VersionString(); +std::string_view VersionString() noexcept; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/sparse_cholesky.cc b/extern/ceres/internal/ceres/sparse_cholesky.cc index 4a80470ffb7..4f1bf876690 100644 --- a/extern/ceres/internal/ceres/sparse_cholesky.cc +++ b/extern/ceres/internal/ceres/sparse_cholesky.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -31,30 +31,28 @@ #include "ceres/sparse_cholesky.h" #include +#include #include "ceres/accelerate_sparse.h" -#include "ceres/cxsparse.h" #include "ceres/eigensparse.h" -#include "ceres/float_cxsparse.h" #include "ceres/float_suitesparse.h" #include "ceres/iterative_refiner.h" #include "ceres/suitesparse.h" -namespace ceres { -namespace internal { +namespace ceres::internal { std::unique_ptr SparseCholesky::Create( const LinearSolver::Options& options) { - const OrderingType ordering_type = options.use_postordering ? AMD : NATURAL; std::unique_ptr sparse_cholesky; switch (options.sparse_linear_algebra_library_type) { case SUITE_SPARSE: #ifndef CERES_NO_SUITESPARSE if (options.use_mixed_precision_solves) { - sparse_cholesky = FloatSuiteSparseCholesky::Create(ordering_type); + sparse_cholesky = + FloatSuiteSparseCholesky::Create(options.ordering_type); } else { - sparse_cholesky = SuiteSparseCholesky::Create(ordering_type); + sparse_cholesky = SuiteSparseCholesky::Create(options.ordering_type); } break; #else @@ -64,9 +62,10 @@ std::unique_ptr SparseCholesky::Create( case EIGEN_SPARSE: #ifdef CERES_USE_EIGEN_SPARSE if (options.use_mixed_precision_solves) { - sparse_cholesky = FloatEigenSparseCholesky::Create(ordering_type); + sparse_cholesky = + FloatEigenSparseCholesky::Create(options.ordering_type); } else { - sparse_cholesky = EigenSparseCholesky::Create(ordering_type); + sparse_cholesky = EigenSparseCholesky::Create(options.ordering_type); } break; #else @@ -74,25 +73,14 @@ std::unique_ptr SparseCholesky::Create( << "Eigen's sparse Cholesky factorization routines."; #endif - case CX_SPARSE: -#ifndef CERES_NO_CXSPARSE - if (options.use_mixed_precision_solves) { - sparse_cholesky = FloatCXSparseCholesky::Create(ordering_type); - } else { - sparse_cholesky = CXSparseCholesky::Create(ordering_type); - } - break; -#else - LOG(FATAL) << "Ceres was compiled without support for CXSparse."; -#endif - case ACCELERATE_SPARSE: #ifndef CERES_NO_ACCELERATE_SPARSE if (options.use_mixed_precision_solves) { - sparse_cholesky = AppleAccelerateCholesky::Create(ordering_type); + sparse_cholesky = + AppleAccelerateCholesky::Create(options.ordering_type); } else { sparse_cholesky = - AppleAccelerateCholesky::Create(ordering_type); + AppleAccelerateCholesky::Create(options.ordering_type); } break; #else @@ -107,10 +95,10 @@ std::unique_ptr SparseCholesky::Create( } if (options.max_num_refinement_iterations > 0) { - std::unique_ptr refiner( - new IterativeRefiner(options.max_num_refinement_iterations)); - sparse_cholesky = std::unique_ptr(new RefinedSparseCholesky( - std::move(sparse_cholesky), std::move(refiner))); + auto refiner = std::make_unique( + options.max_num_refinement_iterations); + sparse_cholesky = std::make_unique( + std::move(sparse_cholesky), std::move(refiner)); } return sparse_cholesky; } @@ -123,7 +111,7 @@ LinearSolverTerminationType SparseCholesky::FactorAndSolve( double* solution, std::string* message) { LinearSolverTerminationType termination_type = Factorize(lhs, message); - if (termination_type == LINEAR_SOLVER_SUCCESS) { + if (termination_type == LinearSolverTerminationType::SUCCESS) { termination_type = Solve(rhs, solution, message); } return termination_type; @@ -131,7 +119,7 @@ LinearSolverTerminationType SparseCholesky::FactorAndSolve( RefinedSparseCholesky::RefinedSparseCholesky( std::unique_ptr sparse_cholesky, - std::unique_ptr iterative_refiner) + std::unique_ptr iterative_refiner) : sparse_cholesky_(std::move(sparse_cholesky)), iterative_refiner_(std::move(iterative_refiner)) {} @@ -153,13 +141,12 @@ LinearSolverTerminationType RefinedSparseCholesky::Solve(const double* rhs, std::string* message) { CHECK(lhs_ != nullptr); auto termination_type = sparse_cholesky_->Solve(rhs, solution, message); - if (termination_type != LINEAR_SOLVER_SUCCESS) { + if (termination_type != LinearSolverTerminationType::SUCCESS) { return termination_type; } iterative_refiner_->Refine(*lhs_, rhs, sparse_cholesky_.get(), solution); - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/sparse_cholesky.h b/extern/ceres/internal/ceres/sparse_cholesky.h index 80c5cb2b83b..53f475a7481 100644 --- a/extern/ceres/internal/ceres/sparse_cholesky.h +++ b/extern/ceres/internal/ceres/sparse_cholesky.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,8 +43,7 @@ #include "ceres/linear_solver.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // An interface that abstracts away the internal details of various // sparse linear algebra libraries and offers a simple API for solving @@ -63,11 +62,12 @@ namespace internal { // // CompressedRowSparseMatrix lhs = ...; // std::string message; -// CHECK_EQ(sparse_cholesky->Factorize(&lhs, &message), LINEAR_SOLVER_SUCCESS); +// CHECK_EQ(sparse_cholesky->Factorize(&lhs, &message), +// LinearSolverTerminationType::SUCCESS); // Vector rhs = ...; // Vector solution = ...; // CHECK_EQ(sparse_cholesky->Solve(rhs.data(), solution.data(), &message), -// LINEAR_SOLVER_SUCCESS); +// LinearSolverTerminationType::SUCCESS); class CERES_NO_EXPORT SparseCholesky { public: @@ -105,21 +105,22 @@ class CERES_NO_EXPORT SparseCholesky { // Convenience method which combines a call to Factorize and // Solve. Solve is only called if Factorize returns - // LINEAR_SOLVER_SUCCESS. + // LinearSolverTerminationType::SUCCESS. LinearSolverTerminationType FactorAndSolve(CompressedRowSparseMatrix* lhs, const double* rhs, double* solution, std::string* message); }; -class IterativeRefiner; +class SparseIterativeRefiner; // Computes an initial solution using the given instance of -// SparseCholesky, and then refines it using the IterativeRefiner. +// SparseCholesky, and then refines it using the SparseIterativeRefiner. class CERES_NO_EXPORT RefinedSparseCholesky final : public SparseCholesky { public: - RefinedSparseCholesky(std::unique_ptr sparse_cholesky, - std::unique_ptr iterative_refiner); + RefinedSparseCholesky( + std::unique_ptr sparse_cholesky, + std::unique_ptr iterative_refiner); ~RefinedSparseCholesky() override; CompressedRowSparseMatrix::StorageType StorageType() const override; @@ -131,12 +132,11 @@ class CERES_NO_EXPORT RefinedSparseCholesky final : public SparseCholesky { private: std::unique_ptr sparse_cholesky_; - std::unique_ptr iterative_refiner_; + std::unique_ptr iterative_refiner_; CompressedRowSparseMatrix* lhs_ = nullptr; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/sparse_matrix.cc b/extern/ceres/internal/ceres/sparse_matrix.cc index bc757ead361..cdc77fcdc75 100644 --- a/extern/ceres/internal/ceres/sparse_matrix.cc +++ b/extern/ceres/internal/ceres/sparse_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,10 +30,24 @@ #include "ceres/sparse_matrix.h" -namespace ceres { -namespace internal { +namespace ceres::internal { SparseMatrix::~SparseMatrix() = default; -} // namespace internal -} // namespace ceres +void SparseMatrix::SquaredColumnNorm(double* x, + ContextImpl* context, + int num_threads) const { + (void)context; + (void)num_threads; + SquaredColumnNorm(x); +} + +void SparseMatrix::ScaleColumns(const double* scale, + ContextImpl* context, + int num_threads) { + (void)context; + (void)num_threads; + ScaleColumns(scale); +} + +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/sparse_matrix.h b/extern/ceres/internal/ceres/sparse_matrix.h index 1dbb96e6070..9c79417a7f4 100644 --- a/extern/ceres/internal/ceres/sparse_matrix.h +++ b/extern/ceres/internal/ceres/sparse_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,8 @@ #include "ceres/linear_operator.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { +class ContextImpl; // This class defines the interface for storing and manipulating // sparse matrices. The key property that differentiates different @@ -69,18 +69,30 @@ class CERES_NO_EXPORT SparseMatrix : public LinearOperator { ~SparseMatrix() override; // y += Ax; - void RightMultiply(const double* x, double* y) const override = 0; + using LinearOperator::RightMultiplyAndAccumulate; + void RightMultiplyAndAccumulate(const double* x, + double* y) const override = 0; + // y += A'x; - void LeftMultiply(const double* x, double* y) const override = 0; + void LeftMultiplyAndAccumulate(const double* x, double* y) const override = 0; // In MATLAB notation sum(A.*A, 1) virtual void SquaredColumnNorm(double* x) const = 0; + virtual void SquaredColumnNorm(double* x, + ContextImpl* context, + int num_threads) const; // A = A * diag(scale) virtual void ScaleColumns(const double* scale) = 0; + virtual void ScaleColumns(const double* scale, + ContextImpl* context, + int num_threads); // A = 0. A->num_nonzeros() == 0 is true after this call. The // sparsity pattern is preserved. virtual void SetZero() = 0; + virtual void SetZero(ContextImpl* /*context*/, int /*num_threads*/) { + SetZero(); + } // Resize and populate dense_matrix with a dense version of the // sparse matrix. @@ -103,7 +115,6 @@ class CERES_NO_EXPORT SparseMatrix : public LinearOperator { virtual int num_nonzeros() const = 0; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_SPARSE_MATRIX_H_ diff --git a/extern/ceres/internal/ceres/sparse_normal_cholesky_solver.cc b/extern/ceres/internal/ceres/sparse_normal_cholesky_solver.cc index 2e52ae6d908..57465098e29 100644 --- a/extern/ceres/internal/ceres/sparse_normal_cholesky_solver.cc +++ b/extern/ceres/internal/ceres/sparse_normal_cholesky_solver.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -45,8 +45,7 @@ #include "ceres/types.h" #include "ceres/wall_time.h" -namespace ceres { -namespace internal { +namespace ceres::internal { SparseNormalCholeskySolver::SparseNormalCholeskySolver( const LinearSolver::Options& options) @@ -64,7 +63,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl( EventLogger event_logger("SparseNormalCholeskySolver::Solve"); LinearSolver::Summary summary; summary.num_iterations = 1; - summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.termination_type = LinearSolverTerminationType::SUCCESS; summary.message = "Success."; const int num_cols = A->num_cols(); @@ -72,7 +71,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl( xref.setZero(); rhs_.resize(num_cols); rhs_.setZero(); - A->LeftMultiply(b, rhs_.data()); + A->LeftMultiplyAndAccumulate(b, rhs_.data()); event_logger.AddEvent("Compute RHS"); if (per_solve_options.D != nullptr) { @@ -110,5 +109,4 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl( return summary; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/sparse_normal_cholesky_solver.h b/extern/ceres/internal/ceres/sparse_normal_cholesky_solver.h index caec566612e..585d1c1aed6 100644 --- a/extern/ceres/internal/ceres/sparse_normal_cholesky_solver.h +++ b/extern/ceres/internal/ceres/sparse_normal_cholesky_solver.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -45,8 +45,7 @@ #include "ceres/internal/export.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CompressedRowSparseMatrix; class InnerProductComputer; @@ -75,7 +74,6 @@ class CERES_NO_EXPORT SparseNormalCholeskySolver std::unique_ptr inner_product_computer_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_SPARSE_NORMAL_CHOLESKY_SOLVER_H_ diff --git a/extern/ceres/internal/ceres/stl_util.h b/extern/ceres/internal/ceres/stl_util.h index 2af2518f837..c326be1d371 100644 --- a/extern/ceres/internal/ceres/stl_util.h +++ b/extern/ceres/internal/ceres/stl_util.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without diff --git a/extern/ceres/internal/ceres/stringprintf.cc b/extern/ceres/internal/ceres/stringprintf.cc index e45b4301eef..100bbff9e8a 100644 --- a/extern/ceres/internal/ceres/stringprintf.cc +++ b/extern/ceres/internal/ceres/stringprintf.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,12 +38,9 @@ #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -using std::string; - -void StringAppendV(string* dst, const char* format, va_list ap) { +void StringAppendV(std::string* dst, const char* format, va_list ap) { // First try with a small fixed size buffer char space[1024]; @@ -93,16 +90,16 @@ void StringAppendV(string* dst, const char* format, va_list ap) { delete[] buf; } -string StringPrintf(const char* format, ...) { +std::string StringPrintf(const char* format, ...) { va_list ap; va_start(ap, format); - string result; + std::string result; StringAppendV(&result, format, ap); va_end(ap); return result; } -const string& SStringPrintf(string* dst, const char* format, ...) { +const std::string& SStringPrintf(std::string* dst, const char* format, ...) { va_list ap; va_start(ap, format); dst->clear(); @@ -111,12 +108,11 @@ const string& SStringPrintf(string* dst, const char* format, ...) { return *dst; } -void StringAppendF(string* dst, const char* format, ...) { +void StringAppendF(std::string* dst, const char* format, ...) { va_list ap; va_start(ap, format); StringAppendV(dst, format, ap); va_end(ap); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/stringprintf.h b/extern/ceres/internal/ceres/stringprintf.h index e24325fbd35..f7617705343 100644 --- a/extern/ceres/internal/ceres/stringprintf.h +++ b/extern/ceres/internal/ceres/stringprintf.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,8 +44,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { #if (defined(__GNUC__) || defined(__clang__)) // Tell the compiler to do printf format string checking if the compiler @@ -90,8 +89,7 @@ CERES_NO_EXPORT extern void StringAppendV(std::string* dst, #undef CERES_PRINTF_ATTRIBUTE -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/subset_preconditioner.cc b/extern/ceres/internal/ceres/subset_preconditioner.cc index 221530c0dd5..068f6cec386 100644 --- a/extern/ceres/internal/ceres/subset_preconditioner.cc +++ b/extern/ceres/internal/ceres/subset_preconditioner.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,7 @@ #include "ceres/sparse_cholesky.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { SubsetPreconditioner::SubsetPreconditioner(Preconditioner::Options options, const BlockSparseMatrix& A) @@ -52,13 +51,14 @@ SubsetPreconditioner::SubsetPreconditioner(Preconditioner::Options options, LinearSolver::Options sparse_cholesky_options; sparse_cholesky_options.sparse_linear_algebra_library_type = options_.sparse_linear_algebra_library_type; - sparse_cholesky_options.use_postordering = options_.use_postordering; + sparse_cholesky_options.ordering_type = options_.ordering_type; sparse_cholesky_ = SparseCholesky::Create(sparse_cholesky_options); } SubsetPreconditioner::~SubsetPreconditioner() = default; -void SubsetPreconditioner::RightMultiply(const double* x, double* y) const { +void SubsetPreconditioner::RightMultiplyAndAccumulate(const double* x, + double* y) const { CHECK(x != nullptr); CHECK(y != nullptr); std::string message; @@ -106,7 +106,7 @@ bool SubsetPreconditioner::UpdateImpl(const BlockSparseMatrix& A, const LinearSolverTerminationType termination_type = sparse_cholesky_->Factorize(inner_product_computer_->mutable_result(), &message); - if (termination_type != LINEAR_SOLVER_SUCCESS) { + if (termination_type != LinearSolverTerminationType::SUCCESS) { LOG(ERROR) << "Preconditioner factorization failed: " << message; return false; } @@ -114,5 +114,4 @@ bool SubsetPreconditioner::UpdateImpl(const BlockSparseMatrix& A, return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/subset_preconditioner.h b/extern/ceres/internal/ceres/subset_preconditioner.h index 6d07995a136..e179e99e1b7 100644 --- a/extern/ceres/internal/ceres/subset_preconditioner.h +++ b/extern/ceres/internal/ceres/subset_preconditioner.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,7 @@ #include "ceres/internal/export.h" #include "ceres/preconditioner.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockSparseMatrix; class SparseCholesky; @@ -76,7 +75,7 @@ class CERES_NO_EXPORT SubsetPreconditioner ~SubsetPreconditioner() override; // Preconditioner interface - void RightMultiply(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, double* y) const final; int num_rows() const final { return num_cols_; } int num_cols() const final { return num_cols_; } @@ -89,8 +88,7 @@ class CERES_NO_EXPORT SubsetPreconditioner std::unique_ptr inner_product_computer_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/suitesparse.cc b/extern/ceres/internal/ceres/suitesparse.cc index 883dcc8f63e..d93dd8d9ed4 100644 --- a/extern/ceres/internal/ceres/suitesparse.cc +++ b/extern/ceres/internal/ceres/suitesparse.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,7 +32,9 @@ #include "ceres/internal/config.h" #ifndef CERES_NO_SUITESPARSE + #include +#include #include #include "ceres/compressed_col_sparse_matrix_utils.h" @@ -42,11 +44,24 @@ #include "ceres/triplet_sparse_matrix.h" #include "cholmod.h" -namespace ceres { -namespace internal { +namespace ceres::internal { +namespace { +int OrderingTypeToCHOLMODEnum(OrderingType ordering_type) { + if (ordering_type == OrderingType::AMD) { + return CHOLMOD_AMD; + } + if (ordering_type == OrderingType::NESDIS) { + return CHOLMOD_NESDIS; + } -using std::string; -using std::vector; + if (ordering_type == OrderingType::NATURAL) { + return CHOLMOD_NATURAL; + } + LOG(FATAL) << "Congratulations you have discovered a bug in Ceres Solver." + << "Please report it to the developers. " << ordering_type; + return -1; +} +} // namespace SuiteSparse::SuiteSparse() { cholmod_start(&cc_); } @@ -103,9 +118,11 @@ cholmod_sparse SuiteSparse::CreateSparseMatrixTransposeView( m.x = reinterpret_cast(A->mutable_values()); m.z = nullptr; - if (A->storage_type() == CompressedRowSparseMatrix::LOWER_TRIANGULAR) { + if (A->storage_type() == + CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR) { m.stype = 1; - } else if (A->storage_type() == CompressedRowSparseMatrix::UPPER_TRIANGULAR) { + } else if (A->storage_type() == + CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) { m.stype = -1; } else { m.stype = 0; @@ -144,19 +161,18 @@ cholmod_dense* SuiteSparse::CreateDenseVector(const double* x, } cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A, - string* message) { - // Cholmod can try multiple re-ordering strategies to find a fill - // reducing ordering. Here we just tell it use AMD with automatic - // matrix dependence choice of supernodal versus simplicial - // factorization. + OrderingType ordering_type, + std::string* message) { cc_.nmethods = 1; - cc_.method[0].ordering = CHOLMOD_AMD; - cc_.supernodal = CHOLMOD_AUTO; + cc_.method[0].ordering = OrderingTypeToCHOLMODEnum(ordering_type); + + // postordering with a NATURAL ordering leads to a significant regression in + // performance. See https://github.com/ceres-solver/ceres-solver/issues/905 + if (ordering_type == OrderingType::NATURAL) { + cc_.postorder = 0; + } cholmod_factor* factor = cholmod_analyze(A, &cc_); - if (VLOG_IS_ON(2)) { - cholmod_print_common(const_cast("Symbolic Analysis"), &cc_); - } if (cc_.status != CHOLMOD_OK) { *message = @@ -165,32 +181,22 @@ cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A, } CHECK(factor != nullptr); + if (VLOG_IS_ON(2)) { + cholmod_print_common(const_cast("Symbolic Analysis"), &cc_); + } + return factor; } -cholmod_factor* SuiteSparse::BlockAnalyzeCholesky(cholmod_sparse* A, - const vector& row_blocks, - const vector& col_blocks, - string* message) { - vector ordering; - if (!BlockAMDOrdering(A, row_blocks, col_blocks, &ordering)) { - return nullptr; - } - return AnalyzeCholeskyWithUserOrdering(A, ordering, message); -} - -cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering( - cholmod_sparse* A, const vector& ordering, string* message) { +cholmod_factor* SuiteSparse::AnalyzeCholeskyWithGivenOrdering( + cholmod_sparse* A, const std::vector& ordering, std::string* message) { CHECK_EQ(ordering.size(), A->nrow); cc_.nmethods = 1; cc_.method[0].ordering = CHOLMOD_GIVEN; - cholmod_factor* factor = - cholmod_analyze_p(A, const_cast(&ordering[0]), nullptr, 0, &cc_); - if (VLOG_IS_ON(2)) { - cholmod_print_common(const_cast("Symbolic Analysis"), &cc_); - } + cholmod_analyze_p(A, const_cast(ordering.data()), nullptr, 0, &cc_); + if (cc_.status != CHOLMOD_OK) { *message = StringPrintf("cholmod_analyze failed. error code: %d", cc_.status); @@ -198,40 +204,33 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering( } CHECK(factor != nullptr); - return factor; -} - -cholmod_factor* SuiteSparse::AnalyzeCholeskyWithNaturalOrdering( - cholmod_sparse* A, string* message) { - cc_.nmethods = 1; - cc_.method[0].ordering = CHOLMOD_NATURAL; - cc_.postorder = 0; - - cholmod_factor* factor = cholmod_analyze(A, &cc_); if (VLOG_IS_ON(2)) { cholmod_print_common(const_cast("Symbolic Analysis"), &cc_); } - if (cc_.status != CHOLMOD_OK) { - *message = - StringPrintf("cholmod_analyze failed. error code: %d", cc_.status); - return nullptr; - } - CHECK(factor != nullptr); return factor; } -bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A, - const vector& row_blocks, - const vector& col_blocks, - vector* ordering) { +bool SuiteSparse::BlockOrdering(const cholmod_sparse* A, + OrderingType ordering_type, + const std::vector& row_blocks, + const std::vector& col_blocks, + std::vector* ordering) { + if (ordering_type == OrderingType::NATURAL) { + ordering->resize(A->nrow); + for (int i = 0; i < A->nrow; ++i) { + (*ordering)[i] = i; + } + return true; + } + const int num_row_blocks = row_blocks.size(); const int num_col_blocks = col_blocks.size(); // Arrays storing the compressed column structure of the matrix - // incoding the block sparsity of A. - vector block_cols; - vector block_rows; + // encoding the block sparsity of A. + std::vector block_cols; + std::vector block_rows; CompressedColumnScalarMatrixToBlockMatrix(reinterpret_cast(A->i), reinterpret_cast(A->p), @@ -243,8 +242,8 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A, block_matrix.nrow = num_row_blocks; block_matrix.ncol = num_col_blocks; block_matrix.nzmax = block_rows.size(); - block_matrix.p = reinterpret_cast(&block_cols[0]); - block_matrix.i = reinterpret_cast(&block_rows[0]); + block_matrix.p = reinterpret_cast(block_cols.data()); + block_matrix.i = reinterpret_cast(block_rows.data()); block_matrix.x = nullptr; block_matrix.stype = A->stype; block_matrix.itype = CHOLMOD_INT; @@ -253,8 +252,8 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A, block_matrix.sorted = 1; block_matrix.packed = 1; - vector block_ordering(num_row_blocks); - if (!cholmod_amd(&block_matrix, nullptr, 0, &block_ordering[0], &cc_)) { + std::vector block_ordering(num_row_blocks); + if (!Ordering(&block_matrix, ordering_type, block_ordering.data())) { return false; } @@ -262,9 +261,22 @@ bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A, return true; } +cholmod_factor* SuiteSparse::BlockAnalyzeCholesky( + cholmod_sparse* A, + OrderingType ordering_type, + const std::vector& row_blocks, + const std::vector& col_blocks, + std::string* message) { + std::vector ordering; + if (!BlockOrdering(A, ordering_type, row_blocks, col_blocks, &ordering)) { + return nullptr; + } + return AnalyzeCholeskyWithGivenOrdering(A, ordering, message); +} + LinearSolverTerminationType SuiteSparse::Cholesky(cholmod_sparse* A, cholmod_factor* L, - string* message) { + std::string* message) { CHECK(A != nullptr); CHECK(L != nullptr); @@ -282,48 +294,48 @@ LinearSolverTerminationType SuiteSparse::Cholesky(cholmod_sparse* A, switch (cc_.status) { case CHOLMOD_NOT_INSTALLED: *message = "CHOLMOD failure: Method not installed."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; case CHOLMOD_OUT_OF_MEMORY: *message = "CHOLMOD failure: Out of memory."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; case CHOLMOD_TOO_LARGE: *message = "CHOLMOD failure: Integer overflow occurred."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; case CHOLMOD_INVALID: *message = "CHOLMOD failure: Invalid input."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; case CHOLMOD_NOT_POSDEF: *message = "CHOLMOD warning: Matrix not positive definite."; - return LINEAR_SOLVER_FAILURE; + return LinearSolverTerminationType::FAILURE; case CHOLMOD_DSMALL: *message = "CHOLMOD warning: D for LDL' or diag(L) or " "LL' has tiny absolute value."; - return LINEAR_SOLVER_FAILURE; + return LinearSolverTerminationType::FAILURE; case CHOLMOD_OK: if (cholmod_status != 0) { - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } *message = "CHOLMOD failure: cholmod_factorize returned false " "but cholmod_common::status is CHOLMOD_OK." "Please report this to ceres-solver@googlegroups.com."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; default: *message = StringPrintf( "Unknown cholmod return code: %d. " "Please report this to ceres-solver@googlegroups.com.", cc_.status); - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } cholmod_dense* SuiteSparse::Solve(cholmod_factor* L, cholmod_dense* b, - string* message) { + std::string* message) { if (cc_.status != CHOLMOD_OK) { *message = "cholmod_solve failed. CHOLMOD status is not CHOLMOD_OK"; return nullptr; @@ -332,22 +344,34 @@ cholmod_dense* SuiteSparse::Solve(cholmod_factor* L, return cholmod_solve(CHOLMOD_A, L, b, &cc_); } -bool SuiteSparse::ApproximateMinimumDegreeOrdering(cholmod_sparse* matrix, - int* ordering) { - return cholmod_amd(matrix, nullptr, 0, ordering, &cc_); +bool SuiteSparse::Ordering(cholmod_sparse* matrix, + OrderingType ordering_type, + int* ordering) { + CHECK_NE(ordering_type, OrderingType::NATURAL); + if (ordering_type == OrderingType::AMD) { + return cholmod_amd(matrix, nullptr, 0, ordering, &cc_); + } + +#ifdef CERES_NO_CHOLMOD_PARTITION + return false; +#else + std::vector CParent(matrix->nrow, 0); + std::vector CMember(matrix->nrow, 0); + return cholmod_nested_dissection( + matrix, nullptr, 0, ordering, CParent.data(), CMember.data(), &cc_); +#endif } bool SuiteSparse::ConstrainedApproximateMinimumDegreeOrdering( cholmod_sparse* matrix, int* constraints, int* ordering) { -#ifndef CERES_NO_CAMD return cholmod_camd(matrix, nullptr, 0, constraints, ordering, &cc_); -#else - LOG(FATAL) << "Congratulations you have found a bug in Ceres." - << "Ceres Solver was compiled with SuiteSparse " - << "version 4.1.0 or less. Calling this function " - << "in that case is a bug. Please contact the" - << "the Ceres Solver developers."; +} + +bool SuiteSparse::IsNestedDissectionAvailable() { +#ifdef CERES_NO_CHOLMOD_PARTITION return false; +#else + return true; #endif } @@ -367,48 +391,61 @@ SuiteSparseCholesky::~SuiteSparseCholesky() { } LinearSolverTerminationType SuiteSparseCholesky::Factorize( - CompressedRowSparseMatrix* lhs, string* message) { + CompressedRowSparseMatrix* lhs, std::string* message) { if (lhs == nullptr) { *message = "Failure: Input lhs is nullptr."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } cholmod_sparse cholmod_lhs = ss_.CreateSparseMatrixTransposeView(lhs); + // If a factorization does not exist, compute the symbolic + // factorization first. + // + // If the ordering type is NATURAL, then there is no fill reducing + // ordering to be computed, regardless of block structure, so we can + // just call the scalar version of symbolic factorization. For + // SuiteSparse this is the common case since we have already + // pre-ordered the columns of the Jacobian. + // + // Similarly regardless of ordering type, if there is no block + // structure in the matrix we call the scalar version of symbolic + // factorization. if (factor_ == nullptr) { - if (ordering_type_ == NATURAL) { - factor_ = ss_.AnalyzeCholeskyWithNaturalOrdering(&cholmod_lhs, message); + if (ordering_type_ == OrderingType::NATURAL || + (lhs->col_blocks().empty() || lhs->row_blocks().empty())) { + factor_ = ss_.AnalyzeCholesky(&cholmod_lhs, ordering_type_, message); } else { - if (!lhs->col_blocks().empty() && !(lhs->row_blocks().empty())) { - factor_ = ss_.BlockAnalyzeCholesky( - &cholmod_lhs, lhs->col_blocks(), lhs->row_blocks(), message); - } else { - factor_ = ss_.AnalyzeCholesky(&cholmod_lhs, message); - } - } - - if (factor_ == nullptr) { - return LINEAR_SOLVER_FATAL_ERROR; + factor_ = ss_.BlockAnalyzeCholesky(&cholmod_lhs, + ordering_type_, + lhs->col_blocks(), + lhs->row_blocks(), + message); } } + if (factor_ == nullptr) { + return LinearSolverTerminationType::FATAL_ERROR; + } + + // Compute and return the numeric factorization. return ss_.Cholesky(&cholmod_lhs, factor_, message); } CompressedRowSparseMatrix::StorageType SuiteSparseCholesky::StorageType() const { - return ((ordering_type_ == NATURAL) - ? CompressedRowSparseMatrix::UPPER_TRIANGULAR - : CompressedRowSparseMatrix::LOWER_TRIANGULAR); + return ((ordering_type_ == OrderingType::NATURAL) + ? CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR + : CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR); } LinearSolverTerminationType SuiteSparseCholesky::Solve(const double* rhs, double* solution, - string* message) { + std::string* message) { // Error checking if (factor_ == nullptr) { *message = "Solve called without a call to Factorize first."; - return LINEAR_SOLVER_FATAL_ERROR; + return LinearSolverTerminationType::FATAL_ERROR; } const int num_cols = factor_->n; @@ -417,15 +454,14 @@ LinearSolverTerminationType SuiteSparseCholesky::Solve(const double* rhs, ss_.Solve(factor_, &cholmod_rhs, message); if (cholmod_dense_solution == nullptr) { - return LINEAR_SOLVER_FAILURE; + return LinearSolverTerminationType::FAILURE; } memcpy(solution, cholmod_dense_solution->x, num_cols * sizeof(*solution)); ss_.Free(cholmod_dense_solution); - return LINEAR_SOLVER_SUCCESS; + return LinearSolverTerminationType::SUCCESS; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_NO_SUITESPARSE diff --git a/extern/ceres/internal/ceres/suitesparse.h b/extern/ceres/internal/ceres/suitesparse.h index 3f62e7c7b7d..703ee87f5df 100644 --- a/extern/ceres/internal/ceres/suitesparse.h +++ b/extern/ceres/internal/ceres/suitesparse.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,37 +44,14 @@ #include #include "SuiteSparseQR.hpp" +#include "ceres/block_structure.h" +#include "ceres/internal/disable_warnings.h" #include "ceres/linear_solver.h" #include "ceres/sparse_cholesky.h" #include "cholmod.h" #include "glog/logging.h" -// Before SuiteSparse version 4.2.0, cholmod_camd was only enabled -// if SuiteSparse was compiled with Metis support. This makes -// calling and linking into cholmod_camd problematic even though it -// has nothing to do with Metis. This has been fixed reliably in -// 4.2.0. -// -// The fix was actually committed in 4.1.0, but there is -// some confusion about a silent update to the tar ball, so we are -// being conservative and choosing the next minor version where -// things are stable. -#if (SUITESPARSE_VERSION < 4002) -#define CERES_NO_CAMD -#endif - -// UF_long is deprecated but SuiteSparse_long is only available in -// newer versions of SuiteSparse. So for older versions of -// SuiteSparse, we define SuiteSparse_long to be the same as UF_long, -// which is what recent versions of SuiteSparse do anyways. -#ifndef SuiteSparse_long -#define SuiteSparse_long UF_long -#endif - -#include "ceres/internal/disable_warnings.h" - -namespace ceres { -namespace internal { +namespace ceres::internal { class CompressedRowSparseMatrix; class TripletSparseMatrix; @@ -91,7 +68,7 @@ class CERES_NO_EXPORT SuiteSparse { // Functions for building cholmod_sparse objects from sparse // matrices stored in triplet form. The matrix A is not - // modifed. Called owns the result. + // modified. Called owns the result. cholmod_sparse* CreateSparseMatrix(TripletSparseMatrix* A); // This function works like CreateSparseMatrix, except that the @@ -142,12 +119,11 @@ class CERES_NO_EXPORT SuiteSparse { cholmod_sdmult(A, 0, alpha_, beta_, x, y, &cc_); } - // Find an ordering of A or AA' (if A is unsymmetric) that minimizes - // the fill-in in the Cholesky factorization of the corresponding - // matrix. This is done by using the AMD algorithm. - // - // Using this ordering, the symbolic Cholesky factorization of A (or - // AA') is computed and returned. + // Compute a symbolic factorization for A or AA' (if A is + // unsymmetric). If ordering_type is NATURAL, then no fill reducing + // ordering is computed, otherwise depending on the value of + // ordering_type AMD or Nested Dissection is used to compute a fill + // reducing ordering before the symbolic factorization is computed. // // A is not modified, only the pattern of non-zeros of A is used, // the actual numerical values in A are of no consequence. @@ -155,11 +131,15 @@ class CERES_NO_EXPORT SuiteSparse { // message contains an explanation of the failures if any. // // Caller owns the result. - cholmod_factor* AnalyzeCholesky(cholmod_sparse* A, std::string* message); + cholmod_factor* AnalyzeCholesky(cholmod_sparse* A, + OrderingType ordering_type, + std::string* message); + // Block oriented version of AnalyzeCholesky. cholmod_factor* BlockAnalyzeCholesky(cholmod_sparse* A, - const std::vector& row_blocks, - const std::vector& col_blocks, + OrderingType ordering_type, + const std::vector& row_blocks, + const std::vector& col_blocks, std::string* message); // If A is symmetric, then compute the symbolic Cholesky @@ -173,20 +153,11 @@ class CERES_NO_EXPORT SuiteSparse { // message contains an explanation of the failures if any. // // Caller owns the result. - cholmod_factor* AnalyzeCholeskyWithUserOrdering( + cholmod_factor* AnalyzeCholeskyWithGivenOrdering( cholmod_sparse* A, const std::vector& ordering, std::string* message); - // Perform a symbolic factorization of A without re-ordering A. No - // postordering of the elimination tree is performed. This ensures - // that the symbolic factor does not introduce an extra permutation - // on the matrix. See the documentation for CHOLMOD for more details. - // - // message contains an explanation of the failures if any. - cholmod_factor* AnalyzeCholeskyWithNaturalOrdering(cholmod_sparse* A, - std::string* message); - // Use the symbolic factorization in L, to find the numerical // factorization for the matrix A or AA^T. Return true if // successful, false otherwise. L contains the numeric factorization @@ -206,51 +177,39 @@ class CERES_NO_EXPORT SuiteSparse { cholmod_dense* b, std::string* message); + // Find a fill reducing ordering. ordering is expected to be large + // enough to hold the ordering. ordering_type must be AMD or NESDIS. + bool Ordering(cholmod_sparse* matrix, + OrderingType ordering_type, + int* ordering); + + // Find the block oriented fill reducing ordering of a matrix A, + // whose row and column blocks are given by row_blocks, and + // col_blocks respectively. The matrix may or may not be + // symmetric. The entries of col_blocks do not need to sum to the + // number of columns in A. If this is the case, only the first + // sum(col_blocks) are used to compute the ordering. + // // By virtue of the modeling layer in Ceres being block oriented, // all the matrices used by Ceres are also block oriented. When // doing sparse direct factorization of these matrices the - // fill-reducing ordering algorithms (in particular AMD) can either - // be run on the block or the scalar form of these matrices. The two - // SuiteSparse::AnalyzeCholesky methods allows the client to - // compute the symbolic factorization of a matrix by either using - // AMD on the matrix or a user provided ordering of the rows. - // - // But since the underlying matrices are block oriented, it is worth - // running AMD on just the block structure of these matrices and then - // lifting these block orderings to a full scalar ordering. This - // preserves the block structure of the permuted matrix, and exposes - // more of the super-nodal structure of the matrix to the numerical - // factorization routines. - // - // Find the block oriented AMD ordering of a matrix A, whose row and - // column blocks are given by row_blocks, and col_blocks - // respectively. The matrix may or may not be symmetric. The entries - // of col_blocks do not need to sum to the number of columns in - // A. If this is the case, only the first sum(col_blocks) are used - // to compute the ordering. - bool BlockAMDOrdering(const cholmod_sparse* A, - const std::vector& row_blocks, - const std::vector& col_blocks, - std::vector* ordering); + // fill-reducing ordering algorithms can either be run on the block + // or the scalar form of these matrices. But since the underlying + // matrices are block oriented, it is worth running the fill + // reducing ordering on just the block structure of these matrices + // and then lifting these block orderings to a full scalar + // ordering. This preserves the block structure of the permuted + // matrix, and exposes more of the super-nodal structure of the + // matrix to the numerical factorization routines. + bool BlockOrdering(const cholmod_sparse* A, + OrderingType ordering_type, + const std::vector& row_blocks, + const std::vector& col_blocks, + std::vector* ordering); - // Find a fill reducing approximate minimum degree - // ordering. ordering is expected to be large enough to hold the - // ordering. - bool ApproximateMinimumDegreeOrdering(cholmod_sparse* matrix, int* ordering); - - // Before SuiteSparse version 4.2.0, cholmod_camd was only enabled - // if SuiteSparse was compiled with Metis support. This makes - // calling and linking into cholmod_camd problematic even though it - // has nothing to do with Metis. This has been fixed reliably in - // 4.2.0. - // - // The fix was actually committed in 4.1.0, but there is - // some confusion about a silent update to the tar ball, so we are - // being conservative and choosing the next minor version where - // things are stable. - static bool IsConstrainedApproximateMinimumDegreeOrderingAvailable() { - return (SUITESPARSE_VERSION > 4001); - } + // Nested dissection is only available if SuiteSparse is compiled + // with Metis support. + static bool IsNestedDissectionAvailable(); // Find a fill reducing approximate minimum degree // ordering. constraints is an array which associates with each @@ -262,9 +221,6 @@ class CERES_NO_EXPORT SuiteSparse { // Calling ApproximateMinimumDegreeOrdering is equivalent to calling // ConstrainedApproximateMinimumDegreeOrdering with a constraint // array that puts all columns in the same elimination group. - // - // If CERES_NO_CAMD is defined then calling this function will - // result in a crash. bool ConstrainedApproximateMinimumDegreeOrdering(cholmod_sparse* matrix, int* constraints, int* ordering); @@ -312,14 +268,13 @@ class CERES_NO_EXPORT SuiteSparseCholesky final : public SparseCholesky { cholmod_factor* factor_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" #else // CERES_NO_SUITESPARSE -typedef void cholmod_factor; +using cholmod_factor = void; #include "ceres/internal/disable_warnings.h" @@ -328,17 +283,9 @@ namespace internal { class CERES_NO_EXPORT SuiteSparse { public: - // Defining this static function even when SuiteSparse is not - // available, allows client code to check for the presence of CAMD - // without checking for the absence of the CERES_NO_CAMD symbol. - // - // This is safer because the symbol maybe missing due to a user - // accidentally not including suitesparse.h in their code when - // checking for the symbol. - static bool IsConstrainedApproximateMinimumDegreeOrderingAvailable() { - return false; - } - + // Nested dissection is only available if SuiteSparse is compiled + // with Metis support. + static bool IsNestedDissectionAvailable() { return false; } void Free(void* /*arg*/) {} }; diff --git a/extern/ceres/internal/ceres/thread_pool.cc b/extern/ceres/internal/ceres/thread_pool.cc index 57f01af5476..1ce9ac8ba0b 100644 --- a/extern/ceres/internal/ceres/thread_pool.cc +++ b/extern/ceres/internal/ceres/thread_pool.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -28,18 +28,14 @@ // // Author: vitus@google.com (Michael Vitus) -// This include must come before any #ifndef check on Ceres compile options. -#include "ceres/internal/config.h" - -#ifdef CERES_USE_CXX_THREADS +#include "ceres/thread_pool.h" #include #include -#include "ceres/thread_pool.h" +#include "ceres/internal/config.h" -namespace ceres { -namespace internal { +namespace ceres::internal { namespace { // Constrain the total number of threads to the amount the hardware can support. @@ -105,7 +101,4 @@ void ThreadPool::ThreadMainLoop() { void ThreadPool::Stop() { task_queue_.StopWaiters(); } -} // namespace internal -} // namespace ceres - -#endif // CERES_USE_CXX_THREADS +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/thread_pool.h b/extern/ceres/internal/ceres/thread_pool.h index 94ab1e66bd4..8c8f06f3104 100644 --- a/extern/ceres/internal/ceres/thread_pool.h +++ b/extern/ceres/internal/ceres/thread_pool.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2018 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,7 @@ #include "ceres/concurrent_queue.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // A thread-safe thread pool with an unbounded task queue and a resizable number // of workers. The size of the thread pool can be increased but never decreased @@ -115,7 +114,6 @@ class CERES_NO_EXPORT ThreadPool { std::mutex thread_pool_mutex_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_THREAD_POOL_H_ diff --git a/extern/ceres/internal/ceres/thread_token_provider.cc b/extern/ceres/internal/ceres/thread_token_provider.cc index c7ec67f31aa..6217e2bb851 100644 --- a/extern/ceres/internal/ceres/thread_token_provider.cc +++ b/extern/ceres/internal/ceres/thread_token_provider.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,44 +30,20 @@ #include "ceres/thread_token_provider.h" -#ifdef CERES_USE_OPENMP -#include -#endif - -namespace ceres { -namespace internal { +namespace ceres::internal { ThreadTokenProvider::ThreadTokenProvider(int num_threads) { - (void)num_threads; -#ifdef CERES_USE_CXX_THREADS for (int i = 0; i < num_threads; i++) { pool_.Push(i); } -#endif } int ThreadTokenProvider::Acquire() { -#ifdef CERES_USE_OPENMP - return omp_get_thread_num(); -#endif - -#ifdef CERES_NO_THREADS - return 0; -#endif - -#ifdef CERES_USE_CXX_THREADS int thread_id; CHECK(pool_.Wait(&thread_id)); return thread_id; -#endif } -void ThreadTokenProvider::Release(int thread_id) { - (void)thread_id; -#ifdef CERES_USE_CXX_THREADS - pool_.Push(thread_id); -#endif -} +void ThreadTokenProvider::Release(int thread_id) { pool_.Push(thread_id); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/thread_token_provider.h b/extern/ceres/internal/ceres/thread_token_provider.h index 918c687eb24..5d375d1e9b6 100644 --- a/extern/ceres/internal/ceres/thread_token_provider.h +++ b/extern/ceres/internal/ceres/thread_token_provider.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -31,15 +31,11 @@ #ifndef CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_ #define CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_ +#include "ceres/concurrent_queue.h" #include "ceres/internal/config.h" #include "ceres/internal/export.h" -#ifdef CERES_USE_CXX_THREADS -#include "ceres/concurrent_queue.h" -#endif - -namespace ceres { -namespace internal { +namespace ceres::internal { // Helper for C++ thread number identification that is similar to // omp_get_thread_num() behaviour. This is necessary to support C++ @@ -48,12 +44,6 @@ namespace internal { // 0 to num_threads - 1 that can be acquired to identify the thread in a thread // pool. // -// If CERES_NO_THREADS is defined, Acquire() always returns 0 and Release() -// takes no action. -// -// If CERES_USE_OPENMP, omp_get_thread_num() is used to Acquire() with no action -// in Release() -// // // Example usage pseudocode: // @@ -78,20 +68,16 @@ class CERES_NO_EXPORT ThreadTokenProvider { void Release(int thread_id); private: -#ifdef CERES_USE_CXX_THREADS // This queue initially holds a sequence from 0..num_threads-1. Every // Acquire() call the first number is removed from here. When the token is not // needed anymore it shall be given back with corresponding Release() // call. This concurrent queue is more expensive than TBB's version, so you // should not acquire the thread ID on every for loop iteration. ConcurrentQueue pool_; -#endif - ThreadTokenProvider(ThreadTokenProvider&) = delete; ThreadTokenProvider& operator=(ThreadTokenProvider&) = delete; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_THREAD_TOKEN_PROVIDER_H_ diff --git a/extern/ceres/internal/ceres/triplet_sparse_matrix.cc b/extern/ceres/internal/ceres/triplet_sparse_matrix.cc index bbb5f676a5d..4bb6685aa69 100644 --- a/extern/ceres/internal/ceres/triplet_sparse_matrix.cc +++ b/extern/ceres/internal/ceres/triplet_sparse_matrix.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,15 +32,16 @@ #include #include +#include +#include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/crs_matrix.h" #include "ceres/internal/eigen.h" #include "ceres/internal/export.h" -#include "ceres/random.h" #include "ceres/types.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { TripletSparseMatrix::TripletSparseMatrix() : num_rows_(0), num_cols_(0), max_num_nonzeros_(0), num_nonzeros_(0) {} @@ -168,13 +169,15 @@ void TripletSparseMatrix::CopyData(const TripletSparseMatrix& orig) { } } -void TripletSparseMatrix::RightMultiply(const double* x, double* y) const { +void TripletSparseMatrix::RightMultiplyAndAccumulate(const double* x, + double* y) const { for (int i = 0; i < num_nonzeros_; ++i) { y[rows_[i]] += values_[i] * x[cols_[i]]; } } -void TripletSparseMatrix::LeftMultiply(const double* x, double* y) const { +void TripletSparseMatrix::LeftMultiplyAndAccumulate(const double* x, + double* y) const { for (int i = 0; i < num_nonzeros_; ++i) { y[cols_[i]] += values_[i] * x[rows_[i]]; } @@ -195,6 +198,11 @@ void TripletSparseMatrix::ScaleColumns(const double* scale) { } } +void TripletSparseMatrix::ToCRSMatrix(CRSMatrix* crs_matrix) const { + CompressedRowSparseMatrix::FromTripletSparseMatrix(*this)->ToCRSMatrix( + crs_matrix); +} + void TripletSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const { dense_matrix->resize(num_rows_, num_cols_); dense_matrix->setZero(); @@ -276,8 +284,34 @@ void TripletSparseMatrix::ToTextFile(FILE* file) const { } } +std::unique_ptr TripletSparseMatrix::CreateFromTextFile( + FILE* file) { + CHECK(file != nullptr); + int num_rows = 0; + int num_cols = 0; + std::vector rows; + std::vector cols; + std::vector values; + while (true) { + int row, col; + double value; + if (fscanf(file, "%d %d %lf", &row, &col, &value) != 3) { + break; + } + rows.push_back(row); + cols.push_back(col); + values.push_back(value); + num_rows = std::max(num_rows, row + 1); + num_cols = std::max(num_cols, col + 1); + } + VLOG(1) << "Read " << rows.size() << " nonzeros from file."; + return std::make_unique( + num_rows, num_cols, rows, cols, values); +} + std::unique_ptr TripletSparseMatrix::CreateRandomMatrix( - const TripletSparseMatrix::RandomMatrixOptions& options) { + const TripletSparseMatrix::RandomMatrixOptions& options, + std::mt19937& prng) { CHECK_GT(options.num_rows, 0); CHECK_GT(options.num_cols, 0); CHECK_GT(options.density, 0.0); @@ -286,16 +320,18 @@ std::unique_ptr TripletSparseMatrix::CreateRandomMatrix( std::vector rows; std::vector cols; std::vector values; + std::uniform_real_distribution uniform01(0.0, 1.0); + std::normal_distribution standard_normal; while (rows.empty()) { rows.clear(); cols.clear(); values.clear(); for (int r = 0; r < options.num_rows; ++r) { for (int c = 0; c < options.num_cols; ++c) { - if (RandDouble() <= options.density) { + if (uniform01(prng) <= options.density) { rows.push_back(r); cols.push_back(c); - values.push_back(RandNormal()); + values.push_back(standard_normal(prng)); } } } @@ -305,5 +341,4 @@ std::unique_ptr TripletSparseMatrix::CreateRandomMatrix( options.num_rows, options.num_cols, rows, cols, values); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/triplet_sparse_matrix.h b/extern/ceres/internal/ceres/triplet_sparse_matrix.h index 065c690dba3..bcb3d2bbf7e 100644 --- a/extern/ceres/internal/ceres/triplet_sparse_matrix.h +++ b/extern/ceres/internal/ceres/triplet_sparse_matrix.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,16 +32,17 @@ #define CERES_INTERNAL_TRIPLET_SPARSE_MATRIX_H_ #include +#include #include +#include "ceres/crs_matrix.h" #include "ceres/internal/disable_warnings.h" #include "ceres/internal/eigen.h" #include "ceres/internal/export.h" #include "ceres/sparse_matrix.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // An implementation of the SparseMatrix interface to store and // manipulate sparse matrices in triplet (i,j,s) form. This object is @@ -65,10 +66,11 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix { // Implementation of the SparseMatrix interface. void SetZero() final; - void RightMultiply(const double* x, double* y) const final; - void LeftMultiply(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, double* y) const final; + void LeftMultiplyAndAccumulate(const double* x, double* y) const final; void SquaredColumnNorm(double* x) const final; void ScaleColumns(const double* scale) final; + void ToCRSMatrix(CRSMatrix* matrix) const; void ToDenseMatrix(Matrix* dense_matrix) const final; void ToTextFile(FILE* file) const final; // clang-format off @@ -134,7 +136,11 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix { // normally distributed and whose structure is determined by // RandomMatrixOptions. static std::unique_ptr CreateRandomMatrix( - const TripletSparseMatrix::RandomMatrixOptions& options); + const TripletSparseMatrix::RandomMatrixOptions& options, + std::mt19937& prng); + + // Load a triplet sparse matrix from a text file. + static std::unique_ptr CreateFromTextFile(FILE* file); private: void AllocateMemory(); @@ -154,8 +160,7 @@ class CERES_NO_EXPORT TripletSparseMatrix final : public SparseMatrix { std::unique_ptr values_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/trust_region_minimizer.cc b/extern/ceres/internal/ceres/trust_region_minimizer.cc index 9ef5167ba6c..2e2b74cc2f7 100644 --- a/extern/ceres/internal/ceres/trust_region_minimizer.cc +++ b/extern/ceres/internal/ceres/trust_region_minimizer.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2016 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -42,9 +42,11 @@ #include "Eigen/Core" #include "ceres/array_utils.h" #include "ceres/coordinate_descent_minimizer.h" +#include "ceres/eigen_vector_ops.h" #include "ceres/evaluator.h" #include "ceres/file.h" #include "ceres/line_search.h" +#include "ceres/parallel_for.h" #include "ceres/stringprintf.h" #include "ceres/types.h" #include "ceres/wall_time.h" @@ -59,8 +61,7 @@ } \ } while (0) -namespace ceres { -namespace internal { +namespace ceres::internal { void TrustRegionMinimizer::Minimize(const Minimizer::Options& options, double* parameters, @@ -79,6 +80,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options, ? options_.max_consecutive_nonmonotonic_steps : 0); + bool atleast_one_successful_step = false; while (FinalizeIterationAndCheckIfMinimizerCanContinue()) { iteration_start_time_in_secs_ = WallTimeInSeconds(); @@ -106,7 +108,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options, ComputeCandidatePointAndEvaluateCost(); DoInnerIterationsIfNeeded(); - if (ParameterToleranceReached()) { + if (atleast_one_successful_step && ParameterToleranceReached()) { return; } @@ -115,6 +117,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options, } if (IsStepSuccessful()) { + atleast_one_successful_step = true; RETURN_IF_ERROR_AND_LOG(HandleSuccessfulStep()); } else { // Declare the step unsuccessful and inform the trust region strategy. @@ -137,8 +140,8 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options, double* parameters, Solver::Summary* solver_summary) { options_ = options; - sort(options_.trust_region_minimizer_iterations_to_dump.begin(), - options_.trust_region_minimizer_iterations_to_dump.end()); + std::sort(options_.trust_region_minimizer_iterations_to_dump.begin(), + options_.trust_region_minimizer_iterations_to_dump.end()); parameters_ = parameters; @@ -166,7 +169,6 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options, num_consecutive_invalid_steps_ = 0; x_ = ConstVectorRef(parameters_, num_parameters_); - x_norm_ = x_.norm(); residuals_.resize(num_residuals_); trust_region_step_.resize(num_effective_parameters_); delta_.resize(num_effective_parameters_); @@ -180,7 +182,6 @@ void TrustRegionMinimizer::Init(const Minimizer::Options& options, // the Jacobian, we will compute and overwrite this vector. jacobian_scaling_ = Vector::Ones(num_effective_parameters_); - x_norm_ = -1; // Invalid value x_cost_ = std::numeric_limits::max(); minimum_cost_ = x_cost_; model_cost_change_ = 0.0; @@ -214,10 +215,11 @@ bool TrustRegionMinimizer::IterationZero() { } x_ = candidate_x_; - x_norm_ = x_.norm(); } if (!EvaluateGradientAndJacobian(/*new_evaluation_point=*/true)) { + solver_summary_->message = + "Initial residual and Jacobian evaluation failed."; return false; } @@ -270,7 +272,8 @@ bool TrustRegionMinimizer::EvaluateGradientAndJacobian( } // jacobian = jacobian * diag(J'J) ^{-1} - jacobian_->ScaleColumns(jacobian_scaling_.data()); + jacobian_->ScaleColumns( + jacobian_scaling_.data(), options_.context, options_.num_threads); } // The gradient exists in the local tangent space. To account for @@ -357,13 +360,13 @@ bool TrustRegionMinimizer::FinalizeIterationAndCheckIfMinimizerCanContinue() { // Compute the trust region step using the TrustRegionStrategy chosen // by the user. // -// If the strategy returns with LINEAR_SOLVER_FATAL_ERROR, which +// If the strategy returns with LinearSolverTerminationType::FATAL_ERROR, which // indicates an unrecoverable error, return false. This is the only // condition that returns false. // -// If the strategy returns with LINEAR_SOLVER_FAILURE, which indicates -// a numerical failure that could be recovered from by retrying -// (e.g. by increasing the strength of the regularization), we set +// If the strategy returns with LinearSolverTerminationType::FAILURE, which +// indicates a numerical failure that could be recovered from by retrying (e.g. +// by increasing the strength of the regularization), we set // iteration_summary_.step_is_valid to false and return true. // // In all other cases, we compute the decrease in the trust region @@ -395,7 +398,8 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() { residuals_.data(), trust_region_step_.data()); - if (strategy_summary.termination_type == LINEAR_SOLVER_FATAL_ERROR) { + if (strategy_summary.termination_type == + LinearSolverTerminationType::FATAL_ERROR) { solver_summary_->message = "Linear solver failed due to unrecoverable " "non-numeric causes. Please see the error log for clues. "; @@ -407,7 +411,8 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() { WallTimeInSeconds() - strategy_start_time; iteration_summary_.linear_solver_iterations = strategy_summary.num_iterations; - if (strategy_summary.termination_type == LINEAR_SOLVER_FAILURE) { + if (strategy_summary.termination_type == + LinearSolverTerminationType::FAILURE) { return true; } @@ -419,10 +424,15 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() { // = f'f/2 - 1/2 [ f'f + 2f'J * step + step' * J' * J * step] // = -f'J * step - step' * J' * J * step / 2 // = -(J * step)'(f + J * step / 2) - model_residuals_.setZero(); - jacobian_->RightMultiply(trust_region_step_.data(), model_residuals_.data()); - model_cost_change_ = - -model_residuals_.dot(residuals_ + model_residuals_ / 2.0); + ParallelSetZero(options_.context, options_.num_threads, model_residuals_); + jacobian_->RightMultiplyAndAccumulate(trust_region_step_.data(), + model_residuals_.data(), + options_.context, + options_.num_threads); + model_cost_change_ = -Dot(model_residuals_, + residuals_ + model_residuals_ / 2.0, + options_.context, + options_.num_threads); // TODO(sameeragarwal) // @@ -432,7 +442,10 @@ bool TrustRegionMinimizer::ComputeTrustRegionStep() { iteration_summary_.step_is_valid = (model_cost_change_ > 0.0); if (iteration_summary_.step_is_valid) { // Undo the Jacobian column scaling. - delta_ = (trust_region_step_.array() * jacobian_scaling_.array()).matrix(); + ParallelAssign(options_.context, + options_.num_threads, + delta_, + (trust_region_step_.array() * jacobian_scaling_.array())); num_consecutive_invalid_steps_ = 0; } @@ -702,10 +715,12 @@ bool TrustRegionMinimizer::MinTrustRegionRadiusReached() { // Solver::Options::parameter_tolerance based convergence check. bool TrustRegionMinimizer::ParameterToleranceReached() { + const double x_norm = x_.norm(); + // Compute the norm of the step in the ambient space. iteration_summary_.step_norm = (x_ - candidate_x_).norm(); const double step_size_tolerance = - options_.parameter_tolerance * (x_norm_ + options_.parameter_tolerance); + options_.parameter_tolerance * (x_norm + options_.parameter_tolerance); if (iteration_summary_.step_norm > step_size_tolerance) { return false; @@ -714,7 +729,7 @@ bool TrustRegionMinimizer::ParameterToleranceReached() { solver_summary_->message = StringPrintf( "Parameter tolerance reached. " "Relative step_norm: %e <= %e.", - (iteration_summary_.step_norm / (x_norm_ + options_.parameter_tolerance)), + (iteration_summary_.step_norm / (x_norm + options_.parameter_tolerance)), options_.parameter_tolerance); solver_summary_->termination_type = CONVERGENCE; if (is_not_silent_) { @@ -807,7 +822,6 @@ bool TrustRegionMinimizer::IsStepSuccessful() { // evaluator know that the step has been accepted. bool TrustRegionMinimizer::HandleSuccessfulStep() { x_ = candidate_x_; - x_norm_ = x_.norm(); // Since the step was successful, this point has already had the residual // evaluated (but not the jacobian). So indicate that to the evaluator. @@ -821,5 +835,4 @@ bool TrustRegionMinimizer::HandleSuccessfulStep() { return true; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/trust_region_minimizer.h b/extern/ceres/internal/ceres/trust_region_minimizer.h index c6fc542a063..c9cdac77850 100644 --- a/extern/ceres/internal/ceres/trust_region_minimizer.h +++ b/extern/ceres/internal/ceres/trust_region_minimizer.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2016 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,8 +43,7 @@ #include "ceres/trust_region_strategy.h" #include "ceres/types.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // Generic trust region minimization algorithm. // @@ -139,8 +138,6 @@ class CERES_NO_EXPORT TrustRegionMinimizer final : public Minimizer { // Scaling vector to scale the columns of the Jacobian. Vector jacobian_scaling_; - // Euclidean norm of x_. - double x_norm_; // Cost at x_. double x_cost_; // Minimum cost encountered up till now. @@ -160,8 +157,7 @@ class CERES_NO_EXPORT TrustRegionMinimizer final : public Minimizer { int num_consecutive_invalid_steps_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/trust_region_preprocessor.cc b/extern/ceres/internal/ceres/trust_region_preprocessor.cc index edba47d88a5..e07e369a97d 100644 --- a/extern/ceres/internal/ceres/trust_region_preprocessor.cc +++ b/extern/ceres/internal/ceres/trust_region_preprocessor.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -32,6 +32,7 @@ #include #include +#include #include "ceres/callbacks.h" #include "ceres/context_impl.h" @@ -48,10 +49,7 @@ #include "ceres/trust_region_strategy.h" #include "ceres/wall_time.h" -namespace ceres { -namespace internal { - -using std::vector; +namespace ceres::internal { namespace { @@ -59,7 +57,8 @@ std::shared_ptr CreateDefaultLinearSolverOrdering( const Program& program) { std::shared_ptr ordering = std::make_shared(); - const vector& parameter_blocks = program.parameter_blocks(); + const std::vector& parameter_blocks = + program.parameter_blocks(); for (auto* parameter_block : parameter_blocks) { ordering->AddElementToGroup( const_cast(parameter_block->user_state()), 0); @@ -114,6 +113,7 @@ bool ReorderProgram(PreprocessedProblem* pp) { return ReorderProgramForSchurTypeLinearSolver( options.linear_solver_type, options.sparse_linear_algebra_library_type, + options.linear_solver_ordering_type, pp->problem->parameter_map(), options.linear_solver_ordering.get(), pp->reduced_program.get(), @@ -124,6 +124,7 @@ bool ReorderProgram(PreprocessedProblem* pp) { !options.dynamic_sparsity) { return ReorderProgramForSparseCholesky( options.sparse_linear_algebra_library_type, + options.linear_solver_ordering_type, *options.linear_solver_ordering, 0, /* use all the rows of the jacobian */ pp->reduced_program.get(), @@ -139,6 +140,7 @@ bool ReorderProgram(PreprocessedProblem* pp) { return ReorderProgramForSparseCholesky( options.sparse_linear_algebra_library_type, + options.linear_solver_ordering_type, *options.linear_solver_ordering, pp->linear_solver_options.subset_preconditioner_start_row_block, pp->reduced_program.get(), @@ -197,10 +199,16 @@ bool SetupLinearSolver(PreprocessedProblem* pp) { options.max_linear_solver_iterations; pp->linear_solver_options.type = options.linear_solver_type; pp->linear_solver_options.preconditioner_type = options.preconditioner_type; + pp->linear_solver_options.use_spse_initialization = + options.use_spse_initialization; + pp->linear_solver_options.spse_tolerance = options.spse_tolerance; + pp->linear_solver_options.max_num_spse_iterations = + options.max_num_spse_iterations; pp->linear_solver_options.visibility_clustering_type = options.visibility_clustering_type; pp->linear_solver_options.sparse_linear_algebra_library_type = options.sparse_linear_algebra_library_type; + pp->linear_solver_options.dense_linear_algebra_library_type = options.dense_linear_algebra_library_type; pp->linear_solver_options.use_explicit_schur_complement = @@ -211,7 +219,6 @@ bool SetupLinearSolver(PreprocessedProblem* pp) { pp->linear_solver_options.max_num_refinement_iterations = options.max_num_refinement_iterations; pp->linear_solver_options.num_threads = options.num_threads; - pp->linear_solver_options.use_postordering = options.use_postordering; pp->linear_solver_options.context = pp->problem->context(); if (IsSchurType(pp->linear_solver_options.type)) { @@ -225,26 +232,23 @@ bool SetupLinearSolver(PreprocessedProblem* pp) { if (pp->linear_solver_options.elimination_groups.size() == 1) { pp->linear_solver_options.elimination_groups.push_back(0); } + } - if (options.linear_solver_type == SPARSE_SCHUR) { - // When using SPARSE_SCHUR, we ignore the user's postordering - // preferences in certain cases. - // - // 1. SUITE_SPARSE is the sparse linear algebra library requested - // but cholmod_camd is not available. - // 2. CX_SPARSE is the sparse linear algebra library requested. - // - // This ensures that the linear solver does not assume that a - // fill-reducing pre-ordering has been done. - // - // TODO(sameeragarwal): Implement the reordering of parameter - // blocks for CX_SPARSE. - if ((options.sparse_linear_algebra_library_type == SUITE_SPARSE && - !SuiteSparse:: - IsConstrainedApproximateMinimumDegreeOrderingAvailable()) || - (options.sparse_linear_algebra_library_type == CX_SPARSE)) { - pp->linear_solver_options.use_postordering = true; - } + if (!options.dynamic_sparsity && + AreJacobianColumnsOrdered(options.linear_solver_type, + options.preconditioner_type, + options.sparse_linear_algebra_library_type, + options.linear_solver_ordering_type)) { + pp->linear_solver_options.ordering_type = OrderingType::NATURAL; + } else { + if (options.linear_solver_ordering_type == ceres::AMD) { + pp->linear_solver_options.ordering_type = OrderingType::AMD; + } else if (options.linear_solver_ordering_type == ceres::NESDIS) { + pp->linear_solver_options.ordering_type = OrderingType::NESDIS; + } else { + LOG(FATAL) << "Congratulations you have found a bug in Ceres Solver." + << " Please report this to the maintainers. : " + << options.linear_solver_ordering_type; } } @@ -257,6 +261,8 @@ bool SetupEvaluator(PreprocessedProblem* pp) { const Solver::Options& options = pp->options; pp->evaluator_options = Evaluator::Options(); pp->evaluator_options.linear_solver_type = options.linear_solver_type; + pp->evaluator_options.sparse_linear_algebra_library_type = + options.sparse_linear_algebra_library_type; pp->evaluator_options.num_eliminate_blocks = 0; if (IsSchurType(options.linear_solver_type)) { pp->evaluator_options.num_eliminate_blocks = @@ -330,13 +336,19 @@ bool SetupInnerIterationMinimizer(PreprocessedProblem* pp) { } // Configure and create a TrustRegionMinimizer object. -void SetupMinimizerOptions(PreprocessedProblem* pp) { +bool SetupMinimizerOptions(PreprocessedProblem* pp) { const Solver::Options& options = pp->options; SetupCommonMinimizerOptions(pp); pp->minimizer_options.is_constrained = pp->reduced_program->IsBoundsConstrained(); pp->minimizer_options.jacobian = pp->evaluator->CreateJacobian(); + if (pp->minimizer_options.jacobian == nullptr) { + pp->error = + "Unable to create Jacobian matrix. Likely because it is too large."; + return false; + } + pp->minimizer_options.inner_iteration_minimizer = pp->inner_iteration_minimizer; @@ -349,9 +361,12 @@ void SetupMinimizerOptions(PreprocessedProblem* pp) { strategy_options.trust_region_strategy_type = options.trust_region_strategy_type; strategy_options.dogleg_type = options.dogleg_type; + strategy_options.context = pp->problem->context(); + strategy_options.num_threads = options.num_threads; pp->minimizer_options.trust_region_strategy = TrustRegionStrategy::Create(strategy_options); CHECK(pp->minimizer_options.trust_region_strategy != nullptr); + return true; } } // namespace @@ -387,9 +402,7 @@ bool TrustRegionPreprocessor::Preprocess(const Solver::Options& options, return false; } - SetupMinimizerOptions(pp); - return true; + return SetupMinimizerOptions(pp); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/trust_region_preprocessor.h b/extern/ceres/internal/ceres/trust_region_preprocessor.h index 26ef8fad37d..14febda010d 100644 --- a/extern/ceres/internal/ceres/trust_region_preprocessor.h +++ b/extern/ceres/internal/ceres/trust_region_preprocessor.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,7 @@ #include "ceres/internal/export.h" #include "ceres/preprocessor.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class CERES_NO_EXPORT TrustRegionPreprocessor final : public Preprocessor { public: @@ -45,8 +44,7 @@ class CERES_NO_EXPORT TrustRegionPreprocessor final : public Preprocessor { PreprocessedProblem* preprocessed_problem) override; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/trust_region_step_evaluator.cc b/extern/ceres/internal/ceres/trust_region_step_evaluator.cc index 19045ae0070..a2333a01379 100644 --- a/extern/ceres/internal/ceres/trust_region_step_evaluator.cc +++ b/extern/ceres/internal/ceres/trust_region_step_evaluator.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2016 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,7 @@ #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { TrustRegionStepEvaluator::TrustRegionStepEvaluator( const double initial_cost, const int max_consecutive_nonmonotonic_steps) @@ -111,5 +110,4 @@ void TrustRegionStepEvaluator::StepAccepted(const double cost, } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/trust_region_step_evaluator.h b/extern/ceres/internal/ceres/trust_region_step_evaluator.h index 8e0c4e91f49..6df04274c30 100644 --- a/extern/ceres/internal/ceres/trust_region_step_evaluator.h +++ b/extern/ceres/internal/ceres/trust_region_step_evaluator.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2016 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -33,8 +33,7 @@ #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { // The job of the TrustRegionStepEvaluator is to evaluate the quality // of a step, i.e., how the cost of a step compares with the reduction @@ -118,7 +117,6 @@ class CERES_NO_EXPORT TrustRegionStepEvaluator { int num_consecutive_nonmonotonic_steps_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_TRUST_REGION_STEP_EVALUATOR_H_ diff --git a/extern/ceres/internal/ceres/trust_region_strategy.cc b/extern/ceres/internal/ceres/trust_region_strategy.cc index 1096cd3c8aa..da5a33724d0 100644 --- a/extern/ceres/internal/ceres/trust_region_strategy.cc +++ b/extern/ceres/internal/ceres/trust_region_strategy.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,7 @@ #include "ceres/dogleg_strategy.h" #include "ceres/levenberg_marquardt_strategy.h" -namespace ceres { -namespace internal { +namespace ceres::internal { TrustRegionStrategy::~TrustRegionStrategy() = default; @@ -59,5 +58,4 @@ std::unique_ptr TrustRegionStrategy::Create( return nullptr; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/trust_region_strategy.h b/extern/ceres/internal/ceres/trust_region_strategy.h index 33086cafb52..0e0a301bbf4 100644 --- a/extern/ceres/internal/ceres/trust_region_strategy.h +++ b/extern/ceres/internal/ceres/trust_region_strategy.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,7 @@ #include "ceres/internal/export.h" #include "ceres/linear_solver.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class LinearSolver; class SparseMatrix; @@ -74,6 +73,9 @@ class CERES_NO_EXPORT TrustRegionStrategy { // Further specify which dogleg method to use DoglegType dogleg_type = TRADITIONAL_DOGLEG; + + ContextImpl* context = nullptr; + int num_threads = 1; }; // Factory. @@ -112,7 +114,8 @@ class CERES_NO_EXPORT TrustRegionStrategy { int num_iterations = -1; // Status of the linear solver used to solve the Newton system. - LinearSolverTerminationType termination_type = LINEAR_SOLVER_FAILURE; + LinearSolverTerminationType termination_type = + LinearSolverTerminationType::FAILURE; }; // Use the current radius to solve for the trust region step. @@ -141,8 +144,7 @@ class CERES_NO_EXPORT TrustRegionStrategy { virtual double Radius() const = 0; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/types.cc b/extern/ceres/internal/ceres/types.cc index 48242678b46..e0005600933 100644 --- a/extern/ceres/internal/ceres/types.cc +++ b/extern/ceres/internal/ceres/types.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,14 +39,12 @@ namespace ceres { -using std::string; - // clang-format off #define CASESTR(x) case x: return #x #define STRENUM(x) if (value == #x) { *type = x; return true; } // clang-format on -static void UpperCase(string* input) { +static void UpperCase(std::string* input) { std::transform(input->begin(), input->end(), input->begin(), ::toupper); } @@ -64,7 +62,7 @@ const char* LinearSolverTypeToString(LinearSolverType type) { } } -bool StringToLinearSolverType(string value, LinearSolverType* type) { +bool StringToLinearSolverType(std::string value, LinearSolverType* type) { UpperCase(&value); STRENUM(DENSE_NORMAL_CHOLESKY); STRENUM(DENSE_QR); @@ -81,6 +79,7 @@ const char* PreconditionerTypeToString(PreconditionerType type) { CASESTR(IDENTITY); CASESTR(JACOBI); CASESTR(SCHUR_JACOBI); + CASESTR(SCHUR_POWER_SERIES_EXPANSION); CASESTR(CLUSTER_JACOBI); CASESTR(CLUSTER_TRIDIAGONAL); CASESTR(SUBSET); @@ -89,11 +88,12 @@ const char* PreconditionerTypeToString(PreconditionerType type) { } } -bool StringToPreconditionerType(string value, PreconditionerType* type) { +bool StringToPreconditionerType(std::string value, PreconditionerType* type) { UpperCase(&value); STRENUM(IDENTITY); STRENUM(JACOBI); STRENUM(SCHUR_JACOBI); + STRENUM(SCHUR_POWER_SERIES_EXPANSION); STRENUM(CLUSTER_JACOBI); STRENUM(CLUSTER_TRIDIAGONAL); STRENUM(SUBSET); @@ -104,9 +104,9 @@ const char* SparseLinearAlgebraLibraryTypeToString( SparseLinearAlgebraLibraryType type) { switch (type) { CASESTR(SUITE_SPARSE); - CASESTR(CX_SPARSE); CASESTR(EIGEN_SPARSE); CASESTR(ACCELERATE_SPARSE); + CASESTR(CUDA_SPARSE); CASESTR(NO_SPARSE); default: return "UNKNOWN"; @@ -114,16 +114,33 @@ const char* SparseLinearAlgebraLibraryTypeToString( } bool StringToSparseLinearAlgebraLibraryType( - string value, SparseLinearAlgebraLibraryType* type) { + std::string value, SparseLinearAlgebraLibraryType* type) { UpperCase(&value); STRENUM(SUITE_SPARSE); - STRENUM(CX_SPARSE); STRENUM(EIGEN_SPARSE); STRENUM(ACCELERATE_SPARSE); + STRENUM(CUDA_SPARSE); STRENUM(NO_SPARSE); return false; } +const char* LinearSolverOrderingTypeToString(LinearSolverOrderingType type) { + switch (type) { + CASESTR(AMD); + CASESTR(NESDIS); + default: + return "UNKNOWN"; + } +} + +bool StringToLinearSolverOrderingType(std::string value, + LinearSolverOrderingType* type) { + UpperCase(&value); + STRENUM(AMD); + STRENUM(NESDIS); + return false; +} + const char* DenseLinearAlgebraLibraryTypeToString( DenseLinearAlgebraLibraryType type) { switch (type) { @@ -136,7 +153,7 @@ const char* DenseLinearAlgebraLibraryTypeToString( } bool StringToDenseLinearAlgebraLibraryType( - string value, DenseLinearAlgebraLibraryType* type) { + std::string value, DenseLinearAlgebraLibraryType* type) { UpperCase(&value); STRENUM(EIGEN); STRENUM(LAPACK); @@ -153,7 +170,7 @@ const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type) { } } -bool StringToTrustRegionStrategyType(string value, +bool StringToTrustRegionStrategyType(std::string value, TrustRegionStrategyType* type) { UpperCase(&value); STRENUM(LEVENBERG_MARQUARDT); @@ -170,7 +187,7 @@ const char* DoglegTypeToString(DoglegType type) { } } -bool StringToDoglegType(string value, DoglegType* type) { +bool StringToDoglegType(std::string value, DoglegType* type) { UpperCase(&value); STRENUM(TRADITIONAL_DOGLEG); STRENUM(SUBSPACE_DOGLEG); @@ -186,7 +203,7 @@ const char* MinimizerTypeToString(MinimizerType type) { } } -bool StringToMinimizerType(string value, MinimizerType* type) { +bool StringToMinimizerType(std::string value, MinimizerType* type) { UpperCase(&value); STRENUM(TRUST_REGION); STRENUM(LINE_SEARCH); @@ -204,7 +221,7 @@ const char* LineSearchDirectionTypeToString(LineSearchDirectionType type) { } } -bool StringToLineSearchDirectionType(string value, +bool StringToLineSearchDirectionType(std::string value, LineSearchDirectionType* type) { UpperCase(&value); STRENUM(STEEPEST_DESCENT); @@ -223,7 +240,7 @@ const char* LineSearchTypeToString(LineSearchType type) { } } -bool StringToLineSearchType(string value, LineSearchType* type) { +bool StringToLineSearchType(std::string value, LineSearchType* type) { UpperCase(&value); STRENUM(ARMIJO); STRENUM(WOLFE); @@ -241,7 +258,7 @@ const char* LineSearchInterpolationTypeToString( } } -bool StringToLineSearchInterpolationType(string value, +bool StringToLineSearchInterpolationType(std::string value, LineSearchInterpolationType* type) { UpperCase(&value); STRENUM(BISECTION); @@ -262,7 +279,7 @@ const char* NonlinearConjugateGradientTypeToString( } bool StringToNonlinearConjugateGradientType( - string value, NonlinearConjugateGradientType* type) { + std::string value, NonlinearConjugateGradientType* type) { UpperCase(&value); STRENUM(FLETCHER_REEVES); STRENUM(POLAK_RIBIERE); @@ -279,7 +296,7 @@ const char* CovarianceAlgorithmTypeToString(CovarianceAlgorithmType type) { } } -bool StringToCovarianceAlgorithmType(string value, +bool StringToCovarianceAlgorithmType(std::string value, CovarianceAlgorithmType* type) { UpperCase(&value); STRENUM(DENSE_SVD); @@ -297,7 +314,8 @@ const char* NumericDiffMethodTypeToString(NumericDiffMethodType type) { } } -bool StringToNumericDiffMethodType(string value, NumericDiffMethodType* type) { +bool StringToNumericDiffMethodType(std::string value, + NumericDiffMethodType* type) { UpperCase(&value); STRENUM(CENTRAL); STRENUM(FORWARD); @@ -314,7 +332,7 @@ const char* VisibilityClusteringTypeToString(VisibilityClusteringType type) { } } -bool StringToVisibilityClusteringType(string value, +bool StringToVisibilityClusteringType(std::string value, VisibilityClusteringType* type) { UpperCase(&value); STRENUM(CANONICAL_VIEWS); @@ -387,14 +405,6 @@ bool IsSparseLinearAlgebraLibraryTypeAvailable( #endif } - if (type == CX_SPARSE) { -#ifdef CERES_NO_CXSPARSE - return false; -#else - return true; -#endif - } - if (type == ACCELERATE_SPARSE) { #ifdef CERES_NO_ACCELERATE_SPARSE return false; @@ -411,6 +421,18 @@ bool IsSparseLinearAlgebraLibraryTypeAvailable( #endif } + if (type == CUDA_SPARSE) { +#ifdef CERES_NO_CUDA + return false; +#else + return true; +#endif + } + + if (type == NO_SPARSE) { + return true; + } + LOG(WARNING) << "Unknown sparse linear algebra library " << type; return false; } diff --git a/extern/ceres/internal/ceres/visibility.cc b/extern/ceres/internal/ceres/visibility.cc index f666ce0c4bb..6c10fb250b0 100644 --- a/extern/ceres/internal/ceres/visibility.cc +++ b/extern/ceres/internal/ceres/visibility.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -44,18 +44,11 @@ #include "ceres/pair_hash.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::make_pair; -using std::max; -using std::pair; -using std::set; -using std::vector; +namespace ceres::internal { void ComputeVisibility(const CompressedRowBlockStructure& block_structure, const int num_eliminate_blocks, - vector>* visibility) { + std::vector>* visibility) { CHECK(visibility != nullptr); // Clear the visibility vector and resize it to hold a @@ -64,7 +57,7 @@ void ComputeVisibility(const CompressedRowBlockStructure& block_structure, visibility->resize(block_structure.cols.size() - num_eliminate_blocks); for (const auto& row : block_structure.rows) { - const vector& cells = row.cells; + const std::vector& cells = row.cells; int block_id = cells[0].block_id; // If the first block is not an e_block, then skip this row block. if (block_id >= num_eliminate_blocks) { @@ -81,7 +74,7 @@ void ComputeVisibility(const CompressedRowBlockStructure& block_structure, } std::unique_ptr> CreateSchurComplementGraph( - const vector>& visibility) { + const std::vector>& visibility) { const time_t start_time = time(nullptr); // Compute the number of e_blocks/point blocks. Since the visibility // set for each e_block/camera contains the set of e_blocks/points @@ -89,7 +82,7 @@ std::unique_ptr> CreateSchurComplementGraph( int num_points = 0; for (const auto& visible : visibility) { if (!visible.empty()) { - num_points = max(num_points, (*visible.rbegin()) + 1); + num_points = std::max(num_points, (*visible.rbegin()) + 1); } } @@ -98,9 +91,9 @@ std::unique_ptr> CreateSchurComplementGraph( // cameras. However, to compute the sparsity structure of the Schur // Complement efficiently, its better to have the point->camera // mapping. - vector> inverse_visibility(num_points); + std::vector> inverse_visibility(num_points); for (int i = 0; i < visibility.size(); i++) { - const set& visibility_set = visibility[i]; + const std::set& visibility_set = visibility[i]; for (int v : visibility_set) { inverse_visibility[v].insert(i); } @@ -108,7 +101,7 @@ std::unique_ptr> CreateSchurComplementGraph( // Map from camera pairs to number of points visible to both cameras // in the pair. - std::unordered_map, int, pair_hash> camera_pairs; + std::unordered_map, int, pair_hash> camera_pairs; // Count the number of points visible to each camera/f_block pair. for (const auto& inverse_visibility_set : inverse_visibility) { @@ -117,7 +110,7 @@ std::unique_ptr> CreateSchurComplementGraph( ++camera1) { auto camera2 = camera1; for (++camera2; camera2 != inverse_visibility_set.end(); ++camera2) { - ++(camera_pairs[make_pair(*camera1, *camera2)]); + ++(camera_pairs[std::make_pair(*camera1, *camera2)]); } } } @@ -151,5 +144,4 @@ std::unique_ptr> CreateSchurComplementGraph( return graph; } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/visibility.h b/extern/ceres/internal/ceres/visibility.h index d8f6968d98f..2e5f4fc33c0 100644 --- a/extern/ceres/internal/ceres/visibility.h +++ b/extern/ceres/internal/ceres/visibility.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -43,8 +43,7 @@ #include "ceres/internal/disable_warnings.h" #include "ceres/internal/export.h" -namespace ceres { -namespace internal { +namespace ceres::internal { struct CompressedRowBlockStructure; @@ -77,8 +76,7 @@ CERES_NO_EXPORT void ComputeVisibility( CERES_NO_EXPORT std::unique_ptr> CreateSchurComplementGraph( const std::vector>& visibility); -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h" diff --git a/extern/ceres/internal/ceres/visibility_based_preconditioner.cc b/extern/ceres/internal/ceres/visibility_based_preconditioner.cc index 831a8663027..42e8a6ed67d 100644 --- a/extern/ceres/internal/ceres/visibility_based_preconditioner.cc +++ b/extern/ceres/internal/ceres/visibility_based_preconditioner.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2022 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include #include @@ -50,14 +52,7 @@ #include "ceres/visibility.h" #include "glog/logging.h" -namespace ceres { -namespace internal { - -using std::make_pair; -using std::pair; -using std::set; -using std::swap; -using std::vector; +namespace ceres::internal { // TODO(sameeragarwal): Currently these are magic weights for the // preconditioner construction. Move these higher up into the Options @@ -82,10 +77,7 @@ VisibilityBasedPreconditioner::VisibilityBasedPreconditioner( CHECK(options_.context != nullptr); // Vector of camera block sizes - block_size_.resize(num_blocks_); - for (int i = 0; i < num_blocks_; ++i) { - block_size_[i] = bs.cols[i + options_.elimination_groups[0]].size; - } + blocks_ = Tail(bs.cols, bs.cols.size() - options_.elimination_groups[0]); const time_t start_time = time(nullptr); switch (options_.type) { @@ -107,14 +99,7 @@ VisibilityBasedPreconditioner::VisibilityBasedPreconditioner( LinearSolver::Options sparse_cholesky_options; sparse_cholesky_options.sparse_linear_algebra_library_type = options_.sparse_linear_algebra_library_type; - - // The preconditioner's sparsity is not available in the - // preprocessor, so the columns of the Jacobian have not been - // reordered to minimize fill in when computing its sparse Cholesky - // factorization. So we must tell the SparseCholesky object to - // perform approximate minimum-degree reordering, which is done by - // setting use_postordering to true. - sparse_cholesky_options.use_postordering = true; + sparse_cholesky_options.ordering_type = options_.ordering_type; sparse_cholesky_ = SparseCholesky::Create(sparse_cholesky_options); const time_t init_time = time(nullptr); @@ -132,13 +117,13 @@ VisibilityBasedPreconditioner::~VisibilityBasedPreconditioner() = default; // preconditioner matrix. void VisibilityBasedPreconditioner::ComputeClusterJacobiSparsity( const CompressedRowBlockStructure& bs) { - vector> visibility; + std::vector> visibility; ComputeVisibility(bs, options_.elimination_groups[0], &visibility); CHECK_EQ(num_blocks_, visibility.size()); ClusterCameras(visibility); cluster_pairs_.clear(); for (int i = 0; i < num_clusters_; ++i) { - cluster_pairs_.insert(make_pair(i, i)); + cluster_pairs_.insert(std::make_pair(i, i)); } } @@ -150,7 +135,7 @@ void VisibilityBasedPreconditioner::ComputeClusterJacobiSparsity( // of edges in this forest are the cluster pairs. void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity( const CompressedRowBlockStructure& bs) { - vector> visibility; + std::vector> visibility; ComputeVisibility(bs, options_.elimination_groups[0], &visibility); CHECK_EQ(num_blocks_, visibility.size()); ClusterCameras(visibility); @@ -159,7 +144,7 @@ void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity( // edges are the number of 3D points/e_blocks visible in both the // clusters at the ends of the edge. Return an approximate degree-2 // maximum spanning forest of this graph. - vector> cluster_visibility; + std::vector> cluster_visibility; ComputeClusterVisibility(visibility, &cluster_visibility); auto cluster_graph = CreateClusterGraph(cluster_visibility); CHECK(cluster_graph != nullptr); @@ -172,8 +157,8 @@ void VisibilityBasedPreconditioner::ComputeClusterTridiagonalSparsity( void VisibilityBasedPreconditioner::InitStorage( const CompressedRowBlockStructure& bs) { ComputeBlockPairsInPreconditioner(bs); - m_ = std::make_unique(block_size_, - block_pairs_); + m_ = std::make_unique( + blocks_, block_pairs_, options_.context, options_.num_threads); } // Call the canonical views algorithm and cluster the cameras based on @@ -183,14 +168,14 @@ void VisibilityBasedPreconditioner::InitStorage( // The cluster_membership_ vector is updated to indicate cluster // memberships for each camera block. void VisibilityBasedPreconditioner::ClusterCameras( - const vector>& visibility) { + const std::vector>& visibility) { auto schur_complement_graph = CreateSchurComplementGraph(visibility); CHECK(schur_complement_graph != nullptr); std::unordered_map membership; if (options_.visibility_clustering_type == CANONICAL_VIEWS) { - vector centers; + std::vector centers; CanonicalViewsClusteringOptions clustering_options; clustering_options.size_penalty_weight = kCanonicalViewsSizePenaltyWeight; clustering_options.similarity_penalty_weight = @@ -236,7 +221,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner( const CompressedRowBlockStructure& bs) { block_pairs_.clear(); for (int i = 0; i < num_blocks_; ++i) { - block_pairs_.insert(make_pair(i, i)); + block_pairs_.insert(std::make_pair(i, i)); } int r = 0; @@ -264,7 +249,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner( break; } - set f_blocks; + std::set f_blocks; for (; r < num_row_blocks; ++r) { const CompressedRow& row = bs.rows[r]; if (row.cells.front().block_id != e_block_id) { @@ -303,7 +288,7 @@ void VisibilityBasedPreconditioner::ComputeBlockPairsInPreconditioner( const int block2 = cell.block_id - num_eliminate_blocks; if (block1 <= block2) { if (IsBlockPairInPreconditioner(block1, block2)) { - block_pairs_.insert(make_pair(block1, block2)); + block_pairs_.insert(std::make_pair(block1, block2)); } } } @@ -354,7 +339,7 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A, // scaling is not needed, which is quite often in our experience. LinearSolverTerminationType status = Factorize(); - if (status == LINEAR_SOLVER_FATAL_ERROR) { + if (status == LinearSolverTerminationType::FATAL_ERROR) { return false; } @@ -363,7 +348,8 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A, // belong to the edges of the degree-2 forest. In the CLUSTER_JACOBI // case, the preconditioner is guaranteed to be positive // semidefinite. - if (status == LINEAR_SOLVER_FAILURE && options_.type == CLUSTER_TRIDIAGONAL) { + if (status == LinearSolverTerminationType::FAILURE && + options_.type == CLUSTER_TRIDIAGONAL) { VLOG(1) << "Unscaled factorization failed. Retrying with off-diagonal " << "scaling"; ScaleOffDiagonalCells(); @@ -371,7 +357,7 @@ bool VisibilityBasedPreconditioner::UpdateImpl(const BlockSparseMatrix& A, } VLOG(2) << "Compute time: " << time(nullptr) - start_time; - return (status == LINEAR_SOLVER_SUCCESS); + return (status == LinearSolverTerminationType::SUCCESS); } // Consider the preconditioner matrix as meta-block matrix, whose @@ -399,35 +385,44 @@ void VisibilityBasedPreconditioner::ScaleOffDiagonalCells() { // dominance. See Lemma 1 in "Visibility Based Preconditioning // For Bundle Adjustment". MatrixRef m(cell_info->values, row_stride, col_stride); - m.block(r, c, block_size_[block1], block_size_[block2]) *= 0.5; + m.block(r, c, blocks_[block1].size, blocks_[block2].size) *= 0.5; } } // Compute the sparse Cholesky factorization of the preconditioner // matrix. LinearSolverTerminationType VisibilityBasedPreconditioner::Factorize() { - // Extract the TripletSparseMatrix that is used for actually storing + // Extract the BlockSparseMatrix that is used for actually storing // S and convert it into a CompressedRowSparseMatrix. - const TripletSparseMatrix* tsm = - down_cast(m_.get())->mutable_matrix(); - - std::unique_ptr lhs; + const BlockSparseMatrix* bsm = + down_cast(m_.get())->matrix(); const CompressedRowSparseMatrix::StorageType storage_type = sparse_cholesky_->StorageType(); - if (storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR) { - lhs = CompressedRowSparseMatrix::FromTripletSparseMatrix(*tsm); - lhs->set_storage_type(CompressedRowSparseMatrix::UPPER_TRIANGULAR); + if (storage_type == + CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) { + if (!m_crs_) { + m_crs_ = bsm->ToCompressedRowSparseMatrix(); + m_crs_->set_storage_type( + CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR); + } else { + bsm->UpdateCompressedRowSparseMatrix(m_crs_.get()); + } } else { - lhs = CompressedRowSparseMatrix::FromTripletSparseMatrixTransposed(*tsm); - lhs->set_storage_type(CompressedRowSparseMatrix::LOWER_TRIANGULAR); + if (!m_crs_) { + m_crs_ = bsm->ToCompressedRowSparseMatrixTranspose(); + m_crs_->set_storage_type( + CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR); + } else { + bsm->UpdateCompressedRowSparseMatrixTranspose(m_crs_.get()); + } } std::string message; - return sparse_cholesky_->Factorize(lhs.get(), &message); + return sparse_cholesky_->Factorize(m_crs_.get(), &message); } -void VisibilityBasedPreconditioner::RightMultiply(const double* x, - double* y) const { +void VisibilityBasedPreconditioner::RightMultiplyAndAccumulate( + const double* x, double* y) const { CHECK(x != nullptr); CHECK(y != nullptr); CHECK(sparse_cholesky_ != nullptr); @@ -445,9 +440,9 @@ bool VisibilityBasedPreconditioner::IsBlockPairInPreconditioner( int cluster1 = cluster_membership_[block1]; int cluster2 = cluster_membership_[block2]; if (cluster1 > cluster2) { - swap(cluster1, cluster2); + std::swap(cluster1, cluster2); } - return (cluster_pairs_.count(make_pair(cluster1, cluster2)) > 0); + return (cluster_pairs_.count(std::make_pair(cluster1, cluster2)) > 0); } bool VisibilityBasedPreconditioner::IsBlockPairOffDiagonal( @@ -459,7 +454,7 @@ bool VisibilityBasedPreconditioner::IsBlockPairOffDiagonal( // each vertex. void VisibilityBasedPreconditioner::ForestToClusterPairs( const WeightedGraph& forest, - std::unordered_set, pair_hash>* cluster_pairs) const { + std::unordered_set, pair_hash>* cluster_pairs) const { CHECK(cluster_pairs != nullptr); cluster_pairs->clear(); const std::unordered_set& vertices = forest.vertices(); @@ -468,11 +463,11 @@ void VisibilityBasedPreconditioner::ForestToClusterPairs( // Add all the cluster pairs corresponding to the edges in the // forest. for (const int cluster1 : vertices) { - cluster_pairs->insert(make_pair(cluster1, cluster1)); + cluster_pairs->insert(std::make_pair(cluster1, cluster1)); const std::unordered_set& neighbors = forest.Neighbors(cluster1); for (const int cluster2 : neighbors) { if (cluster1 < cluster2) { - cluster_pairs->insert(make_pair(cluster1, cluster2)); + cluster_pairs->insert(std::make_pair(cluster1, cluster2)); } } } @@ -482,8 +477,8 @@ void VisibilityBasedPreconditioner::ForestToClusterPairs( // of all its cameras. In other words, the set of points visible to // any camera in the cluster. void VisibilityBasedPreconditioner::ComputeClusterVisibility( - const vector>& visibility, - vector>* cluster_visibility) const { + const std::vector>& visibility, + std::vector>* cluster_visibility) const { CHECK(cluster_visibility != nullptr); cluster_visibility->resize(0); cluster_visibility->resize(num_clusters_); @@ -499,7 +494,7 @@ void VisibilityBasedPreconditioner::ComputeClusterVisibility( // vertices. std::unique_ptr> VisibilityBasedPreconditioner::CreateClusterGraph( - const vector>& cluster_visibility) const { + const std::vector>& cluster_visibility) const { auto cluster_graph = std::make_unique>(); for (int i = 0; i < num_clusters_; ++i) { @@ -507,15 +502,15 @@ VisibilityBasedPreconditioner::CreateClusterGraph( } for (int i = 0; i < num_clusters_; ++i) { - const set& cluster_i = cluster_visibility[i]; + const std::set& cluster_i = cluster_visibility[i]; for (int j = i + 1; j < num_clusters_; ++j) { - vector intersection; - const set& cluster_j = cluster_visibility[j]; - set_intersection(cluster_i.begin(), - cluster_i.end(), - cluster_j.begin(), - cluster_j.end(), - back_inserter(intersection)); + std::vector intersection; + const std::set& cluster_j = cluster_visibility[j]; + std::set_intersection(cluster_i.begin(), + cluster_i.end(), + cluster_j.begin(), + cluster_j.end(), + std::back_inserter(intersection)); if (intersection.size() > 0) { // Clusters interact strongly when they share a large number @@ -540,7 +535,7 @@ VisibilityBasedPreconditioner::CreateClusterGraph( // of integers so that the cluster ids are in [0, num_clusters_). void VisibilityBasedPreconditioner::FlattenMembershipMap( const std::unordered_map& membership_map, - vector* membership_vector) const { + std::vector* membership_vector) const { CHECK(membership_vector != nullptr); membership_vector->resize(0); membership_vector->resize(num_blocks_, -1); @@ -576,5 +571,4 @@ void VisibilityBasedPreconditioner::FlattenMembershipMap( } } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/visibility_based_preconditioner.h b/extern/ceres/internal/ceres/visibility_based_preconditioner.h index 8079dc3f3ce..d2d4aada0b5 100644 --- a/extern/ceres/internal/ceres/visibility_based_preconditioner.h +++ b/extern/ceres/internal/ceres/visibility_based_preconditioner.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2017 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -55,14 +55,14 @@ #include #include +#include "ceres/block_structure.h" #include "ceres/graph.h" #include "ceres/linear_solver.h" #include "ceres/pair_hash.h" #include "ceres/preconditioner.h" #include "ceres/sparse_cholesky.h" -namespace ceres { -namespace internal { +namespace ceres::internal { class BlockRandomAccessSparseMatrix; class BlockSparseMatrix; @@ -123,7 +123,7 @@ class SchurEliminatorBase; // VisibilityBasedPreconditioner preconditioner( // *A.block_structure(), options); // preconditioner.Update(A, nullptr); -// preconditioner.RightMultiply(x, y); +// preconditioner.RightMultiplyAndAccumulate(x, y); class CERES_NO_EXPORT VisibilityBasedPreconditioner : public BlockSparseMatrixPreconditioner { public: @@ -141,7 +141,7 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner ~VisibilityBasedPreconditioner() override; // Preconditioner interface - void RightMultiply(const double* x, double* y) const final; + void RightMultiplyAndAccumulate(const double* x, double* y) const final; int num_rows() const final; friend class VisibilityBasedPreconditionerTest; @@ -177,7 +177,7 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner int num_clusters_; // Sizes of the blocks in the schur complement. - std::vector block_size_; + std::vector blocks_; // Mapping from cameras to clusters. std::vector cluster_membership_; @@ -194,10 +194,10 @@ class CERES_NO_EXPORT VisibilityBasedPreconditioner // Preconditioner matrix. std::unique_ptr m_; + std::unique_ptr m_crs_; std::unique_ptr sparse_cholesky_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #endif // CERES_INTERNAL_VISIBILITY_BASED_PRECONDITIONER_H_ diff --git a/extern/ceres/internal/ceres/wall_time.cc b/extern/ceres/internal/ceres/wall_time.cc index a54ab640b3e..2f4cf28f288 100644 --- a/extern/ceres/internal/ceres/wall_time.cc +++ b/extern/ceres/internal/ceres/wall_time.cc @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -30,13 +30,9 @@ #include "ceres/wall_time.h" -#include "ceres/internal/config.h" - -#ifdef CERES_USE_OPENMP -#include -#else #include -#endif + +#include "ceres/internal/config.h" #ifdef _WIN32 #include @@ -44,13 +40,9 @@ #include #endif -namespace ceres { -namespace internal { +namespace ceres::internal { double WallTimeInSeconds() { -#ifdef CERES_USE_OPENMP - return omp_get_wtime(); -#else #ifdef _WIN32 LARGE_INTEGER count; LARGE_INTEGER frequency; @@ -63,7 +55,6 @@ double WallTimeInSeconds() { gettimeofday(&time_val, nullptr); return (time_val.tv_sec + time_val.tv_usec * 1e-6); #endif -#endif } EventLogger::EventLogger(const std::string& logger_name) { @@ -74,7 +65,7 @@ EventLogger::EventLogger(const std::string& logger_name) { start_time_ = WallTimeInSeconds(); last_event_time_ = start_time_; events_ = StringPrintf( - "\n%s\n Delta Cumulative\n", + "\n%s\n Delta Cumulative\n", logger_name.c_str()); } @@ -103,5 +94,4 @@ void EventLogger::AddEvent(const std::string& event_name) { absolute_time_delta); } -} // namespace internal -} // namespace ceres +} // namespace ceres::internal diff --git a/extern/ceres/internal/ceres/wall_time.h b/extern/ceres/internal/ceres/wall_time.h index f093eed0418..f99052bb22c 100644 --- a/extern/ceres/internal/ceres/wall_time.h +++ b/extern/ceres/internal/ceres/wall_time.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2015 Google Inc. All rights reserved. +// Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without @@ -39,13 +39,10 @@ #include "ceres/stringprintf.h" #include "glog/logging.h" -namespace ceres { -namespace internal { +namespace ceres::internal { -// Returns time, in seconds, from some arbitrary starting point. If -// OpenMP is available then the high precision openmp_get_wtime() -// function is used. Otherwise on unixes, gettimeofday is used. The -// granularity is in seconds on windows systems. +// Returns time, in seconds, from some arbitrary starting point. On unixes, +// gettimeofday is used. The granularity is microseconds. CERES_NO_EXPORT double WallTimeInSeconds(); // Log a series of events, recording for each event the time elapsed @@ -84,8 +81,7 @@ class CERES_NO_EXPORT EventLogger { std::string events_; }; -} // namespace internal -} // namespace ceres +} // namespace ceres::internal #include "ceres/internal/reenable_warnings.h"