From 25b4e41875bb7c062f14a49849aa43ca079f49ed Mon Sep 17 00:00:00 2001
From: Hans Goudey <h.goudey@me.com>
Date: Sun, 13 Mar 2022 14:42:56 -0500
Subject: [PATCH] Curve: Store NURBS basis cache as a single vector

Instead of allocating a vector of the basis weights cache for
each evaluated point, allocate a single vector for all of the
weights. This should reduce memory usage by avoiding the
overhead of storing many vectors. I noticed a small performance
improvement to evaluated position calculation with an order of 5,
which is larger than `Vector`'s default inline buffer capacity.

This change is possible because of previous commits that
made the basis cache for each evaluated point always have
the same "order" size.
---
 source/blender/blenkernel/BKE_spline.hh       | 11 +++--
 .../blender/blenkernel/intern/spline_nurbs.cc | 49 ++++++++++---------
 2 files changed, 34 insertions(+), 26 deletions(-)
diff --git a/source/blender/blenkernel/BKE_spline.hh b/source/blender/blenkernel/BKE_spline.hh
index 1e3134020c6..817bbf60f54 100644
--- a/source/blender/blenkernel/BKE_spline.hh
+++ b/source/blender/blenkernel/BKE_spline.hh
@@ -453,12 +453,15 @@ class NURBSpline final : public Spline {
   KnotsMode knots_mode;
 
   struct BasisCache {
-    /** The influence at each control point `i + #start_index`. */
+    /**
+     * For each evaluated point, the weight for alls control points that influences it.
+     * The vector's size is the evaluated point count multiplied by the spline's order.
+     */
     blender::Vector<float> weights;
     /**
      * An offset for the start of #weights: the first control point index with a non-zero weight.
      */
-    int start_index;
+    blender::Vector<int> start_indices;
   };
 
  private:
@@ -484,7 +487,7 @@ class NURBSpline final : public Spline {
   mutable bool knots_dirty_ = true;
 
   /** Cache of control point influences on each evaluated point. */
-  mutable blender::Vector<BasisCache> basis_cache_;
+  mutable BasisCache basis_cache_;
   mutable std::mutex basis_cache_mutex_;
   mutable bool basis_cache_dirty_ = true;
 
@@ -547,7 +550,7 @@ class NURBSpline final : public Spline {
   void reverse_impl() override;
 
   void calculate_knots() const;
-  blender::Span<BasisCache> calculate_basis_cache() const;
+  const BasisCache &calculate_basis_cache() const;
 };
 
 /**
diff --git a/source/blender/blenkernel/intern/spline_nurbs.cc b/source/blender/blenkernel/intern/spline_nurbs.cc
index e8a324ccb5e..1138250cffc 100644
--- a/source/blender/blenkernel/intern/spline_nurbs.cc
+++ b/source/blender/blenkernel/intern/spline_nurbs.cc
@@ -273,7 +273,7 @@ static void calculate_basis_for_point(const float parameter,
   r_start_index = start;
 }
 
-Span<NURBSpline::BasisCache> NURBSpline::calculate_basis_cache() const
+const NURBSpline::BasisCache &NURBSpline::calculate_basis_cache() const
 {
   if (!basis_cache_dirty_) {
     return basis_cache_;
@@ -286,18 +286,22 @@ Span<NURBSpline::BasisCache> NURBSpline::calculate_basis_cache() const
 
   const int size = this->size();
   const int eval_size = this->evaluated_points_size();
-  if (eval_size == 0) {
-    return {};
-  }
-
-  basis_cache_.resize(eval_size);
 
   const int order = this->order();
   const int degree = order - 1;
-  Span<float> control_weights = this->weights();
-  Span<float> knots = this->knots();
 
-  MutableSpan<BasisCache> basis_cache(basis_cache_);
+  basis_cache_.weights.resize(eval_size * order);
+  basis_cache_.start_indices.resize(eval_size);
+
+  if (eval_size == 0) {
+    return basis_cache_;
+  }
+
+  MutableSpan<float> basis_weights(basis_cache_.weights);
+  MutableSpan<int> basis_start_indices(basis_cache_.start_indices);
+
+  const Span<float> control_weights = this->weights();
+  const Span<float> knots = this->knots();
 
   const float start = knots[degree];
   const float end = is_cyclic_ ? knots[size + degree] : knots[size];
@@ -306,18 +310,18 @@ Span<NURBSpline::BasisCache> NURBSpline::calculate_basis_cache() const
     /* Clamp parameter due to floating point inaccuracy. */
     const float parameter = std::clamp(start + step * i, knots[0], knots[size + degree]);
 
-    BasisCache &basis = basis_cache[i];
-    basis.weights.resize(order);
+    MutableSpan<float> point_weights = basis_weights.slice(i * order, order);
+
     calculate_basis_for_point(parameter,
                               size + (is_cyclic_ ? degree : 0),
                               degree,
                               knots,
-                              basis.weights,
-                              basis.start_index);
+                              point_weights,
+                              basis_start_indices[i]);
 
-    for (const int j : basis.weights.index_range()) {
-      const int point_index = (basis.start_index + j) % size;
-      basis.weights[j] *= control_weights[point_index];
+    for (const int j : point_weights.index_range()) {
+      const int point_index = (basis_start_indices[i] + j) % size;
+      point_weights[j] *= control_weights[point_index];
     }
   }
 
@@ -326,17 +330,18 @@ Span<NURBSpline::BasisCache> NURBSpline::calculate_basis_cache() const
 }
 
 template<typename T>
-void interpolate_to_evaluated_impl(Span<NURBSpline::BasisCache> weights,
+void interpolate_to_evaluated_impl(const NURBSpline::BasisCache &basis_cache,
+                                   const int order,
                                    const blender::VArray<T> &src,
                                    MutableSpan<T> dst)
 {
   const int size = src.size();
-  BLI_assert(dst.size() == weights.size());
   blender::attribute_math::DefaultMixer<T> mixer(dst);
 
   for (const int i : dst.index_range()) {
-    Span<float> point_weights = weights[i].weights;
-    const int start_index = weights[i].start_index;
+    Span<float> point_weights = basis_cache.weights.as_span().slice(i * order, order);
+    const int start_index = basis_cache.start_indices[i];
+
     for (const int j : point_weights.index_range()) {
       const int point_index = (start_index + j) % size;
       mixer.mix_in(i, src[point_index], point_weights[j]);
@@ -354,14 +359,14 @@ GVArray NURBSpline::interpolate_to_evaluated(const GVArray &src) const
     return src;
   }
 
-  Span<BasisCache> basis_cache = this->calculate_basis_cache();
+  const BasisCache &basis_cache = this->calculate_basis_cache();
 
   GVArray new_varray;
   blender::attribute_math::convert_to_static_type(src.type(), [&](auto dummy) {
     using T = decltype(dummy);
     if constexpr (!std::is_void_v<blender::attribute_math::DefaultMixer<T>>) {
       Array<T> values(this->evaluated_points_size());
-      interpolate_to_evaluated_impl<T>(basis_cache, src.typed<T>(), values);
+      interpolate_to_evaluated_impl<T>(basis_cache, this->order(), src.typed<T>(), values);
       new_varray = VArray<T>::ForContainer(std::move(values));
     }
   });