From d6c9cd445cb41480b40fc7a7c29bbf982a2a6446 Mon Sep 17 00:00:00 2001
From: Hans Goudey <h.goudey@me.com>
Date: Thu, 26 Jan 2023 12:34:28 -0600
Subject: [PATCH] Geometry Nodes: Skip sorting in topology nodes if possible

When the sort weights are a single value, they have no effect,
so sorting the relevant indices for the element will be wasted work.
The sorting is expensive compared to the rest of the node. In my
simple test of the points of curve node, it became 6 times faster
when the weights are a single value.
---
 ...node_geo_curve_topology_points_of_curve.cc | 40 ++++++++-------
 .../node_geo_mesh_topology_corners_of_face.cc | 37 ++++++++------
 ...ode_geo_mesh_topology_corners_of_vertex.cc | 45 +++++++++--------
 .../node_geo_mesh_topology_edges_of_vertex.cc | 50 +++++++++++--------
 4 files changed, 97 insertions(+), 75 deletions(-)
diff --git a/source/blender/nodes/geometry/nodes/node_geo_curve_topology_points_of_curve.cc b/source/blender/nodes/geometry/nodes/node_geo_curve_topology_points_of_curve.cc
index 0f9c14b0fee..8d9e4a07a76 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_curve_topology_points_of_curve.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_curve_topology_points_of_curve.cc
@@ -49,6 +49,8 @@ class PointsOfCurveInput final : public bke::CurvesFieldInput {
                                  const eAttrDomain domain,
                                  const IndexMask mask) const final
   {
+    const OffsetIndices points_by_curve = curves.points_by_curve();
+
     const bke::CurvesFieldContext context{curves, domain};
     fn::FieldEvaluator evaluator{context, &mask};
     evaluator.add(curve_index_);
@@ -62,7 +64,7 @@ class PointsOfCurveInput final : public bke::CurvesFieldInput {
     point_evaluator.add(sort_weight_);
     point_evaluator.evaluate();
     const VArray<float> all_sort_weights = point_evaluator.get_evaluated<float>(0);
-    const OffsetIndices points_by_curve = curves.points_by_curve();
+    const bool use_sorting = !all_sort_weights.is_single();
 
     Array<int> point_of_curve(mask.min_array_size());
     threading::parallel_for(mask.index_range(), 256, [&](const IndexRange range) {
@@ -77,25 +79,29 @@ class PointsOfCurveInput final : public bke::CurvesFieldInput {
           point_of_curve[selection_i] = 0;
           continue;
         }
-
         const IndexRange points = points_by_curve[curve_i];
 
-        /* Retrieve the weights for each point. */
-        sort_weights.reinitialize(points.size());
-        all_sort_weights.materialize_compressed(IndexMask(points), sort_weights.as_mutable_span());
-
-        /* Sort a separate array of compressed indices corresponding to the compressed weights.
-         * This allows using `materialize_compressed` to avoid virtual function call overhead
-         * when accessing values in the sort weights. However, it means a separate array of
-         * indices within the compressed array is necessary for sorting. */
-        sort_indices.reinitialize(points.size());
-        std::iota(sort_indices.begin(), sort_indices.end(), 0);
-        std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
-          return sort_weights[a] < sort_weights[b];
-        });
-
         const int index_in_sort_wrapped = mod_i(index_in_sort, points.size());
-        point_of_curve[selection_i] = points[sort_indices[index_in_sort_wrapped]];
+        if (use_sorting) {
+          /* Retrieve the weights for each point. */
+          sort_weights.reinitialize(points.size());
+          all_sort_weights.materialize_compressed(IndexMask(points),
+                                                  sort_weights.as_mutable_span());
+
+          /* Sort a separate array of compressed indices corresponding to the compressed weights.
+           * This allows using `materialize_compressed` to avoid virtual function call overhead
+           * when accessing values in the sort weights. However, it means a separate array of
+           * indices within the compressed array is necessary for sorting. */
+          sort_indices.reinitialize(points.size());
+          std::iota(sort_indices.begin(), sort_indices.end(), 0);
+          std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
+            return sort_weights[a] < sort_weights[b];
+          });
+          point_of_curve[selection_i] = points[sort_indices[index_in_sort_wrapped]];
+        }
+        else {
+          point_of_curve[selection_i] = points[index_in_sort_wrapped];
+        }
       }
     });
 
diff --git a/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_corners_of_face.cc b/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_corners_of_face.cc
index d5b4b4869c1..55c70095236 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_corners_of_face.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_corners_of_face.cc
@@ -64,6 +64,7 @@ class CornersOfFaceInput final : public bke::MeshFieldInput {
     corner_evaluator.add(sort_weight_);
     corner_evaluator.evaluate();
     const VArray<float> all_sort_weights = corner_evaluator.get_evaluated<float>(0);
+    const bool use_sorting = !all_sort_weights.is_single();
 
     Array<int> corner_of_face(mask.min_array_size());
     threading::parallel_for(mask.index_range(), 1024, [&](const IndexRange range) {
@@ -82,23 +83,27 @@ class CornersOfFaceInput final : public bke::MeshFieldInput {
         const MPoly &poly = polys[poly_i];
         const IndexRange corners(poly.loopstart, poly.totloop);
 
-        /* Retrieve the weights for each corner. */
-        sort_weights.reinitialize(corners.size());
-        all_sort_weights.materialize_compressed(IndexMask(corners),
-                                                sort_weights.as_mutable_span());
-
-        /* Sort a separate array of compressed indices corresponding to the compressed weights.
-         * This allows using `materialize_compressed` to avoid virtual function call overhead
-         * when accessing values in the sort weights. However, it means a separate array of
-         * indices within the compressed array is necessary for sorting. */
-        sort_indices.reinitialize(corners.size());
-        std::iota(sort_indices.begin(), sort_indices.end(), 0);
-        std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
-          return sort_weights[a] < sort_weights[b];
-        });
-
         const int index_in_sort_wrapped = mod_i(index_in_sort, corners.size());
-        corner_of_face[selection_i] = corners[sort_indices[index_in_sort_wrapped]];
+        if (use_sorting) {
+          /* Retrieve the weights for each corner. */
+          sort_weights.reinitialize(corners.size());
+          all_sort_weights.materialize_compressed(IndexMask(corners),
+                                                  sort_weights.as_mutable_span());
+
+          /* Sort a separate array of compressed indices corresponding to the compressed weights.
+           * This allows using `materialize_compressed` to avoid virtual function call overhead
+           * when accessing values in the sort weights. However, it means a separate array of
+           * indices within the compressed array is necessary for sorting. */
+          sort_indices.reinitialize(corners.size());
+          std::iota(sort_indices.begin(), sort_indices.end(), 0);
+          std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
+            return sort_weights[a] < sort_weights[b];
+          });
+          corner_of_face[selection_i] = corners[sort_indices[index_in_sort_wrapped]];
+        }
+        else {
+          corner_of_face[selection_i] = corners[index_in_sort_wrapped];
+        }
       }
     });
 
diff --git a/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_corners_of_vertex.cc b/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_corners_of_vertex.cc
index 12310d6df15..8d7ebb4e105 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_corners_of_vertex.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_corners_of_vertex.cc
@@ -77,6 +77,7 @@ class CornersOfVertInput final : public bke::MeshFieldInput {
     corner_evaluator.add(sort_weight_);
     corner_evaluator.evaluate();
     const VArray<float> all_sort_weights = corner_evaluator.get_evaluated<float>(0);
+    const bool use_sorting = !all_sort_weights.is_single();
 
     Array<int> corner_of_vertex(mask.min_array_size());
     threading::parallel_for(mask.index_range(), 1024, [&](const IndexRange range) {
@@ -99,27 +100,31 @@ class CornersOfVertInput final : public bke::MeshFieldInput {
           continue;
         }
 
-        /* Retrieve the connected edge indices as 64 bit integers for #materialize_compressed. */
-        corner_indices.reinitialize(corners.size());
-        convert_span(corners, corner_indices);
-
-        /* Retrieve a compressed array of weights for each edge. */
-        sort_weights.reinitialize(corners.size());
-        all_sort_weights.materialize_compressed(IndexMask(corner_indices),
-                                                sort_weights.as_mutable_span());
-
-        /* Sort a separate array of compressed indices corresponding to the compressed weights.
-         * This allows using `materialize_compressed` to avoid virtual function call overhead
-         * when accessing values in the sort weights. However, it means a separate array of
-         * indices within the compressed array is necessary for sorting. */
-        sort_indices.reinitialize(corners.size());
-        std::iota(sort_indices.begin(), sort_indices.end(), 0);
-        std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
-          return sort_weights[a] < sort_weights[b];
-        });
-
         const int index_in_sort_wrapped = mod_i(index_in_sort, corners.size());
-        corner_of_vertex[selection_i] = corner_indices[sort_indices[index_in_sort_wrapped]];
+        if (use_sorting) {
+          /* Retrieve the connected edge indices as 64 bit integers for #materialize_compressed. */
+          corner_indices.reinitialize(corners.size());
+          convert_span(corners, corner_indices);
+
+          /* Retrieve a compressed array of weights for each edge. */
+          sort_weights.reinitialize(corners.size());
+          all_sort_weights.materialize_compressed(IndexMask(corner_indices),
+                                                  sort_weights.as_mutable_span());
+
+          /* Sort a separate array of compressed indices corresponding to the compressed weights.
+           * This allows using `materialize_compressed` to avoid virtual function call overhead
+           * when accessing values in the sort weights. However, it means a separate array of
+           * indices within the compressed array is necessary for sorting. */
+          sort_indices.reinitialize(corners.size());
+          std::iota(sort_indices.begin(), sort_indices.end(), 0);
+          std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
+            return sort_weights[a] < sort_weights[b];
+          });
+          corner_of_vertex[selection_i] = corner_indices[sort_indices[index_in_sort_wrapped]];
+        }
+        else {
+          corner_of_vertex[selection_i] = corner_indices[index_in_sort_wrapped];
+        }
       }
     });
 
diff --git a/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_edges_of_vertex.cc b/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_edges_of_vertex.cc
index b16bbabfe0b..7c27a4b783d 100644
--- a/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_edges_of_vertex.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_edges_of_vertex.cc
@@ -61,8 +61,8 @@ class EdgesOfVertInput final : public bke::MeshFieldInput {
   {
     const IndexRange vert_range(mesh.totvert);
     const Span<MEdge> edges = mesh.edges();
-    Array<Vector<int>> vert_to_edge_map = bke::mesh_topology::build_vert_to_edge_map(edges,
-                                                                                     mesh.totvert);
+    const Array<Vector<int>> vert_to_edge_map = bke::mesh_topology::build_vert_to_edge_map(
+        edges, mesh.totvert);
 
     const bke::MeshFieldContext context{mesh, domain};
     fn::FieldEvaluator evaluator{context, &mask};
@@ -77,6 +77,7 @@ class EdgesOfVertInput final : public bke::MeshFieldInput {
     edge_evaluator.add(sort_weight_);
     edge_evaluator.evaluate();
     const VArray<float> all_sort_weights = edge_evaluator.get_evaluated<float>(0);
+    const bool use_sorting = !all_sort_weights.is_single();
 
     Array<int> edge_of_vertex(mask.min_array_size());
     threading::parallel_for(mask.index_range(), 1024, [&](const IndexRange range) {
@@ -99,27 +100,32 @@ class EdgesOfVertInput final : public bke::MeshFieldInput {
           continue;
         }
 
-        /* Retrieve the connected edge indices as 64 bit integers for #materialize_compressed. */
-        edge_indices.reinitialize(edges.size());
-        convert_span(edges, edge_indices);
-
-        /* Retrieve a compressed array of weights for each edge. */
-        sort_weights.reinitialize(edges.size());
-        all_sort_weights.materialize_compressed(IndexMask(edge_indices),
-                                                sort_weights.as_mutable_span());
-
-        /* Sort a separate array of compressed indices corresponding to the compressed weights.
-         * This allows using `materialize_compressed` to avoid virtual function call overhead
-         * when accessing values in the sort weights. However, it means a separate array of
-         * indices within the compressed array is necessary for sorting. */
-        sort_indices.reinitialize(edges.size());
-        std::iota(sort_indices.begin(), sort_indices.end(), 0);
-        std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
-          return sort_weights[a] < sort_weights[b];
-        });
-
         const int index_in_sort_wrapped = mod_i(index_in_sort, edges.size());
-        edge_of_vertex[selection_i] = edge_indices[sort_indices[index_in_sort_wrapped]];
+        if (use_sorting) {
+          /* Retrieve the connected edge indices as 64 bit integers for #materialize_compressed. */
+          edge_indices.reinitialize(edges.size());
+          convert_span(edges, edge_indices);
+
+          /* Retrieve a compressed array of weights for each edge. */
+          sort_weights.reinitialize(edges.size());
+          all_sort_weights.materialize_compressed(IndexMask(edge_indices),
+                                                  sort_weights.as_mutable_span());
+
+          /* Sort a separate array of compressed indices corresponding to the compressed weights.
+           * This allows using `materialize_compressed` to avoid virtual function call overhead
+           * when accessing values in the sort weights. However, it means a separate array of
+           * indices within the compressed array is necessary for sorting. */
+          sort_indices.reinitialize(edges.size());
+          std::iota(sort_indices.begin(), sort_indices.end(), 0);
+          std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
+            return sort_weights[a] < sort_weights[b];
+          });
+
+          edge_of_vertex[selection_i] = edge_indices[sort_indices[index_in_sort_wrapped]];
+        }
+        else {
+          edge_of_vertex[selection_i] = edge_indices[index_in_sort_wrapped];
+        }
       }
     });