Sculpt: Compress position undo step data

Stored undo step data for position changes in sculpt mode are now automatically compressed. Compression is run in background threads, reducing memory consumption during sculpting sessions while adding little performance overhead. For testing and benchmarks, memory usage is now available through `bpy.app.undo_memory_info()`. Undo memory usage is now tracked by the existing automated benchmark tests. Some changes to the web benchmark visualization present the data a bit better. ZSTD compression is run asynchronously in a backround task pool. Compression is only blocking if the data is requested immediately for undo/redo. Co-authored-by: Hans Goudey <hans@blender.org> Pull Request: https://projects.blender.org/blender/blender/pulls/141310
2025-09-03 19:15:05 +02:00
parent f3c5119d7d
commit 8536fd1223
8 changed files with 294 additions and 47 deletions
--- a/source/blender/editors/include/ED_sculpt.hh
+++ b/source/blender/editors/include/ED_sculpt.hh
@@ -8,6 +8,8 @@

 #pragma once

+#include <cstddef>
+
 struct Depsgraph;
 struct Main;
 struct Mesh;
@@ -16,6 +18,7 @@ struct RegionView3D;
 struct ReportList;
 struct Scene;
 struct UndoType;
+struct UndoStep;
 struct bContext;
 struct wmKeyConfig;
 struct wmOperator;
@@ -74,6 +77,8 @@ void geometry_end(Object &ob);
 void push_multires_mesh_begin(bContext *C, const char *str);
 void push_multires_mesh_end(bContext *C, const char *str);

+size_t step_memory_size_get(UndoStep *step);
+
 }  // namespace undo

 namespace face_set {
--- a/source/blender/editors/include/ED_undo.hh
+++ b/source/blender/editors/include/ED_undo.hh
@@ -148,3 +148,13 @@ MemFile *ED_undosys_stack_memfile_get_if_active(UndoStack *ustack);
 * (currently we only do that in #MemFileWriteData when writing a new step).
 */
 void ED_undosys_stack_memfile_id_changed_tag(UndoStack *ustack, ID *id);
+/**
+ * Get the total memory usage of all undo steps in the current undo stack.
+ *
+ * This function iterates through all undo steps and calculates their memory consumption.
+ * For sculpt undo steps, it uses the specialized sculpt memory calculation function.
+ * For other undo step types, it uses the generic `data_size` field.
+ *
+ * \return Total memory usage in bytes, or 0 if no undo stack is available.
+ */
+size_t ED_get_total_undo_memory();
--- a/source/blender/editors/sculpt_paint/CMakeLists.txt
+++ b/source/blender/editors/sculpt_paint/CMakeLists.txt
@@ -16,6 +16,7 @@ set(INC
 )

 set(INC_SYS
+  ${ZSTD_INCLUDE_DIRS}
 )

 set(SRC
--- a/source/blender/editors/sculpt_paint/sculpt_undo.cc
+++ b/source/blender/editors/sculpt_paint/sculpt_undo.cc
@@ -25,6 +25,7 @@
 #include "sculpt_undo.hh"

 #include <mutex>
+#include <zstd.h>

 #include "CLG_log.h"

@@ -34,6 +35,7 @@
 #include "BLI_map.hh"
 #include "BLI_memory_counter.hh"
 #include "BLI_string_utf8.h"
+#include "BLI_task.h"
 #include "BLI_utildefines.h"
 #include "BLI_vector.hh"

@@ -81,6 +83,12 @@
 #include "sculpt_face_set.hh"
 #include "sculpt_intern.hh"

+// #define DEBUG_TIME
+
+#ifdef DEBUG_TIME
+#  include "BLI_timeit.hh"
+#endif
+
 static CLG_LogRef LOG = {"undo.sculpt"};

 namespace blender::ed::sculpt_paint::undo {
@@ -180,6 +188,7 @@ struct NodeGeometry {
 };

 struct Node;
+struct PositionUndoStorage;

 struct StepData {
 private:
@@ -255,7 +264,7 @@ struct StepData {

  /** Storage of per-node undo data after creation of the undo step is finished. */
  Vector<std::unique_ptr<Node>> nodes;
-
+  std::unique_ptr<PositionUndoStorage> position_step_storage;
  size_t undo_size;

  /** Whether processing code needs to handle the current data as an undo step. */
@@ -274,6 +283,121 @@ struct StepData {
    applied_ = false;
  }
 };
+namespace zstd {
+
+template<typename T> Array<std::byte> compress(const Span<T> src)
+{
+  Array<std::byte> dst(ZSTD_compressBound(src.size_in_bytes()), NoInitialization());
+  const size_t dst_size = ZSTD_compress(
+      dst.data(), dst.size(), src.data(), src.size_in_bytes(), 12);
+
+  if (ZSTD_isError(dst_size)) {
+    return Array<std::byte>(0, NoInitialization());
+  }
+
+  return dst.as_span().take_front(dst_size);
+}
+
+template<typename T> Array<T> decompress(const Span<std::byte> src)
+{
+  const unsigned long long dst_size_in_bytes = ZSTD_getFrameContentSize(src.data(), src.size());
+
+  if (ELEM(dst_size_in_bytes, ZSTD_CONTENTSIZE_ERROR, ZSTD_CONTENTSIZE_UNKNOWN)) {
+    return Array<T>(0, NoInitialization());
+  }
+
+  const int64_t dst_size = dst_size_in_bytes / sizeof(T);
+  Array<T> dst(dst_size, NoInitialization());
+  const size_t result = ZSTD_decompress(
+      dst.data(), dst.as_span().size_in_bytes(), src.data(), src.size());
+
+  if (ZSTD_isError(result)) {
+    return Array<T>(0, NoInitialization());
+  }
+  return dst;
+}
+
+}  // namespace zstd
+
+struct PositionUndoStorage : NonMovable {
+  Vector<std::unique_ptr<Node>> nodes_to_compress;
+
+  Array<Array<std::byte>> compressed_indices;
+  Array<Array<std::byte>> compressed_positions;
+
+  Array<int> unique_verts_nums;
+
+  TaskPool *compression_task_pool;
+  std::atomic<bool> compression_ready = false;
+  std::atomic<bool> compression_started = false;
+  StepData *owner_step_data = nullptr;
+
+  explicit PositionUndoStorage(StepData &step_data)
+      : nodes_to_compress(std::move(step_data.nodes)), owner_step_data(&step_data)
+  {
+    unique_verts_nums.reinitialize(nodes_to_compress.size());
+    for (const int i : nodes_to_compress.index_range()) {
+      unique_verts_nums[i] = nodes_to_compress[i]->unique_verts_num;
+    }
+
+    compression_task_pool = BLI_task_pool_create_background(this, TASK_PRIORITY_LOW);
+    compression_started = true;
+
+    BLI_task_pool_push(compression_task_pool, compress_fn, this, false, nullptr);
+  }
+
+  ~PositionUndoStorage()
+  {
+    if (compression_started.load() && compression_task_pool) {
+      BLI_task_pool_work_and_wait(compression_task_pool);
+      BLI_task_pool_free(compression_task_pool);
+    }
+  }
+
+  void ensure_compression_complete()
+  {
+    if (!compression_ready.load(std::memory_order_acquire)) {
+      BLI_task_pool_work_and_wait(compression_task_pool);
+    }
+  }
+
+  static void compress_fn(TaskPool * /*pool*/, void *task_data)
+  {
+#ifdef DEBUG_TIME
+    SCOPED_TIMER(__func__);
+#endif
+    auto *data = static_cast<PositionUndoStorage *>(task_data);
+    MutableSpan<std::unique_ptr<Node>> nodes = data->nodes_to_compress;
+    const int nodes_num = nodes.size();
+
+    Array<Array<std::byte>> compressed_indices(nodes.size(), NoInitialization());
+    Array<Array<std::byte>> compressed_data(nodes.size(), NoInitialization());
+    threading::isolate_task([&]() {
+      threading::parallel_for(IndexRange(nodes_num), 1, [&](const IndexRange range) {
+        for (const int i : range) {
+          Array<std::byte> verts = zstd::compress<int>(nodes[i]->vert_indices);
+          Array<std::byte> positions = zstd::compress<float3>(nodes[i]->position);
+          new (&compressed_indices[i]) Array<std::byte>(std::move(verts));
+          new (&compressed_data[i]) Array<std::byte>(std::move(positions));
+          nodes[i].reset();
+        }
+      });
+    });
+    data->nodes_to_compress.clear_and_shrink();
+
+    size_t memory_size = 0;
+    for (const int i : IndexRange(nodes_num)) {
+      memory_size += compressed_indices[i].as_span().size_in_bytes();
+      memory_size += compressed_data[i].as_span().size_in_bytes();
+    }
+
+    data->compressed_indices = std::move(compressed_indices);
+    data->compressed_positions = std::move(compressed_data);
+    data->owner_step_data->undo_size += memory_size;
+
+    data->compression_ready.store(true, std::memory_order_release);
+  }
+};

 struct SculptUndoStep {
  UndoStep step;
@@ -287,6 +411,21 @@ struct SculptUndoStep {
  SculptAttrRef active_color_end;
 };

+size_t step_memory_size_get(UndoStep *step)
+{
+  if (step->type != BKE_UNDOSYS_TYPE_SCULPT) {
+    return 0;
+  }
+
+  SculptUndoStep *sculpt_step = reinterpret_cast<SculptUndoStep *>(step);
+
+  if (sculpt_step->data.position_step_storage) {
+    sculpt_step->data.position_step_storage->ensure_compression_complete();
+  }
+
+  return sculpt_step->data.undo_size;
+}
+
 static SculptUndoStep *get_active_step()
 {
  UndoStack *ustack = ED_undo_stack_get();
@@ -362,37 +501,48 @@ static void swap_indexed_data(MutableSpan<T> full, const Span<int> indices, Muta
 }

 static void restore_position_mesh(Object &object,
-                                  const Span<std::unique_ptr<Node>> unodes,
+                                  PositionUndoStorage &undo_data,
                                  const MutableSpan<bool> modified_verts)
 {
+#ifdef DEBUG_TIME
+  SCOPED_TIMER(__func__);
+#endif
+  SculptSession &ss = *object.sculpt;
  Mesh &mesh = *static_cast<Mesh *>(object.data);
  MutableSpan<float3> positions = mesh.vert_positions_for_write();
  std::optional<ShapeKeyData> shape_key_data = ShapeKeyData::from_object(object);

-  threading::parallel_for(unodes.index_range(), 1, [&](const IndexRange range) {
-    for (const int node_i : range) {
-      Node &unode = *unodes[node_i];
-      const Span<int> verts = unode.vert_indices.as_span().take_front(unode.unique_verts_num);
+  undo_data.ensure_compression_complete();

-      if (unode.orig_position.is_empty()) {
+  const int nodes_num = undo_data.unique_verts_nums.size();
+
+  threading::parallel_for(IndexRange(nodes_num), 1, [&](const IndexRange range) {
+    for (const int i : range) {
+      Array<int> indices = zstd::decompress<int>(undo_data.compressed_indices[i]);
+      Array<float3> node_positions = zstd::decompress<float3>(undo_data.compressed_positions[i]);
+      const int unique_verts_num = undo_data.unique_verts_nums[i];
+      const Span<int> verts = indices.as_span().take_front(unique_verts_num);
+
+      if (!ss.deform_modifiers_active) {
        /* When original positions aren't written separately in the undo step, there are no
         * deform modifiers. Therefore the original and evaluated deform positions will be the
         * same, and modifying the positions from the original mesh is enough. */
        swap_indexed_data(
-            unode.position.as_mutable_span().take_front(unode.unique_verts_num), verts, positions);
+            node_positions.as_mutable_span().take_front(unique_verts_num), verts, positions);
      }
      else {
        /* When original positions are stored in the undo step, undo/redo will cause a reevaluation
         * of the object. The evaluation will recompute the evaluated positions, so dealing with
         * them here is unnecessary. */
-        MutableSpan<float3> undo_positions = unode.orig_position;
+        MutableSpan<float3> undo_positions = node_positions;

        if (shape_key_data) {
          MutableSpan<float3> active_data = shape_key_data->active_key_data;

          if (!shape_key_data->dependent_keys.is_empty()) {
            Array<float3, 1024> translations(verts.size());
-            translations_from_new_positions(undo_positions, verts, active_data, translations);
+            translations_from_new_positions(
+                undo_positions.take_front(unique_verts_num), verts, active_data, translations);
            for (MutableSpan<float3> data : shape_key_data->dependent_keys) {
              apply_translations(translations, verts, data);
            }
@@ -402,14 +552,18 @@ static void restore_position_mesh(Object &object,
            /* The basis key positions and the mesh positions are always kept in sync. */
            scatter_data_mesh(undo_positions.as_span(), verts, positions);
          }
-          swap_indexed_data(undo_positions.take_front(unode.unique_verts_num), verts, active_data);
+          swap_indexed_data(undo_positions.take_front(unique_verts_num), verts, active_data);
        }
        else {
          /* There is a deform modifier, but no shape keys. */
-          swap_indexed_data(undo_positions.take_front(unode.unique_verts_num), verts, positions);
+          swap_indexed_data(undo_positions.take_front(unique_verts_num), verts, positions);
        }
      }
+
      modified_verts.fill_indices(verts, true);
+
+      undo_data.compressed_indices[i] = zstd::compress<int>(indices);
+      undo_data.compressed_positions[i] = zstd::compress<float3>(node_positions);
    }
  });
 }
@@ -896,7 +1050,7 @@ static void restore_list(bContext *C, Depsgraph *depsgraph, StepData &step_data)
        }
        const Mesh &mesh = *static_cast<const Mesh *>(object.data);
        Array<bool> modified_verts(mesh.verts_num, false);
-        restore_position_mesh(object, step_data.nodes, modified_verts);
+        restore_position_mesh(object, *step_data.position_step_storage, modified_verts);

        const IndexMask changed_nodes = IndexMask::from_predicate(
            node_mask, GrainSize(1), memory, [&](const int i) {
@@ -1810,17 +1964,22 @@ void push_end_ex(Object &ob, const bool use_nested_undo)
   * just one positions array that has a different semantic meaning depending on whether there are
   * deform modifiers. */

-  step_data->undo_size = threading::parallel_reduce(
-      step_data->nodes.index_range(),
-      16,
-      0,
-      [&](const IndexRange range, size_t size) {
-        for (const int i : range) {
-          size += node_size_in_bytes(*step_data->nodes[i]);
-        }
-        return size;
-      },
-      std::plus<size_t>());
+  if (step_data->type == Type::Position) {
+    step_data->position_step_storage = std::make_unique<PositionUndoStorage>(*step_data);
+  }
+  else {
+    step_data->undo_size = threading::parallel_reduce(
+        step_data->nodes.index_range(),
+        16,
+        0,
+        [&](const IndexRange range, size_t size) {
+          for (const int i : range) {
+            size += node_size_in_bytes(*step_data->nodes[i]);
+          }
+          return size;
+        },
+        std::plus<size_t>());
+  }

  /* We could remove this and enforce all callers run in an operator using 'OPTYPE_UNDO'. */
  wmWindowManager *wm = static_cast<wmWindowManager *>(G_MAIN->wm.first);
--- a/source/blender/editors/undo/ed_undo.cc
+++ b/source/blender/editors/undo/ed_undo.cc
@@ -37,6 +37,7 @@
 #include "ED_outliner.hh"
 #include "ED_render.hh"
 #include "ED_screen.hh"
+#include "ED_sculpt.hh"
 #include "ED_undo.hh"

 #include "WM_api.hh"
@@ -912,4 +913,25 @@ Vector<Base *> ED_undo_editmode_bases_from_view_layer(const Scene *scene, ViewLa
  return bases;
 }

+size_t ED_get_total_undo_memory()
+{
+  UndoStack *ustack = ED_undo_stack_get();
+  if (!ustack) {
+    return 0;
+  }
+
+  size_t total_memory = 0;
+
+  for (UndoStep *us = static_cast<UndoStep *>(ustack->steps.first); us != nullptr; us = us->next) {
+    if (us->type == BKE_UNDOSYS_TYPE_SCULPT) {
+      total_memory += blender::ed::sculpt_paint::undo::step_memory_size_get(us);
+    }
+    else if (us->data_size > 0) {
+      total_memory += us->data_size;
+    }
+  }
+
+  return total_memory;
+}
+
 /** \} */
--- a/source/blender/python/intern/bpy_app.cc
+++ b/source/blender/python/intern/bpy_app.cc
@@ -22,11 +22,13 @@
 #include "bpy_app_opensubdiv.hh"
 #include "bpy_app_openvdb.hh"
 #include "bpy_app_sdl.hh"
+
 #include "bpy_app_usd.hh"

 #include "bpy_app_translations.hh"

 #include "bpy_app_handlers.hh"
+#include "bpy_capi_utils.hh"
 #include "bpy_driver.hh"

 #include "BPY_extern_python.hh" /* For #BPY_python_app_help_text_fn. */
@@ -46,6 +48,7 @@

 #include "UI_interface_icons.hh"

+#include "ED_undo.hh"
 #include "MEM_guardedalloc.h"

 #include "RNA_enum_types.hh" /* For `rna_enum_wm_job_type_items`. */
@@ -644,6 +647,23 @@ static PyObject *bpy_app_help_text(PyObject * /*self*/, PyObject *args, PyObject
 #    pragma GCC diagnostic ignored "-Wcast-function-type"
 #  endif
 #endif
+PyDoc_STRVAR(
+    /* Wrap. */
+    bpy_app_undo_memory_info_doc,
+    ".. staticmethod:: undo_memory_info()\n"
+    "\n"
+    "   Get undo memory usage information.\n"
+    "\n"
+    "   :return: 'total_memory'.\n"
+    "   :rtype: int\n");
+
+static PyObject *bpy_app_undo_memory_info(PyObject * /*self*/, PyObject * /*args*/)
+{
+
+  size_t total_memory = ED_get_total_undo_memory();
+
+  return PyLong_FromSize_t(total_memory);
+}

 static PyMethodDef bpy_app_methods[] = {
    {"is_job_running",
@@ -654,6 +674,10 @@ static PyMethodDef bpy_app_methods[] = {
     (PyCFunction)bpy_app_help_text,
     METH_VARARGS | METH_KEYWORDS | METH_STATIC,
     bpy_app_help_text_doc},
+    {"undo_memory_info",
+     (PyCFunction)bpy_app_undo_memory_info,
+     METH_NOARGS | METH_STATIC,
+     bpy_app_undo_memory_info_doc},
    {nullptr, nullptr, 0, nullptr},
 };

--- a/tests/performance/api/graph.template.html
+++ b/tests/performance/api/graph.template.html
@@ -39,6 +39,33 @@
      return ndt;
    }

+    function drawChart(chartsQueue, index) {
+      index = index || 0;
+      if (index === chartsQueue.length)
+        return;
+
+      var chartData = chartsQueue[index];
+      var chart;
+      
+      if (chartData.chart_type == 'line') {
+        chart = new google.charts.Line(document.getElementById(chartData.id));
+      } else {
+        chart = new google.charts.Bar(document.getElementById(chartData.id));
+      }
+      
+      google.visualization.events.addOneTimeListener(chart, 'ready', function() {
+        /* Auto scale chart elements to display full SVG. */
+        var allSvg = document.getElementsByTagName("svg");
+        for (var svgIndex = 0; svgIndex < allSvg.length; svgIndex++) {
+          allSvg[svgIndex].setAttribute('height', allSvg[svgIndex].getBBox().height);
+        }
+        
+        drawChart(chartsQueue, index + 1);
+      });
+      
+      chart.draw(chartData.data, chartData.options);
+    }
+
    function draw_charts()
    {
      /* Load JSON data. */
@@ -54,7 +81,9 @@
        charts_content_elem.removeChild(charts_content_elem.firstChild);
      }

-      /* Draw charts for each device. */
+      var chartsQueue = [];
+
+      /* Prepare UI and charts queue for each device. */
      for (var i = 0; i < json_data.length; i++)
      {
        benchmark = json_data[i];
@@ -104,6 +133,7 @@

        /* Create chart div. */
        chart_div = document.createElement('div');
+        chart_div.id = "chart-" + i;
        tab_div.appendChild(chart_div)

        /* Chart drawing options. */
@@ -113,25 +143,20 @@
          height: 500,
        };

-        /* Create chart. */
+        /* Prepare chart data for queue. */
        var data = new google.visualization.DataTable(benchmark["data"]);
-        if (benchmark['chart_type'] == 'line') {
-          var chart = new google.charts.Line(chart_div);
-          chart.draw(data, options);
-        }
-        else {
-          var chart = new google.charts.Bar(chart_div);
-          chart.draw(transposeDataTable(data), options);
-        }
-
-        /* Auto scale chart elements to display full SVG. */
-        google.visualization.events.addListener(chart, 'ready', function () {
-          var allSvg = document.getElementsByTagName("svg");
-          for (var index = 0; index < allSvg.length; index++) {
-            allSvg[index].setAttribute('height', allSvg[index].getBBox().height);
-          }
-        });
+        var chartData = {
+          id: chart_div.id,
+          data: benchmark['chart_type'] == 'line' ? data : transposeDataTable(data),
+          options: options,
+          chart_type: benchmark['chart_type']
+        };
+        
+        chartsQueue.push(chartData);
      }
+      
+      /* Start drawing charts sequentially. */
+      drawChart(chartsQueue, 0);
    }
  </script>
 </head>
--- a/tests/performance/tests/sculpt.py
+++ b/tests/performance/tests/sculpt.py
@@ -169,7 +169,6 @@ def _run_brush_test(args: dict):

    min_measurements = 5
    max_measurements = 100
-
    measurements = []
    while True:
        prepare_sculpt_scene(context, args['mode'])
@@ -178,16 +177,18 @@ def _run_brush_test(args: dict):
        with context.temp_override(**context_override):
            if args.get('spatial_reorder', False):
                bpy.ops.mesh.reorder_vertices_spatial()
+                bpy.ops.ed.undo_push()
            start = time.time()
            bpy.ops.sculpt.brush_stroke(stroke=generate_stroke(context_override), override_location=True)
+            bpy.ops.ed.undo_push()
            measurements.append(time.time() - start)
-
+            memory_info = bpy.app.undo_memory_info()
        if len(measurements) >= min_measurements and (time.time() - total_time_start) > timeout:
            break
        if len(measurements) >= max_measurements:
            break

-    return sum(measurements) / len(measurements)
+    return {'time': sum(measurements) / len(measurements), 'memory': memory_info}


 def _run_bvh_test(args: dict):
@@ -277,7 +278,7 @@ class SculptBrushTest(api.Test):

        result, _ = env.run_in_blender(_run_brush_test, args, [self.filepath])

-        return {'time': result}
+        return result


 class SculptBrushAfterSpatialReorderingTest(api.Test):
@@ -301,7 +302,7 @@ class SculptBrushAfterSpatialReorderingTest(api.Test):

        result, _ = env.run_in_blender(_run_brush_test, args, [self.filepath])

-        return {'time': result}
+        return result


 class SculptRebuildBVHTest(api.Test):