Sculpt: Improve multires drawing performance by simplifying logic

Combined with previous logic, this roughly doubles the performance of sculpt-mode multires draw vertex buffer extraction in my simple test. That measurement is for when we initially enter sculpt mode when the VBO creation is single threaded. Otherwise in my testing the copying was bound by memory bandwidth and the improvement was smaller.
2024-06-26 23:14:12 -04:00
parent 7cf9854938
commit 83a15be109
1 changed files with 108 additions and 108 deletions
--- a/source/blender/draw/intern/draw_pbvh.cc
+++ b/source/blender/draw/intern/draw_pbvh.cc
@@ -477,11 +477,6 @@ static void fill_vbo_normal_faces(const PBVH_GPU_Args &args, gpu::VertBuf &vert_
  }
 }

-static void foreach_grids(
-    const PBVH_GPU_Args &args,
-    const bool use_flat_layout,
-    const FunctionRef<void(int x, int y, int grid_index, CCGElem *elems[4], int i)> func);
-
 static void fill_vbo_grids(PBVHVbo &vbo, const PBVH_GPU_Args &args, const bool use_flat_layout)
 {
  uint vert_per_grid = square_i(args.ccg_key.grid_size - 1) * 4;
@@ -494,16 +489,41 @@ static void fill_vbo_grids(PBVHVbo &vbo, const PBVH_GPU_Args &args, const bool u
    GPU_vertbuf_data_alloc(*vbo.vert_buf, vert_count);
  }

+  const Span<int> grid_indices = args.grid_indices;
+  const Span<CCGElem *> grids = args.grids;
+  const CCGKey key = args.ccg_key;
+  const int gridsize = key.grid_size;
+
  if (const CustomRequest *request_type = std::get_if<CustomRequest>(&vbo.request)) {
    switch (*request_type) {
      case CustomRequest::Position: {
        float3 *data = vbo.vert_buf->data<float3>().data();
-        foreach_grids(args,
-                      use_flat_layout,
-                      [&](int /*x*/, int /*y*/, int /*grid_index*/, CCGElem *elems[4], int i) {
-                        *data = CCG_elem_co(args.ccg_key, elems[i]);
-                        data++;
-                      });
+        if (use_flat_layout) {
+          for (const int i : grid_indices.index_range()) {
+            CCGElem *grid = grids[grid_indices[i]];
+            for (int y = 0; y < gridsize - 1; y++) {
+              for (int x = 0; x < gridsize - 1; x++) {
+                *data = CCG_grid_elem_co(key, grid, x, y);
+                data++;
+                *data = CCG_grid_elem_co(key, grid, x + 1, y);
+                data++;
+                *data = CCG_grid_elem_co(key, grid, x + 1, y + 1);
+                data++;
+                *data = CCG_grid_elem_co(key, grid, x, y + 1);
+                data++;
+              }
+            }
+          }
+        }
+        else {
+          for (const int i : grid_indices.index_range()) {
+            CCGElem *grid = grids[grid_indices[i]];
+            for (const int offset : IndexRange(key.grid_area)) {
+              *data = CCG_elem_offset_co(key, grid, offset);
+              data++;
+            }
+          }
+        }
        break;
      }
      case CustomRequest::Normal: {
@@ -514,34 +534,75 @@ static void fill_vbo_grids(PBVHVbo &vbo, const PBVH_GPU_Args &args, const bool u

        short4 *data = vbo.vert_buf->data<short4>().data();

-        foreach_grids(args,
-                      use_flat_layout,
-                      [&](int /*x*/, int /*y*/, int grid_index, CCGElem *elems[4], int /*i*/) {
-                        if (!sharp_faces.is_empty() && sharp_faces[grid_to_face_map[grid_index]]) {
-                          float3 no;
-                          normal_quad_v3(no,
-                                         CCG_elem_co(args.ccg_key, elems[3]),
-                                         CCG_elem_co(args.ccg_key, elems[2]),
-                                         CCG_elem_co(args.ccg_key, elems[1]),
-                                         CCG_elem_co(args.ccg_key, elems[0]));
-                          *data = normal_float_to_short(no);
-                        }
-                        else {
-                          *data = normal_float_to_short(CCG_elem_no(args.ccg_key, elems[0]));
-                        }
-                        data++;
-                      });
+        if (use_flat_layout) {
+          for (const int i : grid_indices.index_range()) {
+            const int grid_index = grid_indices[i];
+            CCGElem *grid = grids[grid_index];
+            if (!sharp_faces.is_empty() && sharp_faces[grid_to_face_map[grid_index]]) {
+              for (int y = 0; y < gridsize - 1; y++) {
+                for (int x = 0; x < gridsize - 1; x++) {
+                  float3 no;
+                  normal_quad_v3(no,
+                                 CCG_grid_elem_co(key, grid, x, y + 1),
+                                 CCG_grid_elem_co(key, grid, x + 1, y + 1),
+                                 CCG_grid_elem_co(key, grid, x + 1, y),
+                                 CCG_grid_elem_co(key, grid, x, y));
+                  std::fill_n(data, 4, normal_float_to_short(no));
+                  data += 4;
+                }
+              }
+            }
+            else {
+              for (int y = 0; y < gridsize - 1; y++) {
+                for (int x = 0; x < gridsize - 1; x++) {
+                  std::fill_n(data, 4, normal_float_to_short(CCG_grid_elem_no(key, grid, x, y)));
+                  data += 4;
+                }
+              }
+            }
+          }
+        }
+        else {
+          /* The non-flat VBO layout does not support sharp faces. */
+          for (const int i : grid_indices.index_range()) {
+            CCGElem *grid = grids[grid_indices[i]];
+            for (const int offset : IndexRange(key.grid_area)) {
+              *data = normal_float_to_short(CCG_elem_offset_no(key, grid, offset));
+              data++;
+            }
+          }
+        }
        break;
      }
      case CustomRequest::Mask: {
        if (args.ccg_key.has_mask) {
          float *data = vbo.vert_buf->data<float>().data();
-          foreach_grids(args,
-                        use_flat_layout,
-                        [&](int /*x*/, int /*y*/, int /*grid_index*/, CCGElem *elems[4], int i) {
-                          *data = CCG_elem_mask(args.ccg_key, elems[i]);
-                          data++;
-                        });
+          if (use_flat_layout) {
+            for (const int i : grid_indices.index_range()) {
+              CCGElem *grid = grids[grid_indices[i]];
+              for (int y = 0; y < gridsize - 1; y++) {
+                for (int x = 0; x < gridsize - 1; x++) {
+                  *data = CCG_grid_elem_mask(key, grid, x, y);
+                  data++;
+                  *data = CCG_grid_elem_mask(key, grid, x + 1, y);
+                  data++;
+                  *data = CCG_grid_elem_mask(key, grid, x + 1, y + 1);
+                  data++;
+                  *data = CCG_grid_elem_mask(key, grid, x, y + 1);
+                  data++;
+                }
+              }
+            }
+          }
+          else {
+            for (const int i : grid_indices.index_range()) {
+              CCGElem *grid = grids[grid_indices[i]];
+              for (const int offset : IndexRange(key.grid_area)) {
+                *data = CCG_elem_offset_mask(key, grid, offset);
+                data++;
+              }
+            }
+          }
        }
        else {
          vbo.vert_buf->data<float>().fill(0.0f);
@@ -549,30 +610,26 @@ static void fill_vbo_grids(PBVHVbo &vbo, const PBVH_GPU_Args &args, const bool u
        break;
      }
      case CustomRequest::FaceSet: {
+        const Span<int> grid_to_face_map = args.subdiv_ccg->grid_to_face_map;
        const bke::AttributeAccessor attributes = args.mesh->attributes();
        if (const VArray<int> face_sets = *attributes.lookup<int>(".sculpt_face_set",
                                                                  bke::AttrDomain::Face))
        {
          const VArraySpan<int> face_sets_span(face_sets);
          uchar4 *data = vbo.vert_buf->data<uchar4>().data();
-          foreach_grids(
-              args,
-              use_flat_layout,
-              [&](int /*x*/, int /*y*/, int grid_index, CCGElem * /*elems*/[4], int /*i*/) {
-                uchar face_set_color[4] = {UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX};

-                const int face_index = BKE_subdiv_ccg_grid_to_face_index(*args.subdiv_ccg,
-                                                                         grid_index);
-                const int fset = face_sets_span[face_index];
+          const int verts_per_grid = use_flat_layout ? square_i(gridsize - 1) * 4 :
+                                                       square_i(gridsize);
+          for (const int i : grid_indices.index_range()) {
+            uchar4 color{UCHAR_MAX};
+            const int fset = face_sets_span[grid_to_face_map[grid_indices[i]]];
+            if (fset != args.face_sets_color_default) {
+              BKE_paint_face_set_overlay_color_get(fset, args.face_sets_color_seed, color);
+            }

-                /* Skip for the default color Face Set to render it white. */
-                if (fset != args.face_sets_color_default) {
-                  BKE_paint_face_set_overlay_color_get(
-                      fset, args.face_sets_color_seed, face_set_color);
-                }
-                *data = face_set_color;
-                data++;
-              });
+            std::fill_n(data, verts_per_grid, color);
+            data += verts_per_grid;
+          }
        }
        else {
          vbo.vert_buf->data<uchar4>().fill(uchar4{UCHAR_MAX});
@@ -588,69 +645,12 @@ static void fill_vbo_grids(PBVHVbo &vbo, const PBVH_GPU_Args &args, const bool u
      using Converter = AttributeConverter<T>;
      using VBOType = typename Converter::VBOType;
      if constexpr (!std::is_void_v<VBOType>) {
-        std::fill_n(vbo.vert_buf->data<VBOType>().data(),
-                    GPU_vertbuf_get_vertex_len(vbo.vert_buf),
-                    Converter::convert(fallback_value_for_fill<T>()));
+        vbo.vert_buf->data<VBOType>().fill(Converter::convert(fallback_value_for_fill<T>()));
      }
    });
  }
 }

-static void foreach_grids(
-    const PBVH_GPU_Args &args,
-    const bool use_flat_layout,
-    const FunctionRef<void(int x, int y, int grid_index, CCGElem *elems[4], int i)> func)
-{
-  int gridsize = args.ccg_key.grid_size;
-
-  uint totgrid = args.grid_indices.size();
-
-  if (use_flat_layout) {
-    for (int i = 0; i < totgrid; i++) {
-      const int grid_index = args.grid_indices[i];
-
-      CCGElem *grid = args.grids[grid_index];
-
-      for (int y = 0; y < gridsize - 1; y++) {
-        for (int x = 0; x < gridsize - 1; x++) {
-          CCGElem *elems[4] = {
-              CCG_grid_elem(args.ccg_key, grid, x, y),
-              CCG_grid_elem(args.ccg_key, grid, x + 1, y),
-              CCG_grid_elem(args.ccg_key, grid, x + 1, y + 1),
-              CCG_grid_elem(args.ccg_key, grid, x, y + 1),
-          };
-
-          func(x, y, grid_index, elems, 0);
-          func(x + 1, y, grid_index, elems, 1);
-          func(x + 1, y + 1, grid_index, elems, 2);
-          func(x, y + 1, grid_index, elems, 3);
-        }
-      }
-    }
-  }
-  else {
-    for (int i = 0; i < totgrid; i++) {
-      const int grid_index = args.grid_indices[i];
-
-      CCGElem *grid = args.grids[grid_index];
-
-      for (int y = 0; y < gridsize; y++) {
-        for (int x = 0; x < gridsize; x++) {
-          CCGElem *elems[4] = {
-              CCG_grid_elem(args.ccg_key, grid, x, y),
-              CCG_grid_elem(args.ccg_key, grid, min_ii(x + 1, gridsize - 1), y),
-              CCG_grid_elem(
-                  args.ccg_key, grid, min_ii(x + 1, gridsize - 1), min_ii(y + 1, gridsize - 1)),
-              CCG_grid_elem(args.ccg_key, grid, x, min_ii(y + 1, gridsize - 1)),
-          };
-
-          func(x, y, grid_index, elems, 0);
-        }
-      }
-    }
-  }
-}
-
 static void fill_vbo_faces(PBVHVbo &vbo, const PBVH_GPU_Args &args)
 {
  const int totvert = count_faces(args) * 3;