Sculpt: Improve smooth brush performance with small radius

When using a small brush size, if enough latency is encountered,
sculpting can become unresponsive. See #131334 for more in-depth
information on this issue.

In general, this is most apparent to users with the Smooth brush and
any brushes with auto-smoothing enabled. Much of the work for this
brush action is in retrieving and averaging the neighboring vertex
positions. This is wasted work for vertices outside of the brush radius,
as ultimately they will have no displacement applied to them.

To help mitigate this performance regression, this PR adds a variant
of functions that calculate neighboring vertices that takes in the
precalculated `factor` to skip further processing. Additionally, some
methods are restructured to take advantage of this.

This change represents a speedup of 4x, from `0.40ms` without this
patch to `0.10ms` with it on a cube with 400k vertices and a brush
radius of 10px. For most brush size to BVH Node / Mesh ratios, we see
improvements, as we can avoid processing most nodes that a brush only
affects a small number of vertices of. For cases where the entire mesh
is affected by a brush, this patch does introduce a small but
measurable slowdown of 0.31ms to 0.33ms.

Ref: #136006

Pull Request: https://projects.blender.org/blender/blender/pulls/136274
This commit is contained in:
Sean Kim
2025-03-27 18:57:06 +01:00
committed by Sean Kim
parent 091df9ebcc
commit d3d776fd3c
5 changed files with 202 additions and 47 deletions

View File

@@ -56,15 +56,11 @@ struct LocalData {
Vector<float3> translations;
};
BLI_NOINLINE static void apply_positions_faces(const Depsgraph &depsgraph,
const Sculpt &sd,
const Brush &brush,
const MeshAttributeData &attribute_data,
const Span<float3> vert_normals,
BLI_NOINLINE static void apply_positions_faces(const Sculpt &sd,
const bke::pbvh::MeshNode &node,
const float strength,
Object &object,
LocalData &tls,
const Span<float> factors,
const Span<float3> new_positions,
const PositionDeformData &position_data)
{
@@ -72,22 +68,10 @@ BLI_NOINLINE static void apply_positions_faces(const Depsgraph &depsgraph,
const Span<int> verts = node.verts();
calc_factors_common_mesh_indexed(depsgraph,
brush,
object,
attribute_data,
position_data.eval,
vert_normals,
node,
tls.factors,
tls.distances);
scale_factors(tls.factors, strength);
tls.translations.resize(verts.size());
const MutableSpan<float3> translations = tls.translations;
translations_from_new_positions(new_positions, verts, position_data.eval, translations);
scale_translations(translations, tls.factors);
scale_translations(translations, factors);
clip_and_lock_translations(sd, ss, position_data.eval, verts, translations);
position_data.deform(translations, verts);
@@ -116,6 +100,8 @@ BLI_NOINLINE static void do_smooth_brush_mesh(const Depsgraph &depsgraph,
const OffsetIndices<int> node_vert_offsets = create_node_vert_offsets(
nodes, node_mask, node_offset_data);
Array<float3> new_positions(node_vert_offsets.total_size());
Array<float> all_factors(node_vert_offsets.total_size());
Array<float> all_distances(node_vert_offsets.total_size());
threading::EnumerableThreadSpecific<LocalData> all_tls;
@@ -126,12 +112,26 @@ BLI_NOINLINE static void do_smooth_brush_mesh(const Depsgraph &depsgraph,
node_mask.foreach_index(GrainSize(1), [&](const int i, const int pos) {
LocalData &tls = all_tls.local();
const Span<int> verts = nodes[i].verts();
const MutableSpan<float> node_factors = all_factors.as_mutable_span().slice(
node_vert_offsets[pos]);
calc_factors_common_mesh_indexed(
depsgraph,
brush,
object,
attribute_data,
position_data.eval,
vert_normals,
nodes[i],
node_factors,
all_distances.as_mutable_span().slice(node_vert_offsets[pos]));
scale_factors(node_factors, strength);
const GroupedSpan<int> neighbors = calc_vert_neighbors_interior(faces,
corner_verts,
vert_to_face_map,
ss.vertex_info.boundary,
attribute_data.hide_poly,
verts,
node_factors,
tls.neighbor_offsets,
tls.neighbor_data);
smooth::neighbor_data_average_mesh_check_loose(
@@ -143,15 +143,11 @@ BLI_NOINLINE static void do_smooth_brush_mesh(const Depsgraph &depsgraph,
node_mask.foreach_index(GrainSize(1), [&](const int i, const int pos) {
LocalData &tls = all_tls.local();
apply_positions_faces(depsgraph,
sd,
brush,
attribute_data,
vert_normals,
apply_positions_faces(sd,
nodes[i],
strength,
object,
tls,
all_factors.as_mutable_span().slice(node_vert_offsets[pos]),
new_positions.as_span().slice(node_vert_offsets[pos]),
position_data);
});
@@ -182,7 +178,7 @@ static void calc_grids(const Depsgraph &depsgraph,
tls.new_positions.resize(positions.size());
const MutableSpan<float3> new_positions = tls.new_positions;
smooth::neighbor_position_average_interior_grids(
faces, corner_verts, boundary_verts, subdiv_ccg, grids, new_positions);
faces, corner_verts, boundary_verts, subdiv_ccg, grids, tls.factors, new_positions);
tls.translations.resize(positions.size());
const MutableSpan<float3> translations = tls.translations;
@@ -212,7 +208,7 @@ static void calc_bmesh(const Depsgraph &depsgraph,
tls.new_positions.resize(verts.size());
const MutableSpan<float3> new_positions = tls.new_positions;
smooth::neighbor_position_average_interior_bmesh(verts, new_positions);
smooth::neighbor_position_average_interior_bmesh(verts, tls.factors, new_positions);
tls.translations.resize(verts.size());
const MutableSpan<float3> translations = tls.translations;

View File

@@ -189,6 +189,15 @@ void calc_factors_common_mesh_indexed(const Depsgraph &depsgraph,
const bke::pbvh::MeshNode &node,
Vector<float> &r_factors,
Vector<float> &r_distances);
void calc_factors_common_mesh_indexed(const Depsgraph &depsgraph,
const Brush &brush,
const Object &object,
const MeshAttributeData &attribute_data,
Span<float3> vert_positions,
Span<float3> vert_normals,
const bke::pbvh::MeshNode &node,
MutableSpan<float> factors,
MutableSpan<float> distances);
void calc_factors_common_grids(const Depsgraph &depsgraph,
const Brush &brush,
const Object &object,
@@ -456,6 +465,15 @@ GroupedSpan<int> calc_vert_neighbors_interior(OffsetIndices<int> faces,
Span<int> verts,
Vector<int> &r_offset_data,
Vector<int> &r_data);
GroupedSpan<int> calc_vert_neighbors_interior(OffsetIndices<int> faces,
Span<int> corner_verts,
GroupedSpan<int> vert_to_face,
BitSpan boundary_verts,
Span<bool> hide_poly,
Span<int> verts,
Span<float> factors,
Vector<int> &r_offset_data,
Vector<int> &r_data);
void calc_vert_neighbors_interior(OffsetIndices<int> faces,
Span<int> corner_verts,
BitSpan boundary_verts,

View File

@@ -6596,22 +6596,42 @@ void calc_factors_common_mesh_indexed(const Depsgraph &depsgraph,
const bke::pbvh::MeshNode &node,
Vector<float> &r_factors,
Vector<float> &r_distances)
{
const Span<int> verts = node.verts();
r_factors.resize(verts.size());
r_distances.resize(verts.size());
calc_factors_common_mesh_indexed(depsgraph,
brush,
object,
attribute_data,
vert_positions,
vert_normals,
node,
r_factors.as_mutable_span(),
r_distances.as_mutable_span());
}
void calc_factors_common_mesh_indexed(const Depsgraph &depsgraph,
const Brush &brush,
const Object &object,
const MeshAttributeData &attribute_data,
const Span<float3> vert_positions,
const Span<float3> vert_normals,
const bke::pbvh::MeshNode &node,
const MutableSpan<float> factors,
const MutableSpan<float> distances)
{
const SculptSession &ss = *object.sculpt;
const StrokeCache &cache = *ss.cache;
const Span<int> verts = node.verts();
r_factors.resize(verts.size());
const MutableSpan<float> factors = r_factors;
fill_factor_from_hide_and_mask(attribute_data.hide_vert, attribute_data.mask, verts, factors);
filter_region_clip_factors(ss, vert_positions, verts, factors);
if (brush.flag & BRUSH_FRONTFACE) {
calc_front_face(cache.view_normal_symm, vert_normals, verts, factors);
}
r_distances.resize(verts.size());
const MutableSpan<float> distances = r_distances;
calc_brush_distances(
ss, vert_positions, verts, eBrushFalloffShape(brush.falloff_shape), distances);
filter_distances_with_radius(cache.radius, distances, factors);
@@ -7765,16 +7785,21 @@ GroupedSpan<BMVert *> calc_vert_neighbors(Set<BMVert *, 0> verts,
return GroupedSpan<BMVert *>(r_offset_data.as_span(), r_data.as_span());
}
GroupedSpan<int> calc_vert_neighbors_interior(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const GroupedSpan<int> vert_to_face,
const BitSpan boundary_verts,
const Span<bool> hide_poly,
const Span<int> verts,
Vector<int> &r_offset_data,
Vector<int> &r_data)
template<bool use_factors>
static GroupedSpan<int> calc_vert_neighbors_interior_impl(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const GroupedSpan<int> vert_to_face,
const BitSpan boundary_verts,
const Span<bool> hide_poly,
const Span<int> verts,
const Span<float> factors,
Vector<int> &r_offset_data,
Vector<int> &r_data)
{
BLI_assert(corner_verts.size() == faces.total_size());
if constexpr (use_factors) {
BLI_assert(verts.size() == factors.size());
}
r_offset_data.resize(verts.size() + 1);
r_data.clear();
@@ -7783,6 +7808,11 @@ GroupedSpan<int> calc_vert_neighbors_interior(const OffsetIndices<int> faces,
const int vert = verts[i];
const int vert_start = r_data.size();
r_offset_data[i] = vert_start;
if constexpr (use_factors) {
if (factors[i] == 0.0f) {
continue;
}
}
append_neighbors_to_vector(faces, corner_verts, vert_to_face, hide_poly, vert, r_data);
if (boundary_verts[vert]) {
@@ -7804,6 +7834,47 @@ GroupedSpan<int> calc_vert_neighbors_interior(const OffsetIndices<int> faces,
return GroupedSpan<int>(r_offset_data.as_span(), r_data.as_span());
}
GroupedSpan<int> calc_vert_neighbors_interior(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const GroupedSpan<int> vert_to_face,
const BitSpan boundary_verts,
const Span<bool> hide_poly,
const Span<int> verts,
const Span<float> factors,
Vector<int> &r_offset_data,
Vector<int> &r_data)
{
return calc_vert_neighbors_interior_impl<true>(faces,
corner_verts,
vert_to_face,
boundary_verts,
hide_poly,
verts,
factors,
r_offset_data,
r_data);
}
GroupedSpan<int> calc_vert_neighbors_interior(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const GroupedSpan<int> vert_to_face,
const BitSpan boundary_verts,
const Span<bool> hide_poly,
const Span<int> verts,
Vector<int> &r_offset_data,
Vector<int> &r_data)
{
return calc_vert_neighbors_interior_impl<false>(faces,
corner_verts,
vert_to_face,
boundary_verts,
hide_poly,
verts,
{},
r_offset_data,
r_data);
}
void calc_vert_neighbors_interior(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const BitSpan boundary_verts,

View File

@@ -103,17 +103,22 @@ static float3 average_positions(const CCGKey &key,
return result;
}
void neighbor_position_average_interior_grids(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const BitSpan boundary_verts,
const SubdivCCG &subdiv_ccg,
const Span<int> grids,
const MutableSpan<float3> new_positions)
template<bool use_factors>
static void neighbor_position_average_interior_grids_impl(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const BitSpan boundary_verts,
const SubdivCCG &subdiv_ccg,
const Span<int> grids,
const Span<float> factors,
const MutableSpan<float3> new_positions)
{
const CCGKey key = BKE_subdiv_ccg_key_top_level(subdiv_ccg);
const Span<float3> positions = subdiv_ccg.positions;
BLI_assert(grids.size() * key.grid_area == new_positions.size());
if constexpr (use_factors) {
BLI_assert(new_positions.size() == factors.size());
}
for (const int i : grids.index_range()) {
const int node_verts_start = i * key.grid_area;
@@ -128,6 +133,13 @@ void neighbor_position_average_interior_grids(const OffsetIndices<int> faces,
const int node_vert_index = node_verts_start + offset;
const int vert = grid_range[offset];
if constexpr (use_factors) {
if (factors[node_vert_index] == 0.0f) {
new_positions[node_vert_index] = positions[vert];
continue;
}
}
SubdivCCGCoord coord{};
coord.grid_index = grid;
coord.x = x;
@@ -163,6 +175,29 @@ void neighbor_position_average_interior_grids(const OffsetIndices<int> faces,
}
}
void neighbor_position_average_interior_grids(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const BitSpan boundary_verts,
const SubdivCCG &subdiv_ccg,
const Span<int> grids,
const MutableSpan<float3> new_positions)
{
neighbor_position_average_interior_grids_impl<false>(
faces, corner_verts, boundary_verts, subdiv_ccg, grids, {}, new_positions);
}
void neighbor_position_average_interior_grids(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const BitSpan boundary_verts,
const SubdivCCG &subdiv_ccg,
const Span<int> grids,
const Span<float> factors,
const MutableSpan<float3> new_positions)
{
neighbor_position_average_interior_grids_impl<true>(
faces, corner_verts, boundary_verts, subdiv_ccg, grids, factors, new_positions);
}
template<typename T>
void average_data_grids(const SubdivCCG &subdiv_ccg,
const Span<T> src,
@@ -260,14 +295,27 @@ void neighbor_position_average_bmesh(const Set<BMVert *, 0> &verts,
}
}
void neighbor_position_average_interior_bmesh(const Set<BMVert *, 0> &verts,
const MutableSpan<float3> new_positions)
template<bool use_factors>
static void neighbor_position_average_interior_bmesh_impl(const Set<BMVert *, 0> &verts,
const Span<float> factors,
const MutableSpan<float3> new_positions)
{
BLI_assert(verts.size() == new_positions.size());
if constexpr (use_factors) {
BLI_assert(new_positions.size() == factors.size());
}
Vector<BMVert *, 64> neighbor_data;
int i = 0;
for (BMVert *vert : verts) {
if constexpr (use_factors) {
if (factors[i] == 0.0f) {
new_positions[i] = float3(vert->co);
i++;
continue;
}
}
const Span<BMVert *> neighbors = vert_neighbors_get_interior_bmesh(*vert, neighbor_data);
if (neighbors.is_empty()) {
new_positions[i] = float3(vert->co);
@@ -278,6 +326,18 @@ void neighbor_position_average_interior_bmesh(const Set<BMVert *, 0> &verts,
i++;
}
}
void neighbor_position_average_interior_bmesh(const Set<BMVert *, 0> &verts,
const Span<float> factors,
const MutableSpan<float3> new_positions)
{
neighbor_position_average_interior_bmesh_impl<true>(verts, factors, new_positions);
}
void neighbor_position_average_interior_bmesh(const Set<BMVert *, 0> &verts,
const MutableSpan<float3> new_positions)
{
neighbor_position_average_interior_bmesh_impl<false>(verts, {}, new_positions);
}
void bmesh_four_neighbor_average(float avg[3], const float3 &direction, const BMVert *v)
{

View File

@@ -38,6 +38,13 @@ void neighbor_color_average(OffsetIndices<int> faces,
GroupedSpan<int> vert_neighbors,
MutableSpan<float4> smooth_colors);
void neighbor_position_average_interior_grids(OffsetIndices<int> faces,
Span<int> corner_verts,
BitSpan boundary_verts,
const SubdivCCG &subdiv_ccg,
Span<int> grids,
Span<float> factors,
MutableSpan<float3> new_positions);
void neighbor_position_average_interior_grids(OffsetIndices<int> faces,
Span<int> corner_verts,
BitSpan boundary_verts,
@@ -47,6 +54,9 @@ void neighbor_position_average_interior_grids(OffsetIndices<int> faces,
void neighbor_position_average_bmesh(const Set<BMVert *, 0> &verts,
MutableSpan<float3> new_positions);
void neighbor_position_average_interior_bmesh(const Set<BMVert *, 0> &verts,
Span<float> factors,
MutableSpan<float3> new_positions);
void neighbor_position_average_interior_bmesh(const Set<BMVert *, 0> &verts,
MutableSpan<float3> new_positions);