diff --git a/source/blender/blenkernel/BKE_mesh.hh b/source/blender/blenkernel/BKE_mesh.hh index cd838d032ca..b55daeb7446 100644 --- a/source/blender/blenkernel/BKE_mesh.hh +++ b/source/blender/blenkernel/BKE_mesh.hh @@ -149,15 +149,10 @@ struct CornerNormalSpace { /** * Storage for corner fan coordinate spaces for an entire mesh. + * For performance reason the distribution of #spaces and index mapping of them in + * #corner_space_indices are non-deterministic. */ struct CornerNormalSpaceArray { - /** - * Results are added from multiple threads. The lock is an easy way to parallelize adding results - * for each corner fan. This method means the order of spaces in the `spaces` vector and - * `corners_by_face` is non-deterministic. That shouldn't affect the final output for the user - * though. - */ - Mutex build_mutex; /** * The normal coordinate spaces, potentially shared between multiple face corners in a smooth fan * connected to a vertex (and not per face corner). Depending on the mesh (the amount of sharing diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc index 6d68534a0dd..1207c465135 100644 --- a/source/blender/blenkernel/intern/mesh_normals.cc +++ b/source/blender/blenkernel/intern/mesh_normals.cc @@ -17,6 +17,7 @@ #include "BLI_array_utils.hh" #include "BLI_bit_vector.hh" +#include "BLI_enumerable_thread_specific.hh" #include "BLI_linklist.h" #include "BLI_math_base.hh" #include "BLI_math_vector.hh" @@ -1177,6 +1178,12 @@ static float3 accumulate_fan_normal(const Span corner_infos, return math::normalize(fan_normal); } +struct CornerSpaceGroup { + /* Maybe acyclic and unordered set of adjacent corners in same smooth group around vertex. */ + Array fan_corners; + CornerNormalSpace space; +}; + /** Don't inline this function to simplify the code path without custom normals. */ BLI_NOINLINE static void handle_fan_result_and_custom_normals( const Span custom_normals, @@ -1184,7 +1191,8 @@ BLI_NOINLINE static void handle_fan_result_and_custom_normals( const Span edge_dirs, const Span local_corners_in_fan, float3 &fan_normal, - CornerNormalSpaceArray *r_fan_spaces) + CornerNormalSpaceArray *r_fan_spaces, + Vector &r_local_space_groups) { const int local_edge_first = corner_infos[local_corners_in_fan.first()].local_edge_next; const int local_edge_last = corner_infos[local_corners_in_fan.last()].local_edge_prev; @@ -1214,23 +1222,16 @@ BLI_NOINLINE static void handle_fan_result_and_custom_normals( fan_normal = corner_space_custom_data_to_normal(fan_space, short2(average_custom_normal)); } - if (r_fan_spaces) { - std::lock_guard lock(r_fan_spaces->build_mutex); - r_fan_spaces->spaces.append(fan_space); - const int fan_space_index = r_fan_spaces->spaces.size() - 1; - for (const int local_corner : local_corners_in_fan) { - const VertCornerInfo &info = corner_infos[local_corner]; - r_fan_spaces->corner_space_indices[info.corner] = fan_space_index; - } - if (r_fan_spaces->create_corners_by_space) { - Array corners_in_space(local_corners_in_fan.size()); - for (const int i : local_corners_in_fan.index_range()) { - const VertCornerInfo &info = corner_infos[local_corners_in_fan[i]]; - corners_in_space[i] = info.corner; - } - r_fan_spaces->corners_by_space.append(std::move(corners_in_space)); - } + if (!r_fan_spaces) { + return; } + + Array fan_corners(local_corners_in_fan.size()); + for (const int i : local_corners_in_fan.index_range()) { + const VertCornerInfo &info = corner_infos[local_corners_in_fan[i]]; + fan_corners[i] = info.corner; + } + r_local_space_groups.append({std::move(fan_corners), fan_space}); } void normals_calc_corners(const Span vert_positions, @@ -1245,28 +1246,19 @@ void normals_calc_corners(const Span vert_positions, CornerNormalSpaceArray *r_fan_spaces, MutableSpan r_corner_normals) { - if (r_fan_spaces) { - /* These are potentially-wasteful over-allocations. */ - r_fan_spaces->spaces.reserve(corner_verts.size()); - r_fan_spaces->corner_space_indices.reinitialize(corner_verts.size()); - if (r_fan_spaces->create_corners_by_space) { - r_fan_spaces->corners_by_space.reserve(corner_verts.size()); - } - } + threading::EnumerableThreadSpecific> space_groups; - int64_t grain_size = 256; - /* Decrease parallelism in case where lock is used to avoid contention. */ - if (!custom_normals.is_empty() || r_fan_spaces) { - grain_size = std::max(int64_t(16384), vert_positions.size() / 2); - } - - threading::parallel_for(vert_positions.index_range(), grain_size, [&](const IndexRange range) { + threading::parallel_for(vert_positions.index_range(), 256, [&](const IndexRange range) { Vector corner_infos; LocalEdgeVectorSet local_edge_by_vert; Vector edge_infos; Vector edge_dirs; Vector local_corner_visited; Vector corners_in_fan; + + Vector *local_space_groups = r_fan_spaces ? &space_groups.local() : + nullptr; + for (const int vert : range) { const float3 vert_position = vert_positions[vert]; const Span vert_faces = vert_to_face_map[vert]; @@ -1308,8 +1300,13 @@ void normals_calc_corners(const Span vert_positions, corner_infos, edge_dirs, face_normals, corners_in_fan); if (!custom_normals.is_empty() || r_fan_spaces) { - handle_fan_result_and_custom_normals( - custom_normals, corner_infos, edge_dirs, corners_in_fan, fan_normal, r_fan_spaces); + handle_fan_result_and_custom_normals(custom_normals, + corner_infos, + edge_dirs, + corners_in_fan, + fan_normal, + r_fan_spaces, + *local_space_groups); } for (const int local_corner : corners_in_fan) { @@ -1333,6 +1330,48 @@ void normals_calc_corners(const Span vert_positions, BLI_assert(visited_count == corner_infos.size()); } }); + + if (!r_fan_spaces) { + return; + } + + Vector space_groups_count; + Vector> all_space_groups; + for (auto &groups : space_groups) { + space_groups_count.append(groups.size()); + all_space_groups.append(std::move(groups)); + } + space_groups_count.append(0); + const OffsetIndices space_offsets = offset_indices::accumulate_counts_to_offsets( + space_groups_count); + + r_fan_spaces->spaces.reinitialize(space_offsets.total_size()); + r_fan_spaces->corner_space_indices.reinitialize(corner_verts.size()); + if (r_fan_spaces->create_corners_by_space) { + r_fan_spaces->corners_by_space.reinitialize(space_offsets.total_size()); + } + + const int64_t mean_size = space_offsets.total_size() / space_offsets.size(); + const int64_t grain_size = math::clamp((1024 * 512) / mean_size, 256, 1024 * 16); + threading::parallel_for(all_space_groups.index_range(), grain_size, [&](const IndexRange range) { + for (const int thread_i : range) { + Vector &local_space_groups = all_space_groups[thread_i]; + for (const int group_i : local_space_groups.index_range()) { + const int space_index = space_offsets[thread_i][group_i]; + r_fan_spaces->spaces[space_index] = local_space_groups[group_i].space; + r_fan_spaces->corner_space_indices.as_mutable_span().fill_indices( + local_space_groups[group_i].fan_corners.as_span(), space_index); + } + if (!r_fan_spaces->create_corners_by_space) { + continue; + } + for (const int group_i : local_space_groups.index_range()) { + const int space_index = space_offsets[thread_i][group_i]; + r_fan_spaces->corners_by_space[space_index] = std::move( + local_space_groups[group_i].fan_corners); + } + } + }); } #undef INDEX_UNSET