From c8a4026984df4de1050eaa2a228629f93ec4a07b Mon Sep 17 00:00:00 2001 From: Jesse Yurkovich Date: Thu, 3 Jul 2025 20:43:43 +0200 Subject: [PATCH] Mesh: Tune the parallelism of normals_calc_corners Tune the grain size used for the parallel_for to alleviate excessive mutex contention inside `handle_fan_result_and_custom_normals`. I happened to notice that the 4004 Moore Lane USD scene[1] experienced a load time regression compared to the prior release. It looks due to the grain size used and here are some 3-run averages for the import: ``` Grain | Time in seconds 256 (main) | (14.6+14.6+14.8)/3 = 14.6667 1024 | (13+12.8+12.9)/3 = 12.9 4096 | (13.3+13.1+13.1)/3 = 13.1667 16384 | (12.2+12+ 12.5)/3 = 12.2333 65536 | (9.4+9.2+9.6)/3 = 9.4 131072 | (7.9+7.7+8)/3 = 7.8667 262144 | (7.3+7.1+7.2)/3 = 7.2 max(16384, #verts/2) (PR) | (7.1+6.9+6.8)/3 = 6.9333 ``` This PR gets the scenario loading in just under 7 seconds now compared to over 14 originally. [1] https://dpel.aswf.io/4004-moore-lane/ Pull Request: https://projects.blender.org/blender/blender/pulls/141249 --- source/blender/blenkernel/intern/mesh_normals.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc index 4e3b4693097..2bbc18e86f3 100644 --- a/source/blender/blenkernel/intern/mesh_normals.cc +++ b/source/blender/blenkernel/intern/mesh_normals.cc @@ -1243,7 +1243,14 @@ void normals_calc_corners(const Span vert_positions, r_fan_spaces->corners_by_space.reserve(corner_verts.size()); } } - threading::parallel_for(vert_positions.index_range(), 256, [&](const IndexRange range) { + + int64_t grain_size = 256; + /* Decrease parallelism in case where lock is used to avoid contention. */ + if (!custom_normals.is_empty() || r_fan_spaces) { + grain_size = std::max(int64_t(16384), vert_positions.size() / 2); + } + + threading::parallel_for(vert_positions.index_range(), grain_size, [&](const IndexRange range) { Vector corner_infos; LocalEdgeVectorSet local_edge_by_vert; Vector edge_infos;