From 226359ec48c820df59eb948d6d32a5560bffd685 Mon Sep 17 00:00:00 2001 From: Iliya Katueshenock Date: Mon, 28 Aug 2023 22:32:31 +0200 Subject: [PATCH] Mesh: Parallelize vertex and edge to corner topology map creation Change the algorithm to make better use of multiple CPU cores. First offsets are created by counting the number of elements using each vertex. Those offsets are used during the next phase that adds indices to each group in parallel. Atomic increments are used to add elements to each group. Since the order in each group is non-deterministic, they are sorted in parallel afterwards. The performance improvement depends on the number of cores, CPU caches, memory bandwidth, single threaded performance, and mesh topology. In our tests, performance improved by 3-4.5x for large grid-like meshes. See [1] for investigation of this algorithm and potential alternatives. 1. https://hackmd.io/@s0TMIS4lTAGwHVO20ECwpw/build_edge_to_loop_map_tests. Pull Request: https://projects.blender.org/blender/blender/pulls/110707 --- .../blender/blenkernel/intern/mesh_mapping.cc | 70 +++++++++++++------ 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/source/blender/blenkernel/intern/mesh_mapping.cc b/source/blender/blenkernel/intern/mesh_mapping.cc index 942b755975e..1b3220fe497 100644 --- a/source/blender/blenkernel/intern/mesh_mapping.cc +++ b/source/blender/blenkernel/intern/mesh_mapping.cc @@ -11,6 +11,8 @@ #include "MEM_guardedalloc.h" +#include "atomic_ops.h" + #include "DNA_meshdata_types.h" #include "DNA_vec_types.h" @@ -306,6 +308,52 @@ static Array create_reverse_offsets(const Span indices, const int item return offsets; } +static void sort_small_groups(const OffsetIndices groups, + const int grain_size, + MutableSpan indices) +{ + threading::parallel_for(groups.index_range(), grain_size, [&](const IndexRange range) { + for (const int64_t index : range) { + MutableSpan group = indices.slice(groups[index]); + std::sort(group.begin(), group.end()); + } + }); +} + +static Array reverse_indices_in_groups(const Span group_indices, + const OffsetIndices offsets) +{ + BLI_assert(!group_indices.is_empty()); + BLI_assert(*std::max_element(group_indices.begin(), group_indices.end()) < offsets.size()); + BLI_assert(*std::min_element(group_indices.begin(), group_indices.end()) >= 0); + Array counts(offsets.size(), -1); + Array results(group_indices.size()); + threading::parallel_for(group_indices.index_range(), 1024, [&](const IndexRange range) { + for (const int64_t i : range) { + const int group_index = group_indices[i]; + const int index_in_group = atomic_add_and_fetch_int32(&counts[group_index], 1); + results[offsets[group_index][index_in_group]] = int(i); + } + }); + sort_small_groups(offsets, 1024, results); + return results; +} + +static GroupedSpan gather_groups(const Span group_indices, + const int groups_num, + Array &r_offsets, + Array &r_indices) +{ + if (group_indices.is_empty()) { + r_offsets.reinitialize(groups_num + 1); + r_offsets.as_mutable_span().fill(0); + return {OffsetIndices(r_offsets), {}}; + } + r_offsets = create_reverse_offsets(group_indices, groups_num); + r_indices = reverse_indices_in_groups(group_indices, r_offsets.as_span()); + return {OffsetIndices(r_offsets), r_indices}; +} + Array build_loop_to_face_map(const OffsetIndices faces) { Array map(faces.total_size()); @@ -355,16 +403,7 @@ GroupedSpan build_vert_to_loop_map(const Span corner_verts, Array &r_offsets, Array &r_indices) { - r_offsets = create_reverse_offsets(corner_verts, verts_num); - r_indices.reinitialize(r_offsets.last()); - Array counts(verts_num, 0); - - for (const int64_t corner : corner_verts.index_range()) { - const int vert = corner_verts[corner]; - r_indices[r_offsets[vert] + counts[vert]] = int(corner); - counts[vert]++; - } - return {OffsetIndices(r_offsets), r_indices}; + return gather_groups(corner_verts, verts_num, r_offsets, r_indices); } GroupedSpan build_edge_to_loop_map(const Span corner_edges, @@ -372,16 +411,7 @@ GroupedSpan build_edge_to_loop_map(const Span corner_edges, Array &r_offsets, Array &r_indices) { - r_offsets = create_reverse_offsets(corner_edges, edges_num); - r_indices.reinitialize(r_offsets.last()); - Array counts(edges_num, 0); - - for (const int64_t corner : corner_edges.index_range()) { - const int edge = corner_edges[corner]; - r_indices[r_offsets[edge] + counts[edge]] = int(corner); - counts[edge]++; - } - return {OffsetIndices(r_offsets), r_indices}; + return gather_groups(corner_edges, edges_num, r_offsets, r_indices); } GroupedSpan build_edge_to_face_map(const OffsetIndices faces,