Mesh: Parallelize vertex and edge to corner topology map creation

Change the algorithm to make better use of multiple CPU cores. First
offsets are created by counting the number of elements using each
vertex. Those offsets are used during the next phase that adds indices
to each group in parallel. Atomic increments are used to add elements
to each group. Since the order in each group is non-deterministic,
they are sorted in parallel afterwards.

The performance improvement depends on the number of cores, CPU caches,
memory bandwidth, single threaded performance, and mesh topology. In
our tests, performance improved by 3-4.5x for large grid-like meshes.

See [1] for investigation of this algorithm and potential alternatives.

1. https://hackmd.io/@s0TMIS4lTAGwHVO20ECwpw/build_edge_to_loop_map_tests.

Pull Request: https://projects.blender.org/blender/blender/pulls/110707
This commit is contained in:
Iliya Katueshenock
2023-08-28 22:32:31 +02:00
committed by Hans Goudey
parent eafe8332c7
commit 226359ec48

View File

@@ -11,6 +11,8 @@
#include "MEM_guardedalloc.h"
#include "atomic_ops.h"
#include "DNA_meshdata_types.h"
#include "DNA_vec_types.h"
@@ -306,6 +308,52 @@ static Array<int> create_reverse_offsets(const Span<int> indices, const int item
return offsets;
}
static void sort_small_groups(const OffsetIndices<int> groups,
const int grain_size,
MutableSpan<int> indices)
{
threading::parallel_for(groups.index_range(), grain_size, [&](const IndexRange range) {
for (const int64_t index : range) {
MutableSpan<int> group = indices.slice(groups[index]);
std::sort(group.begin(), group.end());
}
});
}
static Array<int> reverse_indices_in_groups(const Span<int> group_indices,
const OffsetIndices<int> offsets)
{
BLI_assert(!group_indices.is_empty());
BLI_assert(*std::max_element(group_indices.begin(), group_indices.end()) < offsets.size());
BLI_assert(*std::min_element(group_indices.begin(), group_indices.end()) >= 0);
Array<int> counts(offsets.size(), -1);
Array<int> results(group_indices.size());
threading::parallel_for(group_indices.index_range(), 1024, [&](const IndexRange range) {
for (const int64_t i : range) {
const int group_index = group_indices[i];
const int index_in_group = atomic_add_and_fetch_int32(&counts[group_index], 1);
results[offsets[group_index][index_in_group]] = int(i);
}
});
sort_small_groups(offsets, 1024, results);
return results;
}
static GroupedSpan<int> gather_groups(const Span<int> group_indices,
const int groups_num,
Array<int> &r_offsets,
Array<int> &r_indices)
{
if (group_indices.is_empty()) {
r_offsets.reinitialize(groups_num + 1);
r_offsets.as_mutable_span().fill(0);
return {OffsetIndices<int>(r_offsets), {}};
}
r_offsets = create_reverse_offsets(group_indices, groups_num);
r_indices = reverse_indices_in_groups(group_indices, r_offsets.as_span());
return {OffsetIndices<int>(r_offsets), r_indices};
}
Array<int> build_loop_to_face_map(const OffsetIndices<int> faces)
{
Array<int> map(faces.total_size());
@@ -355,16 +403,7 @@ GroupedSpan<int> build_vert_to_loop_map(const Span<int> corner_verts,
Array<int> &r_offsets,
Array<int> &r_indices)
{
r_offsets = create_reverse_offsets(corner_verts, verts_num);
r_indices.reinitialize(r_offsets.last());
Array<int> counts(verts_num, 0);
for (const int64_t corner : corner_verts.index_range()) {
const int vert = corner_verts[corner];
r_indices[r_offsets[vert] + counts[vert]] = int(corner);
counts[vert]++;
}
return {OffsetIndices<int>(r_offsets), r_indices};
return gather_groups(corner_verts, verts_num, r_offsets, r_indices);
}
GroupedSpan<int> build_edge_to_loop_map(const Span<int> corner_edges,
@@ -372,16 +411,7 @@ GroupedSpan<int> build_edge_to_loop_map(const Span<int> corner_edges,
Array<int> &r_offsets,
Array<int> &r_indices)
{
r_offsets = create_reverse_offsets(corner_edges, edges_num);
r_indices.reinitialize(r_offsets.last());
Array<int> counts(edges_num, 0);
for (const int64_t corner : corner_edges.index_range()) {
const int edge = corner_edges[corner];
r_indices[r_offsets[edge] + counts[edge]] = int(corner);
counts[edge]++;
}
return {OffsetIndices<int>(r_offsets), r_indices};
return gather_groups(corner_edges, edges_num, r_offsets, r_indices);
}
GroupedSpan<int> build_edge_to_face_map(const OffsetIndices<int> faces,