Mesh: Calculate edges with VectorSet instead of Map

Due to legacy reasons (`MEdge`), edge calculation was being done with
idea that edges cannot be temporarily copied. But today, edges are just
`int2`, so using `edge *` instead of `edge` actually made things worse.
And since `OrderedEdge` itself is the same thing as `int2`, it does not
make sense to use `Map` for edges. So, now edges are in a hash set.
To be able to take index of edges, `VectorSet` is used.

The only functional change now is that original edges will be reordered
as well. This should be okay just like an unintentional but stable
indices change.

For 2'000 x 2'000 x 2'000 cube edges calculation, change is around
`3703.47` -> `2911.18` ms.

In order to reduce memory usage, a template parameter is added to
`VectorSet` slots, so they can use a 32 instead of 64 bit index type.
Without that, the performance change is not consistent and might not be
better on a computer with more memory bandwidth.

Co-authored-by: Hans Goudey <hans@blender.org>

Pull Request: https://projects.blender.org/blender/blender/pulls/120224
This commit is contained in:
Iliya Katueshenock
2024-04-11 04:33:25 +02:00
committed by Hans Goudey
parent 7642a5452a
commit 6bafe65d28
3 changed files with 78 additions and 71 deletions

View File

@@ -6,10 +6,11 @@
* \ingroup bke
*/
#include "BLI_map.hh"
#include "BLI_array_utils.hh"
#include "BLI_ordered_edge.hh"
#include "BLI_task.hh"
#include "BLI_threads.h"
#include "BLI_vector_set.hh"
#include "BKE_attribute.hh"
#include "BKE_customdata.hh"
@@ -28,12 +29,12 @@ static uint64_t edge_hash_2(const OrderedEdge &edge)
return edge.v_low;
}
/* The map first contains an edge pointer and later an index. */
union OrigEdgeOrIndex {
const int2 *original_edge;
int index;
};
using EdgeMap = Map<OrderedEdge, OrigEdgeOrIndex>;
using EdgeMap = VectorSet<OrderedEdge,
DefaultProbingStrategy,
DefaultHash<OrderedEdge>,
DefaultEquality<OrderedEdge>,
SimpleVectorSetSlot<OrderedEdge, int>,
GuardedAllocator>;
static void reserve_hash_maps(const Mesh &mesh,
const bool keep_existing_edges,
@@ -52,11 +53,11 @@ static void add_existing_edges_to_hash_maps(const Mesh &mesh,
const Span<int2> edges = mesh.edges();
threading::parallel_for_each(edge_maps, [&](EdgeMap &edge_map) {
const int task_index = &edge_map - edge_maps.data();
for (const int2 &edge : edges) {
const OrderedEdge ordered_edge(edge[0], edge[1]);
for (const int2 edge : edges) {
const OrderedEdge ordered_edge(edge);
/* Only add the edge when it belongs into this map. */
if (task_index == (parallel_mask & edge_hash_2(ordered_edge))) {
edge_map.add_new(ordered_edge, {&edge});
edge_map.add(ordered_edge);
}
}
});
@@ -76,11 +77,11 @@ static void add_face_edges_to_hash_maps(const Mesh &mesh,
const int vert = corner_verts[corner];
const int vert_prev = corner_verts[bke::mesh::face_corner_prev(face, corner)];
/* Can only be the same when the mesh data is invalid. */
if (vert_prev != vert) {
if (LIKELY(vert_prev != vert)) {
const OrderedEdge ordered_edge(vert_prev, vert);
/* Only add the edge when it belongs into this map. */
if (task_index == (parallel_mask & edge_hash_2(ordered_edge))) {
edge_map.lookup_or_add(ordered_edge, {nullptr});
edge_map.add(ordered_edge);
}
}
}
@@ -89,38 +90,17 @@ static void add_face_edges_to_hash_maps(const Mesh &mesh,
}
static void serialize_and_initialize_deduplicated_edges(MutableSpan<EdgeMap> edge_maps,
const OffsetIndices<int> edge_offsets,
MutableSpan<int2> new_edges)
{
/* All edges are distributed in the hash tables now. They have to be serialized into a single
* array below. To be able to parallelize this, we have to compute edge index offsets for each
* map. */
Array<int> edge_sizes(edge_maps.size() + 1);
for (const int i : edge_maps.index_range()) {
edge_sizes[i] = edge_maps[i].size();
}
const OffsetIndices<int> edge_offsets = offset_indices::accumulate_counts_to_offsets(edge_sizes);
threading::parallel_for_each(edge_maps, [&](EdgeMap &edge_map) {
const int task_index = &edge_map - edge_maps.data();
if (edge_offsets[task_index].is_empty()) {
return;
}
int new_edge_index = edge_offsets[task_index].first();
for (EdgeMap::MutableItem item : edge_map.items()) {
int2 &new_edge = new_edges[new_edge_index];
const int2 *orig_edge = item.value.original_edge;
if (orig_edge != nullptr) {
/* Copy values from original edge. */
new_edge = *orig_edge;
}
else {
/* Initialize new edge. */
new_edge = int2(item.key.v_low, item.key.v_high);
}
item.value.index = new_edge_index;
new_edge_index++;
}
MutableSpan<int2> result_edges = new_edges.slice(edge_offsets[task_index]);
result_edges.copy_from(edge_map.as_span().cast<int2>());
});
}
@@ -128,6 +108,7 @@ static void update_edge_indices_in_face_loops(const OffsetIndices<int> faces,
const Span<int> corner_verts,
const Span<EdgeMap> edge_maps,
const uint32_t parallel_mask,
const OffsetIndices<int> edge_offsets,
MutableSpan<int> corner_edges)
{
threading::parallel_for(faces.index_range(), 100, [&](IndexRange range) {
@@ -136,20 +117,19 @@ static void update_edge_indices_in_face_loops(const OffsetIndices<int> faces,
for (const int corner : face) {
const int vert = corner_verts[corner];
const int vert_prev = corner_verts[bke::mesh::face_corner_next(face, corner)];
int edge_index;
if (vert_prev != vert) {
const OrderedEdge ordered_edge(vert_prev, vert);
/* Double lookup: First find the map that contains the edge, then lookup the edge. */
const EdgeMap &edge_map = edge_maps[parallel_mask & edge_hash_2(ordered_edge)];
edge_index = edge_map.lookup(ordered_edge).index;
}
else {
if (UNLIKELY(vert == vert_prev)) {
/* This is an invalid edge; normally this does not happen in Blender,
* but it can be part of an imported mesh with invalid geometry. See
* #76514. */
edge_index = 0;
corner_edges[corner] = 0;
continue;
}
const OrderedEdge ordered_edge(vert_prev, vert);
const int task_index = parallel_mask & edge_hash_2(ordered_edge);
const EdgeMap &edge_map = edge_maps[task_index];
const int edge_i = edge_map.index_of(ordered_edge);
const int edge_index = edge_offsets[task_index][edge_i];
corner_edges[corner] = edge_index;
}
}
@@ -173,6 +153,24 @@ static void clear_hash_tables(MutableSpan<EdgeMap> edge_maps)
threading::parallel_for_each(edge_maps, [](EdgeMap &edge_map) { edge_map.clear_and_shrink(); });
}
static void deselect_known_edges(const OffsetIndices<int> edge_offsets,
const Span<EdgeMap> edge_maps,
const uint32_t parallel_mask,
const Span<int2> known_edges,
MutableSpan<bool> selection)
{
threading::parallel_for(known_edges.index_range(), 2048, [&](const IndexRange range) {
for (const int2 original_edge : known_edges.slice(range)) {
const OrderedEdge ordered_edge(original_edge);
const int task_index = parallel_mask & edge_hash_2(ordered_edge);
const EdgeMap &edge_map = edge_maps[task_index];
const int edge_i = edge_map.index_of(ordered_edge);
const int edge_index = edge_offsets[task_index][edge_i];
selection[edge_index] = false;
}
});
}
} // namespace calc_edges
void mesh_calc_edges(Mesh &mesh, bool keep_existing_edges, const bool select_new_edges)
@@ -191,24 +189,35 @@ void mesh_calc_edges(Mesh &mesh, bool keep_existing_edges, const bool select_new
}
calc_edges::add_face_edges_to_hash_maps(mesh, parallel_mask, edge_maps);
/* Compute total number of edges. */
int new_totedge = 0;
for (const calc_edges::EdgeMap &edge_map : edge_maps) {
new_totedge += edge_map.size();
Array<int> edge_sizes(edge_maps.size() + 1);
for (const int i : edge_maps.index_range()) {
edge_sizes[i] = edge_maps[i].size();
}
const OffsetIndices<int> edge_offsets = offset_indices::accumulate_counts_to_offsets(edge_sizes);
/* Create new edges. */
MutableAttributeAccessor attributes = mesh.attributes_for_write();
attributes.add<int>(".corner_edge", AttrDomain::Corner, AttributeInitConstruct());
MutableSpan<int2> new_edges(MEM_cnew_array<int2>(new_totedge, __func__), new_totedge);
calc_edges::serialize_and_initialize_deduplicated_edges(edge_maps, new_edges);
calc_edges::update_edge_indices_in_face_loops(
mesh.faces(), mesh.corner_verts(), edge_maps, parallel_mask, mesh.corner_edges_for_write());
MutableSpan<int2> new_edges(MEM_cnew_array<int2>(edge_offsets.total_size(), __func__),
edge_offsets.total_size());
calc_edges::serialize_and_initialize_deduplicated_edges(edge_maps, edge_offsets, new_edges);
calc_edges::update_edge_indices_in_face_loops(mesh.faces(),
mesh.corner_verts(),
edge_maps,
parallel_mask,
edge_offsets,
mesh.corner_edges_for_write());
Array<int2> original_edges;
if (keep_existing_edges && select_new_edges) {
original_edges.reinitialize(mesh.edges_num);
array_utils::copy(mesh.edges(), original_edges.as_mutable_span());
}
/* Free old CustomData and assign new one. */
CustomData_free(&mesh.edge_data, mesh.edges_num);
CustomData_reset(&mesh.edge_data);
mesh.edges_num = new_totedge;
mesh.edges_num = edge_offsets.total_size();
attributes.add<int2>(".edge_verts", AttrDomain::Edge, AttributeInitMoveArray(new_edges.data()));
if (select_new_edges) {
@@ -216,14 +225,10 @@ void mesh_calc_edges(Mesh &mesh, bool keep_existing_edges, const bool select_new
SpanAttributeWriter<bool> select_edge = attributes.lookup_or_add_for_write_span<bool>(
".select_edge", AttrDomain::Edge);
if (select_edge) {
int new_edge_index = 0;
for (const calc_edges::EdgeMap &edge_map : edge_maps) {
for (const calc_edges::EdgeMap::Item item : edge_map.items()) {
if (item.value.original_edge == nullptr) {
select_edge.span[new_edge_index] = true;
}
new_edge_index++;
}
select_edge.span.fill(true);
if (!original_edges.is_empty()) {
calc_edges::deselect_known_edges(
edge_offsets, edge_maps, parallel_mask, original_edges, select_edge.span);
}
select_edge.finish();
}
@@ -235,7 +240,7 @@ void mesh_calc_edges(Mesh &mesh, bool keep_existing_edges, const bool select_new
}
/* Explicitly clear edge maps, because that way it can be parallelized. */
clear_hash_tables(edge_maps);
calc_edges::clear_hash_tables(edge_maps);
}
} // namespace blender::bke

View File

@@ -32,7 +32,9 @@ namespace blender {
* The simplest possible vector set slot. It stores the index and state in a signed integer. If the
* value is negative, it represents empty or occupied state. Otherwise it represents the index.
*/
template<typename Key> class SimpleVectorSetSlot {
template<typename Key, typename IndexT = int64_t> class SimpleVectorSetSlot {
static_assert(std::is_integral_v<IndexT> && std::is_signed_v<IndexT>);
private:
#define s_is_empty -1
#define s_is_removed -2
@@ -40,7 +42,7 @@ template<typename Key> class SimpleVectorSetSlot {
/**
* After the default constructor has run, the slot has to be in the empty state.
*/
int64_t state_ = s_is_empty;
IndexT state_ = s_is_empty;
public:
/**
@@ -62,7 +64,7 @@ template<typename Key> class SimpleVectorSetSlot {
/**
* Return the stored index. It is assumed that the slot is occupied.
*/
int64_t index() const
IndexT index() const
{
BLI_assert(this->is_occupied());
return state_;
@@ -88,7 +90,7 @@ template<typename Key> class SimpleVectorSetSlot {
* Change the state of this slot from empty/removed to occupied. The hash can be used by other
* slot implementations.
*/
void occupy(int64_t index, uint64_t /*hash*/)
void occupy(IndexT index, uint64_t /*hash*/)
{
BLI_assert(!this->is_occupied());
state_ = index;
@@ -98,7 +100,7 @@ template<typename Key> class SimpleVectorSetSlot {
* The key has changed its position in the vector, so the index has to be updated. This method
* can assume that the slot is currently occupied.
*/
void update_index(int64_t index)
void update_index(IndexT index)
{
BLI_assert(this->is_occupied());
state_ = index;
@@ -116,7 +118,7 @@ template<typename Key> class SimpleVectorSetSlot {
/**
* Return true if this slot is currently occupied and its corresponding key has the given index.
*/
bool has_index(int64_t index) const
bool has_index(IndexT index) const
{
return state_ == index;
}