Mesh: Calculate edges with VectorSet instead of Map
Due to legacy reasons (`MEdge`), edge calculation was being done with idea that edges cannot be temporarily copied. But today, edges are just `int2`, so using `edge *` instead of `edge` actually made things worse. And since `OrderedEdge` itself is the same thing as `int2`, it does not make sense to use `Map` for edges. So, now edges are in a hash set. To be able to take index of edges, `VectorSet` is used. The only functional change now is that original edges will be reordered as well. This should be okay just like an unintentional but stable indices change. For 2'000 x 2'000 x 2'000 cube edges calculation, change is around `3703.47` -> `2911.18` ms. In order to reduce memory usage, a template parameter is added to `VectorSet` slots, so they can use a 32 instead of 64 bit index type. Without that, the performance change is not consistent and might not be better on a computer with more memory bandwidth. Co-authored-by: Hans Goudey <hans@blender.org> Pull Request: https://projects.blender.org/blender/blender/pulls/120224
This commit is contained in:
committed by
Hans Goudey
parent
7642a5452a
commit
6bafe65d28
@@ -6,10 +6,11 @@
|
||||
* \ingroup bke
|
||||
*/
|
||||
|
||||
#include "BLI_map.hh"
|
||||
#include "BLI_array_utils.hh"
|
||||
#include "BLI_ordered_edge.hh"
|
||||
#include "BLI_task.hh"
|
||||
#include "BLI_threads.h"
|
||||
#include "BLI_vector_set.hh"
|
||||
|
||||
#include "BKE_attribute.hh"
|
||||
#include "BKE_customdata.hh"
|
||||
@@ -28,12 +29,12 @@ static uint64_t edge_hash_2(const OrderedEdge &edge)
|
||||
return edge.v_low;
|
||||
}
|
||||
|
||||
/* The map first contains an edge pointer and later an index. */
|
||||
union OrigEdgeOrIndex {
|
||||
const int2 *original_edge;
|
||||
int index;
|
||||
};
|
||||
using EdgeMap = Map<OrderedEdge, OrigEdgeOrIndex>;
|
||||
using EdgeMap = VectorSet<OrderedEdge,
|
||||
DefaultProbingStrategy,
|
||||
DefaultHash<OrderedEdge>,
|
||||
DefaultEquality<OrderedEdge>,
|
||||
SimpleVectorSetSlot<OrderedEdge, int>,
|
||||
GuardedAllocator>;
|
||||
|
||||
static void reserve_hash_maps(const Mesh &mesh,
|
||||
const bool keep_existing_edges,
|
||||
@@ -52,11 +53,11 @@ static void add_existing_edges_to_hash_maps(const Mesh &mesh,
|
||||
const Span<int2> edges = mesh.edges();
|
||||
threading::parallel_for_each(edge_maps, [&](EdgeMap &edge_map) {
|
||||
const int task_index = &edge_map - edge_maps.data();
|
||||
for (const int2 &edge : edges) {
|
||||
const OrderedEdge ordered_edge(edge[0], edge[1]);
|
||||
for (const int2 edge : edges) {
|
||||
const OrderedEdge ordered_edge(edge);
|
||||
/* Only add the edge when it belongs into this map. */
|
||||
if (task_index == (parallel_mask & edge_hash_2(ordered_edge))) {
|
||||
edge_map.add_new(ordered_edge, {&edge});
|
||||
edge_map.add(ordered_edge);
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -76,11 +77,11 @@ static void add_face_edges_to_hash_maps(const Mesh &mesh,
|
||||
const int vert = corner_verts[corner];
|
||||
const int vert_prev = corner_verts[bke::mesh::face_corner_prev(face, corner)];
|
||||
/* Can only be the same when the mesh data is invalid. */
|
||||
if (vert_prev != vert) {
|
||||
if (LIKELY(vert_prev != vert)) {
|
||||
const OrderedEdge ordered_edge(vert_prev, vert);
|
||||
/* Only add the edge when it belongs into this map. */
|
||||
if (task_index == (parallel_mask & edge_hash_2(ordered_edge))) {
|
||||
edge_map.lookup_or_add(ordered_edge, {nullptr});
|
||||
edge_map.add(ordered_edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -89,38 +90,17 @@ static void add_face_edges_to_hash_maps(const Mesh &mesh,
|
||||
}
|
||||
|
||||
static void serialize_and_initialize_deduplicated_edges(MutableSpan<EdgeMap> edge_maps,
|
||||
const OffsetIndices<int> edge_offsets,
|
||||
MutableSpan<int2> new_edges)
|
||||
{
|
||||
/* All edges are distributed in the hash tables now. They have to be serialized into a single
|
||||
* array below. To be able to parallelize this, we have to compute edge index offsets for each
|
||||
* map. */
|
||||
Array<int> edge_sizes(edge_maps.size() + 1);
|
||||
for (const int i : edge_maps.index_range()) {
|
||||
edge_sizes[i] = edge_maps[i].size();
|
||||
}
|
||||
const OffsetIndices<int> edge_offsets = offset_indices::accumulate_counts_to_offsets(edge_sizes);
|
||||
|
||||
threading::parallel_for_each(edge_maps, [&](EdgeMap &edge_map) {
|
||||
const int task_index = &edge_map - edge_maps.data();
|
||||
if (edge_offsets[task_index].is_empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
int new_edge_index = edge_offsets[task_index].first();
|
||||
for (EdgeMap::MutableItem item : edge_map.items()) {
|
||||
int2 &new_edge = new_edges[new_edge_index];
|
||||
const int2 *orig_edge = item.value.original_edge;
|
||||
if (orig_edge != nullptr) {
|
||||
/* Copy values from original edge. */
|
||||
new_edge = *orig_edge;
|
||||
}
|
||||
else {
|
||||
/* Initialize new edge. */
|
||||
new_edge = int2(item.key.v_low, item.key.v_high);
|
||||
}
|
||||
item.value.index = new_edge_index;
|
||||
new_edge_index++;
|
||||
}
|
||||
MutableSpan<int2> result_edges = new_edges.slice(edge_offsets[task_index]);
|
||||
result_edges.copy_from(edge_map.as_span().cast<int2>());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -128,6 +108,7 @@ static void update_edge_indices_in_face_loops(const OffsetIndices<int> faces,
|
||||
const Span<int> corner_verts,
|
||||
const Span<EdgeMap> edge_maps,
|
||||
const uint32_t parallel_mask,
|
||||
const OffsetIndices<int> edge_offsets,
|
||||
MutableSpan<int> corner_edges)
|
||||
{
|
||||
threading::parallel_for(faces.index_range(), 100, [&](IndexRange range) {
|
||||
@@ -136,20 +117,19 @@ static void update_edge_indices_in_face_loops(const OffsetIndices<int> faces,
|
||||
for (const int corner : face) {
|
||||
const int vert = corner_verts[corner];
|
||||
const int vert_prev = corner_verts[bke::mesh::face_corner_next(face, corner)];
|
||||
|
||||
int edge_index;
|
||||
if (vert_prev != vert) {
|
||||
const OrderedEdge ordered_edge(vert_prev, vert);
|
||||
/* Double lookup: First find the map that contains the edge, then lookup the edge. */
|
||||
const EdgeMap &edge_map = edge_maps[parallel_mask & edge_hash_2(ordered_edge)];
|
||||
edge_index = edge_map.lookup(ordered_edge).index;
|
||||
}
|
||||
else {
|
||||
if (UNLIKELY(vert == vert_prev)) {
|
||||
/* This is an invalid edge; normally this does not happen in Blender,
|
||||
* but it can be part of an imported mesh with invalid geometry. See
|
||||
* #76514. */
|
||||
edge_index = 0;
|
||||
corner_edges[corner] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
const OrderedEdge ordered_edge(vert_prev, vert);
|
||||
const int task_index = parallel_mask & edge_hash_2(ordered_edge);
|
||||
const EdgeMap &edge_map = edge_maps[task_index];
|
||||
const int edge_i = edge_map.index_of(ordered_edge);
|
||||
const int edge_index = edge_offsets[task_index][edge_i];
|
||||
corner_edges[corner] = edge_index;
|
||||
}
|
||||
}
|
||||
@@ -173,6 +153,24 @@ static void clear_hash_tables(MutableSpan<EdgeMap> edge_maps)
|
||||
threading::parallel_for_each(edge_maps, [](EdgeMap &edge_map) { edge_map.clear_and_shrink(); });
|
||||
}
|
||||
|
||||
static void deselect_known_edges(const OffsetIndices<int> edge_offsets,
|
||||
const Span<EdgeMap> edge_maps,
|
||||
const uint32_t parallel_mask,
|
||||
const Span<int2> known_edges,
|
||||
MutableSpan<bool> selection)
|
||||
{
|
||||
threading::parallel_for(known_edges.index_range(), 2048, [&](const IndexRange range) {
|
||||
for (const int2 original_edge : known_edges.slice(range)) {
|
||||
const OrderedEdge ordered_edge(original_edge);
|
||||
const int task_index = parallel_mask & edge_hash_2(ordered_edge);
|
||||
const EdgeMap &edge_map = edge_maps[task_index];
|
||||
const int edge_i = edge_map.index_of(ordered_edge);
|
||||
const int edge_index = edge_offsets[task_index][edge_i];
|
||||
selection[edge_index] = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace calc_edges
|
||||
|
||||
void mesh_calc_edges(Mesh &mesh, bool keep_existing_edges, const bool select_new_edges)
|
||||
@@ -191,24 +189,35 @@ void mesh_calc_edges(Mesh &mesh, bool keep_existing_edges, const bool select_new
|
||||
}
|
||||
calc_edges::add_face_edges_to_hash_maps(mesh, parallel_mask, edge_maps);
|
||||
|
||||
/* Compute total number of edges. */
|
||||
int new_totedge = 0;
|
||||
for (const calc_edges::EdgeMap &edge_map : edge_maps) {
|
||||
new_totedge += edge_map.size();
|
||||
Array<int> edge_sizes(edge_maps.size() + 1);
|
||||
for (const int i : edge_maps.index_range()) {
|
||||
edge_sizes[i] = edge_maps[i].size();
|
||||
}
|
||||
const OffsetIndices<int> edge_offsets = offset_indices::accumulate_counts_to_offsets(edge_sizes);
|
||||
|
||||
/* Create new edges. */
|
||||
MutableAttributeAccessor attributes = mesh.attributes_for_write();
|
||||
attributes.add<int>(".corner_edge", AttrDomain::Corner, AttributeInitConstruct());
|
||||
MutableSpan<int2> new_edges(MEM_cnew_array<int2>(new_totedge, __func__), new_totedge);
|
||||
calc_edges::serialize_and_initialize_deduplicated_edges(edge_maps, new_edges);
|
||||
calc_edges::update_edge_indices_in_face_loops(
|
||||
mesh.faces(), mesh.corner_verts(), edge_maps, parallel_mask, mesh.corner_edges_for_write());
|
||||
MutableSpan<int2> new_edges(MEM_cnew_array<int2>(edge_offsets.total_size(), __func__),
|
||||
edge_offsets.total_size());
|
||||
calc_edges::serialize_and_initialize_deduplicated_edges(edge_maps, edge_offsets, new_edges);
|
||||
calc_edges::update_edge_indices_in_face_loops(mesh.faces(),
|
||||
mesh.corner_verts(),
|
||||
edge_maps,
|
||||
parallel_mask,
|
||||
edge_offsets,
|
||||
mesh.corner_edges_for_write());
|
||||
|
||||
Array<int2> original_edges;
|
||||
if (keep_existing_edges && select_new_edges) {
|
||||
original_edges.reinitialize(mesh.edges_num);
|
||||
array_utils::copy(mesh.edges(), original_edges.as_mutable_span());
|
||||
}
|
||||
|
||||
/* Free old CustomData and assign new one. */
|
||||
CustomData_free(&mesh.edge_data, mesh.edges_num);
|
||||
CustomData_reset(&mesh.edge_data);
|
||||
mesh.edges_num = new_totedge;
|
||||
mesh.edges_num = edge_offsets.total_size();
|
||||
attributes.add<int2>(".edge_verts", AttrDomain::Edge, AttributeInitMoveArray(new_edges.data()));
|
||||
|
||||
if (select_new_edges) {
|
||||
@@ -216,14 +225,10 @@ void mesh_calc_edges(Mesh &mesh, bool keep_existing_edges, const bool select_new
|
||||
SpanAttributeWriter<bool> select_edge = attributes.lookup_or_add_for_write_span<bool>(
|
||||
".select_edge", AttrDomain::Edge);
|
||||
if (select_edge) {
|
||||
int new_edge_index = 0;
|
||||
for (const calc_edges::EdgeMap &edge_map : edge_maps) {
|
||||
for (const calc_edges::EdgeMap::Item item : edge_map.items()) {
|
||||
if (item.value.original_edge == nullptr) {
|
||||
select_edge.span[new_edge_index] = true;
|
||||
}
|
||||
new_edge_index++;
|
||||
}
|
||||
select_edge.span.fill(true);
|
||||
if (!original_edges.is_empty()) {
|
||||
calc_edges::deselect_known_edges(
|
||||
edge_offsets, edge_maps, parallel_mask, original_edges, select_edge.span);
|
||||
}
|
||||
select_edge.finish();
|
||||
}
|
||||
@@ -235,7 +240,7 @@ void mesh_calc_edges(Mesh &mesh, bool keep_existing_edges, const bool select_new
|
||||
}
|
||||
|
||||
/* Explicitly clear edge maps, because that way it can be parallelized. */
|
||||
clear_hash_tables(edge_maps);
|
||||
calc_edges::clear_hash_tables(edge_maps);
|
||||
}
|
||||
|
||||
} // namespace blender::bke
|
||||
|
||||
@@ -32,7 +32,9 @@ namespace blender {
|
||||
* The simplest possible vector set slot. It stores the index and state in a signed integer. If the
|
||||
* value is negative, it represents empty or occupied state. Otherwise it represents the index.
|
||||
*/
|
||||
template<typename Key> class SimpleVectorSetSlot {
|
||||
template<typename Key, typename IndexT = int64_t> class SimpleVectorSetSlot {
|
||||
static_assert(std::is_integral_v<IndexT> && std::is_signed_v<IndexT>);
|
||||
|
||||
private:
|
||||
#define s_is_empty -1
|
||||
#define s_is_removed -2
|
||||
@@ -40,7 +42,7 @@ template<typename Key> class SimpleVectorSetSlot {
|
||||
/**
|
||||
* After the default constructor has run, the slot has to be in the empty state.
|
||||
*/
|
||||
int64_t state_ = s_is_empty;
|
||||
IndexT state_ = s_is_empty;
|
||||
|
||||
public:
|
||||
/**
|
||||
@@ -62,7 +64,7 @@ template<typename Key> class SimpleVectorSetSlot {
|
||||
/**
|
||||
* Return the stored index. It is assumed that the slot is occupied.
|
||||
*/
|
||||
int64_t index() const
|
||||
IndexT index() const
|
||||
{
|
||||
BLI_assert(this->is_occupied());
|
||||
return state_;
|
||||
@@ -88,7 +90,7 @@ template<typename Key> class SimpleVectorSetSlot {
|
||||
* Change the state of this slot from empty/removed to occupied. The hash can be used by other
|
||||
* slot implementations.
|
||||
*/
|
||||
void occupy(int64_t index, uint64_t /*hash*/)
|
||||
void occupy(IndexT index, uint64_t /*hash*/)
|
||||
{
|
||||
BLI_assert(!this->is_occupied());
|
||||
state_ = index;
|
||||
@@ -98,7 +100,7 @@ template<typename Key> class SimpleVectorSetSlot {
|
||||
* The key has changed its position in the vector, so the index has to be updated. This method
|
||||
* can assume that the slot is currently occupied.
|
||||
*/
|
||||
void update_index(int64_t index)
|
||||
void update_index(IndexT index)
|
||||
{
|
||||
BLI_assert(this->is_occupied());
|
||||
state_ = index;
|
||||
@@ -116,7 +118,7 @@ template<typename Key> class SimpleVectorSetSlot {
|
||||
/**
|
||||
* Return true if this slot is currently occupied and its corresponding key has the given index.
|
||||
*/
|
||||
bool has_index(int64_t index) const
|
||||
bool has_index(IndexT index) const
|
||||
{
|
||||
return state_ == index;
|
||||
}
|
||||
|
||||
Submodule tests/data updated: 19d97256ec...42979e6ec6
Reference in New Issue
Block a user