From a2fac05e9d19121567e7fb66fbdfaeb0cd5caa2b Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Tue, 29 Apr 2025 18:52:30 +0200 Subject: [PATCH] BLI: Add configurable inline buffer to VectorSet Allow configuring the inline buffer capacity for the slots array, and add an inline buffer for the keys vector. Previously there was always an allocation when adding an element. The inline capacity is manually configured in a few places as part of this commit. Pull Request: https://projects.blender.org/blender/blender/pulls/136461 --- source/blender/blenkernel/BKE_paint_bvh.hh | 1 + .../blenkernel/intern/mesh_calc_edges.cc | 1 + source/blender/blenlib/BLI_vector_set.hh | 180 ++++++++++++++---- .../blenlib/tests/BLI_vector_set_test.cc | 64 ++++++- .../blender/io/usd/intern/usd_reader_mesh.cc | 1 + .../nodes/NOD_geometry_nodes_execute.hh | 2 +- 6 files changed, 210 insertions(+), 39 deletions(-) diff --git a/source/blender/blenkernel/BKE_paint_bvh.hh b/source/blender/blenkernel/BKE_paint_bvh.hh index 029969694ed..e88cb04fa8f 100644 --- a/source/blender/blenkernel/BKE_paint_bvh.hh +++ b/source/blender/blenkernel/BKE_paint_bvh.hh @@ -127,6 +127,7 @@ struct MeshNode : public Node { * order to use 32 bit integers for slot values. . */ using LocalVertMap = VectorSet, DefaultEquality, diff --git a/source/blender/blenkernel/intern/mesh_calc_edges.cc b/source/blender/blenkernel/intern/mesh_calc_edges.cc index 3189a21439d..b7f1499a683 100644 --- a/source/blender/blenkernel/intern/mesh_calc_edges.cc +++ b/source/blender/blenkernel/intern/mesh_calc_edges.cc @@ -31,6 +31,7 @@ static uint64_t edge_hash_2(const OrderedEdge &edge) } using EdgeMap = VectorSet, DefaultEquality, diff --git a/source/blender/blenlib/BLI_vector_set.hh b/source/blender/blenlib/BLI_vector_set.hh index 3d4f18e68f7..c6f0df4f578 100644 --- a/source/blender/blenlib/BLI_vector_set.hh +++ b/source/blender/blenlib/BLI_vector_set.hh @@ -42,9 +42,6 @@ * - The default constructor is cheap. * - The `print_stats` method can be used to get information about the distribution of keys and * memory usage. - * - * Possible Improvements: - * - Small buffer optimization for the keys. */ #include "BLI_array.hh" @@ -62,6 +59,10 @@ template< * hash and is-equal functions have to support it. */ typename Key, + /** + * The number of values that can be stored in the container without a heap allocation. + */ + int64_t InlineBufferCapacity = default_inline_buffer_capacity(sizeof(Key)), /** * The strategy used to deal with collisions. They are defined in BLI_probing_strategies.hh. */ @@ -125,7 +126,7 @@ class VectorSet { /** This is called to check equality of two keys. */ BLI_NO_UNIQUE_ADDRESS IsEqual is_equal_; - /** The max load factor is 1/2 = 50% by default. */ +/** The max load factor is 1/2 = 50% by default. */ #define LOAD_FACTOR 1, 2 static constexpr LoadFactor max_load_factor_ = LoadFactor(LOAD_FACTOR); using SlotArray = Array; @@ -137,12 +138,15 @@ class VectorSet { */ SlotArray slots_; + /** A buffer for #keys_ that will remain uninitialized until it is used. */ + BLI_NO_UNIQUE_ADDRESS TypedBuffer inline_buffer_; + /** * Pointer to an array that contains all keys. The keys are sorted by insertion order as long as * no keys are removed. The first set->size() elements in this array are initialized. The * capacity of the array is usable_slots_. */ - Key *keys_ = nullptr; + Key *keys_; /** Iterate over a slot index sequence for a given hash. */ #define VECTOR_SET_SLOT_PROBING_BEGIN(HASH, R_SLOT) \ @@ -150,6 +154,19 @@ class VectorSet { auto &R_SLOT = slots_[SLOT_INDEX]; #define VECTOR_SET_SLOT_PROBING_END() SLOT_PROBING_END() + /** + * Be a friend with other template instantiations. This is necessary to implement some memory + * management logic. + */ + template + friend class VectorSet; + public: /** * Initialize an empty vector set. This is a cheap operation and won't do an allocation. This is @@ -161,9 +178,9 @@ class VectorSet { occupied_and_removed_slots_(0), usable_slots_(0), slot_mask_(0), - slots_(1, allocator), - keys_(nullptr) + slots_(1, allocator) { + keys_ = inline_buffer_; } VectorSet(NoExceptConstructor, Allocator allocator = {}) : VectorSet(allocator) {} @@ -184,44 +201,80 @@ class VectorSet { ~VectorSet() { destruct_n(keys_, this->size()); - if (keys_ != nullptr) { + if (keys_ != inline_buffer_) { this->deallocate_keys_array(keys_); } } VectorSet(const VectorSet &other) : slots_(other.slots_) { - keys_ = this->allocate_keys_array(other.usable_slots_); + if (other.size() <= InlineBufferCapacity) { + usable_slots_ = other.size(); + keys_ = inline_buffer_; + } + else { + keys_ = this->allocate_keys_array(other.usable_slots_); + usable_slots_ = other.usable_slots_; + } try { uninitialized_copy_n(other.keys_, other.size(), keys_); } catch (...) { - this->deallocate_keys_array(keys_); + if (keys_ != inline_buffer_) { + this->deallocate_keys_array(keys_); + } throw; } removed_slots_ = other.removed_slots_; occupied_and_removed_slots_ = other.occupied_and_removed_slots_; - usable_slots_ = other.usable_slots_; slot_mask_ = other.slot_mask_; hash_ = other.hash_; is_equal_ = other.is_equal_; } - VectorSet(VectorSet &&other) noexcept + template + VectorSet(VectorSet &&other) noexcept : removed_slots_(other.removed_slots_), occupied_and_removed_slots_(other.occupied_and_removed_slots_), - usable_slots_(other.usable_slots_), slot_mask_(other.slot_mask_), - slots_(std::move(other.slots_)), - keys_(other.keys_) + slots_(std::move(other.slots_)) { + if (other.is_inline()) { + const int64_t size = other.size(); + usable_slots_ = size; + + constexpr bool other_is_same_type = std::is_same_v>; + constexpr size_t max_full_copy_size = 32; + if constexpr (other_is_same_type && std::is_trivial_v && + sizeof(inline_buffer_) <= max_full_copy_size) + { + /* Optimize by copying the full inline buffer. Similar to #Vector move constructor. */ + keys_ = inline_buffer_; + if (size > 0) { + memcpy(inline_buffer_, other.inline_buffer_, sizeof(inline_buffer_)); + } + } + else { + if (OtherInlineBufferCapacity <= InlineBufferCapacity || size <= InlineBufferCapacity) { + keys_ = inline_buffer_; + } + else { + keys_ = this->allocate_keys_array(size); + } + uninitialized_relocate_n(other.keys_, size, keys_); + } + } + else { + keys_ = other.keys_; + usable_slots_ = other.usable_slots_; + } other.removed_slots_ = 0; other.occupied_and_removed_slots_ = 0; other.usable_slots_ = 0; other.slot_mask_ = 0; other.slots_ = SlotArray(1); - other.keys_ = nullptr; + other.keys_ = other.inline_buffer_; } VectorSet &operator=(const VectorSet &other) @@ -530,6 +583,11 @@ class VectorSet { stats.print(name); } + bool is_inline() const + { + return keys_ == inline_buffer_; + } + /** * Returns the number of keys stored in the vector set. */ @@ -632,18 +690,36 @@ class VectorSet { * Extracts all inserted values as a #Vector. The values are removed from the #VectorSet. This * takes O(1) time. * + * The caller does not need special handling for when the data is stored inline in the vector + * set. + * * One can use this to create a #Vector without duplicates efficiently. */ VectorT extract_vector() { + const int64_t size = this->size(); VectorData data; - data.data = keys_; - data.size = this->size(); - data.capacity = usable_slots_; + if (this->is_inline()) { + data.data = this->allocate_keys_array(size); + data.size = size; + data.capacity = size; + try { + uninitialized_relocate_n(keys_, size, data.data); + } + catch (...) { + this->deallocate_keys_array(data.data); + throw; + } + } + else { + data.data = keys_; + data.size = size; + data.capacity = usable_slots_; + } /* Reset some values so that the destructor does not free the data that is moved to the * #Vector. */ - keys_ = nullptr; + keys_ = inline_buffer_; occupied_and_removed_slots_ = 0; removed_slots_ = 0; std::destroy_at(this); @@ -665,11 +741,20 @@ class VectorSet { if (this->size() == 0) { try { slots_.reinitialize(total_slots); - if (keys_ != nullptr) { - this->deallocate_keys_array(keys_); - keys_ = nullptr; + + Key *new_keys; + if (usable_slots <= InlineBufferCapacity) { + new_keys = inline_buffer_; } - keys_ = this->allocate_keys_array(usable_slots); + else { + new_keys = this->allocate_keys_array(usable_slots); + } + + if (keys_ != inline_buffer_) { + this->deallocate_keys_array(keys_); + } + + keys_ = new_keys; } catch (...) { this->noexcept_reset(); @@ -698,16 +783,34 @@ class VectorSet { throw; } - Key *new_keys = this->allocate_keys_array(usable_slots); - try { - uninitialized_relocate_n(keys_, this->size(), new_keys); + /* Allocate the new keys array, or use the inline buffer if possible. */ + Key *new_keys; + if (usable_slots <= InlineBufferCapacity) { + new_keys = inline_buffer_; } - catch (...) { - this->deallocate_keys_array(new_keys); - this->noexcept_reset(); - throw; + else { + new_keys = this->allocate_keys_array(usable_slots); + } + + /* Copy the keys to the new array. When the inline buffer isn't used before and after the + * reallocation (`new_keys` also references the inline buffer), no copying is necessary. */ + if (new_keys != keys_) { + try { + uninitialized_relocate_n(keys_, this->size(), new_keys); + } + catch (...) { + if (new_keys != inline_buffer_) { + this->deallocate_keys_array(new_keys); + } + this->noexcept_reset(); + throw; + } + } + + /* Free the old keys array. */ + if (keys_ != inline_buffer_) { + this->deallocate_keys_array(keys_); } - this->deallocate_keys_array(keys_); keys_ = new_keys; occupied_and_removed_slots_ -= removed_slots_; @@ -985,11 +1088,13 @@ class VectorSet { * allocating memory with static storage duration. */ template, typename IsEqual = DefaultEquality, typename Slot = typename DefaultVectorSetSlot::type> -using RawVectorSet = VectorSet; +using RawVectorSet = + VectorSet; template struct CustomIDHash { using CustomIDType = decltype(GetIDFn{}(std::declval())); @@ -1028,8 +1133,11 @@ template struct CustomIDEqual { * #GetIDFn should have an implementation that returns a hashable and equality comparable type, * i.e. `StringRef operator()(const bNode *value) { return value->idname; }`. */ -template -using CustomIDVectorSet = - VectorSet, CustomIDEqual>; +template +using CustomIDVectorSet = VectorSet, + CustomIDEqual>; } // namespace blender diff --git a/source/blender/blenlib/tests/BLI_vector_set_test.cc b/source/blender/blenlib/tests/BLI_vector_set_test.cc index 1387cff80ed..5221aae634a 100644 --- a/source/blender/blenlib/tests/BLI_vector_set_test.cc +++ b/source/blender/blenlib/tests/BLI_vector_set_test.cc @@ -25,6 +25,12 @@ TEST(vector_set, InitializerListConstructor_WithoutDuplicates) EXPECT_EQ(set[0], 1); EXPECT_EQ(set[1], 4); EXPECT_EQ(set[2], 5); + VectorSet set_bigger_than_inline = {1, 4, 5, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + EXPECT_EQ(set_bigger_than_inline.size(), 14); + EXPECT_EQ(set_bigger_than_inline[0], 1); + EXPECT_EQ(set_bigger_than_inline[1], 4); + EXPECT_EQ(set_bigger_than_inline[2], 5); + EXPECT_EQ(set_bigger_than_inline.as_span().last(), 19); } TEST(vector_set, InitializerListConstructor_WithDuplicates) @@ -60,10 +66,26 @@ TEST(vector_set, CopyAssignment) TEST(vector_set, Move) { - VectorSet set1 = {1, 2, 3}; + { + VectorSet set1 = {1, 2, 3}; + VectorSet set2 = std::move(set1); + EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */ + EXPECT_EQ(set2.size(), 3); + } + { + VectorSet set1 = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + VectorSet set2 = std::move(set1); + EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */ + EXPECT_EQ(set2.size(), 9); + } +} + +TEST(vector_set, MoveNonInline) +{ + VectorSet set1 = {1, 2, 3, 5, 1, 6, 7, 8, 1, 4, 57, 8, 7, 34, 57, 8, 1231}; VectorSet set2 = std::move(set1); EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */ - EXPECT_EQ(set2.size(), 3); + EXPECT_EQ(set2.size(), 11); } TEST(vector_set, MoveAssignment) @@ -310,6 +332,31 @@ TEST(vector_set, ExtractVector) Vector vec = set.extract_vector(); EXPECT_EQ(vec.size(), 5); EXPECT_EQ(vec.data(), data_ptr); + EXPECT_TRUE(set.is_empty()); +} + +TEST(vector_set, ExtractVectorInline) +{ + VectorSet set; + set.add_multiple({5, 2, 7, 4, 8, 5, 4, 5}); + EXPECT_EQ(set.size(), 5); + + Vector vec = set.extract_vector(); + EXPECT_EQ(vec.size(), 5); + EXPECT_EQ(vec[2], 7); + EXPECT_TRUE(set.is_empty()); +} + +TEST(vector_set, ExtractVectorInlineExceptions) +{ + VectorSet set; + set.add_multiple({5, 2, 7, 4, 8, 5, 4, 5}); + set[3].throw_during_copy = true; + set[3].throw_during_move = true; + EXPECT_EQ(set.size(), 5); + + EXPECT_ANY_THROW({ Vector vec = set.extract_vector(); }); + EXPECT_EQ(set.size(), 5); } TEST(vector_set, ExtractVectorEmpty) @@ -339,6 +386,19 @@ TEST(vector_set, CustomIDVectorSet) EXPECT_EQ(set.size(), 2); set.add(ThingWithID{3333, "test", 27}); EXPECT_EQ(set.size(), 2); + + /* Add more elements than the inline capacity. */ + CustomIDVectorSet larger_set; + larger_set.add_new(ThingWithID{12, "test", 9}); + EXPECT_EQ(larger_set.index_of_as("test"), 0); + larger_set.add_new(ThingWithID{123, "other", 8}); + larger_set.add(ThingWithID{1234, "test", 7}); + larger_set.add_new(ThingWithID{12345, "test_2", 6}); + larger_set.add_new(ThingWithID{123456, "test_4", 5}); + larger_set.add_new(ThingWithID{1234567, "test_5", 4}); + EXPECT_EQ(larger_set.index_of_as("test_4"), 3); + larger_set.add_new(ThingWithID{12345678, "test_6", 3}); + EXPECT_TRUE(larger_set.size() == 6); } namespace { diff --git a/source/blender/io/usd/intern/usd_reader_mesh.cc b/source/blender/io/usd/intern/usd_reader_mesh.cc index f0db807fd5d..7ed39b3c110 100644 --- a/source/blender/io/usd/intern/usd_reader_mesh.cc +++ b/source/blender/io/usd/intern/usd_reader_mesh.cc @@ -467,6 +467,7 @@ void USDMeshReader::read_edge_creases(Mesh *mesh, const double motionSampleTime) /* Build mapping from vert pairs to edge index. */ using EdgeMap = VectorSet, DefaultEquality, diff --git a/source/blender/nodes/NOD_geometry_nodes_execute.hh b/source/blender/nodes/NOD_geometry_nodes_execute.hh index c9431319598..c31e3567b09 100644 --- a/source/blender/nodes/NOD_geometry_nodes_execute.hh +++ b/source/blender/nodes/NOD_geometry_nodes_execute.hh @@ -42,7 +42,7 @@ struct IDPropNameGetter { * Use a #VectorSet to store properties for constant time lookup, to avoid slowdown with many * inputs. */ -using PropertiesVectorSet = CustomIDVectorSet; +using PropertiesVectorSet = CustomIDVectorSet; PropertiesVectorSet build_properties_vector_set(const IDProperty *properties); std::optional input_attribute_name_get(const PropertiesVectorSet &properties,