diff --git a/source/blender/blenkernel/BKE_paint_bvh.hh b/source/blender/blenkernel/BKE_paint_bvh.hh index 029969694ed..e88cb04fa8f 100644 --- a/source/blender/blenkernel/BKE_paint_bvh.hh +++ b/source/blender/blenkernel/BKE_paint_bvh.hh @@ -127,6 +127,7 @@ struct MeshNode : public Node { * order to use 32 bit integers for slot values. . */ using LocalVertMap = VectorSet, DefaultEquality, diff --git a/source/blender/blenkernel/intern/mesh_calc_edges.cc b/source/blender/blenkernel/intern/mesh_calc_edges.cc index 3189a21439d..b7f1499a683 100644 --- a/source/blender/blenkernel/intern/mesh_calc_edges.cc +++ b/source/blender/blenkernel/intern/mesh_calc_edges.cc @@ -31,6 +31,7 @@ static uint64_t edge_hash_2(const OrderedEdge &edge) } using EdgeMap = VectorSet, DefaultEquality, diff --git a/source/blender/blenlib/BLI_vector_set.hh b/source/blender/blenlib/BLI_vector_set.hh index 3d4f18e68f7..c6f0df4f578 100644 --- a/source/blender/blenlib/BLI_vector_set.hh +++ b/source/blender/blenlib/BLI_vector_set.hh @@ -42,9 +42,6 @@ * - The default constructor is cheap. * - The `print_stats` method can be used to get information about the distribution of keys and * memory usage. - * - * Possible Improvements: - * - Small buffer optimization for the keys. */ #include "BLI_array.hh" @@ -62,6 +59,10 @@ template< * hash and is-equal functions have to support it. */ typename Key, + /** + * The number of values that can be stored in the container without a heap allocation. + */ + int64_t InlineBufferCapacity = default_inline_buffer_capacity(sizeof(Key)), /** * The strategy used to deal with collisions. They are defined in BLI_probing_strategies.hh. */ @@ -125,7 +126,7 @@ class VectorSet { /** This is called to check equality of two keys. */ BLI_NO_UNIQUE_ADDRESS IsEqual is_equal_; - /** The max load factor is 1/2 = 50% by default. */ +/** The max load factor is 1/2 = 50% by default. */ #define LOAD_FACTOR 1, 2 static constexpr LoadFactor max_load_factor_ = LoadFactor(LOAD_FACTOR); using SlotArray = Array; @@ -137,12 +138,15 @@ class VectorSet { */ SlotArray slots_; + /** A buffer for #keys_ that will remain uninitialized until it is used. */ + BLI_NO_UNIQUE_ADDRESS TypedBuffer inline_buffer_; + /** * Pointer to an array that contains all keys. The keys are sorted by insertion order as long as * no keys are removed. The first set->size() elements in this array are initialized. The * capacity of the array is usable_slots_. */ - Key *keys_ = nullptr; + Key *keys_; /** Iterate over a slot index sequence for a given hash. */ #define VECTOR_SET_SLOT_PROBING_BEGIN(HASH, R_SLOT) \ @@ -150,6 +154,19 @@ class VectorSet { auto &R_SLOT = slots_[SLOT_INDEX]; #define VECTOR_SET_SLOT_PROBING_END() SLOT_PROBING_END() + /** + * Be a friend with other template instantiations. This is necessary to implement some memory + * management logic. + */ + template + friend class VectorSet; + public: /** * Initialize an empty vector set. This is a cheap operation and won't do an allocation. This is @@ -161,9 +178,9 @@ class VectorSet { occupied_and_removed_slots_(0), usable_slots_(0), slot_mask_(0), - slots_(1, allocator), - keys_(nullptr) + slots_(1, allocator) { + keys_ = inline_buffer_; } VectorSet(NoExceptConstructor, Allocator allocator = {}) : VectorSet(allocator) {} @@ -184,44 +201,80 @@ class VectorSet { ~VectorSet() { destruct_n(keys_, this->size()); - if (keys_ != nullptr) { + if (keys_ != inline_buffer_) { this->deallocate_keys_array(keys_); } } VectorSet(const VectorSet &other) : slots_(other.slots_) { - keys_ = this->allocate_keys_array(other.usable_slots_); + if (other.size() <= InlineBufferCapacity) { + usable_slots_ = other.size(); + keys_ = inline_buffer_; + } + else { + keys_ = this->allocate_keys_array(other.usable_slots_); + usable_slots_ = other.usable_slots_; + } try { uninitialized_copy_n(other.keys_, other.size(), keys_); } catch (...) { - this->deallocate_keys_array(keys_); + if (keys_ != inline_buffer_) { + this->deallocate_keys_array(keys_); + } throw; } removed_slots_ = other.removed_slots_; occupied_and_removed_slots_ = other.occupied_and_removed_slots_; - usable_slots_ = other.usable_slots_; slot_mask_ = other.slot_mask_; hash_ = other.hash_; is_equal_ = other.is_equal_; } - VectorSet(VectorSet &&other) noexcept + template + VectorSet(VectorSet &&other) noexcept : removed_slots_(other.removed_slots_), occupied_and_removed_slots_(other.occupied_and_removed_slots_), - usable_slots_(other.usable_slots_), slot_mask_(other.slot_mask_), - slots_(std::move(other.slots_)), - keys_(other.keys_) + slots_(std::move(other.slots_)) { + if (other.is_inline()) { + const int64_t size = other.size(); + usable_slots_ = size; + + constexpr bool other_is_same_type = std::is_same_v>; + constexpr size_t max_full_copy_size = 32; + if constexpr (other_is_same_type && std::is_trivial_v && + sizeof(inline_buffer_) <= max_full_copy_size) + { + /* Optimize by copying the full inline buffer. Similar to #Vector move constructor. */ + keys_ = inline_buffer_; + if (size > 0) { + memcpy(inline_buffer_, other.inline_buffer_, sizeof(inline_buffer_)); + } + } + else { + if (OtherInlineBufferCapacity <= InlineBufferCapacity || size <= InlineBufferCapacity) { + keys_ = inline_buffer_; + } + else { + keys_ = this->allocate_keys_array(size); + } + uninitialized_relocate_n(other.keys_, size, keys_); + } + } + else { + keys_ = other.keys_; + usable_slots_ = other.usable_slots_; + } other.removed_slots_ = 0; other.occupied_and_removed_slots_ = 0; other.usable_slots_ = 0; other.slot_mask_ = 0; other.slots_ = SlotArray(1); - other.keys_ = nullptr; + other.keys_ = other.inline_buffer_; } VectorSet &operator=(const VectorSet &other) @@ -530,6 +583,11 @@ class VectorSet { stats.print(name); } + bool is_inline() const + { + return keys_ == inline_buffer_; + } + /** * Returns the number of keys stored in the vector set. */ @@ -632,18 +690,36 @@ class VectorSet { * Extracts all inserted values as a #Vector. The values are removed from the #VectorSet. This * takes O(1) time. * + * The caller does not need special handling for when the data is stored inline in the vector + * set. + * * One can use this to create a #Vector without duplicates efficiently. */ VectorT extract_vector() { + const int64_t size = this->size(); VectorData data; - data.data = keys_; - data.size = this->size(); - data.capacity = usable_slots_; + if (this->is_inline()) { + data.data = this->allocate_keys_array(size); + data.size = size; + data.capacity = size; + try { + uninitialized_relocate_n(keys_, size, data.data); + } + catch (...) { + this->deallocate_keys_array(data.data); + throw; + } + } + else { + data.data = keys_; + data.size = size; + data.capacity = usable_slots_; + } /* Reset some values so that the destructor does not free the data that is moved to the * #Vector. */ - keys_ = nullptr; + keys_ = inline_buffer_; occupied_and_removed_slots_ = 0; removed_slots_ = 0; std::destroy_at(this); @@ -665,11 +741,20 @@ class VectorSet { if (this->size() == 0) { try { slots_.reinitialize(total_slots); - if (keys_ != nullptr) { - this->deallocate_keys_array(keys_); - keys_ = nullptr; + + Key *new_keys; + if (usable_slots <= InlineBufferCapacity) { + new_keys = inline_buffer_; } - keys_ = this->allocate_keys_array(usable_slots); + else { + new_keys = this->allocate_keys_array(usable_slots); + } + + if (keys_ != inline_buffer_) { + this->deallocate_keys_array(keys_); + } + + keys_ = new_keys; } catch (...) { this->noexcept_reset(); @@ -698,16 +783,34 @@ class VectorSet { throw; } - Key *new_keys = this->allocate_keys_array(usable_slots); - try { - uninitialized_relocate_n(keys_, this->size(), new_keys); + /* Allocate the new keys array, or use the inline buffer if possible. */ + Key *new_keys; + if (usable_slots <= InlineBufferCapacity) { + new_keys = inline_buffer_; } - catch (...) { - this->deallocate_keys_array(new_keys); - this->noexcept_reset(); - throw; + else { + new_keys = this->allocate_keys_array(usable_slots); + } + + /* Copy the keys to the new array. When the inline buffer isn't used before and after the + * reallocation (`new_keys` also references the inline buffer), no copying is necessary. */ + if (new_keys != keys_) { + try { + uninitialized_relocate_n(keys_, this->size(), new_keys); + } + catch (...) { + if (new_keys != inline_buffer_) { + this->deallocate_keys_array(new_keys); + } + this->noexcept_reset(); + throw; + } + } + + /* Free the old keys array. */ + if (keys_ != inline_buffer_) { + this->deallocate_keys_array(keys_); } - this->deallocate_keys_array(keys_); keys_ = new_keys; occupied_and_removed_slots_ -= removed_slots_; @@ -985,11 +1088,13 @@ class VectorSet { * allocating memory with static storage duration. */ template, typename IsEqual = DefaultEquality, typename Slot = typename DefaultVectorSetSlot::type> -using RawVectorSet = VectorSet; +using RawVectorSet = + VectorSet; template struct CustomIDHash { using CustomIDType = decltype(GetIDFn{}(std::declval())); @@ -1028,8 +1133,11 @@ template struct CustomIDEqual { * #GetIDFn should have an implementation that returns a hashable and equality comparable type, * i.e. `StringRef operator()(const bNode *value) { return value->idname; }`. */ -template -using CustomIDVectorSet = - VectorSet, CustomIDEqual>; +template +using CustomIDVectorSet = VectorSet, + CustomIDEqual>; } // namespace blender diff --git a/source/blender/blenlib/tests/BLI_vector_set_test.cc b/source/blender/blenlib/tests/BLI_vector_set_test.cc index 1387cff80ed..5221aae634a 100644 --- a/source/blender/blenlib/tests/BLI_vector_set_test.cc +++ b/source/blender/blenlib/tests/BLI_vector_set_test.cc @@ -25,6 +25,12 @@ TEST(vector_set, InitializerListConstructor_WithoutDuplicates) EXPECT_EQ(set[0], 1); EXPECT_EQ(set[1], 4); EXPECT_EQ(set[2], 5); + VectorSet set_bigger_than_inline = {1, 4, 5, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + EXPECT_EQ(set_bigger_than_inline.size(), 14); + EXPECT_EQ(set_bigger_than_inline[0], 1); + EXPECT_EQ(set_bigger_than_inline[1], 4); + EXPECT_EQ(set_bigger_than_inline[2], 5); + EXPECT_EQ(set_bigger_than_inline.as_span().last(), 19); } TEST(vector_set, InitializerListConstructor_WithDuplicates) @@ -60,10 +66,26 @@ TEST(vector_set, CopyAssignment) TEST(vector_set, Move) { - VectorSet set1 = {1, 2, 3}; + { + VectorSet set1 = {1, 2, 3}; + VectorSet set2 = std::move(set1); + EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */ + EXPECT_EQ(set2.size(), 3); + } + { + VectorSet set1 = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + VectorSet set2 = std::move(set1); + EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */ + EXPECT_EQ(set2.size(), 9); + } +} + +TEST(vector_set, MoveNonInline) +{ + VectorSet set1 = {1, 2, 3, 5, 1, 6, 7, 8, 1, 4, 57, 8, 7, 34, 57, 8, 1231}; VectorSet set2 = std::move(set1); EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */ - EXPECT_EQ(set2.size(), 3); + EXPECT_EQ(set2.size(), 11); } TEST(vector_set, MoveAssignment) @@ -310,6 +332,31 @@ TEST(vector_set, ExtractVector) Vector vec = set.extract_vector(); EXPECT_EQ(vec.size(), 5); EXPECT_EQ(vec.data(), data_ptr); + EXPECT_TRUE(set.is_empty()); +} + +TEST(vector_set, ExtractVectorInline) +{ + VectorSet set; + set.add_multiple({5, 2, 7, 4, 8, 5, 4, 5}); + EXPECT_EQ(set.size(), 5); + + Vector vec = set.extract_vector(); + EXPECT_EQ(vec.size(), 5); + EXPECT_EQ(vec[2], 7); + EXPECT_TRUE(set.is_empty()); +} + +TEST(vector_set, ExtractVectorInlineExceptions) +{ + VectorSet set; + set.add_multiple({5, 2, 7, 4, 8, 5, 4, 5}); + set[3].throw_during_copy = true; + set[3].throw_during_move = true; + EXPECT_EQ(set.size(), 5); + + EXPECT_ANY_THROW({ Vector vec = set.extract_vector(); }); + EXPECT_EQ(set.size(), 5); } TEST(vector_set, ExtractVectorEmpty) @@ -339,6 +386,19 @@ TEST(vector_set, CustomIDVectorSet) EXPECT_EQ(set.size(), 2); set.add(ThingWithID{3333, "test", 27}); EXPECT_EQ(set.size(), 2); + + /* Add more elements than the inline capacity. */ + CustomIDVectorSet larger_set; + larger_set.add_new(ThingWithID{12, "test", 9}); + EXPECT_EQ(larger_set.index_of_as("test"), 0); + larger_set.add_new(ThingWithID{123, "other", 8}); + larger_set.add(ThingWithID{1234, "test", 7}); + larger_set.add_new(ThingWithID{12345, "test_2", 6}); + larger_set.add_new(ThingWithID{123456, "test_4", 5}); + larger_set.add_new(ThingWithID{1234567, "test_5", 4}); + EXPECT_EQ(larger_set.index_of_as("test_4"), 3); + larger_set.add_new(ThingWithID{12345678, "test_6", 3}); + EXPECT_TRUE(larger_set.size() == 6); } namespace { diff --git a/source/blender/io/usd/intern/usd_reader_mesh.cc b/source/blender/io/usd/intern/usd_reader_mesh.cc index f0db807fd5d..7ed39b3c110 100644 --- a/source/blender/io/usd/intern/usd_reader_mesh.cc +++ b/source/blender/io/usd/intern/usd_reader_mesh.cc @@ -467,6 +467,7 @@ void USDMeshReader::read_edge_creases(Mesh *mesh, const double motionSampleTime) /* Build mapping from vert pairs to edge index. */ using EdgeMap = VectorSet, DefaultEquality, diff --git a/source/blender/nodes/NOD_geometry_nodes_execute.hh b/source/blender/nodes/NOD_geometry_nodes_execute.hh index c9431319598..c31e3567b09 100644 --- a/source/blender/nodes/NOD_geometry_nodes_execute.hh +++ b/source/blender/nodes/NOD_geometry_nodes_execute.hh @@ -42,7 +42,7 @@ struct IDPropNameGetter { * Use a #VectorSet to store properties for constant time lookup, to avoid slowdown with many * inputs. */ -using PropertiesVectorSet = CustomIDVectorSet; +using PropertiesVectorSet = CustomIDVectorSet; PropertiesVectorSet build_properties_vector_set(const IDProperty *properties); std::optional input_attribute_name_get(const PropertiesVectorSet &properties,