BLI: Add configurable inline buffer to VectorSet

Allow configuring the inline buffer capacity for the slots array, and
add an inline buffer for the keys vector. Previously there was always
an allocation when adding an element.

The inline capacity is manually configured in a few places as part of
this commit.

Pull Request: https://projects.blender.org/blender/blender/pulls/136461
This commit is contained in:
Hans Goudey
2025-04-29 18:52:30 +02:00
committed by Hans Goudey
parent 6e190bde4a
commit a2fac05e9d
6 changed files with 210 additions and 39 deletions

View File

@@ -127,6 +127,7 @@ struct MeshNode : public Node {
* order to use 32 bit integers for slot values. .
*/
using LocalVertMap = VectorSet<int,
0,
DefaultProbingStrategy,
DefaultHash<int>,
DefaultEquality<int>,

View File

@@ -31,6 +31,7 @@ static uint64_t edge_hash_2(const OrderedEdge &edge)
}
using EdgeMap = VectorSet<OrderedEdge,
16,
DefaultProbingStrategy,
DefaultHash<OrderedEdge>,
DefaultEquality<OrderedEdge>,

View File

@@ -42,9 +42,6 @@
* - The default constructor is cheap.
* - The `print_stats` method can be used to get information about the distribution of keys and
* memory usage.
*
* Possible Improvements:
* - Small buffer optimization for the keys.
*/
#include "BLI_array.hh"
@@ -62,6 +59,10 @@ template<
* hash and is-equal functions have to support it.
*/
typename Key,
/**
* The number of values that can be stored in the container without a heap allocation.
*/
int64_t InlineBufferCapacity = default_inline_buffer_capacity(sizeof(Key)),
/**
* The strategy used to deal with collisions. They are defined in BLI_probing_strategies.hh.
*/
@@ -125,7 +126,7 @@ class VectorSet {
/** This is called to check equality of two keys. */
BLI_NO_UNIQUE_ADDRESS IsEqual is_equal_;
/** The max load factor is 1/2 = 50% by default. */
/** The max load factor is 1/2 = 50% by default. */
#define LOAD_FACTOR 1, 2
static constexpr LoadFactor max_load_factor_ = LoadFactor(LOAD_FACTOR);
using SlotArray = Array<Slot, LoadFactor::compute_total_slots(4, LOAD_FACTOR), Allocator>;
@@ -137,12 +138,15 @@ class VectorSet {
*/
SlotArray slots_;
/** A buffer for #keys_ that will remain uninitialized until it is used. */
BLI_NO_UNIQUE_ADDRESS TypedBuffer<Key, InlineBufferCapacity> inline_buffer_;
/**
* Pointer to an array that contains all keys. The keys are sorted by insertion order as long as
* no keys are removed. The first set->size() elements in this array are initialized. The
* capacity of the array is usable_slots_.
*/
Key *keys_ = nullptr;
Key *keys_;
/** Iterate over a slot index sequence for a given hash. */
#define VECTOR_SET_SLOT_PROBING_BEGIN(HASH, R_SLOT) \
@@ -150,6 +154,19 @@ class VectorSet {
auto &R_SLOT = slots_[SLOT_INDEX];
#define VECTOR_SET_SLOT_PROBING_END() SLOT_PROBING_END()
/**
* Be a friend with other template instantiations. This is necessary to implement some memory
* management logic.
*/
template<typename Other,
int64_t OtherInlineBufferCapacity,
typename OtherProbingStrategy,
typename OtherHash,
typename OtherIsEqual,
typename OtherSlot,
typename OtherAllocator>
friend class VectorSet;
public:
/**
* Initialize an empty vector set. This is a cheap operation and won't do an allocation. This is
@@ -161,9 +178,9 @@ class VectorSet {
occupied_and_removed_slots_(0),
usable_slots_(0),
slot_mask_(0),
slots_(1, allocator),
keys_(nullptr)
slots_(1, allocator)
{
keys_ = inline_buffer_;
}
VectorSet(NoExceptConstructor, Allocator allocator = {}) : VectorSet(allocator) {}
@@ -184,44 +201,80 @@ class VectorSet {
~VectorSet()
{
destruct_n(keys_, this->size());
if (keys_ != nullptr) {
if (keys_ != inline_buffer_) {
this->deallocate_keys_array(keys_);
}
}
VectorSet(const VectorSet &other) : slots_(other.slots_)
{
keys_ = this->allocate_keys_array(other.usable_slots_);
if (other.size() <= InlineBufferCapacity) {
usable_slots_ = other.size();
keys_ = inline_buffer_;
}
else {
keys_ = this->allocate_keys_array(other.usable_slots_);
usable_slots_ = other.usable_slots_;
}
try {
uninitialized_copy_n(other.keys_, other.size(), keys_);
}
catch (...) {
this->deallocate_keys_array(keys_);
if (keys_ != inline_buffer_) {
this->deallocate_keys_array(keys_);
}
throw;
}
removed_slots_ = other.removed_slots_;
occupied_and_removed_slots_ = other.occupied_and_removed_slots_;
usable_slots_ = other.usable_slots_;
slot_mask_ = other.slot_mask_;
hash_ = other.hash_;
is_equal_ = other.is_equal_;
}
VectorSet(VectorSet &&other) noexcept
template<int64_t OtherInlineBufferCapacity>
VectorSet(VectorSet<Key, OtherInlineBufferCapacity> &&other) noexcept
: removed_slots_(other.removed_slots_),
occupied_and_removed_slots_(other.occupied_and_removed_slots_),
usable_slots_(other.usable_slots_),
slot_mask_(other.slot_mask_),
slots_(std::move(other.slots_)),
keys_(other.keys_)
slots_(std::move(other.slots_))
{
if (other.is_inline()) {
const int64_t size = other.size();
usable_slots_ = size;
constexpr bool other_is_same_type = std::is_same_v<VectorSet, std::decay_t<decltype(other)>>;
constexpr size_t max_full_copy_size = 32;
if constexpr (other_is_same_type && std::is_trivial_v<Key> &&
sizeof(inline_buffer_) <= max_full_copy_size)
{
/* Optimize by copying the full inline buffer. Similar to #Vector move constructor. */
keys_ = inline_buffer_;
if (size > 0) {
memcpy(inline_buffer_, other.inline_buffer_, sizeof(inline_buffer_));
}
}
else {
if (OtherInlineBufferCapacity <= InlineBufferCapacity || size <= InlineBufferCapacity) {
keys_ = inline_buffer_;
}
else {
keys_ = this->allocate_keys_array(size);
}
uninitialized_relocate_n(other.keys_, size, keys_);
}
}
else {
keys_ = other.keys_;
usable_slots_ = other.usable_slots_;
}
other.removed_slots_ = 0;
other.occupied_and_removed_slots_ = 0;
other.usable_slots_ = 0;
other.slot_mask_ = 0;
other.slots_ = SlotArray(1);
other.keys_ = nullptr;
other.keys_ = other.inline_buffer_;
}
VectorSet &operator=(const VectorSet &other)
@@ -530,6 +583,11 @@ class VectorSet {
stats.print(name);
}
bool is_inline() const
{
return keys_ == inline_buffer_;
}
/**
* Returns the number of keys stored in the vector set.
*/
@@ -632,18 +690,36 @@ class VectorSet {
* Extracts all inserted values as a #Vector. The values are removed from the #VectorSet. This
* takes O(1) time.
*
* The caller does not need special handling for when the data is stored inline in the vector
* set.
*
* One can use this to create a #Vector without duplicates efficiently.
*/
VectorT extract_vector()
{
const int64_t size = this->size();
VectorData<Key, Allocator> data;
data.data = keys_;
data.size = this->size();
data.capacity = usable_slots_;
if (this->is_inline()) {
data.data = this->allocate_keys_array(size);
data.size = size;
data.capacity = size;
try {
uninitialized_relocate_n(keys_, size, data.data);
}
catch (...) {
this->deallocate_keys_array(data.data);
throw;
}
}
else {
data.data = keys_;
data.size = size;
data.capacity = usable_slots_;
}
/* Reset some values so that the destructor does not free the data that is moved to the
* #Vector. */
keys_ = nullptr;
keys_ = inline_buffer_;
occupied_and_removed_slots_ = 0;
removed_slots_ = 0;
std::destroy_at(this);
@@ -665,11 +741,20 @@ class VectorSet {
if (this->size() == 0) {
try {
slots_.reinitialize(total_slots);
if (keys_ != nullptr) {
this->deallocate_keys_array(keys_);
keys_ = nullptr;
Key *new_keys;
if (usable_slots <= InlineBufferCapacity) {
new_keys = inline_buffer_;
}
keys_ = this->allocate_keys_array(usable_slots);
else {
new_keys = this->allocate_keys_array(usable_slots);
}
if (keys_ != inline_buffer_) {
this->deallocate_keys_array(keys_);
}
keys_ = new_keys;
}
catch (...) {
this->noexcept_reset();
@@ -698,16 +783,34 @@ class VectorSet {
throw;
}
Key *new_keys = this->allocate_keys_array(usable_slots);
try {
uninitialized_relocate_n(keys_, this->size(), new_keys);
/* Allocate the new keys array, or use the inline buffer if possible. */
Key *new_keys;
if (usable_slots <= InlineBufferCapacity) {
new_keys = inline_buffer_;
}
catch (...) {
this->deallocate_keys_array(new_keys);
this->noexcept_reset();
throw;
else {
new_keys = this->allocate_keys_array(usable_slots);
}
/* Copy the keys to the new array. When the inline buffer isn't used before and after the
* reallocation (`new_keys` also references the inline buffer), no copying is necessary. */
if (new_keys != keys_) {
try {
uninitialized_relocate_n(keys_, this->size(), new_keys);
}
catch (...) {
if (new_keys != inline_buffer_) {
this->deallocate_keys_array(new_keys);
}
this->noexcept_reset();
throw;
}
}
/* Free the old keys array. */
if (keys_ != inline_buffer_) {
this->deallocate_keys_array(keys_);
}
this->deallocate_keys_array(keys_);
keys_ = new_keys;
occupied_and_removed_slots_ -= removed_slots_;
@@ -985,11 +1088,13 @@ class VectorSet {
* allocating memory with static storage duration.
*/
template<typename Key,
int64_t InlineBufferCapacity = 4,
typename ProbingStrategy = DefaultProbingStrategy,
typename Hash = DefaultHash<Key>,
typename IsEqual = DefaultEquality<Key>,
typename Slot = typename DefaultVectorSetSlot<Key>::type>
using RawVectorSet = VectorSet<Key, ProbingStrategy, Hash, IsEqual, Slot, RawAllocator>;
using RawVectorSet =
VectorSet<Key, InlineBufferCapacity, ProbingStrategy, Hash, IsEqual, Slot, RawAllocator>;
template<typename T, typename GetIDFn> struct CustomIDHash {
using CustomIDType = decltype(GetIDFn{}(std::declval<T>()));
@@ -1028,8 +1133,11 @@ template<typename T, typename GetIDFn> struct CustomIDEqual {
* #GetIDFn should have an implementation that returns a hashable and equality comparable type,
* i.e. `StringRef operator()(const bNode *value) { return value->idname; }`.
*/
template<typename T, typename GetIDFn>
using CustomIDVectorSet =
VectorSet<T, DefaultProbingStrategy, CustomIDHash<T, GetIDFn>, CustomIDEqual<T, GetIDFn>>;
template<typename T, typename GetIDFn, int64_t InlineBufferCapacity = 4>
using CustomIDVectorSet = VectorSet<T,
InlineBufferCapacity,
DefaultProbingStrategy,
CustomIDHash<T, GetIDFn>,
CustomIDEqual<T, GetIDFn>>;
} // namespace blender

View File

@@ -25,6 +25,12 @@ TEST(vector_set, InitializerListConstructor_WithoutDuplicates)
EXPECT_EQ(set[0], 1);
EXPECT_EQ(set[1], 4);
EXPECT_EQ(set[2], 5);
VectorSet<int> set_bigger_than_inline = {1, 4, 5, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19};
EXPECT_EQ(set_bigger_than_inline.size(), 14);
EXPECT_EQ(set_bigger_than_inline[0], 1);
EXPECT_EQ(set_bigger_than_inline[1], 4);
EXPECT_EQ(set_bigger_than_inline[2], 5);
EXPECT_EQ(set_bigger_than_inline.as_span().last(), 19);
}
TEST(vector_set, InitializerListConstructor_WithDuplicates)
@@ -60,10 +66,26 @@ TEST(vector_set, CopyAssignment)
TEST(vector_set, Move)
{
VectorSet<int> set1 = {1, 2, 3};
{
VectorSet<int> set1 = {1, 2, 3};
VectorSet<int> set2 = std::move(set1);
EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */
EXPECT_EQ(set2.size(), 3);
}
{
VectorSet<int, 24> set1 = {1, 2, 3, 4, 5, 6, 7, 8, 9};
VectorSet<int> set2 = std::move(set1);
EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */
EXPECT_EQ(set2.size(), 9);
}
}
TEST(vector_set, MoveNonInline)
{
VectorSet<int> set1 = {1, 2, 3, 5, 1, 6, 7, 8, 1, 4, 57, 8, 7, 34, 57, 8, 1231};
VectorSet<int> set2 = std::move(set1);
EXPECT_EQ(set1.size(), 0); /* NOLINT: bugprone-use-after-move */
EXPECT_EQ(set2.size(), 3);
EXPECT_EQ(set2.size(), 11);
}
TEST(vector_set, MoveAssignment)
@@ -310,6 +332,31 @@ TEST(vector_set, ExtractVector)
Vector<int> vec = set.extract_vector();
EXPECT_EQ(vec.size(), 5);
EXPECT_EQ(vec.data(), data_ptr);
EXPECT_TRUE(set.is_empty());
}
TEST(vector_set, ExtractVectorInline)
{
VectorSet<int, 32> set;
set.add_multiple({5, 2, 7, 4, 8, 5, 4, 5});
EXPECT_EQ(set.size(), 5);
Vector<int> vec = set.extract_vector();
EXPECT_EQ(vec.size(), 5);
EXPECT_EQ(vec[2], 7);
EXPECT_TRUE(set.is_empty());
}
TEST(vector_set, ExtractVectorInlineExceptions)
{
VectorSet<ExceptionThrower, 32> set;
set.add_multiple({5, 2, 7, 4, 8, 5, 4, 5});
set[3].throw_during_copy = true;
set[3].throw_during_move = true;
EXPECT_EQ(set.size(), 5);
EXPECT_ANY_THROW({ Vector<ExceptionThrower> vec = set.extract_vector(); });
EXPECT_EQ(set.size(), 5);
}
TEST(vector_set, ExtractVectorEmpty)
@@ -339,6 +386,19 @@ TEST(vector_set, CustomIDVectorSet)
EXPECT_EQ(set.size(), 2);
set.add(ThingWithID{3333, "test", 27});
EXPECT_EQ(set.size(), 2);
/* Add more elements than the inline capacity. */
CustomIDVectorSet<ThingWithID, ThingGetter> larger_set;
larger_set.add_new(ThingWithID{12, "test", 9});
EXPECT_EQ(larger_set.index_of_as("test"), 0);
larger_set.add_new(ThingWithID{123, "other", 8});
larger_set.add(ThingWithID{1234, "test", 7});
larger_set.add_new(ThingWithID{12345, "test_2", 6});
larger_set.add_new(ThingWithID{123456, "test_4", 5});
larger_set.add_new(ThingWithID{1234567, "test_5", 4});
EXPECT_EQ(larger_set.index_of_as("test_4"), 3);
larger_set.add_new(ThingWithID{12345678, "test_6", 3});
EXPECT_TRUE(larger_set.size() == 6);
}
namespace {

View File

@@ -467,6 +467,7 @@ void USDMeshReader::read_edge_creases(Mesh *mesh, const double motionSampleTime)
/* Build mapping from vert pairs to edge index. */
using EdgeMap = VectorSet<OrderedEdge,
16,
DefaultProbingStrategy,
DefaultHash<OrderedEdge>,
DefaultEquality<OrderedEdge>,

View File

@@ -42,7 +42,7 @@ struct IDPropNameGetter {
* Use a #VectorSet to store properties for constant time lookup, to avoid slowdown with many
* inputs.
*/
using PropertiesVectorSet = CustomIDVectorSet<IDProperty *, IDPropNameGetter>;
using PropertiesVectorSet = CustomIDVectorSet<IDProperty *, IDPropNameGetter, 16>;
PropertiesVectorSet build_properties_vector_set(const IDProperty *properties);
std::optional<StringRef> input_attribute_name_get(const PropertiesVectorSet &properties,