Files
test/source/blender/blenlib/BLI_hash.hh
Jacques Lucke 64a13f4be2 Refactor: BLI: simplify extending get_default_hash function
This simplifies extending `blender::get_default_hash` to more than 4 parameters
by just adding extra factors to the list. The behavior should be the same as
before.

Pull Request: https://projects.blender.org/blender/blender/pulls/139392
2025-05-24 21:01:05 +02:00

265 lines
8.7 KiB
C++

/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
/** \file
* \ingroup bli
*
* A specialization of `blender::DefaultHash<T>` provides a hash function for values of type T.
* This hash function is used by default in hash table implementations in blenlib.
*
* The actual hash function is in the `operator()` method of `DefaultHash<T>`. The following code
* computes the hash of some value using DefaultHash.
*
* T value = ...;
* DefaultHash<T> hash_function;
* uint32_t hash = hash_function(value);
*
* Hash table implementations like blender::Set support heterogeneous key lookups. That means that
* one can do a lookup with a key of type A in a hash table that stores keys of type B. This is
* commonly done when B is std::string, because the conversion from e.g. a #StringRef to
* std::string can be costly and is unnecessary. To make this work, values of type A and B that
* compare equal have to have the same hash value. This is achieved by defining potentially
* multiple `operator()` in a specialization of #DefaultHash. All those methods have to compute the
* same hash for values that compare equal.
*
* The computed hash is an unsigned 64 bit integer. Ideally, the hash function would generate
* uniformly random hash values for a set of keys. However, in many cases trivial hash functions
* are faster and produce a good enough distribution. In general it is better when more information
* is in the lower bits of the hash. By choosing a good probing strategy, the effects of a bad hash
* function are less noticeable though. In this context a good probing strategy is one that takes
* all bits of the hash into account eventually. One has to check on a case by case basis to see if
* a better but more expensive or trivial hash function works better.
*
* There are three main ways to provide a hash table implementation with a custom hash function.
*
* - When you want to provide a default hash function for your own custom type: Add a `hash()`
* member function to it. The function should return `uint64_t` and take no arguments. This
* method will be called by the default implementation of #DefaultHash. It will automatically be
* used by hash table implementations.
*
* - When you want to provide a default hash function for a type that you cannot modify: Add a new
* specialization to the #DefaultHash struct. This can be done by writing code like below in
* either global or `blender` namespace.
*
* template<> struct blender::DefaultHash<TheType> {
* uint64_t operator()(const TheType &value) const {
* return ...;
* }
* };
*
* - When you want to provide a different hash function for a type that already has a default hash
* function: Implement a struct like the one below and pass it as template parameter to the hash
* table explicitly.
*
* struct MyCustomHash {
* uint64_t operator()(const TheType &value) const {
* return ...;
* }
* };
*/
#include <memory>
#include <string>
#include <utility>
#include "BLI_hash_fwd.hh"
#include "BLI_string_ref.hh"
namespace blender {
/**
* If there is no other specialization of #DefaultHash for a given type, look for a hash function
* on the type itself. Implementing a `hash()` method on a type is often significantly easier than
* specializing #DefaultHash.
*
* To support heterogeneous lookup, a type can also implement a static `hash_as(const OtherType &)`
* function.
*
* In the case of an enum type, the default hash is just to cast the enum value to an integer.
*/
template<typename T> struct DefaultHash {
uint64_t operator()(const T &value) const
{
if constexpr (std::is_enum_v<T>) {
/* For enums use the value as hash directly. */
return uint64_t(value);
}
else {
/* Try to call the `hash()` function on the value. */
/* If this results in a compiler error, no hash function for the type has been found. */
return value.hash();
}
}
template<typename U> uint64_t operator()(const U &value) const
{
/* Try calling the static `T::hash_as(value)` function with the given value. The returned hash
* should be "compatible" with `T::hash()`. Usually that means that if `value` is converted to
* `T` its hash does not change. */
/* If this results in a compiler error, no hash function for the heterogeneous lookup has been
* found. */
return T::hash_as(value);
}
};
/**
* Use the same hash function for const and non const variants of a type.
*/
template<typename T> struct DefaultHash<const T> {
uint64_t operator()(const T &value) const
{
return DefaultHash<T>{}(value);
}
};
#define TRIVIAL_DEFAULT_INT_HASH(TYPE) \
template<> struct DefaultHash<TYPE> { \
uint64_t operator()(TYPE value) const \
{ \
return uint64_t(value); \
} \
}
/**
* We cannot make any assumptions about the distribution of keys, so use a trivial hash function by
* default. The default probing strategy is designed to take all bits of the hash into account
* to avoid worst case behavior when the lower bits are all zero. Special hash functions can be
* implemented when more knowledge about a specific key distribution is available.
*/
TRIVIAL_DEFAULT_INT_HASH(int8_t);
TRIVIAL_DEFAULT_INT_HASH(uint8_t);
TRIVIAL_DEFAULT_INT_HASH(int16_t);
TRIVIAL_DEFAULT_INT_HASH(uint16_t);
TRIVIAL_DEFAULT_INT_HASH(int32_t);
TRIVIAL_DEFAULT_INT_HASH(uint32_t);
TRIVIAL_DEFAULT_INT_HASH(int64_t);
TRIVIAL_DEFAULT_INT_HASH(uint64_t);
/**
* One should try to avoid using floats as keys in hash tables, but sometimes it is convenient.
*/
template<> struct DefaultHash<float> {
uint64_t operator()(float value) const
{
/* Explicit `uint64_t` cast to suppress CPPCHECK warning. */
return uint64_t(*reinterpret_cast<uint32_t *>(&value));
}
};
template<> struct DefaultHash<double> {
uint64_t operator()(double value) const
{
return *reinterpret_cast<uint64_t *>(&value);
}
};
template<> struct DefaultHash<bool> {
uint64_t operator()(bool value) const
{
return uint64_t((value != false) * 1298191);
}
};
inline uint64_t hash_string(StringRef str)
{
uint64_t hash = 5381;
for (char c : str) {
hash = hash * 33 + c;
}
return hash;
}
template<> struct DefaultHash<std::string> {
/**
* Take a #StringRef as parameter to support heterogeneous lookups in hash table implementations
* when std::string is used as key.
*/
uint64_t operator()(StringRef value) const
{
return hash_string(value);
}
};
template<> struct DefaultHash<StringRef> {
uint64_t operator()(StringRef value) const
{
return hash_string(value);
}
};
template<> struct DefaultHash<StringRefNull> {
uint64_t operator()(StringRef value) const
{
return hash_string(value);
}
};
template<> struct DefaultHash<std::string_view> {
uint64_t operator()(StringRef value) const
{
return hash_string(value);
}
};
/**
* While we cannot guarantee that the lower 4 bits of a pointer are zero, it is often the case.
*/
template<typename T> struct DefaultHash<T *> {
uint64_t operator()(const T *value) const
{
uintptr_t ptr = uintptr_t(value);
uint64_t hash = uint64_t(ptr >> 4);
return hash;
}
};
namespace detail {
static constexpr std::array<uint64_t, 3> default_hash_factors = {19349669, 83492791, 3632623};
template<size_t... I, typename... Args>
inline uint64_t get_default_hash_array(std::index_sequence<I...> /*indices*/, const Args &...args)
{
static_assert(sizeof...(Args) == sizeof...(I));
static_assert(sizeof...(Args) <= default_hash_factors.size());
return (0 ^ ... ^ (default_hash_factors[I] * DefaultHash<std::decay_t<Args>>{}(args)));
}
} // namespace detail
template<typename T, typename... Args>
inline uint64_t get_default_hash(const T &v, const Args &...args)
{
return DefaultHash<std::decay_t<T>>{}(v) ^
detail::get_default_hash_array(std::make_index_sequence<sizeof...(Args)>(), args...);
}
/** Support hashing different kinds of pointer types. */
template<typename T> struct PointerHashes {
template<typename U> uint64_t operator()(const U &value) const
{
return get_default_hash(&*value);
}
};
template<typename T> struct DefaultHash<std::unique_ptr<T>> : public PointerHashes<T> {};
template<typename T> struct DefaultHash<std::shared_ptr<T>> : public PointerHashes<T> {};
template<typename T> struct DefaultHash<std::reference_wrapper<T>> {
uint64_t operator()(const std::reference_wrapper<T> &value) const
{
return get_default_hash(value.get());
}
};
template<typename T1, typename T2> struct DefaultHash<std::pair<T1, T2>> {
uint64_t operator()(const std::pair<T1, T2> &value) const
{
return get_default_hash(value.first, value.second);
}
};
} // namespace blender