Files
test2/source/blender/blenkernel/intern/id_hash.cc
Bastien Montagne fdfbb3ad20 Fix (unreported) memleak in IDHash when computing the hash fails.
Code would early-return on failure, forgetting to free the xxhash state.

Use the `BLI_SCOPED_DEFER` util to ensure the state is always properly freed
when it gets out of scope.

Pull Request: https://projects.blender.org/blender/blender/pulls/147189
2025-10-02 16:26:03 +02:00

251 lines
8.0 KiB
C++

/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include <fcntl.h>
#include <fmt/format.h>
#include <mutex>
#include <xxhash.h>
#include "BKE_id_hash.hh"
#include "BKE_lib_id.hh"
#include "BKE_lib_query.hh"
#include "BKE_library.hh"
#include "BKE_main.hh"
#include "BLI_fileops.hh"
#include "BLI_mmap.h"
#include "BLI_mutex.hh"
#include "BLI_set.hh"
namespace blender::bke::id_hash {
static std::optional<Vector<char>> read_file(const StringRefNull path)
{
blender::fstream stream{path.c_str(), std::ios_base::in | std::ios_base::binary};
stream.seekg(0, std::ios_base::end);
const int64_t size = stream.tellg();
stream.seekg(0, std::ios_base::beg);
blender::Vector<char> buffer(size);
stream.read(buffer.data(), size);
if (stream.bad()) {
return std::nullopt;
}
return buffer;
}
static std::optional<XXH128_hash_t> compute_file_hash_with_file_read(const StringRefNull path)
{
const std::optional<Vector<char>> buffer = read_file(path);
if (!buffer) {
return std::nullopt;
}
return XXH3_128bits(buffer->data(), buffer->size());
}
static std::optional<XXH128_hash_t> compute_file_hash_with_memory_map(const StringRefNull path)
{
const int file = BLI_open(path.c_str(), O_BINARY | O_RDONLY, 0);
if (file == -1) {
return std::nullopt;
}
BLI_mmap_file *mmap_file = BLI_mmap_open(file);
if (!mmap_file) {
return std::nullopt;
}
BLI_SCOPED_DEFER([&]() { BLI_mmap_free(mmap_file); });
const size_t size = BLI_mmap_get_length(mmap_file);
const void *data = BLI_mmap_get_pointer(mmap_file);
const XXH128_hash_t hash = XXH3_128bits(data, size);
if (BLI_mmap_any_io_error(mmap_file)) {
return std::nullopt;
}
return hash;
}
static std::optional<XXH128_hash_t> compute_file_hash(const StringRefNull path)
{
/* First try the memory map the file, because it avoids an extra copy. */
if (const std::optional<XXH128_hash_t> hash = compute_file_hash_with_memory_map(path)) {
/* Make sure both code paths are tested even if memory mapping should almost always work. */
BLI_assert(hash->low64 == compute_file_hash_with_file_read(path)->low64);
return hash;
}
if (const std::optional<XXH128_hash_t> hash = compute_file_hash_with_file_read(path)) {
return hash;
}
return std::nullopt;
}
struct CachedFileHash {
int64_t last_modified = 0;
XXH128_hash_t hash;
};
static std::optional<XXH128_hash_t> get_source_file_hash(const ID &id, DeepHashErrors &r_errors)
{
static Map<std::string, CachedFileHash> cache;
static Mutex mutex;
const StringRefNull path = id.lib->runtime->filepath_abs;
BLI_stat_t stat;
if (BLI_stat(path.c_str(), &stat) == -1) {
r_errors.missing_files.add_as(path);
return std::nullopt;
}
std::lock_guard lock(mutex);
if (const CachedFileHash *cached_hash = cache.lookup_ptr_as(path)) {
if (cached_hash->last_modified == stat.st_mtime) {
return cached_hash->hash;
}
}
if (stat.st_mtime != id.runtime->src_blend_modifification_time) {
r_errors.updated_files.add_as(path);
return std::nullopt;
}
if (const std::optional<XXH128_hash_t> hash = compute_file_hash(path)) {
cache.add_overwrite(path, CachedFileHash{stat.st_mtime, *hash});
return hash;
}
r_errors.missing_files.add_as(path);
return std::nullopt;
}
static std::optional<XXH128_hash_t> get_id_shallow_hash(const ID &id, DeepHashErrors &r_errors)
{
BLI_assert(ID_IS_LINKED(&id));
const StringRefNull id_name = id.name;
const std::optional<XXH128_hash_t> file_hash = get_source_file_hash(id, r_errors);
if (!file_hash) {
return std::nullopt;
}
XXH3_state_t *hash_state = XXH3_createState();
XXH3_128bits_reset(hash_state);
XXH3_128bits_update(hash_state, id_name.data(), id_name.size());
XXH3_128bits_update(hash_state, &*file_hash, sizeof(XXH128_hash_t));
XXH128_hash_t shallow_hash = XXH3_128bits_digest(hash_state);
XXH3_freeState(hash_state);
return shallow_hash;
}
static void compute_deep_hash_recursive(const Main &bmain,
const ID &id,
Set<const ID *> &current_stack,
Map<const ID *, IDHash> &r_hashes,
DeepHashErrors &r_errors)
{
if (r_hashes.contains(&id)) {
return;
}
if (!id.deep_hash.is_null()) {
r_hashes.add(&id, id.deep_hash);
return;
}
current_stack.add(&id);
const std::optional<XXH128_hash_t> id_shallow_hash = get_id_shallow_hash(id, r_errors);
if (!id_shallow_hash) {
return;
}
XXH3_state_t *hash_state = XXH3_createState();
BLI_SCOPED_DEFER([&hash_state]() -> void { XXH3_freeState(hash_state); })
XXH3_128bits_reset(hash_state);
XXH3_128bits_update(hash_state, &*id_shallow_hash, sizeof(XXH128_hash_t));
bool success = true;
BKE_library_foreach_ID_link(
const_cast<Main *>(&bmain),
const_cast<ID *>(&id),
[&](LibraryIDLinkCallbackData *cb_data) {
if (cb_data->cb_flag & IDWALK_CB_LOOPBACK) {
/* Loopback pointer (e.g. from a shapekey to its owner geometry ID, or from a collection
* to its parents) should always be ignored, as they do not represent an actual
* dependency. The dependency relationship should already have been processed from the
* owner to its dependency anyway (if applicable). */
return IDWALK_RET_NOP;
}
if (cb_data->cb_flag & (IDWALK_CB_EMBEDDED | IDWALK_CB_EMBEDDED_NOT_OWNING)) {
/* Embedded data are part of their owner's internal data, and as such already computed as
* part of the owner's shallow hash. */
return IDWALK_RET_NOP;
}
ID *referenced_id = *cb_data->id_pointer;
if (!referenced_id) {
/* Need to update the hash even if there is no id. There is a difference between the case
* where there is no id and the case where this callback is not called at all.*/
const int random_data = 452942579;
XXH3_128bits_update(hash_state, &random_data, sizeof(int));
return IDWALK_RET_NOP;
}
/* All embedded ID usages should already have been excluded above. */
BLI_assert((referenced_id->flag & ID_FLAG_EMBEDDED_DATA) == 0);
if (current_stack.contains(referenced_id)) {
/* Somehow encode that we had a circular reference here. */
const int random_data = 234632342;
XXH3_128bits_update(hash_state, &random_data, sizeof(int));
return IDWALK_RET_NOP;
}
compute_deep_hash_recursive(bmain, *referenced_id, current_stack, r_hashes, r_errors);
const IDHash *referenced_id_hash = r_hashes.lookup_ptr(referenced_id);
if (!referenced_id_hash) {
success = false;
return IDWALK_RET_STOP_ITER;
}
XXH3_128bits_update(hash_state, referenced_id_hash->data, sizeof(IDHash));
return IDWALK_RET_NOP;
},
nullptr,
IDWALK_READONLY);
if (!success) {
return;
}
IDHash new_deep_hash;
const XXH128_hash_t new_deep_hash_xxh128 = XXH3_128bits_digest(hash_state);
static_assert(sizeof(IDHash) == sizeof(XXH128_hash_t));
memcpy(new_deep_hash.data, &new_deep_hash_xxh128, sizeof(IDHash));
r_hashes.add(&id, new_deep_hash);
}
IDHashResult compute_linked_id_deep_hashes(const Main &bmain, Span<const ID *> ids)
{
#ifndef NDEBUG
for (const ID *id : ids) {
BLI_assert(ID_IS_LINKED(id));
}
#endif
if (ids.is_empty()) {
return ValidDeepHashes{};
}
Map<const ID *, IDHash> hashes;
Set<const ID *> current_stack;
DeepHashErrors errors;
for (const ID *id : ids) {
compute_deep_hash_recursive(bmain, *id, current_stack, hashes, errors);
}
if (!errors.missing_files.is_empty() || !errors.updated_files.is_empty()) {
return errors;
}
return ValidDeepHashes{hashes};
}
std::string id_hash_to_hex(const IDHash &hash)
{
std::string hex_str;
for (const uint8_t byte : hash.data) {
hex_str += fmt::format("{:02x}", byte);
}
return hex_str;
}
} // namespace blender::bke::id_hash