From cadb3fe5c542dcdebab19b4a9369a0581d09a66c Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Mon, 29 Sep 2025 13:56:13 +0200 Subject: [PATCH] Blenloader: stable pointers in .blend files When writing .blend files, Blender traditionally wrote raw runtime-pointers into the file. Since these pointers are different whenever Blender is restarted or the file is loaded again, the written .blend file will be very different every time. The file always changing is a problem with tools that use change-detection on .blend files such as: * Change detection during undo in Blender. When a serialized data-block is the same in two consecutive undo steps, it's known that the data didn't change and the data-block does not have to be reloaded and can potentially skip depsgraph evaluation. Also see #141262. * BAT: https://projects.blender.org/studio/flamenco/issues/104437 * Generally using .blend files in version control. The diffs can be smaller when pointers aren't changing all the time and have a lower memory footprint. This PR makes pointers in .blend files across multiples saves much more consistent, improving the situation for the cases above. Although there is still other data that changes on almost every save currently; that needs to be addressed separately. The basic design for pointer stability in blend files stable pointers, described in #127706, is fairly straight forward. This patch implements a slightly modified variant of that design. When reading .blend files, Blender already does not care if the stored pointer values are actual pointer values, or just some arbitrary identifiers. Therefore, we mainly just have to change the write-file code. This also implies that this change is fully forward and backward compatible. The main non-obvious aspect of this patch is how to actually do the remapping of runtime pointers to stable identifiers. In theory, having a single integer that increments for every newly detected pointer works. But in practice that leads to many changes in the .blend file because one pointer is added or removed somewhere, all subsequent pointers will be different too. So some kind of scoping is required to make sure that one small change does not affect everything else. This PR starts a new scope pointer identifier scope whenever a new ID data-block starts. At first I thought it would be good to have separate maps for id-local and global pointers. However, that's tricky currently, because at write-time, we don't always have enough information to know if a specific pointer is local or global. I worked on #146136 to improve the situation but the problem is bigger than that since we also have various void pointers in DNA structs. Fortunately, the solution implemented now also works fine with a single global map. Implicit sharing in undo steps also had to be changed slightly to work with the stable address identifiers instead of raw pointers. There's also new code to find all pointers in DNA structs in the first place. This is done once when writing starts. Then whenever a struct is written, a copy is made, all pointers are replaced and the modified struct is written to the .blend file. There is an optimization for the case when a struct does not contain any pointers because then the copy can be skipped. For checking the diff between two saved .blend files, I recommend enabling `GENERATE_DEBUG_BLEND_FILE` and then diffing the text version of the .blend files. Co-authored-by: Hans Goudey Pull Request: https://projects.blender.org/blender/blender/pulls/127729 --- source/blender/blenloader/BLO_undofile.hh | 8 +- source/blender/blenloader/CMakeLists.txt | 1 + source/blender/blenloader/intern/readfile.cc | 20 +- source/blender/blenloader/intern/undofile.cc | 4 +- source/blender/blenloader/intern/writefile.cc | 177 ++++++++++++++++-- source/blender/makesdna/DNA_sdna_pointers.hh | 54 ++++++ source/blender/makesdna/intern/dna_genfile.cc | 53 ++++++ 7 files changed, 287 insertions(+), 30 deletions(-) create mode 100644 source/blender/makesdna/DNA_sdna_pointers.hh diff --git a/source/blender/blenloader/BLO_undofile.hh b/source/blender/blenloader/BLO_undofile.hh index 479e832fb67..b891f30d10c 100644 --- a/source/blender/blenloader/BLO_undofile.hh +++ b/source/blender/blenloader/BLO_undofile.hh @@ -10,20 +10,18 @@ */ #include "BLI_filereader.h" +#include "BLI_implicit_sharing.hh" #include "BLI_listbase.h" #include "BLI_map.hh" -namespace blender { -class ImplicitSharingInfo; -} struct Main; struct Scene; struct MemFileSharedStorage { /** - * Maps the data pointer to the sharing info that it is owned by. + * Maps the address id to the shared data and corresponding sharing info.. */ - blender::Map map; + blender::Map sharing_info_by_address_id; ~MemFileSharedStorage(); }; diff --git a/source/blender/blenloader/CMakeLists.txt b/source/blender/blenloader/CMakeLists.txt index 6925567427f..4942215bd56 100644 --- a/source/blender/blenloader/CMakeLists.txt +++ b/source/blender/blenloader/CMakeLists.txt @@ -74,6 +74,7 @@ set(LIB PRIVATE bf::render PRIVATE bf::sequencer PRIVATE bf::windowmanager + PRIVATE bf::extern::xxhash ) if(WITH_BUILDINFO) diff --git a/source/blender/blenloader/intern/readfile.cc b/source/blender/blenloader/intern/readfile.cc index 469d3d5a99a..43755bf61dc 100644 --- a/source/blender/blenloader/intern/readfile.cc +++ b/source/blender/blenloader/intern/readfile.cc @@ -576,10 +576,10 @@ struct BlendDataReader { FileData *fd; /** - * The key is the old pointer to shared data that's written to a file, typically an array. The - * corresponding value is the shared data at run-time. + * The key is the old address id referencing shared data that's written to a file, typically an + * array. The corresponding value is the shared data at run-time. */ - blender::Map shared_data_by_stored_address; + blender::Map shared_data_by_stored_address; }; struct BlendLibReader { @@ -5835,26 +5835,26 @@ blender::ImplicitSharingInfoAndData blo_read_shared_impl( const void **ptr_p, const blender::FunctionRef read_fn) { - const void *old_address = *ptr_p; + const uint64_t old_address_id = uint64_t(*ptr_p); if (BLO_read_data_is_undo(reader)) { if (reader->fd->flags & FD_FLAGS_IS_MEMFILE) { UndoReader *undo_reader = reinterpret_cast(reader->fd->file); const MemFile &memfile = *undo_reader->memfile; if (memfile.shared_storage) { /* Check if the data was saved with sharing-info. */ - if (const blender::ImplicitSharingInfo *sharing_info = - memfile.shared_storage->map.lookup_default(old_address, nullptr)) + if (const blender::ImplicitSharingInfoAndData *sharing_info_data = + memfile.shared_storage->sharing_info_by_address_id.lookup_ptr(old_address_id)) { /* Add a new owner of the data that is passed to the caller. */ - sharing_info->add_user(); - return {sharing_info, old_address}; + sharing_info_data->sharing_info->add_user(); + return *sharing_info_data; } } } } if (const blender::ImplicitSharingInfoAndData *shared_data = - reader->shared_data_by_stored_address.lookup_ptr(old_address)) + reader->shared_data_by_stored_address.lookup_ptr(old_address_id)) { /* The data was loaded before. No need to load it again. Just increase the user count to * indicate that it is shared. */ @@ -5869,7 +5869,7 @@ blender::ImplicitSharingInfoAndData blo_read_shared_impl( const blender::ImplicitSharingInfo *sharing_info = read_fn(); const void *new_address = *ptr_p; const blender::ImplicitSharingInfoAndData shared_data{sharing_info, new_address}; - reader->shared_data_by_stored_address.add(old_address, shared_data); + reader->shared_data_by_stored_address.add(old_address_id, shared_data); return shared_data; } diff --git a/source/blender/blenloader/intern/undofile.cc b/source/blender/blenloader/intern/undofile.cc index 78f1320753f..de1cf3aca25 100644 --- a/source/blender/blenloader/intern/undofile.cc +++ b/source/blender/blenloader/intern/undofile.cc @@ -50,9 +50,9 @@ void BLO_memfile_free(MemFile *memfile) MemFileSharedStorage::~MemFileSharedStorage() { - for (const blender::ImplicitSharingInfo *sharing_info : map.values()) { + for (const blender::ImplicitSharingInfoAndData &data : sharing_info_by_address_id.values()) { /* Removing the user makes sure shared data is freed when the undo step was its last owner. */ - sharing_info->remove_user_and_delete_if_last(); + data.sharing_info->remove_user_and_delete_if_last(); } } diff --git a/source/blender/blenloader/intern/writefile.cc b/source/blender/blenloader/intern/writefile.cc index 4198a5b02ef..f02be4cef1b 100644 --- a/source/blender/blenloader/intern/writefile.cc +++ b/source/blender/blenloader/intern/writefile.cc @@ -67,6 +67,7 @@ #include #include #include +#include #ifdef WIN32 # include "BLI_winstuff.h" @@ -89,6 +90,7 @@ #include "DNA_genfile.h" #include "DNA_key_types.h" #include "DNA_print.hh" +#include "DNA_sdna_pointers.hh" #include "DNA_sdna_types.h" #include "DNA_userdef_types.h" #include "DNA_windowmanager_types.h" @@ -447,6 +449,46 @@ struct WriteData { blender::Set per_id_addresses_set; } validation_data; + struct { + /** + * Knows which DNA members are pointers. Those members are overridden when serializing the + * .blend file to get more stable pointer identifiers. + */ + std::unique_ptr sdna_pointers; + /** + * Maps each runtime-pointer to a unique identifier that's written in the .blend file. + * + * Currently, no pointers are ever removed from this map during writing of a single file. + * Correctness wise, this is fine. However, when some data-blocks write temporary addresses, + * those may be reused across IDs while actually pointing to different data. This can break + * address id stability in some situations. In the future this could be improved by clearing + * such temporary pointers before writing the next data-block. + */ + blender::Map pointer_map; + /** + * Contains all the #pointer_map.values(). This is used to make sure that the same id is never + * reused for a different pointer. While this is technically allowed in .blend files (when the + * pointers are local data of different objects), we currently don't always know what type a + * pointer points to when writing it. So we can't determine if a pointer is local or not. + */ + blender::Set used_ids; + /** + * The next stable address id is derived from this. This is modified in + * two cases: + * - A new stable address is needed, in which case this is just incremented. + * - A new "section" of the .blend file starts. In this case, this should be reinitialized with + * some hash of an identifier of the next section. This makes sure that if the number of + * pointers in the previous section is modified, the pointers in the new section are not + * affected. A "section" can be anything, but currently a section simply starts when a new + * data-block starts. In the future, an API could be added that allows sections to start + * within a data-block which could isolate stable pointer ids even more. + * + * When creating the new address id, keep in mind that this may be 0 and it may collide with + * previous hints. + */ + uint64_t next_id_hint = 0; + } stable_address_ids; + /** * Keeps track of which shared data has been written for the current ID. This is necessary to * avoid writing the same data more than once. @@ -475,6 +517,8 @@ static WriteData *writedata_new(WriteWrap *ww) WriteData *wd = MEM_new(__func__); wd->sdna = DNA_sdna_current_get(); + wd->stable_address_ids.sdna_pointers = std::make_unique( + *wd->sdna); wd->ww = ww; @@ -641,6 +685,18 @@ static bool mywrite_end(WriteData *wd) return err; } +static uint64_t get_stable_pointer_hint_for_id(const ID &id) +{ + /* Make the stable pointer dependend on the data-block name. This is somewhat arbitrary but the + * name is at least something that doesn't really change automatically unexpectedly. */ + const uint64_t name_hash = XXH3_64bits(id.name, strlen(id.name)); + if (id.lib) { + const uint64_t lib_hash = XXH3_64bits(id.lib->id.name, strlen(id.lib->id.name)); + return name_hash ^ lib_hash; + } + return name_hash; +} + /** * Start writing of data related to a single ID. * @@ -653,6 +709,8 @@ static void mywrite_id_begin(WriteData *wd, ID *id) BLI_assert(wd->validation_data.per_id_addresses_set.is_empty()); + wd->stable_address_ids.next_id_hint = get_stable_pointer_hint_for_id(*id); + if (wd->use_memfile) { wd->mem.current_id_session_uid = id->session_uid; @@ -767,6 +825,48 @@ static void write_bhead(WriteData *wd, const BHead &bhead) mywrite(wd, &bh, sizeof(bh)); } +static uint64_t stable_id_from_hint(const uint64_t hint) +{ + /* Add a stride. This is not strictly necessary but may help with debugging later on because it's + * easier to identify bad ids. */ + uint64_t stable_id = hint << 4; + if (stable_id == 0) { + /* Null values are reserved for nullptr. */ + stable_id = (1 << 4); + } + return stable_id; +} + +static uint64_t get_next_stable_address_id(WriteData &wd) +{ + uint64_t stable_id = stable_id_from_hint(wd.stable_address_ids.next_id_hint); + while (!wd.stable_address_ids.used_ids.add(stable_id)) { + /* Generate a new hint because there is a collision. Collisions are generally expected to be + * very rare. It can happen when #get_stable_pointer_hint_for_id produces values that are very + * close for different IDs. */ + wd.stable_address_ids.next_id_hint = XXH3_64bits(&wd.stable_address_ids.next_id_hint, + sizeof(uint64_t)); + stable_id = stable_id_from_hint(wd.stable_address_ids.next_id_hint); + } + wd.stable_address_ids.next_id_hint++; + return stable_id; +} + +static uint64_t get_address_id_int(WriteData &wd, const void *address) +{ + if (address == nullptr) { + return 0; + } + /* Either reuse an existing identifier or create a new one. */ + return wd.stable_address_ids.pointer_map.lookup_or_add_cb( + address, [&]() { return get_next_stable_address_id(wd); }); +} + +static const void *get_address_id(WriteData &wd, const void *address) +{ + return reinterpret_cast(get_address_id_int(wd, address)); +} + static void writestruct_at_address_nr(WriteData *wd, const int filecode, const int struct_nr, @@ -794,9 +894,38 @@ static void writestruct_at_address_nr(WriteData *wd, } } + /* Get the address identifier that will be written to the file.*/ + const void *address_id = get_address_id(*wd, adr); + + const blender::dna::pointers::StructInfo &struct_info = + wd->stable_address_ids.sdna_pointers->get_for_struct(struct_nr); + const bool can_write_raw_runtime_data = struct_info.pointers.is_empty(); + + blender::DynamicStackBuffer<16 * 1024> buffer_owner(len_in_bytes, 64); + const void *data_to_write; + if (can_write_raw_runtime_data) { + /* The passed in data contains no pointers, so it can be written without an additional copy. */ + data_to_write = data; + } + else { + void *buffer = buffer_owner.buffer(); + data_to_write = buffer; + memcpy(buffer, data, len_in_bytes); + + /* Overwrite pointers with their corresponding address identifiers. */ + for (const int i : blender::IndexRange(nr)) { + for (const blender::dna::pointers::PointerInfo &pointer_info : struct_info.pointers) { + const int offset = i * struct_info.size_in_bytes + pointer_info.offset; + const void **p_ptr = reinterpret_cast(POINTER_OFFSET(buffer, offset)); + const void *address_id = get_address_id(*wd, *p_ptr); + *p_ptr = address_id; + } + } + } + BHead bh; bh.code = filecode; - bh.old = adr; + bh.old = address_id; bh.nr = nr; bh.SDNAnr = struct_nr; bh.len = len_in_bytes; @@ -806,11 +935,12 @@ static void writestruct_at_address_nr(WriteData *wd, } if (wd->debug_dst) { - blender::dna::print_structs_at_address(*wd->sdna, struct_nr, data, adr, nr, *wd->debug_dst); + blender::dna::print_structs_at_address( + *wd->sdna, struct_nr, data_to_write, address_id, nr, *wd->debug_dst); } write_bhead(wd, bh); - mywrite(wd, data, size_t(bh.len)); + mywrite(wd, data_to_write, size_t(bh.len)); } static void writestruct_nr( @@ -819,12 +949,15 @@ static void writestruct_nr( writestruct_at_address_nr(wd, filecode, struct_nr, nr, adr, adr); } -static void write_raw_data_in_debug_file(WriteData *wd, const size_t len, const void *adr) +static void write_raw_data_in_debug_file(WriteData *wd, + const size_t len, + const void *address_id, + const void *data) { fmt::memory_buffer buf; fmt::appender dst{buf}; - fmt::format_to(dst, " at {} ({} bytes)\n", adr, len); + fmt::format_to(dst, " at {} ({} bytes)\n", address_id, len); constexpr int bytes_per_row = 8; const int len_digits = std::to_string(std::max(0, len - 1)).size(); @@ -833,7 +966,7 @@ static void write_raw_data_in_debug_file(WriteData *wd, const size_t len, const if (i % bytes_per_row == 0) { fmt::format_to(dst, " {:{}}: ", i, len_digits); } - fmt::format_to(dst, "{:02x} ", reinterpret_cast(adr)[i]); + fmt::format_to(dst, "{:02x} ", reinterpret_cast(data)[i]); if (i % bytes_per_row == bytes_per_row - 1) { fmt::format_to(dst, "\n"); } @@ -848,9 +981,10 @@ static void write_raw_data_in_debug_file(WriteData *wd, const size_t len, const /** * \warning Do not use for structs. */ -static void writedata(WriteData *wd, const int filecode, const size_t len, const void *adr) +static void writedata( + WriteData *wd, const int filecode, const void *data, const size_t len, const void *adr) { - if (adr == nullptr || len == 0) { + if (data == nullptr || len == 0) { return; } @@ -866,20 +1000,27 @@ static void writedata(WriteData *wd, const int filecode, const size_t len, const return; } + const void *address_id = get_address_id(*wd, adr); + BHead bh; bh.code = filecode; - bh.old = adr; + bh.old = address_id; bh.nr = 1; BLI_STATIC_ASSERT(SDNA_RAW_DATA_STRUCT_INDEX == 0, "'raw data' SDNA struct index should be 0") bh.SDNAnr = SDNA_RAW_DATA_STRUCT_INDEX; bh.len = int64_t(len); if (wd->debug_dst) { - write_raw_data_in_debug_file(wd, len, adr); + write_raw_data_in_debug_file(wd, len, address_id, adr); } write_bhead(wd, bh); - mywrite(wd, adr, len); + mywrite(wd, data, len); +} + +static void writedata(WriteData *wd, const int filecode, const size_t len, const void *adr) +{ + writedata(wd, filecode, adr, len, adr); } /** @@ -2079,7 +2220,15 @@ void BLO_write_double_array(BlendWriter *writer, const int64_t num, const double void BLO_write_pointer_array(BlendWriter *writer, const int64_t num, const void *data_ptr) { - BLO_write_raw(writer, sizeof(void *) * size_t(num), data_ptr); + /* Create a temporary copy of the pointer array, because all pointers need to be remapped to + * their stable address ids. */ + blender::Array data = blender::Span( + reinterpret_cast(data_ptr), num); + for (const int64_t i : data.index_range()) { + data[i] = get_address_id(*writer->wd, data[i]); + } + + writedata(writer->wd, BLO_CODE_DATA, data.data(), data.as_span().size_in_bytes(), data_ptr); } void BLO_write_float3_array(BlendWriter *writer, const int64_t num, const float *data_ptr) @@ -2103,13 +2252,15 @@ void BLO_write_shared(BlendWriter *writer, if (data == nullptr) { return; } + const uint64_t address_id = get_address_id_int(*writer->wd, data); if (BLO_write_is_undo(writer)) { MemFile &memfile = *writer->wd->mem.written_memfile; if (sharing_info != nullptr) { if (memfile.shared_storage == nullptr) { memfile.shared_storage = MEM_new(__func__); } - if (memfile.shared_storage->map.add(data, sharing_info)) { + if (memfile.shared_storage->sharing_info_by_address_id.add(address_id, {sharing_info, data})) + { /* The undo-step takes (shared) ownership of the data, which also makes it immutable. */ sharing_info->add_user(); /* This size is an estimate, but good enough to count data with many users less. */ diff --git a/source/blender/makesdna/DNA_sdna_pointers.hh b/source/blender/makesdna/DNA_sdna_pointers.hh new file mode 100644 index 00000000000..bb5d6aa517b --- /dev/null +++ b/source/blender/makesdna/DNA_sdna_pointers.hh @@ -0,0 +1,54 @@ +/* SPDX-FileCopyrightText: 2025 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#include "BLI_vector.hh" + +#include "DNA_sdna_types.h" + +namespace blender::dna::pointers { + +/** Information about a single pointer in a DNA struct. */ +struct PointerInfo { + /** Offset in bytes from the start of the struct. */ + int64_t offset; + /** Additional information about the pointer which can be useful for debugging. */ + const char *member_type_name = nullptr; + const char *name = nullptr; +}; + +/** All pointers within a DNA struct (including nested structs). */ +struct StructInfo { + /** All pointers in that struct. */ + Vector pointers; + /** Size of the struct in bytes. */ + int size_in_bytes = 0; +}; + +/** + * Contains information about where pointers are stored in DNA structs. + */ +class PointersInDNA { + private: + /** The SDNA that this class belongs to. */ + const SDNA &sdna_; + /** Pointer information about all structs. */ + Vector structs_; + + public: + explicit PointersInDNA(const SDNA &sdna); + + const StructInfo &get_for_struct(const int struct_nr) const + { + return structs_[struct_nr]; + } + + private: + void gather_pointer_members_recursive(const SDNA_Struct &sdna_struct, + int initial_offset, + StructInfo &r_struct_info) const; +}; + +} // namespace blender::dna::pointers diff --git a/source/blender/makesdna/intern/dna_genfile.cc b/source/blender/makesdna/intern/dna_genfile.cc index d76a3c896d6..363b6e56827 100644 --- a/source/blender/makesdna/intern/dna_genfile.cc +++ b/source/blender/makesdna/intern/dna_genfile.cc @@ -32,6 +32,7 @@ #include "DNA_genfile.h" #include "DNA_print.hh" +#include "DNA_sdna_pointers.hh" #include "DNA_sdna_types.h" /* for SDNA ;-) */ /** @@ -1962,6 +1963,58 @@ void DNA_sdna_alias_data_ensure_structs_map(SDNA *sdna) #endif } +namespace blender::dna::pointers { + +PointersInDNA::PointersInDNA(const SDNA &sdna) : sdna_(sdna) +{ + structs_.resize(sdna.structs_num); + for (const int struct_i : IndexRange(sdna.structs_num)) { + const SDNA_Struct &sdna_struct = *sdna.structs[struct_i]; + StructInfo &struct_info = structs_[struct_i]; + + struct_info.size_in_bytes = 0; + for (const int member_i : IndexRange(sdna_struct.members_num)) { + struct_info.size_in_bytes += get_member_size_in_bytes(&sdna_, + &sdna_struct.members[member_i]); + } + + this->gather_pointer_members_recursive(sdna_struct, 0, structs_[struct_i]); + } +} + +void PointersInDNA::gather_pointer_members_recursive(const SDNA_Struct &sdna_struct, + int initial_offset, + StructInfo &r_struct_info) const +{ + int offset = initial_offset; + for (const int member_i : IndexRange(sdna_struct.members_num)) { + const SDNA_StructMember &member = sdna_struct.members[member_i]; + const char *member_type_name = sdna_.types[member.type_index]; + const eStructMemberCategory member_category = get_struct_member_category(&sdna_, &member); + const int array_elem_num = sdna_.members_array_num[member.member_index]; + + if (member_category == STRUCT_MEMBER_CATEGORY_POINTER) { + for (int elem_i = 0; elem_i < array_elem_num; elem_i++) { + const char *member_name = sdna_.members[member.member_index]; + r_struct_info.pointers.append( + {offset + elem_i * sdna_.pointer_size, member_type_name, member_name}); + } + } + else if (member_category == STRUCT_MEMBER_CATEGORY_STRUCT) { + const int substruct_i = DNA_struct_find_index_without_alias(&sdna_, member_type_name); + const SDNA_Struct &sub_sdna_struct = *sdna_.structs[substruct_i]; + int substruct_size = sdna_.types_size[member.type_index]; + for (int elem_i = 0; elem_i < array_elem_num; elem_i++) { + this->gather_pointer_members_recursive( + sub_sdna_struct, offset + elem_i * substruct_size, r_struct_info); + } + } + offset += get_member_size_in_bytes(&sdna_, &member); + } +} + +} // namespace blender::dna::pointers + /** \} */ /* -------------------------------------------------------------------- */