diff --git a/source/blender/blenkernel/BKE_bake_items_serialize.hh b/source/blender/blenkernel/BKE_bake_items_serialize.hh index 4f73b20e9f8..349b8c41ce9 100644 --- a/source/blender/blenkernel/BKE_bake_items_serialize.hh +++ b/source/blender/blenkernel/BKE_bake_items_serialize.hh @@ -72,6 +72,24 @@ class BlobWriteSharing : NonCopyable, NonMovable { */ Map stored_by_runtime_; + struct SliceHash { + uint64_t a; + uint64_t b; + + BLI_STRUCT_EQUALITY_OPERATORS_2(SliceHash, a, b) + + uint64_t hash() const + { + return get_default_hash(this->a, this->b); + } + }; + + /** + * Remembers where data was stored based on the hash of the data. This allows us to skip writing + * the same array again if it has the same hash. + */ + Map slice_by_content_hash_; + public: ~BlobWriteSharing(); @@ -84,6 +102,14 @@ class BlobWriteSharing : NonCopyable, NonMovable { [[nodiscard]] std::shared_ptr write_implicitly_shared( const ImplicitSharingInfo *sharing_info, FunctionRef()> write_fn); + + /** + * Checks if the given data was written before. If it was, it's not written again, but a + * reference to the previously written data is returned. If the data is new, it's written now. + * Its hash is remembered so that the same data won't be written again. + */ + [[nodiscard]] std::shared_ptr write_deduplicated( + BlobWriter &writer, const void *data, int64_t size_in_bytes); }; /** diff --git a/source/blender/blenkernel/intern/bake_items_serialize.cc b/source/blender/blenkernel/intern/bake_items_serialize.cc index 55c36c21c9d..938d3bcc46e 100644 --- a/source/blender/blenkernel/intern/bake_items_serialize.cc +++ b/source/blender/blenkernel/intern/bake_items_serialize.cc @@ -13,6 +13,7 @@ #include "BLI_endian_defines.h" #include "BLI_endian_switch.h" +#include "BLI_hash_md5.hh" #include "BLI_math_matrix_types.hh" #include "BLI_path_util.h" @@ -131,6 +132,16 @@ DictionaryValuePtr BlobWriteSharing::write_implicitly_shared( }); } +std::shared_ptr BlobWriteSharing::write_deduplicated( + BlobWriter &writer, const void *data, const int64_t size_in_bytes) +{ + SliceHash content_hash; + BLI_hash_md5_buffer(static_cast(data), size_in_bytes, &content_hash); + const BlobSlice slice = slice_by_content_hash_.lookup_or_add_cb( + content_hash, [&]() { return writer.write(data, size_in_bytes); }); + return slice.serialize(); +} + std::optional BlobReadSharing::read_shared( const DictionaryValue &io_data, FunctionRef()> read_fn) const @@ -202,9 +213,12 @@ static std::optional get_data_type_from_io_name(const StringRef * Write the data and remember which endianness the data had. */ static std::shared_ptr write_blob_raw_data_with_endian( - BlobWriter &blob_writer, const void *data, const int64_t size_in_bytes) + BlobWriter &blob_writer, + BlobWriteSharing &blob_sharing, + const void *data, + const int64_t size_in_bytes) { - auto io_data = blob_writer.write(data, size_in_bytes).serialize(); + auto io_data = blob_sharing.write_deduplicated(blob_writer, data, size_in_bytes); if (ENDIAN_ORDER == B_ENDIAN) { io_data->append_str("endian", get_endian_io_name(ENDIAN_ORDER)); } @@ -255,10 +269,11 @@ static std::shared_ptr write_blob_raw_data_with_endian( /** Write bytes ignoring endianness. */ static std::shared_ptr write_blob_raw_bytes(BlobWriter &blob_writer, + BlobWriteSharing &blob_sharing, const void *data, const int64_t size_in_bytes) { - return blob_writer.write(data, size_in_bytes).serialize(); + return blob_sharing.write_deduplicated(blob_writer, data, size_in_bytes); } /** Read bytes ignoring endianness. */ @@ -278,14 +293,16 @@ static std::shared_ptr write_blob_raw_bytes(BlobWriter &blob_wr } static std::shared_ptr write_blob_simple_gspan(BlobWriter &blob_writer, + BlobWriteSharing &blob_sharing, const GSpan data) { const CPPType &type = data.type(); BLI_assert(type.is_trivial()); if (type.size() == 1 || type.is()) { - return write_blob_raw_bytes(blob_writer, data.data(), data.size_in_bytes()); + return write_blob_raw_bytes(blob_writer, blob_sharing, data.data(), data.size_in_bytes()); } - return write_blob_raw_data_with_endian(blob_writer, data.data(), data.size_in_bytes()); + return write_blob_raw_data_with_endian( + blob_writer, blob_sharing, data.data(), data.size_in_bytes()); } [[nodiscard]] static bool read_blob_simple_gspan(const BlobReader &blob_reader, @@ -327,7 +344,7 @@ static std::shared_ptr write_blob_shared_simple_gspan( const ImplicitSharingInfo *sharing_info) { return blob_sharing.write_implicitly_shared( - sharing_info, [&]() { return write_blob_simple_gspan(blob_writer, data); }); + sharing_info, [&]() { return write_blob_simple_gspan(blob_writer, blob_sharing, data); }); } [[nodiscard]] static const void *read_blob_shared_simple_gspan( @@ -814,10 +831,11 @@ static std::shared_ptr serialize_geometry_set(const GeometrySet serialize_geometry_set(reference.geometry_set(), blob_writer, blob_sharing)); } - io_instances->append("transforms", - write_blob_simple_gspan(blob_writer, instances.transforms())); - io_instances->append("handles", - write_blob_simple_gspan(blob_writer, instances.reference_handles())); + io_instances->append( + "transforms", write_blob_simple_gspan(blob_writer, blob_sharing, instances.transforms())); + io_instances->append( + "handles", + write_blob_simple_gspan(blob_writer, blob_sharing, instances.reference_handles())); auto io_attributes = serialize_attributes( instances.attributes(), blob_writer, blob_sharing, {"position"}); @@ -1037,7 +1055,8 @@ static void serialize_bake_item(const BakeItem &item, r_io_item.append_str("data", string_state_item->value()); } else { - r_io_item.append("data", write_blob_raw_bytes(blob_writer, str.data(), str.size())); + r_io_item.append("data", + write_blob_raw_bytes(blob_writer, blob_sharing, str.data(), str.size())); } } else if (const auto *primitive_state_item = dynamic_cast(&item)) {