/* SPDX-FileCopyrightText: 2023 Blender Authors * * SPDX-License-Identifier: GPL-2.0-or-later */ #include #include #include #include #include #include #include #include "MEM_guardedalloc.h" #include "mallocn_intern.hh" #include "../../source/blender/blenlib/BLI_strict_flags.h" namespace { struct Local; struct Global; /** * This is stored per thread. Align to cache line size to avoid false sharing. */ struct alignas(128) Local { /** * Retain shared ownership of #Global to make sure that it is not destructed. */ std::shared_ptr global; /** Helps to find bugs during program shutdown. */ bool destructed = false; /** * This is the first created #Local and on the main thread. When the main local data is * destructed, we know that Blender is quitting and that we can't rely on thread locals being * available still. */ bool is_main = false; /** * Number of bytes. This can be negative when e.g. one thread allocates a lot of memory, and * another frees it. It has to be an atomic, because it may be accessed by other threads when the * total memory usage is counted. */ std::atomic mem_in_use = 0; /** * Number of allocated blocks. Can be negative and is atomic for the same reason as above. */ std::atomic blocks_num = 0; /** * Amount of memory used when the peak was last updated. This is used so that we don't have to * update the peak memory usage after every memory allocation. Instead it's only updated when "a * lot" of new memory has been allocated. This makes the peak memory usage a little bit less * accurate, but it's still good enough for practical purposes. */ std::atomic mem_in_use_during_peak_update = 0; Local(); ~Local(); }; /** * This is a singleton that stores global data. It's owned by a `std::shared_ptr` which is owned by * the static variable in #get_global_ptr and all #Local objects. */ struct Global { /** * Mutex that protects the vector below. */ std::mutex locals_mutex; /** * All currently constructed #Local. This must only be accessed when the mutex above is * locked. Individual threads insert and remove themselves here. */ std::vector locals; /** * Number of bytes that are not tracked by #Local. This is necessary because when a thread exits, * its #Local data is freed. The memory counts stored there would be lost. The memory counts may * be non-zero during thread destruction, if the thread did an unequal amount of allocations and * frees (which is perfectly valid behavior as long as other threads have the responsibility to * free any memory that the thread allocated). * * To solve this, the memory counts are added to these global counters when the thread * exists. The global counters are also used when the entire process starts to exit, because the * #Local data of the main thread is already destructed when the leak detection happens (during * destruction of static variables which happens after destruction of thread-locals). */ std::atomic mem_in_use_outside_locals = 0; /** * Number of blocks that are not tracked by #Local, for the same reason as above. */ std::atomic blocks_num_outside_locals = 0; /** * Peak memory usage since the last reset. */ std::atomic peak = 0; }; } // namespace /** * This is true for most of the lifetime of the program. Only when it starts exiting this becomes * false indicating that global counters should be used for correctness. */ static std::atomic use_local_counters = true; /** * When a thread allocated this amount of memory, the peak memory usage is updated. An alternative * would be to update the global peak memory after every allocation, but that would cause much more * overhead with little benefit. */ static constexpr int64_t peak_update_threshold = 1024 * 1024; static std::shared_ptr &get_global_ptr() { static std::shared_ptr global = std::make_shared(); return global; } static Global &get_global() { return *get_global_ptr(); } static Local &get_local_data() { static thread_local Local local; assert(!local.destructed); return local; } Local::Local() { this->global = get_global_ptr(); std::lock_guard lock{this->global->locals_mutex}; if (this->global->locals.empty()) { /* This is the first thread creating #Local, it is therefore the main thread because it's * created through #memory_usage_init. */ this->is_main = true; } /* Register self in the global list. */ this->global->locals.push_back(this); } Local::~Local() { std::lock_guard lock{this->global->locals_mutex}; /* Unregister self from the global list. */ this->global->locals.erase( std::find(this->global->locals.begin(), this->global->locals.end(), this)); /* Don't forget the memory counts stored locally. */ this->global->blocks_num_outside_locals.fetch_add(this->blocks_num, std::memory_order_relaxed); this->global->mem_in_use_outside_locals.fetch_add(this->mem_in_use, std::memory_order_relaxed); if (this->is_main) { /* The main thread started shutting down. Use global counters from now on to avoid accessing * thread-locals after they have been destructed. */ use_local_counters.store(false, std::memory_order_relaxed); } /* Helps to detect when thread locals are accidentally accessed after destruction. */ this->destructed = true; } /** Check if the current memory usage is higher than the peak and update it if yes. */ static void update_global_peak() { Global &global = get_global(); /* Update peak. */ global.peak = std::max(global.peak, memory_usage_current()); std::lock_guard lock{global.locals_mutex}; for (Local *local : global.locals) { assert(!local->destructed); /* Updating this makes sure that the peak is not updated too often, which would degrade * performance. */ local->mem_in_use_during_peak_update = local->mem_in_use.load(std::memory_order_relaxed); } } void memory_usage_init() { /* Makes sure that the static and thread-local variables on the main thread are initialized. */ get_local_data(); } void memory_usage_block_alloc(const size_t size) { if (LIKELY(use_local_counters.load(std::memory_order_relaxed))) { Local &local = get_local_data(); /* Increase local memory counts. This does not cause thread synchronization in the majority of * cases, because each thread has these counters on a separate cache line. It may only cause * synchronization if another thread is computing the total current memory usage at the same * time, which is very rare compared to doing allocations. */ local.blocks_num.fetch_add(1, std::memory_order_relaxed); local.mem_in_use.fetch_add(int64_t(size), std::memory_order_relaxed); /* If a certain amount of new memory has been allocated, update the peak. */ if (local.mem_in_use - local.mem_in_use_during_peak_update > peak_update_threshold) { update_global_peak(); } } else { Global &global = get_global(); /* Increase global memory counts. */ global.blocks_num_outside_locals.fetch_add(1, std::memory_order_relaxed); global.mem_in_use_outside_locals.fetch_add(int64_t(size), std::memory_order_relaxed); } } void memory_usage_block_free(const size_t size) { if (LIKELY(use_local_counters)) { /* Decrease local memory counts. See comment in #memory_usage_block_alloc for details regarding * thread synchronization. */ Local &local = get_local_data(); local.mem_in_use.fetch_sub(int64_t(size), std::memory_order_relaxed); local.blocks_num.fetch_sub(1, std::memory_order_relaxed); } else { Global &global = get_global(); /* Decrease global memory counts. */ global.blocks_num_outside_locals.fetch_sub(1, std::memory_order_relaxed); global.mem_in_use_outside_locals.fetch_sub(int64_t(size), std::memory_order_relaxed); } } size_t memory_usage_block_num() { Global &global = get_global(); std::lock_guard lock{global.locals_mutex}; /* Count the number of active blocks. */ int64_t blocks_num = global.blocks_num_outside_locals; for (const Local *local : global.locals) { blocks_num += local->blocks_num; } return size_t(blocks_num); } size_t memory_usage_current() { Global &global = get_global(); std::lock_guard lock{global.locals_mutex}; /* Count the memory that's currently in use. */ int64_t mem_in_use = global.mem_in_use_outside_locals; for (const Local *local : global.locals) { mem_in_use += local->mem_in_use; } return size_t(mem_in_use); } size_t memory_usage_peak() { update_global_peak(); Global &global = get_global(); return global.peak; } void memory_usage_peak_reset() { Global &global = get_global(); global.peak = memory_usage_current(); }