Files
test2/source/blender/draw/engines/eevee/eevee_lightcache.cc
Jacques Lucke b7a1325c3c BLI: use blender::Mutex by default which wraps tbb::mutex
This patch adds a new `BLI_mutex.hh` header which adds `blender::Mutex` as alias
for either `tbb::mutex` or `std::mutex` depending on whether TBB is enabled.

Description copied from the patch:
```
/**
 * blender::Mutex should be used as the default mutex in Blender. It implements a subset of the API
 * of std::mutex but has overall better guaranteed properties. It can be used with RAII helpers
 * like std::lock_guard. However, it is not compatible with e.g. std::condition_variable. So one
 * still has to use std::mutex for that case.
 *
 * The mutex provided by TBB has these properties:
 * - It's as fast as a spin-lock in the non-contended case, i.e. when no other thread is trying to
 *   lock the mutex at the same time.
 * - In the contended case, it spins a couple of times but then blocks to avoid draining system
 *   resources by spinning for a long time.
 * - It's only 1 byte large, compared to e.g. 40 bytes when using the std::mutex of GCC. This makes
 *   it more feasible to have many smaller mutexes which can improve scalability of algorithms
 *   compared to using fewer larger mutexes. Also it just reduces "memory slop" across Blender.
 * - It is *not* a fair mutex, i.e. it's not guaranteed that a thread will ever be able to lock the
 *   mutex when there are always more than one threads that try to lock it. In the majority of
 *   cases, using a fair mutex just causes extra overhead without any benefit. std::mutex is not
 *   guaranteed to be fair either.
 */
 ```

The performance benchmark suggests that the impact is negilible in almost
all cases. The only benchmarks that show interesting behavior are the once
testing foreach zones in Geometry Nodes. These tests are explicitly testing
overhead, which I still have to reduce over time. So it's not unexpected that
changing the mutex has an impact there. What's interesting is that on macos the
performance improves a lot while on linux it gets worse. Since that overhead
should eventually be removed almost entirely, I don't really consider that
blocking.

Links:
* Documentation of different mutex flavors in TBB:
  https://www.intel.com/content/www/us/en/docs/onetbb/developer-guide-api-reference/2021-12/mutex-flavors.html
* Older implementation of a similar mutex by me:
  https://archive.blender.org/developer/differential/0016/0016711/index.html
* Interesting read regarding how a mutex can be this small:
  https://webkit.org/blog/6161/locking-in-webkit/

Pull Request: https://projects.blender.org/blender/blender/pulls/138370
2025-05-07 04:53:16 +02:00

364 lines
11 KiB
C++

/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup eevee
*
* Contains everything about light baking.
*/
#include "DRW_engine.hh"
#include "DRW_render.hh"
#include "BKE_global.hh"
#include "BKE_lightprobe.h"
#include "DNA_lightprobe_types.h"
#include "BLI_mutex.hh"
#include "BLI_threads.h"
#include "BLI_time.h"
#include "DEG_depsgraph_build.hh"
#include "DEG_depsgraph_query.hh"
#include "GPU_capabilities.hh"
#include "GPU_context.hh"
#include "WM_api.hh"
#include "WM_types.hh"
#include "wm_window.hh"
#include "eevee_instance.hh"
#include "eevee_lightcache.hh"
/* -------------------------------------------------------------------- */
/** \name Light Probe Baking
* \{ */
namespace blender::eevee {
class LightBake {
private:
Depsgraph *depsgraph_;
/** Scene frame to evaluate the depsgraph at. */
int frame_;
/** Milliseconds. Delay the start of the baking to not slowdown interactions (TODO: remove). */
int delay_ms_;
/**
* Reference to the operator report string to print messages to the UI.
* Should be threadsafe to write to as it gets read by the operator code only if the job is
* finished.
*/
std::string &report_;
/**
* If running in parallel (in a separate thread), use this context.
* Created on main thread but first bound in worker thread.
*/
void *gl_context_ = nullptr;
/** Context associated to `gl_context_`. Created in the worker thread. */
GPUContext *gpu_context_ = nullptr;
/** Baking instance. Created and freed in the worker thread. */
Instance *instance_ = nullptr;
/** Manager used for command submission. Created and freed in the worker thread. */
draw::Manager *manager_ = nullptr;
/** Light-probe original objects to bake. */
Vector<Object *> original_probes_;
/** Frame to copy to original objects during update. This is needed to avoid race conditions. */
Vector<LightProbeGridCacheFrame *> bake_result_;
Mutex result_mutex_;
public:
LightBake(Main *bmain,
ViewLayer *view_layer,
Scene *scene,
Span<Object *> probes,
bool run_as_job,
std::string &report,
int frame,
int delay_ms = 0)
: depsgraph_(DEG_graph_new(bmain, scene, view_layer, DAG_EVAL_RENDER)),
frame_(frame),
delay_ms_(delay_ms),
report_(report),
original_probes_(probes)
{
BLI_assert(BLI_thread_is_main());
bake_result_.resize(probes.size());
bake_result_.fill(nullptr);
if (run_as_job && !GPU_use_main_context_workaround()) {
/* This needs to happen in main thread. */
gl_context_ = WM_system_gpu_context_create();
wm_window_reset_drawable();
}
}
~LightBake()
{
BLI_assert(BLI_thread_is_main());
DEG_graph_free(depsgraph_);
}
/**
* Called from main thread.
* Copy result to original scene data.
* Note that since this is in the main thread, the viewport cannot be using the light cache.
* So there is no race condition here.
*/
void update()
{
BLI_assert(BLI_thread_is_main());
for (auto i : bake_result_.index_range()) {
if (bake_result_[i] == nullptr) {
continue;
}
Object *orig_ob = original_probes_[i];
{
std::scoped_lock lock(result_mutex_);
LightProbeObjectCache *cache = orig_ob->lightprobe_cache;
/* Delete any existing cache. */
if (cache->grid_static_cache != nullptr) {
BKE_lightprobe_grid_cache_frame_free(cache->grid_static_cache);
}
/* Pass ownership to original object. */
cache->grid_static_cache = bake_result_[i];
bake_result_[i] = nullptr;
}
/* Propagate the cache to evaluated object. */
DEG_id_tag_update(&orig_ob->id, ID_RECALC_SYNC_TO_EVAL | ID_RECALC_SHADING);
}
}
/**
* Called from worker thread.
*/
void run(const bool *stop = nullptr, bool *do_update = nullptr, float *progress = nullptr)
{
DEG_graph_relations_update(depsgraph_);
DEG_evaluate_on_framechange(depsgraph_, frame_);
if (delay_ms_ > 0) {
BLI_time_sleep_ms(delay_ms_);
}
context_enable();
manager_ = new draw::Manager();
instance_ = new eevee::Instance();
instance_->init_light_bake(depsgraph_, manager_);
context_disable();
for (auto i : original_probes_.index_range()) {
Object *eval_ob = DEG_get_evaluated(depsgraph_, original_probes_[i]);
instance_->light_bake_irradiance(
*eval_ob,
[this]() { context_enable(); },
[this]() { context_disable(); },
[&]() { return (G.is_break == true) || ((stop != nullptr) ? *stop : false); },
[&](LightProbeGridCacheFrame *cache_frame, float grid_progress) {
{
std::scoped_lock lock(result_mutex_);
/* Delete any existing cache that wasn't transferred to the original object. */
if (bake_result_[i] != nullptr) {
BKE_lightprobe_grid_cache_frame_free(bake_result_[i]);
}
bake_result_[i] = cache_frame;
}
if (do_update) {
*do_update = true;
}
if (progress) {
*progress = (i + grid_progress) / original_probes_.size();
}
});
if (StringRefNull(instance_->info_get()) != "") {
/* Pipe report to operator. */
report_ = instance_->info_get();
}
if ((G.is_break == true) || (stop != nullptr && *stop == true)) {
break;
}
}
delete_resources();
}
private:
void context_enable(bool render_begin = true)
{
if (GPU_use_main_context_workaround() && !BLI_thread_is_main()) {
/* Reuse main draw context. */
GPU_context_main_lock();
DRW_gpu_context_enable();
}
else if (gl_context_ == nullptr) {
/* Main thread case. */
DRW_gpu_context_enable();
}
else {
/* Worker thread case. */
DRW_system_gpu_render_context_enable(gl_context_);
if (gpu_context_ == nullptr) {
/* Create GPUContext in worker thread as it needs the correct gl context bound (which can
* only be bound in worker thread because of some GL driver requirements). */
gpu_context_ = GPU_context_create(nullptr, gl_context_);
}
DRW_blender_gpu_render_context_enable(gpu_context_);
}
if (render_begin) {
GPU_render_begin();
}
}
void context_disable()
{
if (GPU_use_main_context_workaround() && !BLI_thread_is_main()) {
/* Reuse main draw context. */
DRW_gpu_context_disable();
GPU_render_end();
GPU_context_main_unlock();
}
else if (gl_context_ == nullptr) {
/* Main thread case. */
DRW_gpu_context_disable();
GPU_render_end();
}
else {
/* Worker thread case. */
DRW_blender_gpu_render_context_disable(gpu_context_);
GPU_render_end();
DRW_system_gpu_render_context_disable(gl_context_);
}
}
/**
* Delete the engine instance and the optional contexts.
* This needs to run on the worker thread because the OpenGL context can only be ever bound to a
* single thread (because of some driver implementation), and the resources (textures,
* buffers,...) need to be freed with the right context bound.
*/
void delete_resources()
{
/* Bind context without GPU_render_begin(). */
context_enable(false);
/* Free GPU data (Textures, Frame-buffers, etc...). */
delete instance_;
delete manager_;
/* Delete / unbind the GL & GPU context. Assumes it is currently bound. */
if (GPU_use_main_context_workaround() && !BLI_thread_is_main()) {
/* Reuse main draw context. */
DRW_gpu_context_disable();
GPU_context_main_unlock();
}
else if (gl_context_ == nullptr) {
/* Main thread case. */
DRW_gpu_context_disable();
}
else {
/* Worker thread case. */
if (gpu_context_ != nullptr) {
GPU_context_discard(gpu_context_);
}
DRW_system_gpu_render_context_disable(gl_context_);
WM_system_gpu_context_dispose(gl_context_);
}
}
};
} // namespace blender::eevee
/** \} */
/* -------------------------------------------------------------------- */
/** \name Light Bake Job
* \{ */
using namespace blender::eevee;
wmJob *EEVEE_lightbake_job_create(wmWindowManager *wm,
wmWindow *win,
Main *bmain,
ViewLayer *view_layer,
Scene *scene,
blender::Vector<Object *> original_probes,
std::string &report,
int delay_ms,
int frame)
{
/* Do not bake if there is a render going on. */
if (WM_jobs_test(wm, scene, WM_JOB_TYPE_RENDER)) {
return nullptr;
}
/* Stop existing baking job. */
WM_jobs_stop_type(wm, nullptr, WM_JOB_TYPE_LIGHT_BAKE);
wmJob *wm_job = WM_jobs_get(wm,
win,
scene,
"Bake Lighting",
WM_JOB_EXCL_RENDER | WM_JOB_PRIORITY | WM_JOB_PROGRESS,
WM_JOB_TYPE_LIGHT_BAKE);
LightBake *bake = new LightBake(
bmain, view_layer, scene, std::move(original_probes), true, report, frame, delay_ms);
WM_jobs_customdata_set(wm_job, bake, EEVEE_lightbake_job_data_free);
WM_jobs_timer(wm_job, 0.4, NC_SCENE | NA_EDITED, 0);
WM_jobs_callbacks(
wm_job, EEVEE_lightbake_job, nullptr, EEVEE_lightbake_update, EEVEE_lightbake_update);
G.is_break = false;
return wm_job;
}
void *EEVEE_lightbake_job_data_alloc(Main *bmain,
ViewLayer *view_layer,
Scene *scene,
blender::Vector<Object *> original_probes,
std::string &report,
int frame)
{
LightBake *bake = new LightBake(
bmain, view_layer, scene, std::move(original_probes), false, report, frame);
/* TODO(fclem): Can remove this cast once we remove the previous EEVEE light cache. */
return reinterpret_cast<void *>(bake);
}
void EEVEE_lightbake_job_data_free(void *job_data)
{
delete static_cast<LightBake *>(job_data);
}
void EEVEE_lightbake_update(void *job_data)
{
static_cast<LightBake *>(job_data)->update();
}
void EEVEE_lightbake_job(void *job_data, wmJobWorkerStatus *worker_status)
{
static_cast<LightBake *>(job_data)->run(
&worker_status->stop, &worker_status->do_update, &worker_status->progress);
}
/** \} */