Draw: Batch shader compilation for image render

Enable deferred parallel batch compilation for image renders.
This replaces the use of the `WM_job` system with a regular thread,
since `WM_job` requires access to the main context,
which is not accessible from the render thread.
It also simplifies the system so it creates a single thread at startup
and deletes it at exit.

Pull Request: https://projects.blender.org/blender/blender/pulls/125005
This commit is contained in:
Miguel Pozo
2024-09-06 18:13:43 +02:00
parent 2952498724
commit eab640e044
5 changed files with 142 additions and 182 deletions

View File

@@ -517,6 +517,14 @@ void Instance::render_frame(RenderEngine *engine, RenderLayer *render_layer, con
{
/* TODO: Break on RE_engine_test_break(engine) */
while (!sampling.finished()) {
if (materials.queued_shaders_count > 0) {
/* Leave some time for shaders to compile. */
BLI_time_sleep_ms(50);
/** WORKAROUND: Re-sync to check if all shaders are already compiled. */
this->render_sync();
continue;
}
this->render_sample();
if ((sampling.sample_index() == 1) || ((sampling.sample_index() % 25) == 0) ||

View File

@@ -167,7 +167,7 @@ MaterialPass MaterialModule::material_pass_get(Object *ob,
blender_mat->nodetree :
default_surface_ntree_.nodetree_get(blender_mat);
bool use_deferred_compilation = inst_.is_viewport();
bool use_deferred_compilation = inst_.is_viewport() || GPU_use_parallel_compilation();
MaterialPass matpass = MaterialPass();
matpass.gpumat = inst_.shaders.material_shader_get(

View File

@@ -260,6 +260,8 @@ void DRW_texture_free(GPUTexture *tex);
} while (0)
/* Shaders */
void DRW_shader_init();
void DRW_shader_exit();
GPUMaterial *DRW_shader_from_world(World *wo,
bNodeTree *ntree,

View File

@@ -3255,7 +3255,9 @@ void DRW_gpu_context_create()
WM_system_gpu_context_activate(DST.system_gpu_context);
/* Be sure to create blender_gpu_context too. */
DST.blender_gpu_context = GPU_context_create(nullptr, DST.system_gpu_context);
/* So we activate the window's one afterwards. */
/* Setup compilation context. */
DRW_shader_init();
/* Activate the window's context afterwards. */
wm_window_reset_drawable();
}
@@ -3263,6 +3265,7 @@ void DRW_gpu_context_destroy()
{
BLI_assert(BLI_thread_is_main());
if (DST.system_gpu_context != nullptr) {
DRW_shader_exit();
WM_system_gpu_context_activate(DST.system_gpu_context);
GPU_context_active_set(DST.blender_gpu_context);
GPU_context_discard(DST.blender_gpu_context);

View File

@@ -34,11 +34,17 @@
#include "draw_manager_c.hh"
#include <atomic>
#include <condition_variable>
#include <mutex>
extern "C" char datatoc_gpu_shader_depth_only_frag_glsl[];
extern "C" char datatoc_common_fullscreen_vert_glsl[];
#define USE_DEFERRED_COMPILATION 1
using namespace blender;
/* -------------------------------------------------------------------- */
/** \name Deferred Compilation (DRW_deferred)
*
@@ -49,59 +55,63 @@ extern "C" char datatoc_common_fullscreen_vert_glsl[];
struct DRWShaderCompiler {
/** Default compilation queue. */
ListBase queue; /* GPUMaterial */
SpinLock list_lock;
Vector<GPUMaterial *> queue;
/** Optimization queue. */
ListBase optimize_queue; /* GPUMaterial */
Vector<GPUMaterial *> optimize_queue;
std::mutex queue_mutex;
std::condition_variable queue_cv;
void *system_gpu_context;
GPUContext *blender_gpu_context;
bool own_context;
std::atomic<bool> stop;
};
static void drw_deferred_shader_compilation_exec(void *custom_data,
wmJobWorkerStatus *worker_status)
/** NOTE: While the `BLI_threads` API requires a List,
* we only create a single thread at application startup and delete it at exit. */
static ListBase &compilation_threadpool()
{
static ListBase compilation_threadpool_ = {};
return compilation_threadpool_;
}
static DRWShaderCompiler &compiler_data()
{
static DRWShaderCompiler compiler_data_ = {};
return compiler_data_;
}
static void *drw_deferred_shader_compilation_exec(void *)
{
using namespace blender;
GPU_render_begin();
DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
void *system_gpu_context = comp->system_gpu_context;
GPUContext *blender_gpu_context = comp->blender_gpu_context;
void *system_gpu_context = compiler_data().system_gpu_context;
GPUContext *blender_gpu_context = compiler_data().blender_gpu_context;
BLI_assert(system_gpu_context != nullptr);
BLI_assert(blender_gpu_context != nullptr);
const bool use_main_context_workaround = GPU_use_main_context_workaround();
if (use_main_context_workaround) {
BLI_assert(system_gpu_context == DST.system_gpu_context);
GPU_context_main_lock();
}
const bool use_parallel_compilation = GPU_use_parallel_compilation();
GPU_render_begin();
WM_system_gpu_context_activate(system_gpu_context);
GPU_context_active_set(blender_gpu_context);
const bool use_parallel_compilation = GPU_use_parallel_compilation();
Vector<GPUMaterial *> async_mats;
while (true) {
if (worker_status->stop) {
if (compiler_data().stop) {
break;
}
BLI_spin_lock(&comp->list_lock);
/* Pop tail because it will be less likely to lock the main thread
compiler_data().queue_mutex.lock();
/* Pop last because it will be less likely to lock the main thread
* if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
LinkData *link = (LinkData *)BLI_poptail(&comp->queue);
GPUMaterial *mat = link ? (GPUMaterial *)link->data : nullptr;
GPUMaterial *mat = compiler_data().queue.is_empty() ? nullptr :
compiler_data().queue.pop_last();
if (mat) {
/* Avoid another thread freeing the material mid compilation. */
GPU_material_acquire(mat);
MEM_freeN(link);
}
BLI_spin_unlock(&comp->list_lock);
compiler_data().queue_mutex.unlock();
if (mat) {
/* We have a new material that must be compiled,
@@ -129,26 +139,27 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
else {
/* Check for Material Optimization job once there are no more
* shaders to compile. */
BLI_spin_lock(&comp->list_lock);
/* Pop tail because it will be less likely to lock the main thread
compiler_data().queue_mutex.lock();
/* Pop last because it will be less likely to lock the main thread
* if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue);
GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : nullptr;
GPUMaterial *optimize_mat = compiler_data().optimize_queue.is_empty() ?
nullptr :
compiler_data().optimize_queue.pop_last();
if (optimize_mat) {
/* Avoid another thread freeing the material during optimization. */
GPU_material_acquire(optimize_mat);
}
BLI_spin_unlock(&comp->list_lock);
compiler_data().queue_mutex.unlock();
if (optimize_mat) {
/* Compile optimized material shader. */
GPU_material_optimize(optimize_mat);
GPU_material_release(optimize_mat);
MEM_freeN(link);
}
else {
/* No more materials to optimize, or shaders to compile. */
break;
std::unique_lock lock(compiler_data().queue_mutex);
compiler_data().queue_cv.wait(lock);
}
}
@@ -158,7 +169,7 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
}
/* We have to wait until all the requested batches are ready,
* even if worker_status->stop is true. */
* even if compiler_data().stop is true. */
while (!async_mats.is_empty()) {
async_mats.remove_if([](GPUMaterial *mat) {
if (GPU_material_async_try_finalize(mat)) {
@@ -171,118 +182,88 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
GPU_context_active_set(nullptr);
WM_system_gpu_context_release(system_gpu_context);
if (use_main_context_workaround) {
GPU_context_main_unlock();
}
GPU_render_end();
return nullptr;
}
static void drw_deferred_shader_compilation_free(void *custom_data)
void DRW_shader_init()
{
DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
BLI_spin_lock(&comp->list_lock);
LISTBASE_FOREACH (LinkData *, link, &comp->queue) {
GPU_material_status_set(static_cast<GPUMaterial *>(link->data), GPU_MAT_CREATED);
if (GPU_use_main_context_workaround()) {
/* Deferred compilation is not supported. */
return;
}
LISTBASE_FOREACH (LinkData *, link, &comp->optimize_queue) {
GPU_material_optimization_status_set(static_cast<GPUMaterial *>(link->data),
GPU_MAT_OPTIMIZATION_READY);
static bool initialized = false;
if (initialized) {
BLI_assert_unreachable();
return;
}
BLI_freelistN(&comp->queue);
BLI_freelistN(&comp->optimize_queue);
BLI_spin_unlock(&comp->list_lock);
initialized = true;
if (comp->own_context) {
/* Only destroy if the job owns the context. */
WM_system_gpu_context_activate(comp->system_gpu_context);
GPU_context_active_set(comp->blender_gpu_context);
GPU_context_discard(comp->blender_gpu_context);
WM_system_gpu_context_dispose(comp->system_gpu_context);
compiler_data().stop = false;
wm_window_reset_drawable();
compiler_data().system_gpu_context = WM_system_gpu_context_create();
compiler_data().blender_gpu_context = GPU_context_create(nullptr,
compiler_data().system_gpu_context);
GPU_context_active_set(nullptr);
WM_system_gpu_context_activate(DST.system_gpu_context);
GPU_context_active_set(DST.blender_gpu_context);
BLI_threadpool_init(&compilation_threadpool(), drw_deferred_shader_compilation_exec, 1);
BLI_threadpool_insert(&compilation_threadpool(), nullptr);
}
void DRW_shader_exit()
{
if (GPU_use_main_context_workaround()) {
/* Deferred compilation is not supported. */
return;
}
MEM_freeN(comp);
compiler_data().stop = true;
compiler_data().queue_cv.notify_one();
BLI_threadpool_end(&compilation_threadpool());
/* Revert the queued state for the materials that has not been compiled.
* Note that this is not strictly needed since this function is called at program exit. */
{
std::scoped_lock queue_lock(compiler_data().queue_mutex);
while (!compiler_data().queue.is_empty()) {
GPU_material_status_set(compiler_data().queue.pop_last(), GPU_MAT_CREATED);
}
while (!compiler_data().optimize_queue.is_empty()) {
GPU_material_optimization_status_set(compiler_data().optimize_queue.pop_last(),
GPU_MAT_OPTIMIZATION_READY);
}
}
WM_system_gpu_context_activate(compiler_data().system_gpu_context);
GPU_context_active_set(compiler_data().blender_gpu_context);
GPU_context_discard(compiler_data().blender_gpu_context);
WM_system_gpu_context_dispose(compiler_data().system_gpu_context);
}
/**
* Append either shader compilation or optimization job to deferred queue and
* ensure shader compilation worker is active.
* Append either shader compilation or optimization job to deferred queue.
* We keep two separate queue's to ensure core compilations always complete before optimization.
*/
static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
{
const bool use_main_context = GPU_use_main_context_workaround();
const bool job_own_context = !use_main_context;
BLI_assert(DST.draw_ctx.evil_C);
wmWindowManager *wm = CTX_wm_manager(DST.draw_ctx.evil_C);
wmWindow *win = CTX_wm_window(DST.draw_ctx.evil_C);
/* Get the running job or a new one if none is running. Can only have one job per type & owner.
*/
wmJob *wm_job = WM_jobs_get(
wm, win, wm, "Shaders Compilation", eWM_JobFlag(0), WM_JOB_TYPE_SHADER_COMPILATION);
DRWShaderCompiler *old_comp = (DRWShaderCompiler *)WM_jobs_customdata_get(wm_job);
DRWShaderCompiler *comp = static_cast<DRWShaderCompiler *>(
MEM_callocN(sizeof(DRWShaderCompiler), "DRWShaderCompiler"));
BLI_spin_init(&comp->list_lock);
if (old_comp) {
BLI_spin_lock(&old_comp->list_lock);
BLI_movelisttolist(&comp->queue, &old_comp->queue);
BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue);
BLI_spin_unlock(&old_comp->list_lock);
/* Do not recreate context, just pass ownership. */
if (old_comp->system_gpu_context) {
comp->system_gpu_context = old_comp->system_gpu_context;
comp->blender_gpu_context = old_comp->blender_gpu_context;
old_comp->own_context = false;
comp->own_context = job_own_context;
}
}
std::scoped_lock queue_lock(compiler_data().queue_mutex);
/* Add to either compilation or optimization queue. */
if (is_optimization_job) {
BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
LinkData *node = BLI_genericNodeN(mat);
BLI_addtail(&comp->optimize_queue, node);
compiler_data().optimize_queue.append(mat);
}
else {
GPU_material_status_set(mat, GPU_MAT_QUEUED);
LinkData *node = BLI_genericNodeN(mat);
BLI_addtail(&comp->queue, node);
compiler_data().queue.append(mat);
}
/* Create only one context. */
if (comp->system_gpu_context == nullptr) {
if (use_main_context) {
comp->system_gpu_context = DST.system_gpu_context;
comp->blender_gpu_context = DST.blender_gpu_context;
}
else {
comp->system_gpu_context = WM_system_gpu_context_create();
comp->blender_gpu_context = GPU_context_create(nullptr, comp->system_gpu_context);
GPU_context_active_set(nullptr);
WM_system_gpu_context_activate(DST.system_gpu_context);
GPU_context_active_set(DST.blender_gpu_context);
}
comp->own_context = job_own_context;
}
WM_jobs_customdata_set(wm_job, comp, drw_deferred_shader_compilation_free);
WM_jobs_timer(wm_job, 0.1, NC_MATERIAL | ND_SHADING_DRAW, 0);
WM_jobs_delay_start(wm_job, 0.1);
WM_jobs_callbacks(wm_job, drw_deferred_shader_compilation_exec, nullptr, nullptr, nullptr);
G.is_break = false;
WM_jobs_start(wm, wm_job);
compiler_data().queue_cv.notify_one();
}
static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
@@ -291,16 +272,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
return;
}
/* Do not defer the compilation if we are rendering for image.
* deferred rendering is only possible when `evil_C` is available */
if (DST.draw_ctx.evil_C == nullptr || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
deferred = false;
}
/* Avoid crashes with RenderDoc on Windows + Nvidia. */
if (G.debug & G_DEBUG_GPU_RENDERDOC &&
GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL))
{
if (GPU_use_main_context_workaround()) {
deferred = false;
}
@@ -363,59 +335,39 @@ static void drw_register_shader_vlattrs(GPUMaterial *mat)
void DRW_deferred_shader_remove(GPUMaterial *mat)
{
LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
if (comp != nullptr) {
BLI_spin_lock(&comp->list_lock);
if (GPU_use_main_context_workaround()) {
/* Deferred compilation is not supported. */
return;
}
/* Search for compilation job in queue. */
LinkData *link = (LinkData *)BLI_findptr(&comp->queue, mat, offsetof(LinkData, data));
if (link) {
BLI_remlink(&comp->queue, link);
GPU_material_status_set(static_cast<GPUMaterial *>(link->data), GPU_MAT_CREATED);
}
std::scoped_lock queue_lock(compiler_data().queue_mutex);
MEM_SAFE_FREE(link);
/* Search for compilation job in queue. */
if (compiler_data().queue.contains(mat)) {
compiler_data().queue.remove_first_occurrence_and_reorder(mat);
GPU_material_status_set(mat, GPU_MAT_CREATED);
}
/* Search for optimization job in queue. */
LinkData *opti_link = (LinkData *)BLI_findptr(
&comp->optimize_queue, mat, offsetof(LinkData, data));
if (opti_link) {
BLI_remlink(&comp->optimize_queue, opti_link);
GPU_material_optimization_status_set(static_cast<GPUMaterial *>(opti_link->data),
GPU_MAT_OPTIMIZATION_READY);
}
BLI_spin_unlock(&comp->list_lock);
MEM_SAFE_FREE(opti_link);
}
}
/* Search for optimization job in queue. */
if (compiler_data().optimize_queue.contains(mat)) {
compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
}
}
void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
{
LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
if (comp != nullptr) {
BLI_spin_lock(&comp->list_lock);
/* Search for optimization job in queue. */
LinkData *opti_link = (LinkData *)BLI_findptr(
&comp->optimize_queue, mat, offsetof(LinkData, data));
if (opti_link) {
BLI_remlink(&comp->optimize_queue, opti_link);
GPU_material_optimization_status_set(static_cast<GPUMaterial *>(opti_link->data),
GPU_MAT_OPTIMIZATION_READY);
}
BLI_spin_unlock(&comp->list_lock);
if (GPU_use_main_context_workaround()) {
/* Deferred compilation is not supported. */
return;
}
MEM_SAFE_FREE(opti_link);
}
}
std::scoped_lock queue_lock(compiler_data().queue_mutex);
/* Search for optimization job in queue. */
if (compiler_data().optimize_queue.contains(mat)) {
compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
}
}
@@ -485,11 +437,6 @@ GPUMaterial *DRW_shader_from_material(Material *ma,
drw_register_shader_vlattrs(mat);
if (DRW_state_is_image_render()) {
/* Do not deferred if doing render. */
deferred = false;
}
drw_deferred_shader_add(mat, deferred);
DRW_shader_queue_optimize_material(mat);
return mat;