GPU: Support deferred compilation in ShaderCompilerGeneric

Update the `ShaderCompilerGeneric` to support deferred compilation
using the batch compilation API, so we can get rid of
`drw_manager_shader`.
This approach also allows supporting non-blocking compilation
for static shaders.

This shouldn't cause any behavior changes at the moment, since batch
compilation is not yet used when parallel compilation is disabled.

This adds a `GPUWorker` and a `GPUSecondaryContext` as an easy to use
wrapper for managing secondary GPU contexts.

(Part of #133674)
Pull Request: https://projects.blender.org/blender/blender/pulls/136518
This commit is contained in:
Miguel Pozo
2025-04-07 15:26:25 +02:00
parent 47471ca2b0
commit a5ed5dc4bf
24 changed files with 378 additions and 35 deletions

View File

@@ -98,7 +98,8 @@ set(SRC
intern/gpu_vertex_format.cc
intern/gpu_vertex_format_normals.cc
intern/gpu_viewport.cc
intern/gpu_worker.cc
GPU_attribute_convert.hh
GPU_batch.hh
GPU_batch_presets.hh
@@ -132,6 +133,7 @@ set(SRC
GPU_vertex_buffer.hh
GPU_vertex_format.hh
GPU_viewport.hh
GPU_worker.hh
intern/gpu_backend.hh
intern/gpu_capabilities_private.hh

View File

@@ -85,3 +85,23 @@ void GPU_render_step(bool force_resource_release = false);
/* For when we need access to a system context in order to create a GPU context. */
void GPU_backend_ghost_system_set(void *ghost_system_handle);
void *GPU_backend_ghost_system_get();
namespace blender::gpu {
/* Abstracts secondary GHOST and GPU context creation, activation and deletion.
* Must be created from the main thread and destructed from the thread they where activated in.
* (See GPUWorker for an usage example) */
class GPUSecondaryContext {
private:
void *ghost_context_;
GPUContext *gpu_context_;
public:
GPUSecondaryContext();
~GPUSecondaryContext();
/* Must be called from a secondary thread.*/
void activate();
};
} // namespace blender::gpu

View File

@@ -0,0 +1,49 @@
/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_vector.hh"
#include "GPU_context.hh"
#include <atomic>
#include <condition_variable>
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
namespace blender::gpu {
/* Abstracts the creation and management of secondary threads with GPU contexts.
* Must be created from the main thread.
* Threads and their context remain alive until destruction. */
class GPUWorker {
private:
Vector<std::unique_ptr<std::thread>> threads_;
std::condition_variable condition_var_;
std::mutex mutex_;
std::atomic_bool terminate_ = false;
public:
/**
* \param threads_count: Number of threads to span.
* \param share_context: If true, all threads will use the same secondary GPUContext,
* otherwise each thread will have its own unique GPUContext.
* \param run_cb: The callback function that will be called by a thread on `wake_up()`.
*/
GPUWorker(uint32_t threads_count, bool share_context, std::function<void()> run_cb);
~GPUWorker();
/* Wake up a single thread. */
void wake_up()
{
condition_var_.notify_one();
}
private:
void run(std::shared_ptr<GPUSecondaryContext> context, std::function<void()> run_cb);
};
} // namespace blender::gpu

View File

@@ -32,6 +32,7 @@ class DummyBackend : public GPUBackend {
"",
GPU_ARCHITECTURE_IMR);
}
void init_resources() override {}
void delete_resources() override {}
void samplers_update() override {}
void compute_dispatch(int /*groups_x_len*/, int /*groups_y_len*/, int /*groups_z_len*/) override

View File

@@ -32,6 +32,11 @@ class DummyContext : public Context {
void flush() override {}
void finish() override {}
ShaderCompiler *get_compiler() override
{
return nullptr;
}
void memory_statistics_get(int * /*r_total_mem*/, int * /*r_free_mem*/) override {}
void debug_group_begin(const char * /*unused*/, int /*unused*/) override {}

View File

@@ -33,6 +33,10 @@ class VertBuf;
class GPUBackend {
public:
virtual ~GPUBackend() = default;
/* Called after the main context creation and activation. */
virtual void init_resources() = 0;
/* Called before the main context deletion and deactivation. */
virtual void delete_resources() = 0;
static GPUBackend *get();

View File

@@ -16,8 +16,12 @@
#include "BKE_global.hh"
#include "BLI_assert.h"
#include "BLI_threads.h"
#include "BLI_vector_set.hh"
#include "DNA_userdef_types.h"
#include "GHOST_C-api.h"
#include "GHOST_Types.h"
#include "GPU_context.hh"
@@ -450,6 +454,12 @@ static void gpu_backend_create()
}
}
void gpu_backend_init_resources()
{
BLI_assert(g_backend);
g_backend->init_resources();
}
void gpu_backend_delete_resources()
{
BLI_assert(g_backend);
@@ -493,3 +503,96 @@ GPUBackend *GPUBackend::get()
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name GPUSecondaryContext
* \{ */
static GHOST_TDrawingContextType ghost_context_type()
{
switch (GPU_backend_type_selection_get()) {
#ifdef WITH_OPENGL_BACKEND
case GPU_BACKEND_OPENGL:
return GHOST_kDrawingContextTypeOpenGL;
#endif
#ifdef WITH_VULKAN_BACKEND
case GPU_BACKEND_VULKAN:
return GHOST_kDrawingContextTypeVulkan;
#endif
#ifdef WITH_METAL_BACKEND
case GPU_BACKEND_METAL:
return GHOST_kDrawingContextTypeMetal;
#endif
default:
BLI_assert_unreachable();
return GHOST_kDrawingContextTypeNone;
}
}
GPUSecondaryContext::GPUSecondaryContext()
{
/* Contexts can only be created on the main thread. */
BLI_assert(BLI_thread_is_main());
GHOST_ContextHandle main_thread_ghost_context = GHOST_GetActiveGPUContext();
GPUContext *main_thread_gpu_context = GPU_context_active_get();
/* GPU settings for context creation. */
GHOST_GPUSettings gpu_settings = {0};
gpu_settings.context_type = ghost_context_type();
if (G.debug & G_DEBUG_GPU) {
gpu_settings.flags |= GHOST_gpuDebugContext;
}
gpu_settings.preferred_device.index = U.gpu_preferred_index;
gpu_settings.preferred_device.vendor_id = U.gpu_preferred_vendor_id;
gpu_settings.preferred_device.device_id = U.gpu_preferred_device_id;
/* Grab the system handle. */
GHOST_SystemHandle ghost_system = reinterpret_cast<GHOST_SystemHandle>(
GPU_backend_ghost_system_get());
BLI_assert(ghost_system);
/* Create a Ghost GPU Context using the system handle. */
ghost_context_ = GHOST_CreateGPUContext(ghost_system, gpu_settings);
BLI_assert(ghost_context_);
/* Create a GPU context for the secondary thread to use. */
gpu_context_ = GPU_context_create(nullptr, ghost_context_);
BLI_assert(gpu_context_);
/* Release the Ghost GPU Context from this thread. */
GHOST_TSuccess success = GHOST_ReleaseGPUContext(
reinterpret_cast<GHOST_ContextHandle>(ghost_context_));
BLI_assert(success);
/* Restore the main thread contexts.
* (required as the above context creation also makes it active). */
GHOST_ActivateGPUContext(main_thread_ghost_context);
GPU_context_active_set(main_thread_gpu_context);
}
GPUSecondaryContext::~GPUSecondaryContext()
{
/* Contexts should be destructed on the thread they were activated. */
BLI_assert(!BLI_thread_is_main());
GPU_context_discard(gpu_context_);
GHOST_ReleaseGPUContext(reinterpret_cast<GHOST_ContextHandle>(ghost_context_));
GHOST_SystemHandle ghost_system = reinterpret_cast<GHOST_SystemHandle>(
GPU_backend_ghost_system_get());
BLI_assert(ghost_system);
GHOST_DisposeGPUContext(ghost_system, reinterpret_cast<GHOST_ContextHandle>(ghost_context_));
}
void GPUSecondaryContext::activate()
{
/* Contexts need to be activated in the thread they're going to be used. */
BLI_assert(!BLI_thread_is_main());
GHOST_ActivateGPUContext(reinterpret_cast<GHOST_ContextHandle>(ghost_context_));
GPU_context_active_set(gpu_context_);
}
/** \} */

View File

@@ -37,8 +37,6 @@ class Context {
StateManager *state_manager = nullptr;
Immediate *imm = nullptr;
ShaderCompiler *compiler = nullptr;
/**
* All 4 window frame-buffers.
* None of them are valid in an off-screen context.
@@ -99,6 +97,8 @@ class Context {
/* Will wait until the GPU has finished executing all command. */
virtual void finish() = 0;
virtual ShaderCompiler *get_compiler() = 0;
virtual void memory_statistics_get(int *r_total_mem, int *r_free_mem) = 0;
virtual void debug_group_begin(const char * /*name*/, int /*index*/){};

View File

@@ -29,6 +29,8 @@ void GPU_init()
initialized = true;
gpu_backend_init_resources();
gpu_shader_dependency_init();
gpu_shader_create_info_init();

View File

@@ -10,4 +10,5 @@
/* gpu_backend.cc */
void gpu_backend_init_resources();
void gpu_backend_delete_resources();

View File

@@ -8,6 +8,7 @@
#include "BLI_math_matrix.h"
#include "BLI_string.h"
#include "BLI_time.h"
#include "GPU_capabilities.hh"
#include "GPU_debug.hh"
@@ -265,7 +266,7 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
{
using namespace blender::gpu::shader;
const ShaderCreateInfo &info = *reinterpret_cast<const ShaderCreateInfo *>(_info);
return wrap(Context::get()->compiler->compile(info, false));
return wrap(Context::get()->get_compiler()->compile(info, false));
}
static std::string preprocess_source(StringRefNull original)
@@ -293,7 +294,7 @@ GPUShader *GPU_shader_create_from_info_python(const GPUShaderCreateInfo *_info)
info.geometry_source_generated = preprocess_source(info.geometry_source_generated);
info.compute_source_generated = preprocess_source(info.compute_source_generated);
GPUShader *result = wrap(Context::get()->compiler->compile(info, false));
GPUShader *result = wrap(Context::get()->get_compiler()->compile(info, false));
info.vertex_source_generated = vertex_source_original;
info.fragment_source_generated = fragment_source_original;
@@ -365,17 +366,17 @@ BatchHandle GPU_shader_batch_create_from_infos(Span<const GPUShaderCreateInfo *>
using namespace blender::gpu::shader;
Span<const ShaderCreateInfo *> &infos_ = reinterpret_cast<Span<const ShaderCreateInfo *> &>(
infos);
return Context::get()->compiler->batch_compile(infos_);
return Context::get()->get_compiler()->batch_compile(infos_);
}
bool GPU_shader_batch_is_ready(BatchHandle handle)
{
return Context::get()->compiler->batch_is_ready(handle);
return Context::get()->get_compiler()->batch_is_ready(handle);
}
Vector<GPUShader *> GPU_shader_batch_finalize(BatchHandle &handle)
{
Vector<Shader *> result = Context::get()->compiler->batch_finalize(handle);
Vector<Shader *> result = Context::get()->get_compiler()->batch_finalize(handle);
return reinterpret_cast<Vector<GPUShader *> &>(result);
}
@@ -543,12 +544,12 @@ void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value)
SpecializationBatchHandle GPU_shader_batch_specializations(
blender::Span<ShaderSpecialization> specializations)
{
return Context::get()->compiler->precompile_specializations(specializations);
return Context::get()->get_compiler()->precompile_specializations(specializations);
}
bool GPU_shader_batch_specializations_is_ready(SpecializationBatchHandle &handle)
{
return Context::get()->compiler->specialization_batch_is_ready(handle);
return Context::get()->get_compiler()->specialization_batch_is_ready(handle);
}
/** \} */
@@ -953,23 +954,43 @@ Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_ba
/** \name ShaderCompilerGeneric
* \{ */
ShaderCompilerGeneric::ShaderCompilerGeneric()
{
if (!GPU_use_main_context_workaround()) {
compilation_thread_ = std::make_unique<GPUWorker>(1, true, [this]() { this->run_thread(); });
}
}
ShaderCompilerGeneric::~ShaderCompilerGeneric()
{
compilation_thread_.reset();
/* Ensure all the requested batches have been retrieved. */
BLI_assert(batches.is_empty());
BLI_assert(batches_.is_empty());
}
BatchHandle ShaderCompilerGeneric::batch_compile(Span<const shader::ShaderCreateInfo *> &infos)
{
std::lock_guard lock(mutex_);
std::unique_lock lock(mutex_);
BatchHandle handle = next_batch_handle++;
batches.add(handle, {{}, infos, true});
Batch &batch = batches.lookup(handle);
batch.shaders.reserve(infos.size());
for (const shader::ShaderCreateInfo *info : infos) {
batch.shaders.append(compile(*info, true));
BatchHandle handle = next_batch_handle_++;
batches_.add(handle, std::make_unique<Batch>());
Batch *batch = batches_.lookup(handle).get();
batch->infos = infos;
batch->shaders.reserve(infos.size());
if (compilation_thread_) {
compilation_queue_.push_back(batch);
lock.unlock();
compilation_thread_->wake_up();
}
else {
for (const shader::ShaderCreateInfo *info : infos) {
batch->shaders.append(compile(*info, false));
}
batch->is_ready = true;
}
return handle;
}
@@ -977,19 +998,47 @@ bool ShaderCompilerGeneric::batch_is_ready(BatchHandle handle)
{
std::lock_guard lock(mutex_);
bool is_ready = batches.lookup(handle).is_ready;
bool is_ready = batches_.lookup(handle)->is_ready;
return is_ready;
}
Vector<Shader *> ShaderCompilerGeneric::batch_finalize(BatchHandle &handle)
{
while (!batch_is_ready(handle)) {
BLI_time_sleep_ms(1);
}
std::lock_guard lock(mutex_);
Vector<Shader *> shaders = batches.pop(handle).shaders;
Vector<Shader *> shaders = batches_.lookup(handle)->shaders;
batches_.pop(handle);
handle = 0;
return shaders;
}
void ShaderCompilerGeneric::run_thread()
{
while (true) {
Batch *batch = nullptr;
{
std::unique_lock<std::mutex> lock(mutex_);
if (compilation_queue_.empty()) {
return;
}
batch = compilation_queue_.front();
compilation_queue_.pop_front();
}
/* Compile */
for (const shader::ShaderCreateInfo *info : batch->infos) {
batch->shaders.append(compile(*info, false));
}
batch->is_ready = true;
}
}
/** \} */
} // namespace blender::gpu

View File

@@ -8,15 +8,16 @@
#pragma once
#include "BLI_map.hh"
#include "BLI_span.hh"
#include "BLI_string_ref.hh"
#include "GPU_shader.hh"
#include "GPU_worker.hh"
#include "gpu_shader_create_info.hh"
#include "gpu_shader_interface.hh"
#include "BLI_map.hh"
#include <deque>
#include <string>
namespace blender::gpu {
@@ -188,19 +189,25 @@ class ShaderCompiler {
};
};
/* Generic (fully synchronous) implementation used as fallback. */
/* Generic implementation used as fallback. */
class ShaderCompilerGeneric : public ShaderCompiler {
private:
struct Batch {
Vector<Shader *> shaders;
Vector<const shader::ShaderCreateInfo *> infos;
bool is_ready = false;
std::atomic_bool is_ready = false;
};
BatchHandle next_batch_handle = 1;
Map<BatchHandle, Batch> batches;
BatchHandle next_batch_handle_ = 1;
Map<BatchHandle, std::unique_ptr<Batch>> batches_;
std::mutex mutex_;
std::deque<Batch *> compilation_queue_;
std::unique_ptr<GPUWorker> compilation_thread_;
void run_thread();
public:
ShaderCompilerGeneric();
~ShaderCompilerGeneric() override;
BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) override;

View File

@@ -0,0 +1,52 @@
/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "GPU_worker.hh"
namespace blender::gpu {
GPUWorker::GPUWorker(uint32_t threads_count, bool share_context, std::function<void()> run_cb)
{
std::shared_ptr<GPUSecondaryContext> shared_context = nullptr;
if (share_context) {
shared_context = std::make_shared<GPUSecondaryContext>();
}
for (int i : IndexRange(threads_count)) {
UNUSED_VARS(i);
std::shared_ptr<GPUSecondaryContext> thread_context =
share_context ? shared_context : std::make_shared<GPUSecondaryContext>();
threads_.append(std::make_unique<std::thread>([=]() { this->run(thread_context, run_cb); }));
}
}
GPUWorker::~GPUWorker()
{
terminate_ = true;
condition_var_.notify_all();
for (std::unique_ptr<std::thread> &thread : threads_) {
thread->join();
}
}
void GPUWorker::run(std::shared_ptr<GPUSecondaryContext> context, std::function<void()> run_cb)
{
context->activate();
/* Loop until we get the terminate signal. */
while (!terminate_) {
{
/* Wait until wake_up() */
std::unique_lock<std::mutex> lock(mutex_);
condition_var_.wait(lock);
}
if (terminate_) {
continue;
}
run_cb();
}
}
} // namespace blender::gpu

View File

@@ -40,6 +40,11 @@ class MTLBackend : public GPUBackend {
MTLBackend::platform_exit();
}
void init_resources() override
{
/* Create any resources with context active. */
}
void delete_resources() override
{
/* Delete any resources with context active. */

View File

@@ -776,6 +776,8 @@ class MTLContext : public Context {
GPUVertFormat dummy_vertformat_[GPU_SAMPLER_TYPE_MAX];
VertBuf *dummy_verts_[GPU_SAMPLER_TYPE_MAX] = {nullptr};
ShaderCompiler *compiler;
public:
/* GPUContext interface. */
MTLContext(void *ghost_window, void *ghost_context);
@@ -791,6 +793,11 @@ class MTLContext : public Context {
void flush() override;
void finish() override;
ShaderCompiler *get_compiler() override
{
return compiler;
}
void memory_statistics_get(int *r_total_mem, int *r_free_mem) override;
static MTLContext *get()

View File

@@ -1695,7 +1695,8 @@ void MTLParallelShaderCompiler::parallel_compilation_thread_func(
GPU_context_active_set(blender_gpu_context);
MTLContext *metal_context = static_cast<MTLContext *>(unwrap(blender_gpu_context));
MTLShaderCompiler *shader_compiler = static_cast<MTLShaderCompiler *>(metal_context->compiler);
MTLShaderCompiler *shader_compiler = static_cast<MTLShaderCompiler *>(
metal_context->get_compiler());
/* This context is only for compilation, it does not need it's own instance of the compiler */
shader_compiler->release_parallel_shader_compiler();

View File

@@ -8,6 +8,7 @@
#pragma once
#include "GPU_capabilities.hh"
#include "gpu_backend.hh"
#include "BLI_vector.hh"
@@ -39,7 +40,7 @@ class GLBackend : public GPUBackend {
renderdoc::api::Renderdoc renderdoc_;
#endif
GLShaderCompiler compiler_;
ShaderCompiler *compiler_;
public:
GLBackend()
@@ -55,10 +56,21 @@ class GLBackend : public GPUBackend {
GLBackend::platform_exit();
}
void init_resources() override
{
if (GPU_use_parallel_compilation()) {
compiler_ = new GLShaderCompiler();
}
else {
compiler_ = new ShaderCompilerGeneric();
}
};
void delete_resources() override
{
/* Delete any resources with context active. */
GLTexture::samplers_free();
delete compiler_;
}
static GLBackend *get()
@@ -66,9 +78,9 @@ class GLBackend : public GPUBackend {
return static_cast<GLBackend *>(GPUBackend::get());
}
GLShaderCompiler *get_compiler()
ShaderCompiler *get_compiler()
{
return &compiler_;
return compiler_;
}
void samplers_update() override

View File

@@ -10,10 +10,12 @@
# include "BLI_fileops.hh"
# include "BLI_hash.hh"
# include "BLI_path_utils.hh"
# include "BLI_threads.h"
# include "CLG_log.h"
# include "GHOST_C-api.h"
# include "GPU_context.hh"
# include "GPU_init_exit.hh"
# include "gpu_capabilities_private.hh"
# include <iostream>
# include <string>
@@ -153,6 +155,10 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
# endif
CLG_init();
BLI_threadapi_init();
/* Prevent the ShaderCompiler from spawning extra threads/contexts, we don't need them. */
GCaps.use_main_context_workaround = true;
std::string name = subprocess_name;
SharedMemory shared_mem(name, compilation_subprocess_shared_memory_size, false);

View File

@@ -84,8 +84,6 @@ GLContext::GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list
active_fb = back_left;
static_cast<GLStateManager *>(state_manager)->active_fb = static_cast<GLFrameBuffer *>(
active_fb);
compiler = GLBackend::get()->get_compiler();
}
GLContext::~GLContext()
@@ -183,6 +181,17 @@ void GLContext::finish()
/** \} */
/* -------------------------------------------------------------------- */
/** \name ShaderCompiler
* \{ */
ShaderCompiler *GLContext::get_compiler()
{
return GLBackend::get()->get_compiler();
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Safe object deletion
*

View File

@@ -123,6 +123,8 @@ class GLContext : public Context {
void flush() override;
void finish() override;
ShaderCompiler *get_compiler() override;
void memory_statistics_get(int *r_total_mem, int *r_free_mem) override;
static GLContext *get()

View File

@@ -447,6 +447,7 @@ void VKBackend::platform_exit()
}
}
void VKBackend::init_resources() {}
void VKBackend::delete_resources() {}
void VKBackend::samplers_update()

View File

@@ -54,6 +54,7 @@ class VKBackend : public GPUBackend {
*/
static bool is_supported();
void init_resources() override;
void delete_resources() override;
void samplers_update() override;

View File

@@ -32,8 +32,6 @@ VKContext::VKContext(void *ghost_window, void *ghost_context)
back_left = new VKFrameBuffer("back_left");
front_left = new VKFrameBuffer("front_left");
active_fb = back_left;
compiler = &VKBackend::get().shader_compiler;
}
VKContext::~VKContext()
@@ -48,7 +46,6 @@ VKContext::~VKContext()
VKBackend::get().device.context_unregister(*this);
imm = nullptr;
compiler = nullptr;
}
void VKContext::sync_backbuffer(bool cycle_resource_pool)
@@ -184,6 +181,11 @@ TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags,
void VKContext::finish() {}
ShaderCompiler *VKContext::get_compiler()
{
return &VKBackend::get().shader_compiler;
}
void VKContext::memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb)
{
const VKDevice &device = VKBackend::get().device;

View File

@@ -79,6 +79,8 @@ class VKContext : public Context, NonCopyable {
VkFence signal_fence = VK_NULL_HANDLE);
void finish() override;
ShaderCompiler *get_compiler() override;
void memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb) override;
void debug_group_begin(const char *, int) override;