GPU: Non-blocking specialization constants compilation

Update the batch specializations compilation to allow using it in an
async way.

The implementation has 2 main limitations:
- Only one batch at a time can be processed, extra batches will be
  added to a queue.
- Binding a specialization variant that is still being compiled will fail.

Pull Request: https://projects.blender.org/blender/blender/pulls/123015
This commit is contained in:
Miguel Pozo
2024-06-20 18:02:44 +02:00
parent 8cb0b347ae
commit 33005ad716
9 changed files with 208 additions and 88 deletions

View File

@@ -112,6 +112,11 @@ void Instance::init(const int2 &output_res,
volume_probes.init();
volume.init();
lookdev.init(visible_rect);
/* Pre-compile specialization constants in parallel (if supported). */
shaders.precompile_specializations(
render_buffers.data.shadow_id, shadows.get_data().ray_count, shadows.get_data().step_count);
shaders_are_ready_ = shaders.is_ready(is_image_render());
}
void Instance::init_light_bake(Depsgraph *depsgraph, draw::Manager *manager)

View File

@@ -501,30 +501,6 @@ void DeferredLayerBase::gbuffer_pass_sync(Instance &inst)
void DeferredLayer::begin_sync()
{
if (GPU_use_parallel_compilation()) {
/* Pre-compile specialization constants in parallel. */
Vector<ShaderSpecialization> specializations;
for (int i = 0; i < 3; i++) {
GPUShader *sh = inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
for (bool use_split_indirect : {false, true}) {
for (bool use_lightprobe_eval : {false, true}) {
for (bool use_transmission : {false, true}) {
specializations.append(
{sh,
{{"render_pass_shadow_id", inst_.render_buffers.data.shadow_id},
{"use_split_indirect", use_split_indirect},
{"use_lightprobe_eval", use_lightprobe_eval},
{"use_transmission", use_transmission},
{"shadow_ray_count", inst_.shadows.get_data().ray_count},
{"shadow_ray_step_count", inst_.shadows.get_data().step_count}}});
}
}
}
}
GPU_shaders_precompile_specializations(specializations);
}
{
prepass_ps_.init();
/* Textures. */

View File

@@ -90,20 +90,55 @@ ShaderModule::~ShaderModule()
*
* \{ */
bool ShaderModule::is_ready(bool block)
void ShaderModule::precompile_specializations(int render_buffers_shadow_id,
int shadow_ray_count,
int shadow_ray_step_count)
{
if (compilation_handle_ == 0) {
return true;
BLI_assert(specialization_handle_ == 0);
if (!GPU_use_parallel_compilation()) {
return;
}
if (block || GPU_shader_batch_is_ready(compilation_handle_)) {
Vector<GPUShader *> shaders = GPU_shader_batch_finalize(compilation_handle_);
for (int i : IndexRange(MAX_SHADER_TYPE)) {
shaders_[i] = shaders[i];
Vector<ShaderSpecialization> specializations;
for (int i = 0; i < 3; i++) {
GPUShader *sh = static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
for (bool use_split_indirect : {false, true}) {
for (bool use_lightprobe_eval : {false, true}) {
for (bool use_transmission : {false, true}) {
specializations.append({sh,
{{"render_pass_shadow_id", render_buffers_shadow_id},
{"use_split_indirect", use_split_indirect},
{"use_lightprobe_eval", use_lightprobe_eval},
{"use_transmission", use_transmission},
{"shadow_ray_count", shadow_ray_count},
{"shadow_ray_step_count", shadow_ray_step_count}}});
}
}
}
}
return compilation_handle_ == 0;
specialization_handle_ = GPU_shader_batch_specializations(specializations);
}
bool ShaderModule::is_ready(bool block)
{
if (compilation_handle_) {
if (GPU_shader_batch_is_ready(compilation_handle_) || block) {
Vector<GPUShader *> shaders = GPU_shader_batch_finalize(compilation_handle_);
for (int i : IndexRange(MAX_SHADER_TYPE)) {
shaders_[i] = shaders[i];
}
}
}
if (specialization_handle_) {
while (!GPU_shader_batch_specializations_is_ready(specialization_handle_) && block) {
/* Block until ready. */
}
}
return compilation_handle_ == 0 && specialization_handle_ == 0;
}
const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type)

View File

@@ -159,6 +159,7 @@ class ShaderModule {
private:
std::array<GPUShader *, MAX_SHADER_TYPE> shaders_;
BatchHandle compilation_handle_ = 0;
SpecializationBatchHandle specialization_handle_ = 0;
/** Shared shader module across all engine instances. */
static ShaderModule *g_shader_module;
@@ -169,6 +170,10 @@ class ShaderModule {
bool is_ready(bool block = false);
void precompile_specializations(int render_buffers_shadow_id,
int shadow_ray_count,
int shadow_ray_step_count);
GPUShader *static_shader_get(eShaderType shader_type);
GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type);

View File

@@ -220,12 +220,30 @@ void GPU_shader_constant_uint(GPUShader *sh, const char *name, unsigned int valu
void GPU_shader_constant_float(GPUShader *sh, const char *name, float value);
void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value);
using SpecializationBatchHandle = int64_t;
struct ShaderSpecialization {
GPUShader *shader;
blender::Vector<blender::gpu::shader::SpecializationConstant> constants;
};
void GPU_shaders_precompile_specializations(blender::Span<ShaderSpecialization> specializations);
/**
* Request the compilation of multiple specialization constant variations at once,
* allowing the backend to use multithreaded compilation.
* Returns a handle that can be used to poll if all variations have been compiled.
* NOTE: This function is asynchronous on OpenGL, and a no-op on Vulkan and Metal.
* Batches are processed one by one in FIFO order.
* WARNING: Binding a specialization before the batch finishes will fail.
*/
SpecializationBatchHandle GPU_shader_batch_specializations(
blender::Span<ShaderSpecialization> specializations);
/**
* Returns true if all the specializations from the batch have finished their compilation.
* NOTE: Polling this function is required for the compilation process to keep progressing.
* WARNING: Invalidates the handle if it returns true.
*/
bool GPU_shader_batch_specializations_is_ready(SpecializationBatchHandle &handle);
/** \} */

View File

@@ -515,9 +515,15 @@ void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value)
GPU_shader_constant_bool_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
}
void GPU_shaders_precompile_specializations(Span<ShaderSpecialization> specializations)
SpecializationBatchHandle GPU_shader_batch_specializations(
blender::Span<ShaderSpecialization> specializations)
{
Context::get()->compiler->precompile_specializations(specializations);
return Context::get()->compiler->precompile_specializations(specializations);
}
bool GPU_shader_batch_specializations_is_ready(SpecializationBatchHandle &handle)
{
return Context::get()->compiler->specialization_batch_is_ready(handle);
}
/** \} */

View File

@@ -181,7 +181,18 @@ class ShaderCompiler {
virtual bool batch_is_ready(BatchHandle handle) = 0;
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) = 0;
virtual void precompile_specializations(Span<ShaderSpecialization> /*specializations*/){};
virtual SpecializationBatchHandle precompile_specializations(
Span<ShaderSpecialization> /*specializations*/)
{
/* No-op.*/
return 0;
};
virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle)
{
handle = 0;
return true;
};
};
/* Generic (fully synchronous) implementation for backends that don't implement their own

View File

@@ -1820,6 +1820,8 @@ BatchHandle GLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo
bool GLShaderCompiler::batch_is_ready(BatchHandle handle)
{
std::scoped_lock lock(mutex_);
BLI_assert(batches.contains(handle));
Batch &batch = batches.lookup(handle);
if (batch.is_ready) {
return true;
@@ -1879,6 +1881,8 @@ Vector<Shader *> GLShaderCompiler::batch_finalize(BatchHandle &handle)
BLI_time_sleep_ms(1);
}
std::scoped_lock lock(mutex_);
BLI_assert(batches.contains(handle));
Batch batch = batches.pop(handle);
Vector<Shader *> result;
for (CompilationWork &item : batch.items) {
@@ -1888,24 +1892,33 @@ Vector<Shader *> GLShaderCompiler::batch_finalize(BatchHandle &handle)
return result;
}
void GLShaderCompiler::precompile_specializations(Span<ShaderSpecialization> specializations)
SpecializationBatchHandle GLShaderCompiler::precompile_specializations(
Span<ShaderSpecialization> specializations)
{
BLI_assert(GPU_use_parallel_compilation());
struct SpecializationWork {
GLShader *shader = nullptr;
GLuint program;
GLSourcesBaked sources;
std::scoped_lock lock(mutex_);
GLCompilerWorker *worker = nullptr;
bool do_async_compilation = false;
bool is_ready = false;
};
SpecializationBatchHandle handle = next_batch_handle++;
Vector<SpecializationWork> items;
items.reserve(specializations.size());
specialization_queue.append({handle, specializations});
for (auto &specialization : specializations) {
return handle;
}
void GLShaderCompiler::prepare_next_specialization_batch()
{
BLI_assert(current_specialization_batch.is_ready && !specialization_queue.is_empty());
SpecializationRequest &next = specialization_queue.first();
SpecializationBatch &batch = current_specialization_batch;
batch.handle = next.handle;
batch.is_ready = false;
Vector<SpecializationWork> &items = batch.items;
items.clear();
items.reserve(next.specializations.size());
for (auto &specialization : next.specializations) {
GLShader *sh = static_cast<GLShader *>(unwrap(specialization.shader));
for (const SpecializationConstant &constant : specialization.constants) {
const ShaderInput *input = sh->interface->constant_get(constant.name.c_str());
@@ -1932,53 +1945,72 @@ void GLShaderCompiler::precompile_specializations(Span<ShaderSpecialization> spe
item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
}
bool is_ready = false;
while (!is_ready) {
/* Loop until ready, we can't defer the compilation of required specialization constants. */
is_ready = true;
specialization_queue.remove(0);
}
for (SpecializationWork &item : items) {
if (item.is_ready) {
continue;
}
std::scoped_lock lock(mutex_);
bool GLShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle)
{
std::scoped_lock lock(mutex_);
if (!item.do_async_compilation) {
/* Compilation will happen locally on shader bind. */
glDeleteProgram(item.program);
item.program = 0;
item.shader->program_active_->program_id = 0;
item.shader->constants.is_dirty = true;
SpecializationBatch &batch = current_specialization_batch;
if (handle < batch.handle || (handle == batch.handle && batch.is_ready)) {
handle = 0;
return true;
}
if (batch.is_ready) {
prepare_next_specialization_batch();
}
bool is_ready = true;
for (SpecializationWork &item : batch.items) {
if (item.is_ready) {
continue;
}
if (!item.do_async_compilation) {
/* Compilation will happen locally on shader bind. */
glDeleteProgram(item.program);
item.program = 0;
item.shader->program_active_->program_id = 0;
item.shader->constants.is_dirty = true;
item.is_ready = true;
continue;
}
if (item.worker == nullptr) {
/* Try to acquire an available worker. */
item.worker = get_compiler_worker(item.sources);
}
else if (item.worker->is_ready()) {
/* Retrieve the binary compiled by the worker. */
if (item.worker->load_program_binary(item.program)) {
item.worker->release();
item.worker = nullptr;
item.is_ready = true;
continue;
}
if (item.worker == nullptr) {
/* Try to acquire an available worker. */
item.worker = get_compiler_worker(item.sources);
}
else if (item.worker->is_ready()) {
/* Retrieve the binary compiled by the worker. */
if (item.worker->load_program_binary(item.program)) {
item.worker->release();
item.worker = nullptr;
item.is_ready = true;
}
else {
/* Compilation failed, local compilation will be tried later on shader bind. */
item.do_async_compilation = false;
}
}
else if (worker_is_lost(item.worker)) {
/* We lost the worker, local compilation will be tried later on shader bind. */
else {
/* Compilation failed, local compilation will be tried later on shader bind. */
item.do_async_compilation = false;
}
}
else if (worker_is_lost(item.worker)) {
/* We lost the worker, local compilation will be tried later on shader bind. */
item.do_async_compilation = false;
}
if (!item.is_ready) {
is_ready = false;
}
if (!item.is_ready) {
is_ready = false;
}
}
if (is_ready) {
batch.is_ready = true;
handle = 0;
}
return is_ready;
}
/** \} */

View File

@@ -288,9 +288,38 @@ class GLShaderCompiler : public ShaderCompiler {
bool is_ready = false;
};
BatchHandle next_batch_handle = 1;
Map<BatchHandle, Batch> batches;
struct SpecializationRequest {
BatchHandle handle;
Vector<ShaderSpecialization> specializations;
};
Vector<SpecializationRequest> specialization_queue;
struct SpecializationWork {
GLShader *shader = nullptr;
GLuint program;
GLSourcesBaked sources;
GLCompilerWorker *worker = nullptr;
bool do_async_compilation = false;
bool is_ready = false;
};
struct SpecializationBatch {
SpecializationBatchHandle handle = 0;
Vector<SpecializationWork> items;
bool is_ready = true;
};
SpecializationBatch current_specialization_batch;
void prepare_next_specialization_batch();
/* Shared accross regular and specialization batches,
* to prevent the use of a wrong handle type. */
int64_t next_batch_handle = 1;
GLCompilerWorker *get_compiler_worker(const GLSourcesBaked &sources);
bool worker_is_lost(GLCompilerWorker *&worker);
@@ -301,7 +330,10 @@ class GLShaderCompiler : public ShaderCompiler {
virtual bool batch_is_ready(BatchHandle handle) override;
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
virtual void precompile_specializations(Span<ShaderSpecialization> specializations) override;
virtual SpecializationBatchHandle precompile_specializations(
Span<ShaderSpecialization> specializations) override;
virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) override;
};
#else