GPU: Non-blocking specialization constants compilation
Update the batch specializations compilation to allow using it in an async way. The implementation has 2 main limitations: - Only one batch at a time can be processed, extra batches will be added to a queue. - Binding a specialization variant that is still being compiled will fail. Pull Request: https://projects.blender.org/blender/blender/pulls/123015
This commit is contained in:
@@ -112,6 +112,11 @@ void Instance::init(const int2 &output_res,
|
||||
volume_probes.init();
|
||||
volume.init();
|
||||
lookdev.init(visible_rect);
|
||||
|
||||
/* Pre-compile specialization constants in parallel (if supported). */
|
||||
shaders.precompile_specializations(
|
||||
render_buffers.data.shadow_id, shadows.get_data().ray_count, shadows.get_data().step_count);
|
||||
shaders_are_ready_ = shaders.is_ready(is_image_render());
|
||||
}
|
||||
|
||||
void Instance::init_light_bake(Depsgraph *depsgraph, draw::Manager *manager)
|
||||
|
||||
@@ -501,30 +501,6 @@ void DeferredLayerBase::gbuffer_pass_sync(Instance &inst)
|
||||
|
||||
void DeferredLayer::begin_sync()
|
||||
{
|
||||
if (GPU_use_parallel_compilation()) {
|
||||
/* Pre-compile specialization constants in parallel. */
|
||||
Vector<ShaderSpecialization> specializations;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
GPUShader *sh = inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
|
||||
for (bool use_split_indirect : {false, true}) {
|
||||
for (bool use_lightprobe_eval : {false, true}) {
|
||||
for (bool use_transmission : {false, true}) {
|
||||
specializations.append(
|
||||
{sh,
|
||||
{{"render_pass_shadow_id", inst_.render_buffers.data.shadow_id},
|
||||
{"use_split_indirect", use_split_indirect},
|
||||
{"use_lightprobe_eval", use_lightprobe_eval},
|
||||
{"use_transmission", use_transmission},
|
||||
{"shadow_ray_count", inst_.shadows.get_data().ray_count},
|
||||
{"shadow_ray_step_count", inst_.shadows.get_data().step_count}}});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GPU_shaders_precompile_specializations(specializations);
|
||||
}
|
||||
|
||||
{
|
||||
prepass_ps_.init();
|
||||
/* Textures. */
|
||||
|
||||
@@ -90,20 +90,55 @@ ShaderModule::~ShaderModule()
|
||||
*
|
||||
* \{ */
|
||||
|
||||
bool ShaderModule::is_ready(bool block)
|
||||
void ShaderModule::precompile_specializations(int render_buffers_shadow_id,
|
||||
int shadow_ray_count,
|
||||
int shadow_ray_step_count)
|
||||
{
|
||||
if (compilation_handle_ == 0) {
|
||||
return true;
|
||||
BLI_assert(specialization_handle_ == 0);
|
||||
|
||||
if (!GPU_use_parallel_compilation()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (block || GPU_shader_batch_is_ready(compilation_handle_)) {
|
||||
Vector<GPUShader *> shaders = GPU_shader_batch_finalize(compilation_handle_);
|
||||
for (int i : IndexRange(MAX_SHADER_TYPE)) {
|
||||
shaders_[i] = shaders[i];
|
||||
Vector<ShaderSpecialization> specializations;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
GPUShader *sh = static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
|
||||
for (bool use_split_indirect : {false, true}) {
|
||||
for (bool use_lightprobe_eval : {false, true}) {
|
||||
for (bool use_transmission : {false, true}) {
|
||||
specializations.append({sh,
|
||||
{{"render_pass_shadow_id", render_buffers_shadow_id},
|
||||
{"use_split_indirect", use_split_indirect},
|
||||
{"use_lightprobe_eval", use_lightprobe_eval},
|
||||
{"use_transmission", use_transmission},
|
||||
{"shadow_ray_count", shadow_ray_count},
|
||||
{"shadow_ray_step_count", shadow_ray_step_count}}});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return compilation_handle_ == 0;
|
||||
specialization_handle_ = GPU_shader_batch_specializations(specializations);
|
||||
}
|
||||
|
||||
bool ShaderModule::is_ready(bool block)
|
||||
{
|
||||
if (compilation_handle_) {
|
||||
if (GPU_shader_batch_is_ready(compilation_handle_) || block) {
|
||||
Vector<GPUShader *> shaders = GPU_shader_batch_finalize(compilation_handle_);
|
||||
for (int i : IndexRange(MAX_SHADER_TYPE)) {
|
||||
shaders_[i] = shaders[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (specialization_handle_) {
|
||||
while (!GPU_shader_batch_specializations_is_ready(specialization_handle_) && block) {
|
||||
/* Block until ready. */
|
||||
}
|
||||
}
|
||||
|
||||
return compilation_handle_ == 0 && specialization_handle_ == 0;
|
||||
}
|
||||
|
||||
const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type)
|
||||
|
||||
@@ -159,6 +159,7 @@ class ShaderModule {
|
||||
private:
|
||||
std::array<GPUShader *, MAX_SHADER_TYPE> shaders_;
|
||||
BatchHandle compilation_handle_ = 0;
|
||||
SpecializationBatchHandle specialization_handle_ = 0;
|
||||
|
||||
/** Shared shader module across all engine instances. */
|
||||
static ShaderModule *g_shader_module;
|
||||
@@ -169,6 +170,10 @@ class ShaderModule {
|
||||
|
||||
bool is_ready(bool block = false);
|
||||
|
||||
void precompile_specializations(int render_buffers_shadow_id,
|
||||
int shadow_ray_count,
|
||||
int shadow_ray_step_count);
|
||||
|
||||
GPUShader *static_shader_get(eShaderType shader_type);
|
||||
GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type,
|
||||
eMaterialGeometry geometry_type);
|
||||
|
||||
@@ -220,12 +220,30 @@ void GPU_shader_constant_uint(GPUShader *sh, const char *name, unsigned int valu
|
||||
void GPU_shader_constant_float(GPUShader *sh, const char *name, float value);
|
||||
void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value);
|
||||
|
||||
using SpecializationBatchHandle = int64_t;
|
||||
|
||||
struct ShaderSpecialization {
|
||||
GPUShader *shader;
|
||||
blender::Vector<blender::gpu::shader::SpecializationConstant> constants;
|
||||
};
|
||||
|
||||
void GPU_shaders_precompile_specializations(blender::Span<ShaderSpecialization> specializations);
|
||||
/**
|
||||
* Request the compilation of multiple specialization constant variations at once,
|
||||
* allowing the backend to use multithreaded compilation.
|
||||
* Returns a handle that can be used to poll if all variations have been compiled.
|
||||
* NOTE: This function is asynchronous on OpenGL, and a no-op on Vulkan and Metal.
|
||||
* Batches are processed one by one in FIFO order.
|
||||
* WARNING: Binding a specialization before the batch finishes will fail.
|
||||
*/
|
||||
SpecializationBatchHandle GPU_shader_batch_specializations(
|
||||
blender::Span<ShaderSpecialization> specializations);
|
||||
|
||||
/**
|
||||
* Returns true if all the specializations from the batch have finished their compilation.
|
||||
* NOTE: Polling this function is required for the compilation process to keep progressing.
|
||||
* WARNING: Invalidates the handle if it returns true.
|
||||
*/
|
||||
bool GPU_shader_batch_specializations_is_ready(SpecializationBatchHandle &handle);
|
||||
|
||||
/** \} */
|
||||
|
||||
|
||||
@@ -515,9 +515,15 @@ void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value)
|
||||
GPU_shader_constant_bool_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
|
||||
}
|
||||
|
||||
void GPU_shaders_precompile_specializations(Span<ShaderSpecialization> specializations)
|
||||
SpecializationBatchHandle GPU_shader_batch_specializations(
|
||||
blender::Span<ShaderSpecialization> specializations)
|
||||
{
|
||||
Context::get()->compiler->precompile_specializations(specializations);
|
||||
return Context::get()->compiler->precompile_specializations(specializations);
|
||||
}
|
||||
|
||||
bool GPU_shader_batch_specializations_is_ready(SpecializationBatchHandle &handle)
|
||||
{
|
||||
return Context::get()->compiler->specialization_batch_is_ready(handle);
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -181,7 +181,18 @@ class ShaderCompiler {
|
||||
virtual bool batch_is_ready(BatchHandle handle) = 0;
|
||||
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) = 0;
|
||||
|
||||
virtual void precompile_specializations(Span<ShaderSpecialization> /*specializations*/){};
|
||||
virtual SpecializationBatchHandle precompile_specializations(
|
||||
Span<ShaderSpecialization> /*specializations*/)
|
||||
{
|
||||
/* No-op.*/
|
||||
return 0;
|
||||
};
|
||||
|
||||
virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle)
|
||||
{
|
||||
handle = 0;
|
||||
return true;
|
||||
};
|
||||
};
|
||||
|
||||
/* Generic (fully synchronous) implementation for backends that don't implement their own
|
||||
|
||||
@@ -1820,6 +1820,8 @@ BatchHandle GLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo
|
||||
bool GLShaderCompiler::batch_is_ready(BatchHandle handle)
|
||||
{
|
||||
std::scoped_lock lock(mutex_);
|
||||
|
||||
BLI_assert(batches.contains(handle));
|
||||
Batch &batch = batches.lookup(handle);
|
||||
if (batch.is_ready) {
|
||||
return true;
|
||||
@@ -1879,6 +1881,8 @@ Vector<Shader *> GLShaderCompiler::batch_finalize(BatchHandle &handle)
|
||||
BLI_time_sleep_ms(1);
|
||||
}
|
||||
std::scoped_lock lock(mutex_);
|
||||
|
||||
BLI_assert(batches.contains(handle));
|
||||
Batch batch = batches.pop(handle);
|
||||
Vector<Shader *> result;
|
||||
for (CompilationWork &item : batch.items) {
|
||||
@@ -1888,24 +1892,33 @@ Vector<Shader *> GLShaderCompiler::batch_finalize(BatchHandle &handle)
|
||||
return result;
|
||||
}
|
||||
|
||||
void GLShaderCompiler::precompile_specializations(Span<ShaderSpecialization> specializations)
|
||||
SpecializationBatchHandle GLShaderCompiler::precompile_specializations(
|
||||
Span<ShaderSpecialization> specializations)
|
||||
{
|
||||
BLI_assert(GPU_use_parallel_compilation());
|
||||
|
||||
struct SpecializationWork {
|
||||
GLShader *shader = nullptr;
|
||||
GLuint program;
|
||||
GLSourcesBaked sources;
|
||||
std::scoped_lock lock(mutex_);
|
||||
|
||||
GLCompilerWorker *worker = nullptr;
|
||||
bool do_async_compilation = false;
|
||||
bool is_ready = false;
|
||||
};
|
||||
SpecializationBatchHandle handle = next_batch_handle++;
|
||||
|
||||
Vector<SpecializationWork> items;
|
||||
items.reserve(specializations.size());
|
||||
specialization_queue.append({handle, specializations});
|
||||
|
||||
for (auto &specialization : specializations) {
|
||||
return handle;
|
||||
}
|
||||
|
||||
void GLShaderCompiler::prepare_next_specialization_batch()
|
||||
{
|
||||
BLI_assert(current_specialization_batch.is_ready && !specialization_queue.is_empty());
|
||||
|
||||
SpecializationRequest &next = specialization_queue.first();
|
||||
SpecializationBatch &batch = current_specialization_batch;
|
||||
batch.handle = next.handle;
|
||||
batch.is_ready = false;
|
||||
Vector<SpecializationWork> &items = batch.items;
|
||||
items.clear();
|
||||
items.reserve(next.specializations.size());
|
||||
|
||||
for (auto &specialization : next.specializations) {
|
||||
GLShader *sh = static_cast<GLShader *>(unwrap(specialization.shader));
|
||||
for (const SpecializationConstant &constant : specialization.constants) {
|
||||
const ShaderInput *input = sh->interface->constant_get(constant.name.c_str());
|
||||
@@ -1932,53 +1945,72 @@ void GLShaderCompiler::precompile_specializations(Span<ShaderSpecialization> spe
|
||||
item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
|
||||
}
|
||||
|
||||
bool is_ready = false;
|
||||
while (!is_ready) {
|
||||
/* Loop until ready, we can't defer the compilation of required specialization constants. */
|
||||
is_ready = true;
|
||||
specialization_queue.remove(0);
|
||||
}
|
||||
|
||||
for (SpecializationWork &item : items) {
|
||||
if (item.is_ready) {
|
||||
continue;
|
||||
}
|
||||
std::scoped_lock lock(mutex_);
|
||||
bool GLShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle)
|
||||
{
|
||||
std::scoped_lock lock(mutex_);
|
||||
|
||||
if (!item.do_async_compilation) {
|
||||
/* Compilation will happen locally on shader bind. */
|
||||
glDeleteProgram(item.program);
|
||||
item.program = 0;
|
||||
item.shader->program_active_->program_id = 0;
|
||||
item.shader->constants.is_dirty = true;
|
||||
SpecializationBatch &batch = current_specialization_batch;
|
||||
|
||||
if (handle < batch.handle || (handle == batch.handle && batch.is_ready)) {
|
||||
handle = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (batch.is_ready) {
|
||||
prepare_next_specialization_batch();
|
||||
}
|
||||
|
||||
bool is_ready = true;
|
||||
for (SpecializationWork &item : batch.items) {
|
||||
if (item.is_ready) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!item.do_async_compilation) {
|
||||
/* Compilation will happen locally on shader bind. */
|
||||
glDeleteProgram(item.program);
|
||||
item.program = 0;
|
||||
item.shader->program_active_->program_id = 0;
|
||||
item.shader->constants.is_dirty = true;
|
||||
item.is_ready = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item.worker == nullptr) {
|
||||
/* Try to acquire an available worker. */
|
||||
item.worker = get_compiler_worker(item.sources);
|
||||
}
|
||||
else if (item.worker->is_ready()) {
|
||||
/* Retrieve the binary compiled by the worker. */
|
||||
if (item.worker->load_program_binary(item.program)) {
|
||||
item.worker->release();
|
||||
item.worker = nullptr;
|
||||
item.is_ready = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item.worker == nullptr) {
|
||||
/* Try to acquire an available worker. */
|
||||
item.worker = get_compiler_worker(item.sources);
|
||||
}
|
||||
else if (item.worker->is_ready()) {
|
||||
/* Retrieve the binary compiled by the worker. */
|
||||
if (item.worker->load_program_binary(item.program)) {
|
||||
item.worker->release();
|
||||
item.worker = nullptr;
|
||||
item.is_ready = true;
|
||||
}
|
||||
else {
|
||||
/* Compilation failed, local compilation will be tried later on shader bind. */
|
||||
item.do_async_compilation = false;
|
||||
}
|
||||
}
|
||||
else if (worker_is_lost(item.worker)) {
|
||||
/* We lost the worker, local compilation will be tried later on shader bind. */
|
||||
else {
|
||||
/* Compilation failed, local compilation will be tried later on shader bind. */
|
||||
item.do_async_compilation = false;
|
||||
}
|
||||
}
|
||||
else if (worker_is_lost(item.worker)) {
|
||||
/* We lost the worker, local compilation will be tried later on shader bind. */
|
||||
item.do_async_compilation = false;
|
||||
}
|
||||
|
||||
if (!item.is_ready) {
|
||||
is_ready = false;
|
||||
}
|
||||
if (!item.is_ready) {
|
||||
is_ready = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_ready) {
|
||||
batch.is_ready = true;
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
return is_ready;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -288,9 +288,38 @@ class GLShaderCompiler : public ShaderCompiler {
|
||||
bool is_ready = false;
|
||||
};
|
||||
|
||||
BatchHandle next_batch_handle = 1;
|
||||
Map<BatchHandle, Batch> batches;
|
||||
|
||||
struct SpecializationRequest {
|
||||
BatchHandle handle;
|
||||
Vector<ShaderSpecialization> specializations;
|
||||
};
|
||||
|
||||
Vector<SpecializationRequest> specialization_queue;
|
||||
|
||||
struct SpecializationWork {
|
||||
GLShader *shader = nullptr;
|
||||
GLuint program;
|
||||
GLSourcesBaked sources;
|
||||
|
||||
GLCompilerWorker *worker = nullptr;
|
||||
bool do_async_compilation = false;
|
||||
bool is_ready = false;
|
||||
};
|
||||
|
||||
struct SpecializationBatch {
|
||||
SpecializationBatchHandle handle = 0;
|
||||
Vector<SpecializationWork> items;
|
||||
bool is_ready = true;
|
||||
};
|
||||
|
||||
SpecializationBatch current_specialization_batch;
|
||||
void prepare_next_specialization_batch();
|
||||
|
||||
/* Shared accross regular and specialization batches,
|
||||
* to prevent the use of a wrong handle type. */
|
||||
int64_t next_batch_handle = 1;
|
||||
|
||||
GLCompilerWorker *get_compiler_worker(const GLSourcesBaked &sources);
|
||||
bool worker_is_lost(GLCompilerWorker *&worker);
|
||||
|
||||
@@ -301,7 +330,10 @@ class GLShaderCompiler : public ShaderCompiler {
|
||||
virtual bool batch_is_ready(BatchHandle handle) override;
|
||||
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
|
||||
|
||||
virtual void precompile_specializations(Span<ShaderSpecialization> specializations) override;
|
||||
virtual SpecializationBatchHandle precompile_specializations(
|
||||
Span<ShaderSpecialization> specializations) override;
|
||||
|
||||
virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) override;
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
Reference in New Issue
Block a user