From 33005ad716aa11ed3e672eccfaced16b3c30734e Mon Sep 17 00:00:00 2001 From: Miguel Pozo Date: Thu, 20 Jun 2024 18:02:44 +0200 Subject: [PATCH] GPU: Non-blocking specialization constants compilation Update the batch specializations compilation to allow using it in an async way. The implementation has 2 main limitations: - Only one batch at a time can be processed, extra batches will be added to a queue. - Binding a specialization variant that is still being compiled will fail. Pull Request: https://projects.blender.org/blender/blender/pulls/123015 --- .../draw/engines/eevee_next/eevee_instance.cc | 5 + .../draw/engines/eevee_next/eevee_pipeline.cc | 24 ---- .../draw/engines/eevee_next/eevee_shader.cc | 51 +++++-- .../draw/engines/eevee_next/eevee_shader.hh | 5 + source/blender/gpu/GPU_shader.hh | 20 ++- source/blender/gpu/intern/gpu_shader.cc | 10 +- .../blender/gpu/intern/gpu_shader_private.hh | 13 +- source/blender/gpu/opengl/gl_shader.cc | 132 +++++++++++------- source/blender/gpu/opengl/gl_shader.hh | 36 ++++- 9 files changed, 208 insertions(+), 88 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc index 7d7e91f593b..41b68d41e87 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.cc +++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc @@ -112,6 +112,11 @@ void Instance::init(const int2 &output_res, volume_probes.init(); volume.init(); lookdev.init(visible_rect); + + /* Pre-compile specialization constants in parallel (if supported). */ + shaders.precompile_specializations( + render_buffers.data.shadow_id, shadows.get_data().ray_count, shadows.get_data().step_count); + shaders_are_ready_ = shaders.is_ready(is_image_render()); } void Instance::init_light_bake(Depsgraph *depsgraph, draw::Manager *manager) diff --git a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc index 98aa6f436e3..871ece23dd7 100644 --- a/source/blender/draw/engines/eevee_next/eevee_pipeline.cc +++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.cc @@ -501,30 +501,6 @@ void DeferredLayerBase::gbuffer_pass_sync(Instance &inst) void DeferredLayer::begin_sync() { - if (GPU_use_parallel_compilation()) { - /* Pre-compile specialization constants in parallel. */ - Vector specializations; - for (int i = 0; i < 3; i++) { - GPUShader *sh = inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i)); - for (bool use_split_indirect : {false, true}) { - for (bool use_lightprobe_eval : {false, true}) { - for (bool use_transmission : {false, true}) { - specializations.append( - {sh, - {{"render_pass_shadow_id", inst_.render_buffers.data.shadow_id}, - {"use_split_indirect", use_split_indirect}, - {"use_lightprobe_eval", use_lightprobe_eval}, - {"use_transmission", use_transmission}, - {"shadow_ray_count", inst_.shadows.get_data().ray_count}, - {"shadow_ray_step_count", inst_.shadows.get_data().step_count}}}); - } - } - } - } - - GPU_shaders_precompile_specializations(specializations); - } - { prepass_ps_.init(); /* Textures. */ diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc index aef6ddda05f..c2be8ac7b4e 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc @@ -90,20 +90,55 @@ ShaderModule::~ShaderModule() * * \{ */ -bool ShaderModule::is_ready(bool block) +void ShaderModule::precompile_specializations(int render_buffers_shadow_id, + int shadow_ray_count, + int shadow_ray_step_count) { - if (compilation_handle_ == 0) { - return true; + BLI_assert(specialization_handle_ == 0); + + if (!GPU_use_parallel_compilation()) { + return; } - if (block || GPU_shader_batch_is_ready(compilation_handle_)) { - Vector shaders = GPU_shader_batch_finalize(compilation_handle_); - for (int i : IndexRange(MAX_SHADER_TYPE)) { - shaders_[i] = shaders[i]; + Vector specializations; + for (int i = 0; i < 3; i++) { + GPUShader *sh = static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i)); + for (bool use_split_indirect : {false, true}) { + for (bool use_lightprobe_eval : {false, true}) { + for (bool use_transmission : {false, true}) { + specializations.append({sh, + {{"render_pass_shadow_id", render_buffers_shadow_id}, + {"use_split_indirect", use_split_indirect}, + {"use_lightprobe_eval", use_lightprobe_eval}, + {"use_transmission", use_transmission}, + {"shadow_ray_count", shadow_ray_count}, + {"shadow_ray_step_count", shadow_ray_step_count}}}); + } + } } } - return compilation_handle_ == 0; + specialization_handle_ = GPU_shader_batch_specializations(specializations); +} + +bool ShaderModule::is_ready(bool block) +{ + if (compilation_handle_) { + if (GPU_shader_batch_is_ready(compilation_handle_) || block) { + Vector shaders = GPU_shader_batch_finalize(compilation_handle_); + for (int i : IndexRange(MAX_SHADER_TYPE)) { + shaders_[i] = shaders[i]; + } + } + } + + if (specialization_handle_) { + while (!GPU_shader_batch_specializations_is_ready(specialization_handle_) && block) { + /* Block until ready. */ + } + } + + return compilation_handle_ == 0 && specialization_handle_ == 0; } const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_type) diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh index bc886584c76..d83e495922e 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh @@ -159,6 +159,7 @@ class ShaderModule { private: std::array shaders_; BatchHandle compilation_handle_ = 0; + SpecializationBatchHandle specialization_handle_ = 0; /** Shared shader module across all engine instances. */ static ShaderModule *g_shader_module; @@ -169,6 +170,10 @@ class ShaderModule { bool is_ready(bool block = false); + void precompile_specializations(int render_buffers_shadow_id, + int shadow_ray_count, + int shadow_ray_step_count); + GPUShader *static_shader_get(eShaderType shader_type); GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type, eMaterialGeometry geometry_type); diff --git a/source/blender/gpu/GPU_shader.hh b/source/blender/gpu/GPU_shader.hh index ad34231a9c8..d78a0825c30 100644 --- a/source/blender/gpu/GPU_shader.hh +++ b/source/blender/gpu/GPU_shader.hh @@ -220,12 +220,30 @@ void GPU_shader_constant_uint(GPUShader *sh, const char *name, unsigned int valu void GPU_shader_constant_float(GPUShader *sh, const char *name, float value); void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value); +using SpecializationBatchHandle = int64_t; + struct ShaderSpecialization { GPUShader *shader; blender::Vector constants; }; -void GPU_shaders_precompile_specializations(blender::Span specializations); +/** + * Request the compilation of multiple specialization constant variations at once, + * allowing the backend to use multithreaded compilation. + * Returns a handle that can be used to poll if all variations have been compiled. + * NOTE: This function is asynchronous on OpenGL, and a no-op on Vulkan and Metal. + * Batches are processed one by one in FIFO order. + * WARNING: Binding a specialization before the batch finishes will fail. + */ +SpecializationBatchHandle GPU_shader_batch_specializations( + blender::Span specializations); + +/** + * Returns true if all the specializations from the batch have finished their compilation. + * NOTE: Polling this function is required for the compilation process to keep progressing. + * WARNING: Invalidates the handle if it returns true. + */ +bool GPU_shader_batch_specializations_is_ready(SpecializationBatchHandle &handle); /** \} */ diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index c71942ebb7d..cf69d286fda 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -515,9 +515,15 @@ void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value) GPU_shader_constant_bool_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value); } -void GPU_shaders_precompile_specializations(Span specializations) +SpecializationBatchHandle GPU_shader_batch_specializations( + blender::Span specializations) { - Context::get()->compiler->precompile_specializations(specializations); + return Context::get()->compiler->precompile_specializations(specializations); +} + +bool GPU_shader_batch_specializations_is_ready(SpecializationBatchHandle &handle) +{ + return Context::get()->compiler->specialization_batch_is_ready(handle); } /** \} */ diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh index 19706d24303..d823ca1d2bf 100644 --- a/source/blender/gpu/intern/gpu_shader_private.hh +++ b/source/blender/gpu/intern/gpu_shader_private.hh @@ -181,7 +181,18 @@ class ShaderCompiler { virtual bool batch_is_ready(BatchHandle handle) = 0; virtual Vector batch_finalize(BatchHandle &handle) = 0; - virtual void precompile_specializations(Span /*specializations*/){}; + virtual SpecializationBatchHandle precompile_specializations( + Span /*specializations*/) + { + /* No-op.*/ + return 0; + }; + + virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) + { + handle = 0; + return true; + }; }; /* Generic (fully synchronous) implementation for backends that don't implement their own diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 23b43096f82..7eacdb7634b 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -1820,6 +1820,8 @@ BatchHandle GLShaderCompiler::batch_compile(Span GLShaderCompiler::batch_finalize(BatchHandle &handle) BLI_time_sleep_ms(1); } std::scoped_lock lock(mutex_); + + BLI_assert(batches.contains(handle)); Batch batch = batches.pop(handle); Vector result; for (CompilationWork &item : batch.items) { @@ -1888,24 +1892,33 @@ Vector GLShaderCompiler::batch_finalize(BatchHandle &handle) return result; } -void GLShaderCompiler::precompile_specializations(Span specializations) +SpecializationBatchHandle GLShaderCompiler::precompile_specializations( + Span specializations) { BLI_assert(GPU_use_parallel_compilation()); - struct SpecializationWork { - GLShader *shader = nullptr; - GLuint program; - GLSourcesBaked sources; + std::scoped_lock lock(mutex_); - GLCompilerWorker *worker = nullptr; - bool do_async_compilation = false; - bool is_ready = false; - }; + SpecializationBatchHandle handle = next_batch_handle++; - Vector items; - items.reserve(specializations.size()); + specialization_queue.append({handle, specializations}); - for (auto &specialization : specializations) { + return handle; +} + +void GLShaderCompiler::prepare_next_specialization_batch() +{ + BLI_assert(current_specialization_batch.is_ready && !specialization_queue.is_empty()); + + SpecializationRequest &next = specialization_queue.first(); + SpecializationBatch &batch = current_specialization_batch; + batch.handle = next.handle; + batch.is_ready = false; + Vector &items = batch.items; + items.clear(); + items.reserve(next.specializations.size()); + + for (auto &specialization : next.specializations) { GLShader *sh = static_cast(unwrap(specialization.shader)); for (const SpecializationConstant &constant : specialization.constants) { const ShaderInput *input = sh->interface->constant_get(constant.name.c_str()); @@ -1932,53 +1945,72 @@ void GLShaderCompiler::precompile_specializations(Span spe item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources); } - bool is_ready = false; - while (!is_ready) { - /* Loop until ready, we can't defer the compilation of required specialization constants. */ - is_ready = true; + specialization_queue.remove(0); +} - for (SpecializationWork &item : items) { - if (item.is_ready) { - continue; - } - std::scoped_lock lock(mutex_); +bool GLShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle) +{ + std::scoped_lock lock(mutex_); - if (!item.do_async_compilation) { - /* Compilation will happen locally on shader bind. */ - glDeleteProgram(item.program); - item.program = 0; - item.shader->program_active_->program_id = 0; - item.shader->constants.is_dirty = true; + SpecializationBatch &batch = current_specialization_batch; + + if (handle < batch.handle || (handle == batch.handle && batch.is_ready)) { + handle = 0; + return true; + } + + if (batch.is_ready) { + prepare_next_specialization_batch(); + } + + bool is_ready = true; + for (SpecializationWork &item : batch.items) { + if (item.is_ready) { + continue; + } + + if (!item.do_async_compilation) { + /* Compilation will happen locally on shader bind. */ + glDeleteProgram(item.program); + item.program = 0; + item.shader->program_active_->program_id = 0; + item.shader->constants.is_dirty = true; + item.is_ready = true; + continue; + } + + if (item.worker == nullptr) { + /* Try to acquire an available worker. */ + item.worker = get_compiler_worker(item.sources); + } + else if (item.worker->is_ready()) { + /* Retrieve the binary compiled by the worker. */ + if (item.worker->load_program_binary(item.program)) { + item.worker->release(); + item.worker = nullptr; item.is_ready = true; - continue; } - - if (item.worker == nullptr) { - /* Try to acquire an available worker. */ - item.worker = get_compiler_worker(item.sources); - } - else if (item.worker->is_ready()) { - /* Retrieve the binary compiled by the worker. */ - if (item.worker->load_program_binary(item.program)) { - item.worker->release(); - item.worker = nullptr; - item.is_ready = true; - } - else { - /* Compilation failed, local compilation will be tried later on shader bind. */ - item.do_async_compilation = false; - } - } - else if (worker_is_lost(item.worker)) { - /* We lost the worker, local compilation will be tried later on shader bind. */ + else { + /* Compilation failed, local compilation will be tried later on shader bind. */ item.do_async_compilation = false; } + } + else if (worker_is_lost(item.worker)) { + /* We lost the worker, local compilation will be tried later on shader bind. */ + item.do_async_compilation = false; + } - if (!item.is_ready) { - is_ready = false; - } + if (!item.is_ready) { + is_ready = false; } } + + if (is_ready) { + batch.is_ready = true; + handle = 0; + } + + return is_ready; } /** \} */ diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index 5688f85835e..0e5f74f2b1c 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -288,9 +288,38 @@ class GLShaderCompiler : public ShaderCompiler { bool is_ready = false; }; - BatchHandle next_batch_handle = 1; Map batches; + struct SpecializationRequest { + BatchHandle handle; + Vector specializations; + }; + + Vector specialization_queue; + + struct SpecializationWork { + GLShader *shader = nullptr; + GLuint program; + GLSourcesBaked sources; + + GLCompilerWorker *worker = nullptr; + bool do_async_compilation = false; + bool is_ready = false; + }; + + struct SpecializationBatch { + SpecializationBatchHandle handle = 0; + Vector items; + bool is_ready = true; + }; + + SpecializationBatch current_specialization_batch; + void prepare_next_specialization_batch(); + + /* Shared accross regular and specialization batches, + * to prevent the use of a wrong handle type. */ + int64_t next_batch_handle = 1; + GLCompilerWorker *get_compiler_worker(const GLSourcesBaked &sources); bool worker_is_lost(GLCompilerWorker *&worker); @@ -301,7 +330,10 @@ class GLShaderCompiler : public ShaderCompiler { virtual bool batch_is_ready(BatchHandle handle) override; virtual Vector batch_finalize(BatchHandle &handle) override; - virtual void precompile_specializations(Span specializations) override; + virtual SpecializationBatchHandle precompile_specializations( + Span specializations) override; + + virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) override; }; #else