diff --git a/source/blender/gpu/GPU_common_types.hh b/source/blender/gpu/GPU_common_types.hh index decad05a10c..5b207bb106d 100644 --- a/source/blender/gpu/GPU_common_types.hh +++ b/source/blender/gpu/GPU_common_types.hh @@ -8,6 +8,8 @@ #pragma once +#include "BLI_string_ref.hh" + /** * Describes the load operation of a frame-buffer attachment at the start of a render pass. */ @@ -70,3 +72,151 @@ enum eGPUFrontFace { GPU_CLOCKWISE, GPU_COUNTERCLOCKWISE, }; + +namespace blender::gpu::shader { + +enum class Type { + /* Types supported natively across all GPU back-ends. */ + FLOAT = 0, + VEC2, + VEC3, + VEC4, + MAT3, + MAT4, + UINT, + UVEC2, + UVEC3, + UVEC4, + INT, + IVEC2, + IVEC3, + IVEC4, + BOOL, + /* Additionally supported types to enable data optimization and native + * support in some GPU back-ends. + * NOTE: These types must be representable in all APIs. E.g. `VEC3_101010I2` is aliased as vec3 + * in the GL back-end, as implicit type conversions from packed normal attribute data to vec3 is + * supported. UCHAR/CHAR types are natively supported in Metal and can be used to avoid + * additional data conversions for `GPU_COMP_U8` vertex attributes. */ + VEC3_101010I2, + UCHAR, + UCHAR2, + UCHAR3, + UCHAR4, + CHAR, + CHAR2, + CHAR3, + CHAR4, + USHORT, + USHORT2, + USHORT3, + USHORT4, + SHORT, + SHORT2, + SHORT3, + SHORT4 +}; + +BLI_INLINE int to_component_count(const Type &type) +{ + switch (type) { + case Type::FLOAT: + case Type::UINT: + case Type::INT: + case Type::BOOL: + return 1; + case Type::VEC2: + case Type::UVEC2: + case Type::IVEC2: + return 2; + case Type::VEC3: + case Type::UVEC3: + case Type::IVEC3: + return 3; + case Type::VEC4: + case Type::UVEC4: + case Type::IVEC4: + return 4; + case Type::MAT3: + return 9; + case Type::MAT4: + return 16; + /* Alias special types. */ + case Type::UCHAR: + case Type::USHORT: + return 1; + case Type::UCHAR2: + case Type::USHORT2: + return 2; + case Type::UCHAR3: + case Type::USHORT3: + return 3; + case Type::UCHAR4: + case Type::USHORT4: + return 4; + case Type::CHAR: + case Type::SHORT: + return 1; + case Type::CHAR2: + case Type::SHORT2: + return 2; + case Type::CHAR3: + case Type::SHORT3: + return 3; + case Type::CHAR4: + case Type::SHORT4: + return 4; + case Type::VEC3_101010I2: + return 3; + } + BLI_assert_unreachable(); + return -1; +} + +struct SpecializationConstant { + struct Value { + union { + uint32_t u; + int32_t i; + float f; + }; + + inline bool operator==(const Value &other) const + { + return u == other.u; + } + }; + + Type type; + StringRefNull name; + Value value; + + SpecializationConstant() {} + + SpecializationConstant(const char *name, uint32_t value) : type(Type::UINT), name(name) + { + this->value.u = value; + } + + SpecializationConstant(const char *name, int value) : type(Type::INT), name(name) + { + this->value.i = value; + } + + SpecializationConstant(const char *name, float value) : type(Type::FLOAT), name(name) + { + this->value.f = value; + } + + SpecializationConstant(const char *name, bool value) : type(Type::BOOL), name(name) + { + this->value.u = value ? 1 : 0; + } + + inline bool operator==(const SpecializationConstant &b) const + { + return this->type == b.type && this->name == b.name && this->value == b.value; + } +}; + +} // namespace blender::gpu::shader diff --git a/source/blender/gpu/GPU_shader.hh b/source/blender/gpu/GPU_shader.hh index f11594665b3..ad34231a9c8 100644 --- a/source/blender/gpu/GPU_shader.hh +++ b/source/blender/gpu/GPU_shader.hh @@ -12,6 +12,7 @@ #include "BLI_span.hh" #include "BLI_vector.hh" +#include "GPU_common_types.hh" #include "GPU_shader_builtin.hh" namespace blender::gpu { @@ -219,6 +220,13 @@ void GPU_shader_constant_uint(GPUShader *sh, const char *name, unsigned int valu void GPU_shader_constant_float(GPUShader *sh, const char *name, float value); void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value); +struct ShaderSpecialization { + GPUShader *shader; + blender::Vector constants; +}; + +void GPU_shaders_precompile_specializations(blender::Span specializations); + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index 574a7bbefe0..c71942ebb7d 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -465,9 +465,9 @@ void GPU_shader_transform_feedback_disable(GPUShader *shader) void Shader::specialization_constants_init(const shader::ShaderCreateInfo &info) { using namespace shader; - for (const ShaderCreateInfo::SpecializationConstant &sc : info.specialization_constants_) { + for (const SpecializationConstant &sc : info.specialization_constants_) { constants.types.append(sc.type); - constants.values.append(sc.default_value); + constants.values.append(sc.value); } constants.is_dirty = true; } @@ -515,6 +515,11 @@ void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value) GPU_shader_constant_bool_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value); } +void GPU_shaders_precompile_specializations(Span specializations) +{ + Context::get()->compiler->precompile_specializations(specializations); +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_shader_create_info.hh b/source/blender/gpu/intern/gpu_shader_create_info.hh index 2ca04ebefcb..f3c34be577d 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.hh +++ b/source/blender/gpu/intern/gpu_shader_create_info.hh @@ -16,6 +16,7 @@ #include "BLI_hash.hh" #include "BLI_string_ref.hh" #include "BLI_vector.hh" +#include "GPU_common_types.hh" #include "GPU_material.hh" #include "GPU_texture.hh" @@ -33,104 +34,6 @@ namespace blender::gpu::shader { _info #endif -enum class Type { - /* Types supported natively across all GPU back-ends. */ - FLOAT = 0, - VEC2, - VEC3, - VEC4, - MAT3, - MAT4, - UINT, - UVEC2, - UVEC3, - UVEC4, - INT, - IVEC2, - IVEC3, - IVEC4, - BOOL, - /* Additionally supported types to enable data optimization and native - * support in some GPU back-ends. - * NOTE: These types must be representable in all APIs. E.g. `VEC3_101010I2` is aliased as vec3 - * in the GL back-end, as implicit type conversions from packed normal attribute data to vec3 is - * supported. UCHAR/CHAR types are natively supported in Metal and can be used to avoid - * additional data conversions for `GPU_COMP_U8` vertex attributes. */ - VEC3_101010I2, - UCHAR, - UCHAR2, - UCHAR3, - UCHAR4, - CHAR, - CHAR2, - CHAR3, - CHAR4, - USHORT, - USHORT2, - USHORT3, - USHORT4, - SHORT, - SHORT2, - SHORT3, - SHORT4 -}; - -BLI_INLINE int to_component_count(const Type &type) -{ - switch (type) { - case Type::FLOAT: - case Type::UINT: - case Type::INT: - case Type::BOOL: - return 1; - case Type::VEC2: - case Type::UVEC2: - case Type::IVEC2: - return 2; - case Type::VEC3: - case Type::UVEC3: - case Type::IVEC3: - return 3; - case Type::VEC4: - case Type::UVEC4: - case Type::IVEC4: - return 4; - case Type::MAT3: - return 9; - case Type::MAT4: - return 16; - /* Alias special types. */ - case Type::UCHAR: - case Type::USHORT: - return 1; - case Type::UCHAR2: - case Type::USHORT2: - return 2; - case Type::UCHAR3: - case Type::USHORT3: - return 3; - case Type::UCHAR4: - case Type::USHORT4: - return 4; - case Type::CHAR: - case Type::SHORT: - return 1; - case Type::CHAR2: - case Type::SHORT2: - return 2; - case Type::CHAR3: - case Type::SHORT3: - return 3; - case Type::CHAR4: - case Type::SHORT4: - return 4; - case Type::VEC3_101010I2: - return 3; - } - BLI_assert_unreachable(); - return -1; -} - /* All of these functions is a bit out of place */ static inline Type to_type(const eGPUType type) { @@ -554,32 +457,6 @@ struct ShaderCreateInfo { using SubpassIn = FragOut; Vector subpass_inputs_; - struct SpecializationConstant { - struct Value { - union { - uint32_t u; - int32_t i; - float f; - }; - - bool operator==(const Value &other) const - { - return u == other.u; - } - }; - - Type type; - StringRefNull name; - Value default_value; - - bool operator==(const SpecializationConstant &b) const - { - TEST_EQUAL(*this, b, type); - TEST_EQUAL(*this, b, name); - TEST_EQUAL(*this, b, default_value); - return true; - } - }; Vector specialization_constants_; struct Sampler { @@ -832,14 +709,14 @@ struct ShaderCreateInfo { constant.name = name; switch (type) { case Type::INT: - constant.default_value.i = static_cast(default_value); + constant.value.i = static_cast(default_value); break; case Type::BOOL: case Type::UINT: - constant.default_value.u = static_cast(default_value); + constant.value.u = static_cast(default_value); break; case Type::FLOAT: - constant.default_value.f = static_cast(default_value); + constant.value.f = static_cast(default_value); break; default: BLI_assert_msg(0, "Only scalar types can be used as constants"); @@ -1226,13 +1103,11 @@ struct ShaderCreateInfo { } // namespace blender::gpu::shader namespace blender { -template<> -struct DefaultHash> { - uint64_t operator()( - const Vector &key) const +template<> struct DefaultHash> { + uint64_t operator()(const Vector &key) const { uint64_t hash = 0; - for (const gpu::shader::ShaderCreateInfo::SpecializationConstant::Value &value : key) { + for (const blender::gpu::shader::SpecializationConstant::Value &value : key) { hash = hash * 33 ^ uint64_t(value.u); } return hash; diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh index aa6b81d8d83..19706d24303 100644 --- a/source/blender/gpu/intern/gpu_shader_private.hh +++ b/source/blender/gpu/intern/gpu_shader_private.hh @@ -50,7 +50,7 @@ class Shader { * The backend is free to implement their support as they see fit. */ struct Constants { - using Value = shader::ShaderCreateInfo::SpecializationConstant::Value; + using Value = shader::SpecializationConstant::Value; Vector types; /* Current values set by `GPU_shader_constant_*()` call. The backend can choose to interpret * that however it wants (i.e: bind another shader instead). */ @@ -180,6 +180,8 @@ class ShaderCompiler { virtual BatchHandle batch_compile(Span &infos) = 0; virtual bool batch_is_ready(BatchHandle handle) = 0; virtual Vector batch_finalize(BatchHandle &handle) = 0; + + virtual void precompile_specializations(Span /*specializations*/){}; }; /* Generic (fully synchronous) implementation for backends that don't implement their own diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm index e6a94ea6630..3bf2bb1ab38 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.mm +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -876,7 +876,7 @@ static void generate_specialization_constant_declarations(const shader::ShaderCr std::stringstream &ss) { uint index = MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID; - for (const ShaderCreateInfo::SpecializationConstant &sc : info->specialization_constants_) { + for (const SpecializationConstant &sc : info->specialization_constants_) { /* TODO(Metal): Output specialization constant chain. */ ss << "constant " << sc.type << " " << sc.name << " [[function_constant(" << index << ")]];\n"; index++; diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 01641359b3f..9d1dfcd398b 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -68,7 +68,7 @@ void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat async_compilation_ = is_batch_compilation; /* Extract the constants names from info and store them locally. */ - for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) { + for (const SpecializationConstant &constant : info.specialization_constants_) { specialization_constant_names_.append(constant.name.c_str()); } } @@ -617,8 +617,7 @@ std::string GLShader::constants_declare() const for (int constant_index : IndexRange(constants.types.size())) { const StringRefNull name = specialization_constant_names_[constant_index]; gpu::shader::Type constant_type = constants.types[constant_index]; - const shader::ShaderCreateInfo::SpecializationConstant::Value &value = - constants.values[constant_index]; + const SpecializationConstant::Value &value = constants.values[constant_index]; switch (constant_type) { case Type::INT: @@ -1888,6 +1887,99 @@ Vector GLShaderCompiler::batch_finalize(BatchHandle &handle) return result; } +void GLShaderCompiler::precompile_specializations(Span specializations) +{ + BLI_assert(GPU_use_parallel_compilation()); + + struct SpecializationWork { + GLShader *shader = nullptr; + GLuint program; + GLSourcesBaked sources; + + GLCompilerWorker *worker = nullptr; + bool do_async_compilation = false; + bool is_ready = false; + }; + + Vector items; + items.reserve(specializations.size()); + + for (auto &specialization : specializations) { + GLShader *sh = static_cast(unwrap(specialization.shader)); + for (const SpecializationConstant &constant : specialization.constants) { + const ShaderInput *input = sh->interface->constant_get(constant.name.c_str()); + BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists"); + sh->constants.values[input->location].u = constant.value.u; + } + sh->constants.is_dirty = true; + if (sh->program_cache_.contains(sh->constants.values)) { + /* Already compiled. */ + continue; + } + items.append({}); + SpecializationWork &item = items.last(); + item.shader = sh; + + /** WORKAROUND: Set async_compilation to true, so only the sources are generated. */ + sh->async_compilation_ = true; + item.program = sh->program_get(); + sh->async_compilation_ = false; + + item.sources = sh->get_sources(); + + size_t required_size = item.sources.size(); + item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources); + } + + bool is_ready = false; + while (!is_ready) { + /* Loop until ready, we can't defer the compilation of required specialization constants. */ + is_ready = true; + + for (SpecializationWork &item : items) { + if (item.is_ready) { + continue; + } + std::scoped_lock lock(mutex_); + + if (!item.do_async_compilation) { + /* Compilation will happen locally on shader bind. */ + glDeleteProgram(item.program); + item.program = 0; + item.shader->program_active_->program_id = 0; + item.shader->constants.is_dirty = true; + item.is_ready = true; + continue; + } + + if (item.worker == nullptr) { + /* Try to acquire an available worker. */ + item.worker = get_compiler_worker(item.sources); + } + else if (item.worker->is_ready()) { + /* Retrieve the binary compiled by the worker. */ + if (item.worker->load_program_binary(item.program)) { + item.worker->release(); + item.worker = nullptr; + item.is_ready = true; + } + else { + /* Compilation failed, local compilation will be tried later on shader bind. */ + item.do_async_compilation = false; + } + } + else if (worker_is_lost(item.worker)) { + /* We lost the worker, local compilation will be tried later on shader bind. */ + item.do_async_compilation = false; + } + + if (!item.is_ready) { + is_ready = false; + } + } + } +} + /** \} */ #endif diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index 0e15a494683..5688f85835e 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -96,7 +96,7 @@ class GLShader : public Shader { ~GLProgram(); }; - using GLProgramCacheKey = Vector; + using GLProgramCacheKey = Vector; Map program_cache_; /** @@ -105,6 +105,8 @@ class GLShader : public Shader { */ GLProgram *program_active_ = nullptr; + /* When true, the shader generates its GLSources but it's not compiled. + * (Used for batch compilation) */ bool async_compilation_ = false; /** @@ -298,6 +300,8 @@ class GLShaderCompiler : public ShaderCompiler { virtual BatchHandle batch_compile(Span &infos) override; virtual bool batch_is_ready(BatchHandle handle) override; virtual Vector batch_finalize(BatchHandle &handle) override; + + virtual void precompile_specializations(Span specializations) override; }; #else diff --git a/source/blender/gpu/opengl/gl_shader_interface.cc b/source/blender/gpu/opengl/gl_shader_interface.cc index fbe2eb90b60..3396b115dda 100644 --- a/source/blender/gpu/opengl/gl_shader_interface.cc +++ b/source/blender/gpu/opengl/gl_shader_interface.cc @@ -531,7 +531,7 @@ GLShaderInterface::GLShaderInterface(GLuint program, const shader::ShaderCreateI /* Constants */ int constant_id = 0; - for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) { + for (const SpecializationConstant &constant : info.specialization_constants_) { copy_input_name(input, constant.name, name_buffer_, name_buffer_offset); input->location = constant_id++; input++; diff --git a/source/blender/gpu/vulkan/vk_pipeline_pool.hh b/source/blender/gpu/vulkan/vk_pipeline_pool.hh index 2bf5faf8cf2..25427d05c9a 100644 --- a/source/blender/gpu/vulkan/vk_pipeline_pool.hh +++ b/source/blender/gpu/vulkan/vk_pipeline_pool.hh @@ -24,7 +24,7 @@ namespace gpu { struct VKComputeInfo { VkShaderModule vk_shader_module; VkPipelineLayout vk_pipeline_layout; - Vector specialization_constants; + Vector specialization_constants; bool operator==(const VKComputeInfo &other) const { diff --git a/source/blender/gpu/vulkan/vk_shader.cc b/source/blender/gpu/vulkan/vk_shader.cc index 9c5f2737844..d20b077befb 100644 --- a/source/blender/gpu/vulkan/vk_shader.cc +++ b/source/blender/gpu/vulkan/vk_shader.cc @@ -802,22 +802,22 @@ std::string VKShader::resources_declare(const shader::ShaderCreateInfo &info) co ss << "\n/* Specialization Constants (pass-through). */\n"; uint constant_id = 0; - for (const ShaderCreateInfo::SpecializationConstant &sc : info.specialization_constants_) { + for (const SpecializationConstant &sc : info.specialization_constants_) { ss << "layout (constant_id=" << constant_id++ << ") const "; switch (sc.type) { case Type::INT: - ss << "int " << sc.name << "=" << std::to_string(sc.default_value.i) << ";\n"; + ss << "int " << sc.name << "=" << std::to_string(sc.value.i) << ";\n"; break; case Type::UINT: - ss << "uint " << sc.name << "=" << std::to_string(sc.default_value.u) << "u;\n"; + ss << "uint " << sc.name << "=" << std::to_string(sc.value.u) << "u;\n"; break; case Type::BOOL: - ss << "bool " << sc.name << "=" << (sc.default_value.u ? "true" : "false") << ";\n"; + ss << "bool " << sc.name << "=" << (sc.value.u ? "true" : "false") << ";\n"; break; case Type::FLOAT: /* Use uint representation to allow exact same bit pattern even if NaN. uintBitsToFloat * isn't supported during global const initialization. */ - ss << "uint " << sc.name << "_uint=" << std::to_string(sc.default_value.u) << "u;\n"; + ss << "uint " << sc.name << "_uint=" << std::to_string(sc.value.u) << "u;\n"; ss << "#define " << sc.name << " uintBitsToFloat(" << sc.name << "_uint)\n"; break; default: diff --git a/source/blender/gpu/vulkan/vk_shader_interface.cc b/source/blender/gpu/vulkan/vk_shader_interface.cc index 3d24fbaf53a..060f83b6407 100644 --- a/source/blender/gpu/vulkan/vk_shader_interface.cc +++ b/source/blender/gpu/vulkan/vk_shader_interface.cc @@ -138,7 +138,7 @@ void VKShaderInterface::init(const shader::ShaderCreateInfo &info) /* Constants */ int constant_id = 0; - for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) { + for (const SpecializationConstant &constant : info.specialization_constants_) { copy_input_name(input, constant.name, name_buffer_, name_buffer_offset); input->location = constant_id++; input++;