GPU: Add GPU_shaders_precompile_specializations

Allow precompiling specialization constants variations in parallel.
Only supported in OpenGL as the rest of the batch compilation API,
on the other backends the function is a no-op.
This also moves the `SpecializationConstant` from
`gpu_shader_create_info` (private API) into`GPU_common_types`
(public API).

Pull Request: https://projects.blender.org/blender/blender/pulls/122796
This commit is contained in:
Miguel Pozo
2024-06-07 18:45:31 +02:00
parent dae3554d5c
commit 22652b305e
12 changed files with 284 additions and 148 deletions

View File

@@ -8,6 +8,8 @@
#pragma once
#include "BLI_string_ref.hh"
/**
* Describes the load operation of a frame-buffer attachment at the start of a render pass.
*/
@@ -70,3 +72,151 @@ enum eGPUFrontFace {
GPU_CLOCKWISE,
GPU_COUNTERCLOCKWISE,
};
namespace blender::gpu::shader {
enum class Type {
/* Types supported natively across all GPU back-ends. */
FLOAT = 0,
VEC2,
VEC3,
VEC4,
MAT3,
MAT4,
UINT,
UVEC2,
UVEC3,
UVEC4,
INT,
IVEC2,
IVEC3,
IVEC4,
BOOL,
/* Additionally supported types to enable data optimization and native
* support in some GPU back-ends.
* NOTE: These types must be representable in all APIs. E.g. `VEC3_101010I2` is aliased as vec3
* in the GL back-end, as implicit type conversions from packed normal attribute data to vec3 is
* supported. UCHAR/CHAR types are natively supported in Metal and can be used to avoid
* additional data conversions for `GPU_COMP_U8` vertex attributes. */
VEC3_101010I2,
UCHAR,
UCHAR2,
UCHAR3,
UCHAR4,
CHAR,
CHAR2,
CHAR3,
CHAR4,
USHORT,
USHORT2,
USHORT3,
USHORT4,
SHORT,
SHORT2,
SHORT3,
SHORT4
};
BLI_INLINE int to_component_count(const Type &type)
{
switch (type) {
case Type::FLOAT:
case Type::UINT:
case Type::INT:
case Type::BOOL:
return 1;
case Type::VEC2:
case Type::UVEC2:
case Type::IVEC2:
return 2;
case Type::VEC3:
case Type::UVEC3:
case Type::IVEC3:
return 3;
case Type::VEC4:
case Type::UVEC4:
case Type::IVEC4:
return 4;
case Type::MAT3:
return 9;
case Type::MAT4:
return 16;
/* Alias special types. */
case Type::UCHAR:
case Type::USHORT:
return 1;
case Type::UCHAR2:
case Type::USHORT2:
return 2;
case Type::UCHAR3:
case Type::USHORT3:
return 3;
case Type::UCHAR4:
case Type::USHORT4:
return 4;
case Type::CHAR:
case Type::SHORT:
return 1;
case Type::CHAR2:
case Type::SHORT2:
return 2;
case Type::CHAR3:
case Type::SHORT3:
return 3;
case Type::CHAR4:
case Type::SHORT4:
return 4;
case Type::VEC3_101010I2:
return 3;
}
BLI_assert_unreachable();
return -1;
}
struct SpecializationConstant {
struct Value {
union {
uint32_t u;
int32_t i;
float f;
};
inline bool operator==(const Value &other) const
{
return u == other.u;
}
};
Type type;
StringRefNull name;
Value value;
SpecializationConstant() {}
SpecializationConstant(const char *name, uint32_t value) : type(Type::UINT), name(name)
{
this->value.u = value;
}
SpecializationConstant(const char *name, int value) : type(Type::INT), name(name)
{
this->value.i = value;
}
SpecializationConstant(const char *name, float value) : type(Type::FLOAT), name(name)
{
this->value.f = value;
}
SpecializationConstant(const char *name, bool value) : type(Type::BOOL), name(name)
{
this->value.u = value ? 1 : 0;
}
inline bool operator==(const SpecializationConstant &b) const
{
return this->type == b.type && this->name == b.name && this->value == b.value;
}
};
} // namespace blender::gpu::shader

View File

@@ -12,6 +12,7 @@
#include "BLI_span.hh"
#include "BLI_vector.hh"
#include "GPU_common_types.hh"
#include "GPU_shader_builtin.hh"
namespace blender::gpu {
@@ -219,6 +220,13 @@ void GPU_shader_constant_uint(GPUShader *sh, const char *name, unsigned int valu
void GPU_shader_constant_float(GPUShader *sh, const char *name, float value);
void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value);
struct ShaderSpecialization {
GPUShader *shader;
blender::Vector<blender::gpu::shader::SpecializationConstant> constants;
};
void GPU_shaders_precompile_specializations(blender::Span<ShaderSpecialization> specializations);
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -465,9 +465,9 @@ void GPU_shader_transform_feedback_disable(GPUShader *shader)
void Shader::specialization_constants_init(const shader::ShaderCreateInfo &info)
{
using namespace shader;
for (const ShaderCreateInfo::SpecializationConstant &sc : info.specialization_constants_) {
for (const SpecializationConstant &sc : info.specialization_constants_) {
constants.types.append(sc.type);
constants.values.append(sc.default_value);
constants.values.append(sc.value);
}
constants.is_dirty = true;
}
@@ -515,6 +515,11 @@ void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value)
GPU_shader_constant_bool_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
}
void GPU_shaders_precompile_specializations(Span<ShaderSpecialization> specializations)
{
Context::get()->compiler->precompile_specializations(specializations);
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -16,6 +16,7 @@
#include "BLI_hash.hh"
#include "BLI_string_ref.hh"
#include "BLI_vector.hh"
#include "GPU_common_types.hh"
#include "GPU_material.hh"
#include "GPU_texture.hh"
@@ -33,104 +34,6 @@ namespace blender::gpu::shader {
_info
#endif
enum class Type {
/* Types supported natively across all GPU back-ends. */
FLOAT = 0,
VEC2,
VEC3,
VEC4,
MAT3,
MAT4,
UINT,
UVEC2,
UVEC3,
UVEC4,
INT,
IVEC2,
IVEC3,
IVEC4,
BOOL,
/* Additionally supported types to enable data optimization and native
* support in some GPU back-ends.
* NOTE: These types must be representable in all APIs. E.g. `VEC3_101010I2` is aliased as vec3
* in the GL back-end, as implicit type conversions from packed normal attribute data to vec3 is
* supported. UCHAR/CHAR types are natively supported in Metal and can be used to avoid
* additional data conversions for `GPU_COMP_U8` vertex attributes. */
VEC3_101010I2,
UCHAR,
UCHAR2,
UCHAR3,
UCHAR4,
CHAR,
CHAR2,
CHAR3,
CHAR4,
USHORT,
USHORT2,
USHORT3,
USHORT4,
SHORT,
SHORT2,
SHORT3,
SHORT4
};
BLI_INLINE int to_component_count(const Type &type)
{
switch (type) {
case Type::FLOAT:
case Type::UINT:
case Type::INT:
case Type::BOOL:
return 1;
case Type::VEC2:
case Type::UVEC2:
case Type::IVEC2:
return 2;
case Type::VEC3:
case Type::UVEC3:
case Type::IVEC3:
return 3;
case Type::VEC4:
case Type::UVEC4:
case Type::IVEC4:
return 4;
case Type::MAT3:
return 9;
case Type::MAT4:
return 16;
/* Alias special types. */
case Type::UCHAR:
case Type::USHORT:
return 1;
case Type::UCHAR2:
case Type::USHORT2:
return 2;
case Type::UCHAR3:
case Type::USHORT3:
return 3;
case Type::UCHAR4:
case Type::USHORT4:
return 4;
case Type::CHAR:
case Type::SHORT:
return 1;
case Type::CHAR2:
case Type::SHORT2:
return 2;
case Type::CHAR3:
case Type::SHORT3:
return 3;
case Type::CHAR4:
case Type::SHORT4:
return 4;
case Type::VEC3_101010I2:
return 3;
}
BLI_assert_unreachable();
return -1;
}
/* All of these functions is a bit out of place */
static inline Type to_type(const eGPUType type)
{
@@ -554,32 +457,6 @@ struct ShaderCreateInfo {
using SubpassIn = FragOut;
Vector<SubpassIn> subpass_inputs_;
struct SpecializationConstant {
struct Value {
union {
uint32_t u;
int32_t i;
float f;
};
bool operator==(const Value &other) const
{
return u == other.u;
}
};
Type type;
StringRefNull name;
Value default_value;
bool operator==(const SpecializationConstant &b) const
{
TEST_EQUAL(*this, b, type);
TEST_EQUAL(*this, b, name);
TEST_EQUAL(*this, b, default_value);
return true;
}
};
Vector<SpecializationConstant> specialization_constants_;
struct Sampler {
@@ -832,14 +709,14 @@ struct ShaderCreateInfo {
constant.name = name;
switch (type) {
case Type::INT:
constant.default_value.i = static_cast<int>(default_value);
constant.value.i = static_cast<int>(default_value);
break;
case Type::BOOL:
case Type::UINT:
constant.default_value.u = static_cast<uint>(default_value);
constant.value.u = static_cast<uint>(default_value);
break;
case Type::FLOAT:
constant.default_value.f = static_cast<float>(default_value);
constant.value.f = static_cast<float>(default_value);
break;
default:
BLI_assert_msg(0, "Only scalar types can be used as constants");
@@ -1226,13 +1103,11 @@ struct ShaderCreateInfo {
} // namespace blender::gpu::shader
namespace blender {
template<>
struct DefaultHash<Vector<gpu::shader::ShaderCreateInfo::SpecializationConstant::Value>> {
uint64_t operator()(
const Vector<gpu::shader::ShaderCreateInfo::SpecializationConstant::Value> &key) const
template<> struct DefaultHash<Vector<blender::gpu::shader::SpecializationConstant::Value>> {
uint64_t operator()(const Vector<blender::gpu::shader::SpecializationConstant::Value> &key) const
{
uint64_t hash = 0;
for (const gpu::shader::ShaderCreateInfo::SpecializationConstant::Value &value : key) {
for (const blender::gpu::shader::SpecializationConstant::Value &value : key) {
hash = hash * 33 ^ uint64_t(value.u);
}
return hash;

View File

@@ -50,7 +50,7 @@ class Shader {
* The backend is free to implement their support as they see fit.
*/
struct Constants {
using Value = shader::ShaderCreateInfo::SpecializationConstant::Value;
using Value = shader::SpecializationConstant::Value;
Vector<gpu::shader::Type> types;
/* Current values set by `GPU_shader_constant_*()` call. The backend can choose to interpret
* that however it wants (i.e: bind another shader instead). */
@@ -180,6 +180,8 @@ class ShaderCompiler {
virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) = 0;
virtual bool batch_is_ready(BatchHandle handle) = 0;
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) = 0;
virtual void precompile_specializations(Span<ShaderSpecialization> /*specializations*/){};
};
/* Generic (fully synchronous) implementation for backends that don't implement their own

View File

@@ -876,7 +876,7 @@ static void generate_specialization_constant_declarations(const shader::ShaderCr
std::stringstream &ss)
{
uint index = MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID;
for (const ShaderCreateInfo::SpecializationConstant &sc : info->specialization_constants_) {
for (const SpecializationConstant &sc : info->specialization_constants_) {
/* TODO(Metal): Output specialization constant chain. */
ss << "constant " << sc.type << " " << sc.name << " [[function_constant(" << index << ")]];\n";
index++;

View File

@@ -68,7 +68,7 @@ void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat
async_compilation_ = is_batch_compilation;
/* Extract the constants names from info and store them locally. */
for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) {
for (const SpecializationConstant &constant : info.specialization_constants_) {
specialization_constant_names_.append(constant.name.c_str());
}
}
@@ -617,8 +617,7 @@ std::string GLShader::constants_declare() const
for (int constant_index : IndexRange(constants.types.size())) {
const StringRefNull name = specialization_constant_names_[constant_index];
gpu::shader::Type constant_type = constants.types[constant_index];
const shader::ShaderCreateInfo::SpecializationConstant::Value &value =
constants.values[constant_index];
const SpecializationConstant::Value &value = constants.values[constant_index];
switch (constant_type) {
case Type::INT:
@@ -1888,6 +1887,99 @@ Vector<Shader *> GLShaderCompiler::batch_finalize(BatchHandle &handle)
return result;
}
void GLShaderCompiler::precompile_specializations(Span<ShaderSpecialization> specializations)
{
BLI_assert(GPU_use_parallel_compilation());
struct SpecializationWork {
GLShader *shader = nullptr;
GLuint program;
GLSourcesBaked sources;
GLCompilerWorker *worker = nullptr;
bool do_async_compilation = false;
bool is_ready = false;
};
Vector<SpecializationWork> items;
items.reserve(specializations.size());
for (auto &specialization : specializations) {
GLShader *sh = static_cast<GLShader *>(unwrap(specialization.shader));
for (const SpecializationConstant &constant : specialization.constants) {
const ShaderInput *input = sh->interface->constant_get(constant.name.c_str());
BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists");
sh->constants.values[input->location].u = constant.value.u;
}
sh->constants.is_dirty = true;
if (sh->program_cache_.contains(sh->constants.values)) {
/* Already compiled. */
continue;
}
items.append({});
SpecializationWork &item = items.last();
item.shader = sh;
/** WORKAROUND: Set async_compilation to true, so only the sources are generated. */
sh->async_compilation_ = true;
item.program = sh->program_get();
sh->async_compilation_ = false;
item.sources = sh->get_sources();
size_t required_size = item.sources.size();
item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
}
bool is_ready = false;
while (!is_ready) {
/* Loop until ready, we can't defer the compilation of required specialization constants. */
is_ready = true;
for (SpecializationWork &item : items) {
if (item.is_ready) {
continue;
}
std::scoped_lock lock(mutex_);
if (!item.do_async_compilation) {
/* Compilation will happen locally on shader bind. */
glDeleteProgram(item.program);
item.program = 0;
item.shader->program_active_->program_id = 0;
item.shader->constants.is_dirty = true;
item.is_ready = true;
continue;
}
if (item.worker == nullptr) {
/* Try to acquire an available worker. */
item.worker = get_compiler_worker(item.sources);
}
else if (item.worker->is_ready()) {
/* Retrieve the binary compiled by the worker. */
if (item.worker->load_program_binary(item.program)) {
item.worker->release();
item.worker = nullptr;
item.is_ready = true;
}
else {
/* Compilation failed, local compilation will be tried later on shader bind. */
item.do_async_compilation = false;
}
}
else if (worker_is_lost(item.worker)) {
/* We lost the worker, local compilation will be tried later on shader bind. */
item.do_async_compilation = false;
}
if (!item.is_ready) {
is_ready = false;
}
}
}
}
/** \} */
#endif

View File

@@ -96,7 +96,7 @@ class GLShader : public Shader {
~GLProgram();
};
using GLProgramCacheKey = Vector<shader::ShaderCreateInfo::SpecializationConstant::Value>;
using GLProgramCacheKey = Vector<shader::SpecializationConstant::Value>;
Map<GLProgramCacheKey, GLProgram> program_cache_;
/**
@@ -105,6 +105,8 @@ class GLShader : public Shader {
*/
GLProgram *program_active_ = nullptr;
/* When true, the shader generates its GLSources but it's not compiled.
* (Used for batch compilation) */
bool async_compilation_ = false;
/**
@@ -298,6 +300,8 @@ class GLShaderCompiler : public ShaderCompiler {
virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) override;
virtual bool batch_is_ready(BatchHandle handle) override;
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
virtual void precompile_specializations(Span<ShaderSpecialization> specializations) override;
};
#else

View File

@@ -531,7 +531,7 @@ GLShaderInterface::GLShaderInterface(GLuint program, const shader::ShaderCreateI
/* Constants */
int constant_id = 0;
for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) {
for (const SpecializationConstant &constant : info.specialization_constants_) {
copy_input_name(input, constant.name, name_buffer_, name_buffer_offset);
input->location = constant_id++;
input++;

View File

@@ -24,7 +24,7 @@ namespace gpu {
struct VKComputeInfo {
VkShaderModule vk_shader_module;
VkPipelineLayout vk_pipeline_layout;
Vector<shader::ShaderCreateInfo::SpecializationConstant::Value> specialization_constants;
Vector<shader::SpecializationConstant::Value> specialization_constants;
bool operator==(const VKComputeInfo &other) const
{

View File

@@ -802,22 +802,22 @@ std::string VKShader::resources_declare(const shader::ShaderCreateInfo &info) co
ss << "\n/* Specialization Constants (pass-through). */\n";
uint constant_id = 0;
for (const ShaderCreateInfo::SpecializationConstant &sc : info.specialization_constants_) {
for (const SpecializationConstant &sc : info.specialization_constants_) {
ss << "layout (constant_id=" << constant_id++ << ") const ";
switch (sc.type) {
case Type::INT:
ss << "int " << sc.name << "=" << std::to_string(sc.default_value.i) << ";\n";
ss << "int " << sc.name << "=" << std::to_string(sc.value.i) << ";\n";
break;
case Type::UINT:
ss << "uint " << sc.name << "=" << std::to_string(sc.default_value.u) << "u;\n";
ss << "uint " << sc.name << "=" << std::to_string(sc.value.u) << "u;\n";
break;
case Type::BOOL:
ss << "bool " << sc.name << "=" << (sc.default_value.u ? "true" : "false") << ";\n";
ss << "bool " << sc.name << "=" << (sc.value.u ? "true" : "false") << ";\n";
break;
case Type::FLOAT:
/* Use uint representation to allow exact same bit pattern even if NaN. uintBitsToFloat
* isn't supported during global const initialization. */
ss << "uint " << sc.name << "_uint=" << std::to_string(sc.default_value.u) << "u;\n";
ss << "uint " << sc.name << "_uint=" << std::to_string(sc.value.u) << "u;\n";
ss << "#define " << sc.name << " uintBitsToFloat(" << sc.name << "_uint)\n";
break;
default:

View File

@@ -138,7 +138,7 @@ void VKShaderInterface::init(const shader::ShaderCreateInfo &info)
/* Constants */
int constant_id = 0;
for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) {
for (const SpecializationConstant &constant : info.specialization_constants_) {
copy_input_name(input, constant.name, name_buffer_, name_buffer_offset);
input->location = constant_id++;
input++;