GPU: OpenGL: Support parallel geometry and compute shader compilation

Support parallel compilation of geometry and compute shaders when
using the `GPU_shader_batch` API.

Pull Request: https://projects.blender.org/blender/blender/pulls/122792
This commit is contained in:
Miguel Pozo
2024-06-07 16:10:43 +02:00
parent 1790314f89
commit ec0dd18de5
4 changed files with 194 additions and 73 deletions

View File

@@ -25,35 +25,57 @@
namespace blender::gpu {
class SubprocessShader {
GLuint comp_ = 0;
GLuint vert_ = 0;
GLuint geom_ = 0;
GLuint frag_ = 0;
GLuint program_ = 0;
bool success_ = false;
public:
SubprocessShader(const char *vert_src, const char *frag_src)
SubprocessShader(const char *comp_src,
const char *vert_src,
const char *geom_src,
const char *frag_src)
{
GLint status;
vert_ = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vert_, 1, &vert_src, nullptr);
glCompileShader(vert_);
glGetShaderiv(vert_, GL_COMPILE_STATUS, &status);
if (!status) {
return;
}
frag_ = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(frag_, 1, &frag_src, nullptr);
glCompileShader(frag_);
glGetShaderiv(frag_, GL_COMPILE_STATUS, &status);
if (!status) {
return;
}
program_ = glCreateProgram();
glAttachShader(program_, vert_);
glAttachShader(program_, frag_);
auto compile_stage = [&](const char *src, GLenum stage) -> GLuint {
if (src == nullptr) {
/* We only want status errors if compilation fails. */
status = GL_TRUE;
return 0;
}
GLuint shader = glCreateShader(stage);
glShaderSource(shader, 1, &src, nullptr);
glCompileShader(shader);
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
glAttachShader(program_, shader);
return shader;
};
comp_ = compile_stage(comp_src, GL_COMPUTE_SHADER);
if (!status) {
return;
}
vert_ = compile_stage(vert_src, GL_VERTEX_SHADER);
if (!status) {
return;
}
geom_ = compile_stage(geom_src, GL_GEOMETRY_SHADER);
if (!status) {
return;
}
frag_ = compile_stage(frag_src, GL_FRAGMENT_SHADER);
if (!status) {
return;
}
glLinkProgram(program_);
glGetProgramiv(program_, GL_LINK_STATUS, &status);
if (!status) {
@@ -65,7 +87,9 @@ class SubprocessShader {
~SubprocessShader()
{
glDeleteShader(comp_);
glDeleteShader(vert_);
glDeleteShader(geom_);
glDeleteShader(frag_);
glDeleteProgram(program_);
}
@@ -78,8 +102,8 @@ class SubprocessShader {
if (success_) {
glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &bin->size);
if (bin->size + sizeof(ShaderBinaryHeader) < compilation_subprocess_shared_memory_size) {
glGetProgramBinary(program_, bin->size, nullptr, &bin->format, &bin->data_start);
if (bin->size <= sizeof(ShaderBinaryHeader::data)) {
glGetProgramBinary(program_, bin->size, nullptr, &bin->format, bin->data);
}
}
@@ -92,7 +116,7 @@ static bool validate_binary(void *binary)
{
ShaderBinaryHeader *bin = reinterpret_cast<ShaderBinaryHeader *>(binary);
GLuint program = glCreateProgram();
glProgramBinary(program, bin->format, &bin->data_start, bin->size);
glProgramBinary(program, bin->format, bin->data, bin->size);
GLint status;
glGetProgramiv(program, GL_LINK_STATUS, &status);
glDeleteProgram(program);
@@ -165,15 +189,34 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
break;
}
const char *shaders = reinterpret_cast<const char *>(shared_mem.get_data());
const char *vert_src = shaders;
const char *frag_src = shaders + strlen(shaders) + 1;
ShaderSourceHeader *source = reinterpret_cast<ShaderSourceHeader *>(shared_mem.get_data());
const char *next_src = source->sources;
const char *comp_src = nullptr;
const char *vert_src = nullptr;
const char *geom_src = nullptr;
const char *frag_src = nullptr;
DefaultHash<StringRefNull> hasher;
uint64_t vert_hash = hasher(vert_src);
uint64_t frag_hash = hasher(frag_src);
std::string hash_str = std::to_string(vert_hash) + "_" + std::to_string(frag_hash);
std::string hash_str = "_";
auto get_src = [&]() {
const char *src = next_src;
next_src += strlen(src) + sizeof('\0');
hash_str += std::to_string(hasher(src)) + "_";
return src;
};
if (source->type == ShaderSourceHeader::Type::COMPUTE) {
comp_src = get_src();
}
else {
vert_src = get_src();
if (source->type == ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE) {
geom_src = get_src();
}
frag_src = get_src();
}
std::string cache_path = cache_dir + SEP_STR + hash_str;
/* TODO: This should lock the files? */
@@ -203,14 +246,14 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
}
}
SubprocessShader shader(vert_src, frag_src);
SubprocessShader shader(comp_src, vert_src, geom_src, frag_src);
ShaderBinaryHeader *binary = shader.get_binary(shared_mem.get_data());
end_semaphore.increment();
fstream file(cache_path, std::ios::binary | std::ios::out);
file.write(reinterpret_cast<char *>(shared_mem.get_data()),
binary->size + offsetof(ShaderBinaryHeader, data_start));
binary->size + offsetof(ShaderBinaryHeader, data));
}
GPU_exit();

View File

@@ -13,7 +13,19 @@
namespace blender::gpu {
/* The size of the memory pools shared by Blender and the compilation subprocesses. */
constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5mB */
constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5 MiB */
struct ShaderSourceHeader {
enum Type { COMPUTE, GRAPHICS, GRAPHICS_WITH_GEOMETRY_STAGE };
/* The type of program being compiled. */
Type type;
/* The source code for all the shader stages (Separated by a null terminator).
* The stages follows the execution order (eg. vert > geom > frag). */
char sources[compilation_subprocess_shared_memory_size - sizeof(type)];
};
static_assert(sizeof(ShaderSourceHeader) == compilation_subprocess_shared_memory_size,
"Size must match the shared memory size");
struct ShaderBinaryHeader {
/* Size of the shader binary data. */
@@ -21,11 +33,13 @@ struct ShaderBinaryHeader {
/* Magic number that identifies the format of this shader binary (Driver-defined).
* This (and size) is set to 0 when the shader has failed to compile. */
uint32_t format;
/* When casting a shared memory pool into a ShaderBinaryHeader*, this is the first byte of the
* shader binary data. */
uint8_t data_start;
/* The serialized shader binary data. */
uint8_t data[compilation_subprocess_shared_memory_size - sizeof(size) - sizeof(format)];
};
static_assert(sizeof(ShaderBinaryHeader) == compilation_subprocess_shared_memory_size,
"Size must match the shared memory size");
} // namespace blender::gpu
#endif

View File

@@ -1472,6 +1472,30 @@ Vector<const char *> GLSources::sources_get() const
return result;
}
std::string GLSources::to_string() const
{
std::string result;
for (const GLSource &source : *this) {
if (source.source_ref) {
result.append(source.source_ref);
}
else {
result.append(source.source);
}
}
return result;
}
size_t GLSourcesBaked::size()
{
size_t result = 0;
result += comp.empty() ? 0 : comp.size() + sizeof('\0');
result += vert.empty() ? 0 : vert.size() + sizeof('\0');
result += geom.empty() ? 0 : geom.size() + sizeof('\0');
result += frag.empty() ? 0 : frag.size() + sizeof('\0');
return result;
}
/** \} */
/* -------------------------------------------------------------------- */
@@ -1588,6 +1612,16 @@ GLuint GLShader::program_get()
return program_active_->program_id;
}
GLSourcesBaked GLShader::get_sources()
{
GLSourcesBaked result;
result.comp = compute_sources_.to_string();
result.vert = vertex_sources_.to_string();
result.geom = geometry_sources_.to_string();
result.frag = fragment_sources_.to_string();
return result;
}
/** \} */
#if BLI_SUBPROCESS_SUPPORT
@@ -1620,12 +1654,37 @@ GLCompilerWorker::~GLCompilerWorker()
start_semaphore_->increment();
}
void GLCompilerWorker::compile(StringRefNull vert, StringRefNull frag)
void GLCompilerWorker::compile(const GLSourcesBaked &sources)
{
BLI_assert(state_ == AVAILABLE);
strcpy((char *)shared_mem_->get_data(), vert.c_str());
strcpy((char *)shared_mem_->get_data() + vert.size() + sizeof('\0'), frag.c_str());
ShaderSourceHeader *shared_src = reinterpret_cast<ShaderSourceHeader *>(shared_mem_->get_data());
char *next_src = shared_src->sources;
auto add_src = [&](const std::string &src) {
if (!src.empty()) {
strcpy(next_src, src.c_str());
next_src += src.size() + sizeof('\0');
}
};
add_src(sources.comp);
add_src(sources.vert);
add_src(sources.geom);
add_src(sources.frag);
BLI_assert(size_t(next_src) <= size_t(shared_src) + compilation_subprocess_shared_memory_size);
if (!sources.comp.empty()) {
BLI_assert(sources.vert.empty() && sources.geom.empty() && sources.frag.empty());
shared_src->type = ShaderSourceHeader::Type::COMPUTE;
}
else {
BLI_assert(sources.comp.empty() && !sources.vert.empty() && !sources.frag.empty());
shared_src->type = sources.geom.empty() ?
ShaderSourceHeader::Type::GRAPHICS :
ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE;
}
start_semaphore_->increment();
@@ -1668,7 +1727,7 @@ bool GLCompilerWorker::load_program_binary(GLint program)
state_ = COMPILATION_FINISHED;
if (binary->size > 0) {
glProgramBinary(program, binary->format, &binary->data_start, binary->size);
glProgramBinary(program, binary->format, binary->data, binary->size);
return true;
}
@@ -1695,7 +1754,7 @@ GLShaderCompiler::~GLShaderCompiler()
}
}
GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const char *frag)
GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const GLSourcesBaked &sources)
{
GLCompilerWorker *result = nullptr;
for (GLCompilerWorker *compiler : workers_) {
@@ -1709,7 +1768,7 @@ GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const
workers_.append(result);
}
if (result) {
result->compile(vert, frag);
result->compile(sources);
}
return result;
}
@@ -1739,31 +1798,21 @@ BatchHandle GLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo
for (const shader::ShaderCreateInfo *info : infos) {
const_cast<ShaderCreateInfo *>(info)->finalize();
CompilationWork item = {};
batch.items.append({});
CompilationWork &item = batch.items.last();
item.info = info;
item.do_async_compilation = !info->vertex_source_.is_empty() &&
!info->fragment_source_.is_empty() &&
info->compute_source_.is_empty() &&
info->geometry_source_.is_empty();
if (item.do_async_compilation) {
item.shader = static_cast<GLShader *>(compile(*info, true));
for (const char *src : item.shader->vertex_sources_.sources_get()) {
item.vertex_src.append(src);
}
for (const char *src : item.shader->fragment_sources_.sources_get()) {
item.fragment_src.append(src);
}
item.shader = static_cast<GLShader *>(compile(*info, true));
item.sources = item.shader->get_sources();
size_t required_size = item.vertex_src.size() + item.fragment_src.size();
if (required_size < compilation_subprocess_shared_memory_size) {
item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
}
else {
delete item.shader;
item.do_async_compilation = false;
}
size_t required_size = item.sources.size();
item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
if (item.do_async_compilation) {
item.worker = get_compiler_worker(item.sources);
}
else {
delete item.shader;
item.sources = {};
}
batch.items.append(item);
}
return handle;
}
@@ -1791,7 +1840,7 @@ bool GLShaderCompiler::batch_is_ready(BatchHandle handle)
if (!item.worker) {
/* Try to acquire an available worker. */
item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
item.worker = get_compiler_worker(item.sources);
}
else if (item.worker->is_ready()) {
/* Retrieve the binary compiled by the worker. */

View File

@@ -44,6 +44,21 @@ class GLSources : public Vector<GLSource> {
public:
GLSources &operator=(Span<const char *> other);
Vector<const char *> sources_get() const;
std::string to_string() const;
};
/**
* The full sources for each shader stage, baked into a single string from their respective
* GLSources. (Can be retrieved from GLShader::get_sources())
*/
struct GLSourcesBaked : NonCopyable {
std::string comp;
std::string vert;
std::string geom;
std::string frag;
/* Returns the size (in bytes) required to store the source of all the used stages. */
size_t size();
};
/**
@@ -194,6 +209,8 @@ class GLShader : public Shader {
return program_active_->compute_shader != 0;
}
GLSourcesBaked get_sources();
private:
const char *glsl_patch_get(GLenum gl_stage);
@@ -240,7 +257,7 @@ class GLCompilerWorker {
GLCompilerWorker();
~GLCompilerWorker();
void compile(StringRefNull vert, StringRefNull frag);
void compile(const GLSourcesBaked &sources);
bool is_ready();
bool load_program_binary(GLint program);
void release();
@@ -255,14 +272,12 @@ class GLShaderCompiler : public ShaderCompiler {
Vector<GLCompilerWorker *> workers_;
struct CompilationWork {
GLCompilerWorker *worker = nullptr;
GLShader *shader = nullptr;
const shader::ShaderCreateInfo *info = nullptr;
GLShader *shader = nullptr;
GLSourcesBaked sources;
GLCompilerWorker *worker = nullptr;
bool do_async_compilation = false;
std::string vertex_src;
std::string fragment_src;
bool is_ready = false;
};
@@ -274,7 +289,7 @@ class GLShaderCompiler : public ShaderCompiler {
BatchHandle next_batch_handle = 1;
Map<BatchHandle, Batch> batches;
GLCompilerWorker *get_compiler_worker(const char *vert, const char *frag);
GLCompilerWorker *get_compiler_worker(const GLSourcesBaked &sources);
bool worker_is_lost(GLCompilerWorker *&worker);
public: