GPU: OpenGL: Support parallel geometry and compute shader compilation
Support parallel compilation of geometry and compute shaders when using the `GPU_shader_batch` API. Pull Request: https://projects.blender.org/blender/blender/pulls/122792
This commit is contained in:
@@ -25,35 +25,57 @@
|
||||
namespace blender::gpu {
|
||||
|
||||
class SubprocessShader {
|
||||
GLuint comp_ = 0;
|
||||
GLuint vert_ = 0;
|
||||
GLuint geom_ = 0;
|
||||
GLuint frag_ = 0;
|
||||
GLuint program_ = 0;
|
||||
bool success_ = false;
|
||||
|
||||
public:
|
||||
SubprocessShader(const char *vert_src, const char *frag_src)
|
||||
SubprocessShader(const char *comp_src,
|
||||
const char *vert_src,
|
||||
const char *geom_src,
|
||||
const char *frag_src)
|
||||
{
|
||||
GLint status;
|
||||
|
||||
vert_ = glCreateShader(GL_VERTEX_SHADER);
|
||||
glShaderSource(vert_, 1, &vert_src, nullptr);
|
||||
glCompileShader(vert_);
|
||||
glGetShaderiv(vert_, GL_COMPILE_STATUS, &status);
|
||||
if (!status) {
|
||||
return;
|
||||
}
|
||||
|
||||
frag_ = glCreateShader(GL_FRAGMENT_SHADER);
|
||||
glShaderSource(frag_, 1, &frag_src, nullptr);
|
||||
glCompileShader(frag_);
|
||||
glGetShaderiv(frag_, GL_COMPILE_STATUS, &status);
|
||||
if (!status) {
|
||||
return;
|
||||
}
|
||||
|
||||
program_ = glCreateProgram();
|
||||
glAttachShader(program_, vert_);
|
||||
glAttachShader(program_, frag_);
|
||||
|
||||
auto compile_stage = [&](const char *src, GLenum stage) -> GLuint {
|
||||
if (src == nullptr) {
|
||||
/* We only want status errors if compilation fails. */
|
||||
status = GL_TRUE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLuint shader = glCreateShader(stage);
|
||||
glShaderSource(shader, 1, &src, nullptr);
|
||||
glCompileShader(shader);
|
||||
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
|
||||
glAttachShader(program_, shader);
|
||||
return shader;
|
||||
};
|
||||
|
||||
comp_ = compile_stage(comp_src, GL_COMPUTE_SHADER);
|
||||
if (!status) {
|
||||
return;
|
||||
}
|
||||
|
||||
vert_ = compile_stage(vert_src, GL_VERTEX_SHADER);
|
||||
if (!status) {
|
||||
return;
|
||||
}
|
||||
|
||||
geom_ = compile_stage(geom_src, GL_GEOMETRY_SHADER);
|
||||
if (!status) {
|
||||
return;
|
||||
}
|
||||
|
||||
frag_ = compile_stage(frag_src, GL_FRAGMENT_SHADER);
|
||||
if (!status) {
|
||||
return;
|
||||
}
|
||||
|
||||
glLinkProgram(program_);
|
||||
glGetProgramiv(program_, GL_LINK_STATUS, &status);
|
||||
if (!status) {
|
||||
@@ -65,7 +87,9 @@ class SubprocessShader {
|
||||
|
||||
~SubprocessShader()
|
||||
{
|
||||
glDeleteShader(comp_);
|
||||
glDeleteShader(vert_);
|
||||
glDeleteShader(geom_);
|
||||
glDeleteShader(frag_);
|
||||
glDeleteProgram(program_);
|
||||
}
|
||||
@@ -78,8 +102,8 @@ class SubprocessShader {
|
||||
|
||||
if (success_) {
|
||||
glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &bin->size);
|
||||
if (bin->size + sizeof(ShaderBinaryHeader) < compilation_subprocess_shared_memory_size) {
|
||||
glGetProgramBinary(program_, bin->size, nullptr, &bin->format, &bin->data_start);
|
||||
if (bin->size <= sizeof(ShaderBinaryHeader::data)) {
|
||||
glGetProgramBinary(program_, bin->size, nullptr, &bin->format, bin->data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,7 +116,7 @@ static bool validate_binary(void *binary)
|
||||
{
|
||||
ShaderBinaryHeader *bin = reinterpret_cast<ShaderBinaryHeader *>(binary);
|
||||
GLuint program = glCreateProgram();
|
||||
glProgramBinary(program, bin->format, &bin->data_start, bin->size);
|
||||
glProgramBinary(program, bin->format, bin->data, bin->size);
|
||||
GLint status;
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &status);
|
||||
glDeleteProgram(program);
|
||||
@@ -165,15 +189,34 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
|
||||
break;
|
||||
}
|
||||
|
||||
const char *shaders = reinterpret_cast<const char *>(shared_mem.get_data());
|
||||
|
||||
const char *vert_src = shaders;
|
||||
const char *frag_src = shaders + strlen(shaders) + 1;
|
||||
ShaderSourceHeader *source = reinterpret_cast<ShaderSourceHeader *>(shared_mem.get_data());
|
||||
const char *next_src = source->sources;
|
||||
const char *comp_src = nullptr;
|
||||
const char *vert_src = nullptr;
|
||||
const char *geom_src = nullptr;
|
||||
const char *frag_src = nullptr;
|
||||
|
||||
DefaultHash<StringRefNull> hasher;
|
||||
uint64_t vert_hash = hasher(vert_src);
|
||||
uint64_t frag_hash = hasher(frag_src);
|
||||
std::string hash_str = std::to_string(vert_hash) + "_" + std::to_string(frag_hash);
|
||||
std::string hash_str = "_";
|
||||
|
||||
auto get_src = [&]() {
|
||||
const char *src = next_src;
|
||||
next_src += strlen(src) + sizeof('\0');
|
||||
hash_str += std::to_string(hasher(src)) + "_";
|
||||
return src;
|
||||
};
|
||||
|
||||
if (source->type == ShaderSourceHeader::Type::COMPUTE) {
|
||||
comp_src = get_src();
|
||||
}
|
||||
else {
|
||||
vert_src = get_src();
|
||||
if (source->type == ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE) {
|
||||
geom_src = get_src();
|
||||
}
|
||||
frag_src = get_src();
|
||||
}
|
||||
|
||||
std::string cache_path = cache_dir + SEP_STR + hash_str;
|
||||
|
||||
/* TODO: This should lock the files? */
|
||||
@@ -203,14 +246,14 @@ void GPU_compilation_subprocess_run(const char *subprocess_name)
|
||||
}
|
||||
}
|
||||
|
||||
SubprocessShader shader(vert_src, frag_src);
|
||||
SubprocessShader shader(comp_src, vert_src, geom_src, frag_src);
|
||||
ShaderBinaryHeader *binary = shader.get_binary(shared_mem.get_data());
|
||||
|
||||
end_semaphore.increment();
|
||||
|
||||
fstream file(cache_path, std::ios::binary | std::ios::out);
|
||||
file.write(reinterpret_cast<char *>(shared_mem.get_data()),
|
||||
binary->size + offsetof(ShaderBinaryHeader, data_start));
|
||||
binary->size + offsetof(ShaderBinaryHeader, data));
|
||||
}
|
||||
|
||||
GPU_exit();
|
||||
|
||||
@@ -13,7 +13,19 @@
|
||||
namespace blender::gpu {
|
||||
|
||||
/* The size of the memory pools shared by Blender and the compilation subprocesses. */
|
||||
constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5mB */
|
||||
constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5 MiB */
|
||||
|
||||
struct ShaderSourceHeader {
|
||||
enum Type { COMPUTE, GRAPHICS, GRAPHICS_WITH_GEOMETRY_STAGE };
|
||||
/* The type of program being compiled. */
|
||||
Type type;
|
||||
/* The source code for all the shader stages (Separated by a null terminator).
|
||||
* The stages follows the execution order (eg. vert > geom > frag). */
|
||||
char sources[compilation_subprocess_shared_memory_size - sizeof(type)];
|
||||
};
|
||||
|
||||
static_assert(sizeof(ShaderSourceHeader) == compilation_subprocess_shared_memory_size,
|
||||
"Size must match the shared memory size");
|
||||
|
||||
struct ShaderBinaryHeader {
|
||||
/* Size of the shader binary data. */
|
||||
@@ -21,11 +33,13 @@ struct ShaderBinaryHeader {
|
||||
/* Magic number that identifies the format of this shader binary (Driver-defined).
|
||||
* This (and size) is set to 0 when the shader has failed to compile. */
|
||||
uint32_t format;
|
||||
/* When casting a shared memory pool into a ShaderBinaryHeader*, this is the first byte of the
|
||||
* shader binary data. */
|
||||
uint8_t data_start;
|
||||
/* The serialized shader binary data. */
|
||||
uint8_t data[compilation_subprocess_shared_memory_size - sizeof(size) - sizeof(format)];
|
||||
};
|
||||
|
||||
static_assert(sizeof(ShaderBinaryHeader) == compilation_subprocess_shared_memory_size,
|
||||
"Size must match the shared memory size");
|
||||
|
||||
} // namespace blender::gpu
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1472,6 +1472,30 @@ Vector<const char *> GLSources::sources_get() const
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string GLSources::to_string() const
|
||||
{
|
||||
std::string result;
|
||||
for (const GLSource &source : *this) {
|
||||
if (source.source_ref) {
|
||||
result.append(source.source_ref);
|
||||
}
|
||||
else {
|
||||
result.append(source.source);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t GLSourcesBaked::size()
|
||||
{
|
||||
size_t result = 0;
|
||||
result += comp.empty() ? 0 : comp.size() + sizeof('\0');
|
||||
result += vert.empty() ? 0 : vert.size() + sizeof('\0');
|
||||
result += geom.empty() ? 0 : geom.size() + sizeof('\0');
|
||||
result += frag.empty() ? 0 : frag.size() + sizeof('\0');
|
||||
return result;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -1588,6 +1612,16 @@ GLuint GLShader::program_get()
|
||||
return program_active_->program_id;
|
||||
}
|
||||
|
||||
GLSourcesBaked GLShader::get_sources()
|
||||
{
|
||||
GLSourcesBaked result;
|
||||
result.comp = compute_sources_.to_string();
|
||||
result.vert = vertex_sources_.to_string();
|
||||
result.geom = geometry_sources_.to_string();
|
||||
result.frag = fragment_sources_.to_string();
|
||||
return result;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
#if BLI_SUBPROCESS_SUPPORT
|
||||
@@ -1620,12 +1654,37 @@ GLCompilerWorker::~GLCompilerWorker()
|
||||
start_semaphore_->increment();
|
||||
}
|
||||
|
||||
void GLCompilerWorker::compile(StringRefNull vert, StringRefNull frag)
|
||||
void GLCompilerWorker::compile(const GLSourcesBaked &sources)
|
||||
{
|
||||
BLI_assert(state_ == AVAILABLE);
|
||||
|
||||
strcpy((char *)shared_mem_->get_data(), vert.c_str());
|
||||
strcpy((char *)shared_mem_->get_data() + vert.size() + sizeof('\0'), frag.c_str());
|
||||
ShaderSourceHeader *shared_src = reinterpret_cast<ShaderSourceHeader *>(shared_mem_->get_data());
|
||||
char *next_src = shared_src->sources;
|
||||
|
||||
auto add_src = [&](const std::string &src) {
|
||||
if (!src.empty()) {
|
||||
strcpy(next_src, src.c_str());
|
||||
next_src += src.size() + sizeof('\0');
|
||||
}
|
||||
};
|
||||
|
||||
add_src(sources.comp);
|
||||
add_src(sources.vert);
|
||||
add_src(sources.geom);
|
||||
add_src(sources.frag);
|
||||
|
||||
BLI_assert(size_t(next_src) <= size_t(shared_src) + compilation_subprocess_shared_memory_size);
|
||||
|
||||
if (!sources.comp.empty()) {
|
||||
BLI_assert(sources.vert.empty() && sources.geom.empty() && sources.frag.empty());
|
||||
shared_src->type = ShaderSourceHeader::Type::COMPUTE;
|
||||
}
|
||||
else {
|
||||
BLI_assert(sources.comp.empty() && !sources.vert.empty() && !sources.frag.empty());
|
||||
shared_src->type = sources.geom.empty() ?
|
||||
ShaderSourceHeader::Type::GRAPHICS :
|
||||
ShaderSourceHeader::Type::GRAPHICS_WITH_GEOMETRY_STAGE;
|
||||
}
|
||||
|
||||
start_semaphore_->increment();
|
||||
|
||||
@@ -1668,7 +1727,7 @@ bool GLCompilerWorker::load_program_binary(GLint program)
|
||||
state_ = COMPILATION_FINISHED;
|
||||
|
||||
if (binary->size > 0) {
|
||||
glProgramBinary(program, binary->format, &binary->data_start, binary->size);
|
||||
glProgramBinary(program, binary->format, binary->data, binary->size);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1695,7 +1754,7 @@ GLShaderCompiler::~GLShaderCompiler()
|
||||
}
|
||||
}
|
||||
|
||||
GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const char *frag)
|
||||
GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const GLSourcesBaked &sources)
|
||||
{
|
||||
GLCompilerWorker *result = nullptr;
|
||||
for (GLCompilerWorker *compiler : workers_) {
|
||||
@@ -1709,7 +1768,7 @@ GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const
|
||||
workers_.append(result);
|
||||
}
|
||||
if (result) {
|
||||
result->compile(vert, frag);
|
||||
result->compile(sources);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -1739,31 +1798,21 @@ BatchHandle GLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo
|
||||
|
||||
for (const shader::ShaderCreateInfo *info : infos) {
|
||||
const_cast<ShaderCreateInfo *>(info)->finalize();
|
||||
CompilationWork item = {};
|
||||
batch.items.append({});
|
||||
CompilationWork &item = batch.items.last();
|
||||
item.info = info;
|
||||
item.do_async_compilation = !info->vertex_source_.is_empty() &&
|
||||
!info->fragment_source_.is_empty() &&
|
||||
info->compute_source_.is_empty() &&
|
||||
info->geometry_source_.is_empty();
|
||||
if (item.do_async_compilation) {
|
||||
item.shader = static_cast<GLShader *>(compile(*info, true));
|
||||
for (const char *src : item.shader->vertex_sources_.sources_get()) {
|
||||
item.vertex_src.append(src);
|
||||
}
|
||||
for (const char *src : item.shader->fragment_sources_.sources_get()) {
|
||||
item.fragment_src.append(src);
|
||||
}
|
||||
item.shader = static_cast<GLShader *>(compile(*info, true));
|
||||
item.sources = item.shader->get_sources();
|
||||
|
||||
size_t required_size = item.vertex_src.size() + item.fragment_src.size();
|
||||
if (required_size < compilation_subprocess_shared_memory_size) {
|
||||
item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
|
||||
}
|
||||
else {
|
||||
delete item.shader;
|
||||
item.do_async_compilation = false;
|
||||
}
|
||||
size_t required_size = item.sources.size();
|
||||
item.do_async_compilation = required_size <= sizeof(ShaderSourceHeader::sources);
|
||||
if (item.do_async_compilation) {
|
||||
item.worker = get_compiler_worker(item.sources);
|
||||
}
|
||||
else {
|
||||
delete item.shader;
|
||||
item.sources = {};
|
||||
}
|
||||
batch.items.append(item);
|
||||
}
|
||||
return handle;
|
||||
}
|
||||
@@ -1791,7 +1840,7 @@ bool GLShaderCompiler::batch_is_ready(BatchHandle handle)
|
||||
|
||||
if (!item.worker) {
|
||||
/* Try to acquire an available worker. */
|
||||
item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
|
||||
item.worker = get_compiler_worker(item.sources);
|
||||
}
|
||||
else if (item.worker->is_ready()) {
|
||||
/* Retrieve the binary compiled by the worker. */
|
||||
|
||||
@@ -44,6 +44,21 @@ class GLSources : public Vector<GLSource> {
|
||||
public:
|
||||
GLSources &operator=(Span<const char *> other);
|
||||
Vector<const char *> sources_get() const;
|
||||
std::string to_string() const;
|
||||
};
|
||||
|
||||
/**
|
||||
* The full sources for each shader stage, baked into a single string from their respective
|
||||
* GLSources. (Can be retrieved from GLShader::get_sources())
|
||||
*/
|
||||
struct GLSourcesBaked : NonCopyable {
|
||||
std::string comp;
|
||||
std::string vert;
|
||||
std::string geom;
|
||||
std::string frag;
|
||||
|
||||
/* Returns the size (in bytes) required to store the source of all the used stages. */
|
||||
size_t size();
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -194,6 +209,8 @@ class GLShader : public Shader {
|
||||
return program_active_->compute_shader != 0;
|
||||
}
|
||||
|
||||
GLSourcesBaked get_sources();
|
||||
|
||||
private:
|
||||
const char *glsl_patch_get(GLenum gl_stage);
|
||||
|
||||
@@ -240,7 +257,7 @@ class GLCompilerWorker {
|
||||
GLCompilerWorker();
|
||||
~GLCompilerWorker();
|
||||
|
||||
void compile(StringRefNull vert, StringRefNull frag);
|
||||
void compile(const GLSourcesBaked &sources);
|
||||
bool is_ready();
|
||||
bool load_program_binary(GLint program);
|
||||
void release();
|
||||
@@ -255,14 +272,12 @@ class GLShaderCompiler : public ShaderCompiler {
|
||||
Vector<GLCompilerWorker *> workers_;
|
||||
|
||||
struct CompilationWork {
|
||||
GLCompilerWorker *worker = nullptr;
|
||||
GLShader *shader = nullptr;
|
||||
const shader::ShaderCreateInfo *info = nullptr;
|
||||
GLShader *shader = nullptr;
|
||||
GLSourcesBaked sources;
|
||||
|
||||
GLCompilerWorker *worker = nullptr;
|
||||
bool do_async_compilation = false;
|
||||
|
||||
std::string vertex_src;
|
||||
std::string fragment_src;
|
||||
|
||||
bool is_ready = false;
|
||||
};
|
||||
|
||||
@@ -274,7 +289,7 @@ class GLShaderCompiler : public ShaderCompiler {
|
||||
BatchHandle next_batch_handle = 1;
|
||||
Map<BatchHandle, Batch> batches;
|
||||
|
||||
GLCompilerWorker *get_compiler_worker(const char *vert, const char *frag);
|
||||
GLCompilerWorker *get_compiler_worker(const GLSourcesBaked &sources);
|
||||
bool worker_is_lost(GLCompilerWorker *&worker);
|
||||
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user