GPU: Optimize GLStorageBuf::read performance

Add a separate persistent mapped buffer where the main SSBO can be
copied, so its contents can be read from the CPU without stalling the
GPU.

Pull Request: https://projects.blender.org/blender/blender/pulls/117521
This commit is contained in:
Miguel Pozo
2024-02-09 16:11:33 +01:00
parent c065ef94ee
commit 98231ea880
3 changed files with 71 additions and 13 deletions

View File

@@ -63,9 +63,8 @@ void GPU_storagebuf_sync_to_host(GPUStorageBuf *ssbo);
* If pending GPU updates to the storage buffer are not yet visible to the host, the command will
* stall until dependent GPU work has completed.
*
* Otherwise, this command is unsynchronized and will return current visible storage buffer
* contents immediately.
* Alternatively, use appropriate barrier or GPU_finish before reading.
* Otherwise, this command is synchronized against this call and will stall the CPU until the
* buffer content can be read by the host.
*/
void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data);

View File

@@ -33,6 +33,25 @@ GLStorageBuf::GLStorageBuf(size_t size, GPUUsageType usage, const char *name)
GLStorageBuf::~GLStorageBuf()
{
if (read_fence_) {
glDeleteSync(read_fence_);
}
if (persistent_ptr_) {
if (GLContext::direct_state_access_support) {
glUnmapNamedBuffer(read_ssbo_id_);
}
else {
glBindBuffer(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}
}
if (read_ssbo_id_) {
GLContext::buf_free(read_ssbo_id_);
}
GLContext::buf_free(ssbo_id_);
}
@@ -58,6 +77,7 @@ void GLStorageBuf::update(const void *data)
if (ssbo_id_ == 0) {
this->init();
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_id_);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, data);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
@@ -162,25 +182,60 @@ void GLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uin
}
void GLStorageBuf::async_flush_to_host()
{
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
}
void GLStorageBuf::read(void *data)
{
if (ssbo_id_ == 0) {
this->init();
}
if (read_ssbo_id_ == 0) {
glGenBuffers(1, &read_ssbo_id_);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_);
glBufferStorage(GL_SHADER_STORAGE_BUFFER,
size_in_bytes_,
nullptr,
GL_MAP_PERSISTENT_BIT | GL_MAP_READ_BIT);
persistent_ptr_ = glMapBufferRange(
GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, GL_MAP_PERSISTENT_BIT | GL_MAP_READ_BIT);
BLI_assert(persistent_ptr_);
debug::object_label(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_, name_);
}
if (GLContext::direct_state_access_support) {
glGetNamedBufferSubData(ssbo_id_, 0, size_in_bytes_, data);
glCopyNamedBufferSubData(ssbo_id_, read_ssbo_id_, 0, 0, size_in_bytes_);
}
else {
/* This binds the buffer to GL_ARRAY_BUFFER and upload the data if any. */
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_id_);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, data);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
glBindBuffer(GL_COPY_READ_BUFFER, ssbo_id_);
glBindBuffer(GL_COPY_WRITE_BUFFER, read_ssbo_id_);
glCopyBufferSubData(GL_SHADER_STORAGE_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0, size_in_bytes_);
glBindBuffer(GL_COPY_READ_BUFFER, 0);
glBindBuffer(GL_COPY_WRITE_BUFFER, 0);
}
glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
if (read_fence_) {
glDeleteSync(read_fence_);
}
read_fence_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void GLStorageBuf::read(void *data)
{
if (data == nullptr) {
return;
}
if (!persistent_ptr_ || !read_fence_) {
this->async_flush_to_host();
}
while (glClientWaitSync(read_fence_, GL_SYNC_FLUSH_COMMANDS_BIT, 1000) == GL_TIMEOUT_EXPIRED) {
/* Repeat until the data is ready.*/
}
glDeleteSync(read_fence_);
read_fence_ = 0;
memcpy(data, persistent_ptr_, size_in_bytes_);
}
void GLStorageBuf::sync_as_indirect_buffer()

View File

@@ -26,6 +26,10 @@ class GLStorageBuf : public StorageBuf {
GLuint ssbo_id_ = 0;
/** Usage type. */
GPUUsageType usage_;
/* Read */
GLuint read_ssbo_id_ = 0;
GLsync read_fence_ = 0;
void *persistent_ptr_ = nullptr;
public:
GLStorageBuf(size_t size, GPUUsageType usage, const char *name);