From 98231ea880bb03111b2a03a37bf15474fde79d14 Mon Sep 17 00:00:00 2001 From: Miguel Pozo Date: Fri, 9 Feb 2024 16:11:33 +0100 Subject: [PATCH] GPU: Optimize GLStorageBuf::read performance Add a separate persistent mapped buffer where the main SSBO can be copied, so its contents can be read from the CPU without stalling the GPU. Pull Request: https://projects.blender.org/blender/blender/pulls/117521 --- source/blender/gpu/GPU_storage_buffer.h | 5 +- .../blender/gpu/opengl/gl_storage_buffer.cc | 75 ++++++++++++++++--- .../blender/gpu/opengl/gl_storage_buffer.hh | 4 + 3 files changed, 71 insertions(+), 13 deletions(-) diff --git a/source/blender/gpu/GPU_storage_buffer.h b/source/blender/gpu/GPU_storage_buffer.h index e9f6084b1cf..2f29333e45d 100644 --- a/source/blender/gpu/GPU_storage_buffer.h +++ b/source/blender/gpu/GPU_storage_buffer.h @@ -63,9 +63,8 @@ void GPU_storagebuf_sync_to_host(GPUStorageBuf *ssbo); * If pending GPU updates to the storage buffer are not yet visible to the host, the command will * stall until dependent GPU work has completed. * - * Otherwise, this command is unsynchronized and will return current visible storage buffer - * contents immediately. - * Alternatively, use appropriate barrier or GPU_finish before reading. + * Otherwise, this command is synchronized against this call and will stall the CPU until the + * buffer content can be read by the host. */ void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data); diff --git a/source/blender/gpu/opengl/gl_storage_buffer.cc b/source/blender/gpu/opengl/gl_storage_buffer.cc index 85492228b81..75958f9c153 100644 --- a/source/blender/gpu/opengl/gl_storage_buffer.cc +++ b/source/blender/gpu/opengl/gl_storage_buffer.cc @@ -33,6 +33,25 @@ GLStorageBuf::GLStorageBuf(size_t size, GPUUsageType usage, const char *name) GLStorageBuf::~GLStorageBuf() { + if (read_fence_) { + glDeleteSync(read_fence_); + } + + if (persistent_ptr_) { + if (GLContext::direct_state_access_support) { + glUnmapNamedBuffer(read_ssbo_id_); + } + else { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + } + } + + if (read_ssbo_id_) { + GLContext::buf_free(read_ssbo_id_); + } + GLContext::buf_free(ssbo_id_); } @@ -58,6 +77,7 @@ void GLStorageBuf::update(const void *data) if (ssbo_id_ == 0) { this->init(); } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_id_); glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, data); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); @@ -162,25 +182,60 @@ void GLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uin } void GLStorageBuf::async_flush_to_host() -{ - GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE); -} - -void GLStorageBuf::read(void *data) { if (ssbo_id_ == 0) { this->init(); } + if (read_ssbo_id_ == 0) { + glGenBuffers(1, &read_ssbo_id_); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_); + glBufferStorage(GL_SHADER_STORAGE_BUFFER, + size_in_bytes_, + nullptr, + GL_MAP_PERSISTENT_BIT | GL_MAP_READ_BIT); + persistent_ptr_ = glMapBufferRange( + GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, GL_MAP_PERSISTENT_BIT | GL_MAP_READ_BIT); + BLI_assert(persistent_ptr_); + debug::object_label(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_, name_); + } + if (GLContext::direct_state_access_support) { - glGetNamedBufferSubData(ssbo_id_, 0, size_in_bytes_, data); + glCopyNamedBufferSubData(ssbo_id_, read_ssbo_id_, 0, 0, size_in_bytes_); } else { - /* This binds the buffer to GL_ARRAY_BUFFER and upload the data if any. */ - glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_id_); - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, data); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + glBindBuffer(GL_COPY_READ_BUFFER, ssbo_id_); + glBindBuffer(GL_COPY_WRITE_BUFFER, read_ssbo_id_); + glCopyBufferSubData(GL_SHADER_STORAGE_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0, size_in_bytes_); + glBindBuffer(GL_COPY_READ_BUFFER, 0); + glBindBuffer(GL_COPY_WRITE_BUFFER, 0); } + + glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); + + if (read_fence_) { + glDeleteSync(read_fence_); + } + read_fence_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); +} + +void GLStorageBuf::read(void *data) +{ + if (data == nullptr) { + return; + } + + if (!persistent_ptr_ || !read_fence_) { + this->async_flush_to_host(); + } + + while (glClientWaitSync(read_fence_, GL_SYNC_FLUSH_COMMANDS_BIT, 1000) == GL_TIMEOUT_EXPIRED) { + /* Repeat until the data is ready.*/ + } + glDeleteSync(read_fence_); + read_fence_ = 0; + + memcpy(data, persistent_ptr_, size_in_bytes_); } void GLStorageBuf::sync_as_indirect_buffer() diff --git a/source/blender/gpu/opengl/gl_storage_buffer.hh b/source/blender/gpu/opengl/gl_storage_buffer.hh index af3fdacb7b2..be2eb3e8df2 100644 --- a/source/blender/gpu/opengl/gl_storage_buffer.hh +++ b/source/blender/gpu/opengl/gl_storage_buffer.hh @@ -26,6 +26,10 @@ class GLStorageBuf : public StorageBuf { GLuint ssbo_id_ = 0; /** Usage type. */ GPUUsageType usage_; + /* Read */ + GLuint read_ssbo_id_ = 0; + GLsync read_fence_ = 0; + void *persistent_ptr_ = nullptr; public: GLStorageBuf(size_t size, GPUUsageType usage, const char *name);