GPU: Optimize GLStorageBuf::read performance
Add a separate persistent mapped buffer where the main SSBO can be copied, so its contents can be read from the CPU without stalling the GPU. Pull Request: https://projects.blender.org/blender/blender/pulls/117521
This commit is contained in:
@@ -63,9 +63,8 @@ void GPU_storagebuf_sync_to_host(GPUStorageBuf *ssbo);
|
||||
* If pending GPU updates to the storage buffer are not yet visible to the host, the command will
|
||||
* stall until dependent GPU work has completed.
|
||||
*
|
||||
* Otherwise, this command is unsynchronized and will return current visible storage buffer
|
||||
* contents immediately.
|
||||
* Alternatively, use appropriate barrier or GPU_finish before reading.
|
||||
* Otherwise, this command is synchronized against this call and will stall the CPU until the
|
||||
* buffer content can be read by the host.
|
||||
*/
|
||||
void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data);
|
||||
|
||||
|
||||
@@ -33,6 +33,25 @@ GLStorageBuf::GLStorageBuf(size_t size, GPUUsageType usage, const char *name)
|
||||
|
||||
GLStorageBuf::~GLStorageBuf()
|
||||
{
|
||||
if (read_fence_) {
|
||||
glDeleteSync(read_fence_);
|
||||
}
|
||||
|
||||
if (persistent_ptr_) {
|
||||
if (GLContext::direct_state_access_support) {
|
||||
glUnmapNamedBuffer(read_ssbo_id_);
|
||||
}
|
||||
else {
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_);
|
||||
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (read_ssbo_id_) {
|
||||
GLContext::buf_free(read_ssbo_id_);
|
||||
}
|
||||
|
||||
GLContext::buf_free(ssbo_id_);
|
||||
}
|
||||
|
||||
@@ -58,6 +77,7 @@ void GLStorageBuf::update(const void *data)
|
||||
if (ssbo_id_ == 0) {
|
||||
this->init();
|
||||
}
|
||||
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_id_);
|
||||
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, data);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||
@@ -162,25 +182,60 @@ void GLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uin
|
||||
}
|
||||
|
||||
void GLStorageBuf::async_flush_to_host()
|
||||
{
|
||||
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
|
||||
}
|
||||
|
||||
void GLStorageBuf::read(void *data)
|
||||
{
|
||||
if (ssbo_id_ == 0) {
|
||||
this->init();
|
||||
}
|
||||
|
||||
if (read_ssbo_id_ == 0) {
|
||||
glGenBuffers(1, &read_ssbo_id_);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_);
|
||||
glBufferStorage(GL_SHADER_STORAGE_BUFFER,
|
||||
size_in_bytes_,
|
||||
nullptr,
|
||||
GL_MAP_PERSISTENT_BIT | GL_MAP_READ_BIT);
|
||||
persistent_ptr_ = glMapBufferRange(
|
||||
GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, GL_MAP_PERSISTENT_BIT | GL_MAP_READ_BIT);
|
||||
BLI_assert(persistent_ptr_);
|
||||
debug::object_label(GL_SHADER_STORAGE_BUFFER, read_ssbo_id_, name_);
|
||||
}
|
||||
|
||||
if (GLContext::direct_state_access_support) {
|
||||
glGetNamedBufferSubData(ssbo_id_, 0, size_in_bytes_, data);
|
||||
glCopyNamedBufferSubData(ssbo_id_, read_ssbo_id_, 0, 0, size_in_bytes_);
|
||||
}
|
||||
else {
|
||||
/* This binds the buffer to GL_ARRAY_BUFFER and upload the data if any. */
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_id_);
|
||||
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, data);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||
glBindBuffer(GL_COPY_READ_BUFFER, ssbo_id_);
|
||||
glBindBuffer(GL_COPY_WRITE_BUFFER, read_ssbo_id_);
|
||||
glCopyBufferSubData(GL_SHADER_STORAGE_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0, size_in_bytes_);
|
||||
glBindBuffer(GL_COPY_READ_BUFFER, 0);
|
||||
glBindBuffer(GL_COPY_WRITE_BUFFER, 0);
|
||||
}
|
||||
|
||||
glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
||||
|
||||
if (read_fence_) {
|
||||
glDeleteSync(read_fence_);
|
||||
}
|
||||
read_fence_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
}
|
||||
|
||||
void GLStorageBuf::read(void *data)
|
||||
{
|
||||
if (data == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!persistent_ptr_ || !read_fence_) {
|
||||
this->async_flush_to_host();
|
||||
}
|
||||
|
||||
while (glClientWaitSync(read_fence_, GL_SYNC_FLUSH_COMMANDS_BIT, 1000) == GL_TIMEOUT_EXPIRED) {
|
||||
/* Repeat until the data is ready.*/
|
||||
}
|
||||
glDeleteSync(read_fence_);
|
||||
read_fence_ = 0;
|
||||
|
||||
memcpy(data, persistent_ptr_, size_in_bytes_);
|
||||
}
|
||||
|
||||
void GLStorageBuf::sync_as_indirect_buffer()
|
||||
|
||||
@@ -26,6 +26,10 @@ class GLStorageBuf : public StorageBuf {
|
||||
GLuint ssbo_id_ = 0;
|
||||
/** Usage type. */
|
||||
GPUUsageType usage_;
|
||||
/* Read */
|
||||
GLuint read_ssbo_id_ = 0;
|
||||
GLsync read_fence_ = 0;
|
||||
void *persistent_ptr_ = nullptr;
|
||||
|
||||
public:
|
||||
GLStorageBuf(size_t size, GPUUsageType usage, const char *name);
|
||||
|
||||
Reference in New Issue
Block a user