The vulkan backend was implemented with async in mind, however the one place where Blender uses for async was implemented blocking. This PR splits the readback into flushing the command and waiting for readback. **Performance** Improvement of animation playback performance of shader balls.blend is around 10%. Shader balls.blend frame: 1-100, 10 x animation playback | Branch | Total time | Average time | | -------------------- | ---------- | ------------ | | blender-v4.4-release | 26851 ms | 2685 ms | | This PR | 23675 ms | 2367 ms | Pull Request: https://projects.blender.org/blender/blender/pulls/134227
129 lines
3.5 KiB
C++
129 lines
3.5 KiB
C++
/* SPDX-FileCopyrightText: 2022 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup gpu
|
|
*/
|
|
#include "vk_shader.hh"
|
|
#include "vk_shader_interface.hh"
|
|
#include "vk_staging_buffer.hh"
|
|
#include "vk_state_manager.hh"
|
|
#include "vk_vertex_buffer.hh"
|
|
|
|
#include "vk_storage_buffer.hh"
|
|
|
|
namespace blender::gpu {
|
|
|
|
VKStorageBuffer::VKStorageBuffer(size_t size, GPUUsageType usage, const char *name)
|
|
: StorageBuf(size, name), usage_(usage)
|
|
{
|
|
UNUSED_VARS(usage_);
|
|
}
|
|
VKStorageBuffer::~VKStorageBuffer()
|
|
{
|
|
if (async_read_buffer_) {
|
|
MEM_delete(async_read_buffer_);
|
|
async_read_buffer_ = nullptr;
|
|
}
|
|
}
|
|
|
|
void VKStorageBuffer::update(const void *data)
|
|
{
|
|
VKContext &context = *VKContext::get();
|
|
ensure_allocated();
|
|
VKStagingBuffer staging_buffer(buffer_, VKStagingBuffer::Direction::HostToDevice);
|
|
staging_buffer.host_buffer_get().update_immediately(data);
|
|
staging_buffer.copy_to_device(context);
|
|
}
|
|
|
|
void VKStorageBuffer::ensure_allocated()
|
|
{
|
|
if (!buffer_.is_allocated()) {
|
|
allocate();
|
|
}
|
|
}
|
|
|
|
void VKStorageBuffer::allocate()
|
|
{
|
|
const VkBufferUsageFlags buffer_usage_flags = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
|
buffer_.create(size_in_bytes_,
|
|
buffer_usage_flags,
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
|
VkMemoryPropertyFlags(0),
|
|
VmaAllocationCreateFlags(0));
|
|
BLI_assert(buffer_.is_allocated());
|
|
debug::object_label(buffer_.vk_handle(), name_);
|
|
}
|
|
|
|
void VKStorageBuffer::bind(int slot)
|
|
{
|
|
VKContext &context = *VKContext::get();
|
|
context.state_manager_get().storage_buffer_bind(
|
|
BindSpaceStorageBuffers::Type::StorageBuffer, this, slot);
|
|
}
|
|
|
|
void VKStorageBuffer::unbind()
|
|
{
|
|
VKContext *context = VKContext::get();
|
|
if (context) {
|
|
context->state_manager_get().storage_buffer_unbind(this);
|
|
}
|
|
}
|
|
|
|
void VKStorageBuffer::clear(uint32_t clear_value)
|
|
{
|
|
ensure_allocated();
|
|
VKContext &context = *VKContext::get();
|
|
buffer_.clear(context, clear_value);
|
|
}
|
|
|
|
void VKStorageBuffer::copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size)
|
|
{
|
|
ensure_allocated();
|
|
|
|
VKVertexBuffer &src_vertex_buffer = *unwrap(src);
|
|
src_vertex_buffer.upload();
|
|
|
|
render_graph::VKCopyBufferNode::CreateInfo copy_buffer = {};
|
|
copy_buffer.src_buffer = src_vertex_buffer.vk_handle();
|
|
copy_buffer.dst_buffer = vk_handle();
|
|
copy_buffer.region.srcOffset = src_offset;
|
|
copy_buffer.region.dstOffset = dst_offset;
|
|
copy_buffer.region.size = copy_size;
|
|
|
|
VKContext &context = *VKContext::get();
|
|
context.render_graph().add_node(copy_buffer);
|
|
}
|
|
|
|
void VKStorageBuffer::async_flush_to_host()
|
|
{
|
|
if (async_read_buffer_ != nullptr) {
|
|
return;
|
|
}
|
|
ensure_allocated();
|
|
VKContext &context = *VKContext::get();
|
|
|
|
async_read_buffer_ = MEM_new<VKStagingBuffer>(
|
|
__func__, buffer_, VKStagingBuffer::Direction::DeviceToHost);
|
|
async_read_buffer_->copy_from_device(context);
|
|
async_read_buffer_->host_buffer_get().async_flush_to_host(context);
|
|
}
|
|
|
|
void VKStorageBuffer::read(void *data)
|
|
{
|
|
if (async_read_buffer_ == nullptr) {
|
|
async_flush_to_host();
|
|
}
|
|
|
|
VKContext &context = *VKContext::get();
|
|
async_read_buffer_->host_buffer_get().read_async(context, data);
|
|
MEM_delete(async_read_buffer_);
|
|
async_read_buffer_ = nullptr;
|
|
}
|
|
|
|
} // namespace blender::gpu
|