Files
test/source/blender/gpu/vulkan/vk_buffer.cc
Jeroen Bakker 15d88e544a GPU: Storage buffer allocation alignment
Since the introduction of storage buffers in Blender, the calling
code has been responsible for ensuring the buffer meets allocation
requirements. All backends require the allocation size to be divisible
by 16 bytes. Until now, this was sufficient, but with GPU subdivision
changes, an external library must also adhere to these requirements.

For OpenSubdiv (OSD), some buffers are not 16-byte aligned, leading
to potential misallocation. Currently, this is mitigated by allocating
a few extra bytes, but this approach has the drawback of potentially
reading unintended bytes beyond the source buffer.

This PR adopts a similar approach to vertex buffers: the backend handles
extra byte allocation while ensuring data uploads and downloads function
correctly without requiring those additional bytes.

No changes were needed for Metal, as its allocation size is already
aligned to 256 bytes.

**Alternative solutions considered**:

- Copying the CPU buffer to a larger buffer when needed (performance impact).
- Modifying OSD buffers to allocate extra space (requires changes to an external library).
- Implementing GPU_storagebuf_update_sub.

Ref #135873

Pull Request: https://projects.blender.org/blender/blender/pulls/135716
2025-03-13 15:05:16 +01:00

211 lines
6.1 KiB
C++

/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#include "vk_buffer.hh"
#include "vk_backend.hh"
#include "vk_context.hh"
namespace blender::gpu {
VKBuffer::~VKBuffer()
{
if (is_allocated()) {
free();
}
}
bool VKBuffer::is_allocated() const
{
return allocation_ != VK_NULL_HANDLE;
}
bool VKBuffer::create(size_t size_in_bytes,
VkBufferUsageFlags buffer_usage,
VkMemoryPropertyFlags required_flags,
VkMemoryPropertyFlags preferred_flags,
VmaAllocationCreateFlags allocation_flags)
{
BLI_assert(!is_allocated());
BLI_assert(vk_buffer_ == VK_NULL_HANDLE);
BLI_assert(mapped_memory_ == nullptr);
size_in_bytes_ = size_in_bytes;
alloc_size_in_bytes_ = ceil_to_multiple_ul(max_ulul(size_in_bytes_, 16), 16);
VKDevice &device = VKBackend::get().device;
VmaAllocator allocator = device.mem_allocator_get();
VkBufferCreateInfo create_info = {};
create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
create_info.flags = 0;
/*
* Vulkan doesn't allow empty buffers but some areas (DrawManager Instance data, PyGPU) create
* them.
*/
create_info.size = alloc_size_in_bytes_;
create_info.usage = buffer_usage;
/* We use the same command queue for the compute and graphics pipeline, so it is safe to use
* exclusive resource handling. */
create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
create_info.queueFamilyIndexCount = 1;
const uint32_t queue_family_indices[1] = {device.queue_family_get()};
create_info.pQueueFamilyIndices = queue_family_indices;
VmaAllocationCreateInfo vma_create_info = {};
vma_create_info.flags = allocation_flags;
vma_create_info.priority = 1.0f;
vma_create_info.requiredFlags = required_flags;
vma_create_info.preferredFlags = preferred_flags;
vma_create_info.usage = VMA_MEMORY_USAGE_AUTO;
VkResult result = vmaCreateBuffer(
allocator, &create_info, &vma_create_info, &vk_buffer_, &allocation_, nullptr);
if (result != VK_SUCCESS) {
return false;
}
device.resources.add_buffer(vk_buffer_);
vmaGetAllocationMemoryProperties(allocator, allocation_, &vk_memory_property_flags_);
if (vk_memory_property_flags_ & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
return map();
}
return true;
}
void VKBuffer::update_immediately(const void *data) const
{
update_sub_immediately(0, size_in_bytes_, data);
}
void VKBuffer::update_sub_immediately(size_t start_offset,
size_t data_size,
const void *data) const
{
BLI_assert_msg(is_mapped(), "Cannot update a non-mapped buffer.");
memcpy(static_cast<uint8_t *>(mapped_memory_) + start_offset, data, data_size);
}
void VKBuffer::update_render_graph(VKContext &context, void *data) const
{
BLI_assert(size_in_bytes_ <= 65536 && size_in_bytes_ % 4 == 0);
render_graph::VKUpdateBufferNode::CreateInfo update_buffer = {};
update_buffer.dst_buffer = vk_buffer_;
update_buffer.data_size = size_in_bytes_;
update_buffer.data = data;
context.render_graph().add_node(update_buffer);
}
void VKBuffer::flush() const
{
const VKDevice &device = VKBackend::get().device;
VmaAllocator allocator = device.mem_allocator_get();
vmaFlushAllocation(allocator, allocation_, 0, max_ulul(size_in_bytes(), 1));
}
void VKBuffer::clear(VKContext &context, uint32_t clear_value)
{
render_graph::VKFillBufferNode::CreateInfo fill_buffer = {};
fill_buffer.vk_buffer = vk_buffer_;
fill_buffer.data = clear_value;
fill_buffer.size = alloc_size_in_bytes_;
context.render_graph().add_node(fill_buffer);
}
void VKBuffer::async_flush_to_host(VKContext &context)
{
BLI_assert(async_timeline_ == 0);
context.rendering_end();
context.descriptor_set_get().upload_descriptor_sets();
async_timeline_ = context.flush_render_graph(RenderGraphFlushFlags::SUBMIT |
RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
}
void VKBuffer::read_async(VKContext &context, void *data)
{
BLI_assert_msg(is_mapped(), "Cannot read a non-mapped buffer.");
if (async_timeline_ == 0) {
async_flush_to_host(context);
}
VKDevice &device = VKBackend::get().device;
device.wait_for_timeline(async_timeline_);
async_timeline_ = 0;
memcpy(data, mapped_memory_, size_in_bytes_);
}
void VKBuffer::read(VKContext &context, void *data) const
{
BLI_assert_msg(is_mapped(), "Cannot read a non-mapped buffer.");
BLI_assert(async_timeline_ == 0);
context.rendering_end();
context.descriptor_set_get().upload_descriptor_sets();
context.flush_render_graph(RenderGraphFlushFlags::SUBMIT |
RenderGraphFlushFlags::WAIT_FOR_COMPLETION |
RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
memcpy(data, mapped_memory_, size_in_bytes_);
}
void *VKBuffer::mapped_memory_get() const
{
BLI_assert_msg(is_mapped(), "Cannot access a non-mapped buffer.");
return mapped_memory_;
}
bool VKBuffer::is_mapped() const
{
return mapped_memory_ != nullptr;
}
bool VKBuffer::map()
{
BLI_assert(!is_mapped());
const VKDevice &device = VKBackend::get().device;
VmaAllocator allocator = device.mem_allocator_get();
VkResult result = vmaMapMemory(allocator, allocation_, &mapped_memory_);
return result == VK_SUCCESS;
}
void VKBuffer::unmap()
{
BLI_assert(is_mapped());
const VKDevice &device = VKBackend::get().device;
VmaAllocator allocator = device.mem_allocator_get();
vmaUnmapMemory(allocator, allocation_);
mapped_memory_ = nullptr;
}
bool VKBuffer::free()
{
if (is_mapped()) {
unmap();
}
VKDiscardPool::discard_pool_get().discard_buffer(vk_buffer_, allocation_);
allocation_ = VK_NULL_HANDLE;
vk_buffer_ = VK_NULL_HANDLE;
return true;
}
void VKBuffer::free_immediately(VKDevice &device)
{
BLI_assert(vk_buffer_ != VK_NULL_HANDLE);
BLI_assert(allocation_ != VK_NULL_HANDLE);
if (is_mapped()) {
unmap();
}
device.resources.remove_buffer(vk_buffer_);
vmaDestroyBuffer(device.mem_allocator_get(), vk_buffer_, allocation_);
allocation_ = VK_NULL_HANDLE;
vk_buffer_ = VK_NULL_HANDLE;
}
} // namespace blender::gpu