Vulkan: Add support for ReBAR
This adds initial support for ReBAR capable platforms. It ensures that when allocating buffers that should not be host visible, still tries to allocate in host visible memory. When there is space in this memory heap the buffer will be automatically mapped to host memory. When mapped staging buffers can be skipped when the buffer was newly created. In order to make better usage of ReBAR the `VKBuffer::create` function will need to be revisit. It currently hides to much options to allocate in the correct memory heap. This change isn't part of this PR. Using shader_balls.blend rendering the first 50 frames in main takes 1516ms. When using ReBAR it takes 1416ms. ``` Operating system: Linux-6.8.0-49-generic-x86_64-with-glibc2.39 64 Bits, X11 UI Graphics card: AMD Radeon Pro W7700 (RADV NAVI32) Advanced Micro Devices radv Mesa 24.3.1 - kisak-mesa PPA Vulkan Backend ``` Pull Request: https://projects.blender.org/blender/blender/pulls/131856
This commit is contained in:
@@ -40,9 +40,13 @@ static VmaAllocationCreateFlags vma_allocation_flags(GPUUsageType usage)
|
||||
return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
}
|
||||
|
||||
static VkMemoryPropertyFlags vma_preferred_flags()
|
||||
static VkMemoryPropertyFlags vma_preferred_flags(const bool is_host_visible)
|
||||
{
|
||||
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
/* When is_host_visible is true, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT is set in
|
||||
* `vma_required_flags`. We set the reverse to support ReBAR. */
|
||||
return is_host_visible ?
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT :
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
|
||||
}
|
||||
|
||||
static VkMemoryPropertyFlags vma_required_flags(const bool is_host_visible)
|
||||
@@ -55,12 +59,6 @@ bool VKBuffer::create(size_t size_in_bytes,
|
||||
VkBufferUsageFlags buffer_usage,
|
||||
const bool is_host_visible)
|
||||
{
|
||||
/*
|
||||
* TODO: Check which memory is selected and adjust the creation flag to add mapping. This way the
|
||||
* staging buffer can be skipped, or in case of a vertex buffer an intermediate buffer can be
|
||||
* removed.
|
||||
*/
|
||||
|
||||
BLI_assert(!is_allocated());
|
||||
BLI_assert(vk_buffer_ == VK_NULL_HANDLE);
|
||||
BLI_assert(mapped_memory_ == nullptr);
|
||||
@@ -89,7 +87,7 @@ bool VKBuffer::create(size_t size_in_bytes,
|
||||
vma_create_info.flags = vma_allocation_flags(usage);
|
||||
vma_create_info.priority = 1.0f;
|
||||
vma_create_info.requiredFlags = vma_required_flags(is_host_visible);
|
||||
vma_create_info.preferredFlags = vma_preferred_flags();
|
||||
vma_create_info.preferredFlags = vma_preferred_flags(is_host_visible);
|
||||
vma_create_info.usage = VMA_MEMORY_USAGE_AUTO;
|
||||
|
||||
VkResult result = vmaCreateBuffer(
|
||||
@@ -100,7 +98,9 @@ bool VKBuffer::create(size_t size_in_bytes,
|
||||
|
||||
device.resources.add_buffer(vk_buffer_);
|
||||
|
||||
if (is_host_visible) {
|
||||
vmaGetAllocationMemoryProperties(allocator, allocation_, &vk_memory_property_flags_);
|
||||
|
||||
if (vk_memory_property_flags_ & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
|
||||
return map();
|
||||
}
|
||||
return true;
|
||||
@@ -110,7 +110,6 @@ void VKBuffer::update_immediately(const void *data) const
|
||||
{
|
||||
BLI_assert_msg(is_mapped(), "Cannot update a non-mapped buffer.");
|
||||
memcpy(mapped_memory_, data, size_in_bytes_);
|
||||
flush();
|
||||
}
|
||||
|
||||
void VKBuffer::update_render_graph(VKContext &context, void *data) const
|
||||
|
||||
@@ -24,6 +24,8 @@ class VKBuffer : public NonCopyable {
|
||||
size_t size_in_bytes_ = 0;
|
||||
VkBuffer vk_buffer_ = VK_NULL_HANDLE;
|
||||
VmaAllocation allocation_ = VK_NULL_HANDLE;
|
||||
VkMemoryPropertyFlags vk_memory_property_flags_;
|
||||
|
||||
/* Pointer to the virtually mapped memory. */
|
||||
void *mapped_memory_ = nullptr;
|
||||
|
||||
@@ -33,10 +35,19 @@ class VKBuffer : public NonCopyable {
|
||||
|
||||
/** Has this buffer been allocated? */
|
||||
bool is_allocated() const;
|
||||
|
||||
/**
|
||||
* Allocate the buffer.
|
||||
*
|
||||
* When `is_host_visible` is set to true it will allocate from a host visible memory heap. When
|
||||
* `is_host_visible` is false it will try to allocate from a host visible memory heap. When not
|
||||
* available it will allocate from a not host visible memory heap. This is also known as
|
||||
* Resizable BAR or ReBAR.
|
||||
*/
|
||||
bool create(size_t size,
|
||||
GPUUsageType usage,
|
||||
VkBufferUsageFlags buffer_usage,
|
||||
bool is_host_visible = true);
|
||||
bool is_host_visible);
|
||||
void clear(VKContext &context, uint32_t clear_value);
|
||||
void update_immediately(const void *data) const;
|
||||
|
||||
|
||||
@@ -196,7 +196,8 @@ void VKDevice::init_dummy_buffer()
|
||||
{
|
||||
dummy_buffer.create(sizeof(float4x4),
|
||||
GPU_USAGE_DEVICE_ONLY,
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
true);
|
||||
debug::object_label(dummy_buffer.vk_handle(), "DummyBuffer");
|
||||
/* Default dummy buffer. Set the 4th element to 1 to fix missing orcos. */
|
||||
float data[16] = {
|
||||
|
||||
@@ -169,7 +169,8 @@ VKBuffer &VKImmediate::ensure_space(VkDeviceSize bytes_needed, VkDeviceSize offs
|
||||
result.create(alloc_size,
|
||||
GPU_USAGE_DYNAMIC,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
true);
|
||||
debug::object_label(result.vk_handle(), "Immediate");
|
||||
|
||||
return result;
|
||||
|
||||
@@ -29,11 +29,19 @@ void VKIndexBuffer::ensure_updated()
|
||||
return;
|
||||
}
|
||||
|
||||
VKContext &context = *VKContext::get();
|
||||
VKStagingBuffer staging_buffer(buffer_, VKStagingBuffer::Direction::HostToDevice);
|
||||
staging_buffer.host_buffer_get().update_immediately(data_);
|
||||
staging_buffer.copy_to_device(context);
|
||||
MEM_SAFE_FREE(data_);
|
||||
if (!data_uploaded_ && buffer_.is_mapped()) {
|
||||
buffer_.update_immediately(data_);
|
||||
MEM_SAFE_FREE(data_);
|
||||
}
|
||||
else {
|
||||
VKContext &context = *VKContext::get();
|
||||
VKStagingBuffer staging_buffer(buffer_, VKStagingBuffer::Direction::HostToDevice);
|
||||
staging_buffer.host_buffer_get().update_immediately(data_);
|
||||
staging_buffer.copy_to_device(context);
|
||||
MEM_SAFE_FREE(data_);
|
||||
}
|
||||
|
||||
data_uploaded_ = true;
|
||||
}
|
||||
|
||||
void VKIndexBuffer::upload_data()
|
||||
|
||||
@@ -16,6 +16,7 @@ namespace blender::gpu {
|
||||
|
||||
class VKIndexBuffer : public IndexBuf {
|
||||
VKBuffer buffer_;
|
||||
bool data_uploaded_ = false;
|
||||
|
||||
public:
|
||||
void upload_data() override;
|
||||
|
||||
@@ -151,6 +151,7 @@ void VKPushConstants::update_uniform_buffer()
|
||||
BLI_assert(data_ != nullptr);
|
||||
VKContext &context = *VKContext::get();
|
||||
std::unique_ptr<VKUniformBuffer> &uniform_buffer = tracked_resource_for(context, is_dirty_);
|
||||
uniform_buffer->reset_data_uploaded();
|
||||
uniform_buffer->update(data_);
|
||||
is_dirty_ = false;
|
||||
}
|
||||
|
||||
@@ -194,7 +194,8 @@ void VKTexture::read_sub(
|
||||
/* Vulkan images cannot be directly mapped to host memory and requires a staging buffer. */
|
||||
VKBuffer staging_buffer;
|
||||
size_t device_memory_size = sample_len * to_bytesize(device_format_);
|
||||
staging_buffer.create(device_memory_size, GPU_USAGE_DYNAMIC, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
|
||||
staging_buffer.create(
|
||||
device_memory_size, GPU_USAGE_DYNAMIC, VK_BUFFER_USAGE_TRANSFER_DST_BIT, true);
|
||||
|
||||
render_graph::VKCopyImageToBufferNode::CreateInfo copy_image_to_buffer = {};
|
||||
render_graph::VKCopyImageToBufferNode::Data &node_data = copy_image_to_buffer.node_data;
|
||||
@@ -300,7 +301,8 @@ void VKTexture::update_sub(
|
||||
}
|
||||
|
||||
VKBuffer staging_buffer;
|
||||
staging_buffer.create(device_memory_size, GPU_USAGE_DYNAMIC, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
|
||||
staging_buffer.create(
|
||||
device_memory_size, GPU_USAGE_DYNAMIC, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, true);
|
||||
/* Rows are sequentially stored, when unpack row length is 0, or equal to the extent width. In
|
||||
* other cases we unpack the rows to reduce the size of the staging buffer and data transfer. */
|
||||
const uint texture_unpack_row_length =
|
||||
|
||||
@@ -21,11 +21,16 @@ void VKUniformBuffer::update(const void *data)
|
||||
allocate();
|
||||
}
|
||||
|
||||
/* TODO: when buffer is mapped and newly created we should use `buffer_.update_immediately`. */
|
||||
void *data_copy = MEM_mallocN(size_in_bytes_, __func__);
|
||||
memcpy(data_copy, data, size_in_bytes_);
|
||||
VKContext &context = *VKContext::get();
|
||||
buffer_.update_render_graph(context, data_copy);
|
||||
if (!data_uploaded_ && buffer_.is_mapped()) {
|
||||
buffer_.update_immediately(data);
|
||||
}
|
||||
else {
|
||||
void *data_copy = MEM_mallocN(size_in_bytes_, __func__);
|
||||
memcpy(data_copy, data, size_in_bytes_);
|
||||
VKContext &context = *VKContext::get();
|
||||
buffer_.update_render_graph(context, data_copy);
|
||||
}
|
||||
data_uploaded_ = true;
|
||||
}
|
||||
|
||||
void VKUniformBuffer::allocate()
|
||||
@@ -45,6 +50,7 @@ void VKUniformBuffer::clear_to_zero()
|
||||
}
|
||||
VKContext &context = *VKContext::get();
|
||||
buffer_.clear(context, 0);
|
||||
data_uploaded_ = true;
|
||||
}
|
||||
|
||||
void VKUniformBuffer::ensure_updated()
|
||||
@@ -55,10 +61,17 @@ void VKUniformBuffer::ensure_updated()
|
||||
|
||||
/* Upload attached data, during bind time. */
|
||||
if (data_) {
|
||||
/* TODO: when buffer is mapped and newly created we should use `buffer_.update_immediately`. */
|
||||
VKContext &context = *VKContext::get();
|
||||
buffer_.update_render_graph(context, std::move(data_));
|
||||
data_ = nullptr;
|
||||
if (!data_uploaded_ && buffer_.is_mapped()) {
|
||||
buffer_.update_immediately(data_);
|
||||
MEM_freeN(data_);
|
||||
data_ = nullptr;
|
||||
}
|
||||
else {
|
||||
VKContext &context = *VKContext::get();
|
||||
buffer_.update_render_graph(context, std::move(data_));
|
||||
data_ = nullptr;
|
||||
}
|
||||
data_uploaded_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,12 @@ namespace blender::gpu {
|
||||
class VKUniformBuffer : public UniformBuf, NonCopyable {
|
||||
VKBuffer buffer_;
|
||||
|
||||
/**
|
||||
* Has this uniform data already been fed with data. When so we are not allowed to directly
|
||||
* overwrite the data as it could still be in use.
|
||||
*/
|
||||
bool data_uploaded_ = false;
|
||||
|
||||
public:
|
||||
VKUniformBuffer(size_t size, const char *name) : UniformBuf(size, name) {}
|
||||
|
||||
@@ -44,6 +50,15 @@ class VKUniformBuffer : public UniformBuf, NonCopyable {
|
||||
|
||||
void ensure_updated();
|
||||
|
||||
/**
|
||||
* Reset data uploaded flag. When the resource is sure it isn't used, the caller can call
|
||||
* reset_data_uploaded so the next update can use ReBAR when available.
|
||||
*/
|
||||
void reset_data_uploaded()
|
||||
{
|
||||
data_uploaded_ = false;
|
||||
}
|
||||
|
||||
private:
|
||||
void allocate();
|
||||
};
|
||||
|
||||
@@ -148,7 +148,7 @@ void VKVertexBuffer::upload_data()
|
||||
|
||||
if (flag & GPU_VERTBUF_DATA_DIRTY) {
|
||||
device_format_ensure();
|
||||
if (buffer_.is_mapped()) {
|
||||
if (buffer_.is_mapped() && !data_uploaded_) {
|
||||
upload_data_direct(buffer_);
|
||||
}
|
||||
else {
|
||||
@@ -158,6 +158,7 @@ void VKVertexBuffer::upload_data()
|
||||
if (usage_ == GPU_USAGE_STATIC) {
|
||||
MEM_SAFE_FREE(data_);
|
||||
}
|
||||
data_uploaded_ = true;
|
||||
|
||||
flag &= ~GPU_VERTBUF_DATA_DIRTY;
|
||||
flag |= GPU_VERTBUF_DATA_UPLOADED;
|
||||
|
||||
@@ -21,6 +21,7 @@ class VKVertexBuffer : public VertBuf {
|
||||
VkBufferView vk_buffer_view_ = VK_NULL_HANDLE;
|
||||
|
||||
VertexFormatConverter vertex_format_converter;
|
||||
bool data_uploaded_ = false;
|
||||
|
||||
public:
|
||||
~VKVertexBuffer();
|
||||
|
||||
Reference in New Issue
Block a user