Vulkan: Bundle Calls in DrawList

A draw list bundles multiple draw commands for the same geometry
and sends the draw commands in a single command. This reduces
the overhead of pipeline checking, resource validation and can
keep the load higher on the gpu as more work needs to be done.

Previously the draw list didn't bundle any commands and would still
send each call separately to the GPU. This PR implements the bundling
of the commands.

Pull Request: https://projects.blender.org/blender/blender/pulls/117548
This commit is contained in:
Jeroen Bakker
2024-01-26 17:45:18 +01:00
parent 3f0f26ee8a
commit ec80264d09
6 changed files with 149 additions and 11 deletions

View File

@@ -177,9 +177,9 @@ Batch *VKBackend::batch_alloc()
return new VKBatch();
}
DrawList *VKBackend::drawlist_alloc(int /*list_length*/)
DrawList *VKBackend::drawlist_alloc(int list_length)
{
return new VKDrawList();
return new VKDrawList(list_length);
}
Fence *VKBackend::fence_alloc()

View File

@@ -31,4 +31,9 @@ class VKBatch : public Batch {
void draw_setup();
};
BLI_INLINE VKBatch *unwrap(GPUBatch *batch)
{
return static_cast<VKBatch *>(batch);
}
} // namespace blender::gpu

View File

@@ -8,15 +8,81 @@
#include "GPU_batch.h"
#include "vk_batch.hh"
#include "vk_common.hh"
#include "vk_drawlist.hh"
#include "vk_index_buffer.hh"
#include "vk_vertex_buffer.hh"
namespace blender::gpu {
void VKDrawList::append(GPUBatch *batch, int instance_first, int instance_count)
VKDrawList::VKDrawList(int list_length)
: command_buffer_(
list_length * sizeof(VkDrawIndexedIndirectCommand), GPU_USAGE_STREAM, __func__),
length_(list_length)
{
GPU_batch_draw_advanced(batch, 0, 0, instance_first, instance_count);
command_buffer_.ensure_allocated();
}
void VKDrawList::submit() {}
void VKDrawList::append(GPUBatch *gpu_batch, int instance_first, int instance_count)
{
/* Check for different batch. When batch is different the previous commands should be flushed to
* the gpu. */
VKBatch *batch = unwrap(gpu_batch);
if (batch_ != batch) {
submit();
batch_ = batch;
}
/* Record the new command */
const VKIndexBuffer *index_buffer = batch_->index_buffer_get();
const bool is_indexed = index_buffer != nullptr;
if (is_indexed) {
VkDrawIndexedIndirectCommand &command = get_command<VkDrawIndexedIndirectCommand>();
command.firstIndex = index_buffer->index_base_get();
command.vertexOffset = index_buffer->index_start_get();
command.indexCount = index_buffer->index_len_get();
command.firstInstance = instance_first;
command.instanceCount = instance_count;
}
else {
const VKVertexBuffer *vertex_buffer = batch_->vertex_buffer_get(0);
if (vertex_buffer == nullptr) {
batch_ = nullptr;
return;
}
VkDrawIndirectCommand &command = get_command<VkDrawIndirectCommand>();
command.vertexCount = vertex_buffer->vertex_len;
command.instanceCount = instance_count;
command.firstVertex = 0;
command.firstInstance = instance_first;
}
command_index_++;
/* Submit commands when command buffer is full. */
if (command_index_ == length_) {
submit();
}
}
void VKDrawList::submit()
{
if (batch_ == nullptr || command_index_ == 0) {
command_index_ = 0;
batch_ = nullptr;
return;
}
const VKIndexBuffer *index_buffer = batch_->index_buffer_get();
const bool is_indexed = index_buffer != nullptr;
command_buffer_.buffer_get().flush();
batch_->multi_draw_indirect(wrap(wrap(&command_buffer_)),
command_index_,
0,
is_indexed ? sizeof(VkDrawIndexedIndirectCommand) :
sizeof(VkDrawIndirectCommand));
command_index_ = 0;
batch_ = nullptr;
}
} // namespace blender::gpu

View File

@@ -10,12 +10,68 @@
#include "gpu_drawlist_private.hh"
#include "vk_storage_buffer.hh"
namespace blender::gpu {
class VKBatch;
class VKDrawList : public DrawList {
private:
/**
* Batch from who the commands are being recorded.
*/
VKBatch *batch_ = nullptr;
/**
* Storage buffer containing the commands.
*
* The storage buffer is host visible and new commands are directly added to the buffer. Reducing
* the need to copy the commands from an intermediate buffer to the GPU. The commands are only
* written once and used once.
*
* The data can be used to record VkDrawIndirectCommands or VkDrawIndirectIndexedCommands.
*/
VKStorageBuffer command_buffer_;
/**
* Maximum number of commands that can be recorded per batch. Commands will be flushed when this
* number of commands are added.
*/
const int length_;
/**
* Current number of recorded commands.
*/
int command_index_ = 0;
public:
VKDrawList(int list_length);
/**
* Append a new command for the given batch to the draw list.
*
* Will flush when batch is different than the previous one or when the command_buffer_ is full.
*/
void append(GPUBatch *batch, int instance_first, int instance_count) override;
/**
* Submit buffered commands to the GPU.
*
* NOTE: after calling this method the command_index_ and the batch_ are reset.
*/
void submit() override;
private:
/**
* Retrieve command to write to. The returned memory is part of the mapped memory of the
* commands_buffer_.
*/
template<typename CommandType> CommandType &get_command() const
{
return MutableSpan<CommandType>(
static_cast<CommandType *>(command_buffer_.buffer_get().mapped_memory_get()),
length_)[command_index_];
}
};
} // namespace blender::gpu

View File

@@ -38,12 +38,14 @@ void VKStorageBuffer::ensure_allocated()
void VKStorageBuffer::allocate()
{
const bool is_host_visible = false;
buffer_.create(size_in_bytes_,
usage_,
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
is_host_visible);
const bool is_host_visible = ELEM(usage_, GPU_USAGE_STREAM);
VkBufferUsageFlags buffer_usage_flags = ELEM(usage_, GPU_USAGE_STREAM) ?
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT :
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT;
buffer_.create(size_in_bytes_, usage_, buffer_usage_flags, is_host_visible);
debug::object_label(buffer_.vk_handle(), name_);
}

View File

@@ -49,6 +49,11 @@ class VKStorageBuffer : public StorageBuf, public VKBindableResource {
void ensure_allocated();
const VKBuffer &buffer_get() const
{
return buffer_;
}
private:
void allocate();
};
@@ -57,5 +62,9 @@ BLI_INLINE VKStorageBuffer *unwrap(StorageBuf *storage_buffer)
{
return static_cast<VKStorageBuffer *>(storage_buffer);
}
BLI_INLINE StorageBuf *wrap(VKStorageBuffer *storage_buffer)
{
return static_cast<StorageBuf *>(storage_buffer);
}
} // namespace blender::gpu