Vulkan: Bundle Calls in DrawList

A draw list bundles multiple draw commands for the same geometry and sends the draw commands in a single command. This reduces the overhead of pipeline checking, resource validation and can keep the load higher on the gpu as more work needs to be done. Previously the draw list didn't bundle any commands and would still send each call separately to the GPU. This PR implements the bundling of the commands. Pull Request: https://projects.blender.org/blender/blender/pulls/117548
2024-01-26 17:45:18 +01:00
parent 3f0f26ee8a
commit ec80264d09
6 changed files with 149 additions and 11 deletions
--- a/source/blender/gpu/vulkan/vk_backend.cc
+++ b/source/blender/gpu/vulkan/vk_backend.cc
@@ -177,9 +177,9 @@ Batch *VKBackend::batch_alloc()
  return new VKBatch();
 }

-DrawList *VKBackend::drawlist_alloc(int /*list_length*/)
+DrawList *VKBackend::drawlist_alloc(int list_length)
 {
-  return new VKDrawList();
+  return new VKDrawList(list_length);
 }

 Fence *VKBackend::fence_alloc()
--- a/source/blender/gpu/vulkan/vk_batch.hh
+++ b/source/blender/gpu/vulkan/vk_batch.hh
@@ -31,4 +31,9 @@ class VKBatch : public Batch {
  void draw_setup();
 };

+BLI_INLINE VKBatch *unwrap(GPUBatch *batch)
+{
+  return static_cast<VKBatch *>(batch);
+}
+
 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_drawlist.cc
+++ b/source/blender/gpu/vulkan/vk_drawlist.cc
@@ -8,15 +8,81 @@

 #include "GPU_batch.h"

+#include "vk_batch.hh"
+#include "vk_common.hh"
 #include "vk_drawlist.hh"
+#include "vk_index_buffer.hh"
+#include "vk_vertex_buffer.hh"

 namespace blender::gpu {

-void VKDrawList::append(GPUBatch *batch, int instance_first, int instance_count)
+VKDrawList::VKDrawList(int list_length)
+    : command_buffer_(
+          list_length * sizeof(VkDrawIndexedIndirectCommand), GPU_USAGE_STREAM, __func__),
+      length_(list_length)
 {
-  GPU_batch_draw_advanced(batch, 0, 0, instance_first, instance_count);
+  command_buffer_.ensure_allocated();
 }

-void VKDrawList::submit() {}
+void VKDrawList::append(GPUBatch *gpu_batch, int instance_first, int instance_count)
+{
+  /* Check for different batch. When batch is different the previous commands should be flushed to
+   * the gpu. */
+  VKBatch *batch = unwrap(gpu_batch);
+  if (batch_ != batch) {
+    submit();
+    batch_ = batch;
+  }
+
+  /* Record the new command */
+  const VKIndexBuffer *index_buffer = batch_->index_buffer_get();
+  const bool is_indexed = index_buffer != nullptr;
+  if (is_indexed) {
+    VkDrawIndexedIndirectCommand &command = get_command<VkDrawIndexedIndirectCommand>();
+    command.firstIndex = index_buffer->index_base_get();
+    command.vertexOffset = index_buffer->index_start_get();
+    command.indexCount = index_buffer->index_len_get();
+    command.firstInstance = instance_first;
+    command.instanceCount = instance_count;
+  }
+  else {
+    const VKVertexBuffer *vertex_buffer = batch_->vertex_buffer_get(0);
+    if (vertex_buffer == nullptr) {
+      batch_ = nullptr;
+      return;
+    }
+    VkDrawIndirectCommand &command = get_command<VkDrawIndirectCommand>();
+    command.vertexCount = vertex_buffer->vertex_len;
+    command.instanceCount = instance_count;
+    command.firstVertex = 0;
+    command.firstInstance = instance_first;
+  }
+  command_index_++;
+
+  /* Submit commands when command buffer is full. */
+  if (command_index_ == length_) {
+    submit();
+  }
+}
+
+void VKDrawList::submit()
+{
+  if (batch_ == nullptr || command_index_ == 0) {
+    command_index_ = 0;
+    batch_ = nullptr;
+    return;
+  }
+
+  const VKIndexBuffer *index_buffer = batch_->index_buffer_get();
+  const bool is_indexed = index_buffer != nullptr;
+  command_buffer_.buffer_get().flush();
+  batch_->multi_draw_indirect(wrap(wrap(&command_buffer_)),
+                              command_index_,
+                              0,
+                              is_indexed ? sizeof(VkDrawIndexedIndirectCommand) :
+                                           sizeof(VkDrawIndirectCommand));
+  command_index_ = 0;
+  batch_ = nullptr;
+}

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_drawlist.hh
+++ b/source/blender/gpu/vulkan/vk_drawlist.hh
@@ -10,12 +10,68 @@

 #include "gpu_drawlist_private.hh"

+#include "vk_storage_buffer.hh"
+
 namespace blender::gpu {
+class VKBatch;

 class VKDrawList : public DrawList {
+ private:
+  /**
+   * Batch from who the commands are being recorded.
+   */
+  VKBatch *batch_ = nullptr;
+
+  /**
+   * Storage buffer containing the commands.
+   *
+   * The storage buffer is host visible and new commands are directly added to the buffer. Reducing
+   * the need to copy the commands from an intermediate buffer to the GPU. The commands are only
+   * written once and used once.
+   *
+   * The data can be used to record VkDrawIndirectCommands or VkDrawIndirectIndexedCommands.
+   */
+  VKStorageBuffer command_buffer_;
+
+  /**
+   * Maximum number of commands that can be recorded per batch. Commands will be flushed when this
+   * number of commands are added.
+   */
+  const int length_;
+
+  /**
+   * Current number of recorded commands.
+   */
+  int command_index_ = 0;
+
 public:
+  VKDrawList(int list_length);
+
+  /**
+   * Append a new command for the given batch to the draw list.
+   *
+   * Will flush when batch is different than the previous one or when the command_buffer_ is full.
+   */
  void append(GPUBatch *batch, int instance_first, int instance_count) override;
+
+  /**
+   * Submit buffered commands to the GPU.
+   *
+   * NOTE: after calling this method the command_index_ and the batch_ are reset.
+   */
  void submit() override;
+
+ private:
+  /**
+   * Retrieve command to write to. The returned memory is part of the mapped memory of the
+   * commands_buffer_.
+   */
+  template<typename CommandType> CommandType &get_command() const
+  {
+    return MutableSpan<CommandType>(
+        static_cast<CommandType *>(command_buffer_.buffer_get().mapped_memory_get()),
+        length_)[command_index_];
+  }
 };

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_storage_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_storage_buffer.cc
@@ -38,12 +38,14 @@ void VKStorageBuffer::ensure_allocated()

 void VKStorageBuffer::allocate()
 {
-  const bool is_host_visible = false;
-  buffer_.create(size_in_bytes_,
-                 usage_,
-                 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
-                     VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
-                 is_host_visible);
+  const bool is_host_visible = ELEM(usage_, GPU_USAGE_STREAM);
+  VkBufferUsageFlags buffer_usage_flags = ELEM(usage_, GPU_USAGE_STREAM) ?
+                                              VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT :
+                                              VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
+                                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+                                                  VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  buffer_.create(size_in_bytes_, usage_, buffer_usage_flags, is_host_visible);
  debug::object_label(buffer_.vk_handle(), name_);
 }

--- a/source/blender/gpu/vulkan/vk_storage_buffer.hh
+++ b/source/blender/gpu/vulkan/vk_storage_buffer.hh
@@ -49,6 +49,11 @@ class VKStorageBuffer : public StorageBuf, public VKBindableResource {

  void ensure_allocated();

+  const VKBuffer &buffer_get() const
+  {
+    return buffer_;
+  }
+
 private:
  void allocate();
 };
@@ -57,5 +62,9 @@ BLI_INLINE VKStorageBuffer *unwrap(StorageBuf *storage_buffer)
 {
  return static_cast<VKStorageBuffer *>(storage_buffer);
 }
+BLI_INLINE StorageBuf *wrap(VKStorageBuffer *storage_buffer)
+{
+  return static_cast<StorageBuf *>(storage_buffer);
+}

 }  // namespace blender::gpu