Fix: Vulkan: Performance regression text drawing

7becc38a3c introduced new text rendering. In the refactoring the vertex buffer was replaced by a more shallow storage buffer. However the refactoring removed one optimization that the vulkan backend uses, namely the actual amount of bytes that is being used by the draw call. This resulted in overly large data transfers. For example in the text editor the texts are rendered one glyph at a time. But the storage buffer would always upload the data for 1024 glyphs. This PR allows the usage size of a storage buffer to be set to ensure that the data transfers are limited. The implementation wasn't added to OpenGL as it draws incorrectly. Issue detected during the research of !146956 That PR will implement better data streaming inside the Vulkan backend and also requires to know the actual usage size of the buffer to detect what data can be grouped together. Pull Request: https://projects.blender.org/blender/blender/pulls/146958
2025-09-29 15:55:31 +02:00
parent 7ac7aadce5
commit a0370d2fe3
6 changed files with 30 additions and 5 deletions
--- a/source/blender/blenfont/intern/blf_font.cc
+++ b/source/blender/blenfont/intern/blf_font.cc
@@ -187,7 +187,8 @@ static ft_pix blf_unscaled_F26Dot6_to_pixels(FontBLF *font, const FT_Pos value)
 */
 static void blf_batch_draw_init()
 {
-  g_batch.glyph_buf = GPU_storagebuf_create(sizeof(g_batch.glyph_data));
+  g_batch.glyph_buf = GPU_storagebuf_create_ex(
+      sizeof(g_batch.glyph_data), nullptr, GPU_USAGE_STREAM, __func__);
  g_batch.glyph_len = 0;
  /* We render a quad as a triangle strip and instance it for each glyph. */
  g_batch.batch = GPU_batch_create_procedural(GPU_PRIM_TRI_STRIP, 4);
@@ -315,6 +316,7 @@ void blf_batch_draw()
  }

  blender::gpu::Texture *texture = blf_batch_cache_texture_load();
+  GPU_storagebuf_usage_size_set(g_batch.glyph_buf, size_t(g_batch.glyph_len) * sizeof(GlyphQuad));
  GPU_storagebuf_update(g_batch.glyph_buf, g_batch.glyph_data);
  GPU_storagebuf_bind(g_batch.glyph_buf, 0);

--- a/source/blender/gpu/GPU_storage_buffer.hh
+++ b/source/blender/gpu/GPU_storage_buffer.hh
@@ -31,6 +31,12 @@ blender::gpu::StorageBuf *GPU_storagebuf_create_ex(size_t size,

 void GPU_storagebuf_free(blender::gpu::StorageBuf *ssbo);

+/**
+ * Limit the size of the storage buffer.
+ *
+ * Backends can optimize data transfers using the size that is actually used.
+ */
+void GPU_storagebuf_usage_size_set(blender::gpu::StorageBuf *ssbo, size_t size);
 void GPU_storagebuf_update(blender::gpu::StorageBuf *ssbo, const void *data);

 void GPU_storagebuf_bind(blender::gpu::StorageBuf *ssbo, int slot);
--- a/source/blender/gpu/intern/gpu_storage_buffer.cc
+++ b/source/blender/gpu/intern/gpu_storage_buffer.cc
@@ -29,7 +29,7 @@ namespace blender::gpu {

 StorageBuf::StorageBuf(size_t size, const char *name)
 {
-  size_in_bytes_ = size;
+  size_in_bytes_ = usage_size_in_bytes_ = size;
  STRNCPY(name_, name);
 }

@@ -38,6 +38,12 @@ StorageBuf::~StorageBuf()
  MEM_SAFE_FREE(data_);
 }

+void StorageBuf::usage_size_set(size_t usage_size)
+{
+  BLI_assert(usage_size <= size_in_bytes_);
+  usage_size_in_bytes_ = usage_size;
+}
+
 }  // namespace blender::gpu

 /** \} */
@@ -73,6 +79,11 @@ void GPU_storagebuf_free(blender::gpu::StorageBuf *ssbo)
  delete ssbo;
 }

+void GPU_storagebuf_usage_size_set(blender::gpu::StorageBuf *ssbo, size_t usage_size)
+{
+  ssbo->usage_size_set(usage_size);
+}
+
 void GPU_storagebuf_update(blender::gpu::StorageBuf *ssbo, const void *data)
 {
  ssbo->update(data);
--- a/source/blender/gpu/intern/gpu_storage_buffer_private.hh
+++ b/source/blender/gpu/intern/gpu_storage_buffer_private.hh
@@ -29,6 +29,7 @@ class StorageBuf {
 protected:
  /** Data size in bytes. Doesn't need to match actual allocation size due to alignment rules. */
  size_t size_in_bytes_ = -1;
+  size_t usage_size_in_bytes_ = -1;
  /** Continuous memory block to copy to GPU. This data is owned by the StorageBuf. */
  void *data_ = nullptr;
  /** Debugging name */
@@ -37,7 +38,11 @@ class StorageBuf {
 public:
  StorageBuf(size_t size, const char *name);
  virtual ~StorageBuf();
-
+  void usage_size_set(size_t size);
+  size_t usage_size_get() const
+  {
+    return usage_size_in_bytes_;
+  }
  virtual void update(const void *data) = 0;
  virtual void bind(int slot) = 0;
  virtual void unbind() = 0;
--- a/source/blender/gpu/vulkan/vk_descriptor_set.cc
+++ b/source/blender/gpu/vulkan/vk_descriptor_set.cc
@@ -235,7 +235,7 @@ void VKDescriptorSetUpdator::bind_storage_buffer_resource(
      VKStorageBuffer *storage_buffer = static_cast<VKStorageBuffer *>(elem.resource);
      storage_buffer->ensure_allocated();
      vk_buffer = storage_buffer->vk_handle();
-      vk_device_size = storage_buffer->size_in_bytes();
+      vk_device_size = storage_buffer->usage_size_get();
      vk_device_address = storage_buffer->device_address_get();
      break;
    }
--- a/source/blender/gpu/vulkan/vk_storage_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_storage_buffer.cc
@@ -36,7 +36,8 @@ void VKStorageBuffer::update(const void *data)
 {
  VKContext &context = *VKContext::get();
  ensure_allocated();
-  VKStagingBuffer staging_buffer(buffer_, VKStagingBuffer::Direction::HostToDevice);
+  VKStagingBuffer staging_buffer(
+      buffer_, VKStagingBuffer::Direction::HostToDevice, 0, usage_size_in_bytes_);
  VKBuffer &buffer = staging_buffer.host_buffer_get();
  if (buffer.is_allocated()) {
    buffer.update_immediately(data);