From a0370d2fe3d15a21d452b7900f5e6c1330bcafab Mon Sep 17 00:00:00 2001
From: Jeroen Bakker <jeroen@blender.org>
Date: Mon, 29 Sep 2025 15:55:31 +0200
Subject: [PATCH] Fix: Vulkan: Performance regression text drawing

7becc38a3cbbd658aff066a6d8b2c35a190ce632 introduced new text rendering.
In the refactoring the vertex buffer was replaced by a more shallow
storage buffer. However the refactoring removed one optimization that
the vulkan backend uses, namely the actual amount of bytes that is being
used by the draw call. This resulted in overly large data transfers.

For example in the text editor the texts are rendered one glyph at a
time. But the storage buffer would always upload the data for 1024
glyphs.

This PR allows the usage size of a storage buffer to be set to ensure
that the data transfers are limited.

The implementation wasn't added to OpenGL as it draws incorrectly.

Issue detected during the research of !146956 That PR will implement
better data streaming inside the Vulkan backend and also requires
to know the actual usage size of the buffer to detect what data can
be grouped together.

Pull Request: https://projects.blender.org/blender/blender/pulls/146958
---
 source/blender/blenfont/intern/blf_font.cc          |  4 +++-
 source/blender/gpu/GPU_storage_buffer.hh            |  6 ++++++
 source/blender/gpu/intern/gpu_storage_buffer.cc     | 13 ++++++++++++-
 .../gpu/intern/gpu_storage_buffer_private.hh        |  7 ++++++-
 source/blender/gpu/vulkan/vk_descriptor_set.cc      |  2 +-
 source/blender/gpu/vulkan/vk_storage_buffer.cc      |  3 ++-
 6 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/source/blender/blenfont/intern/blf_font.cc b/source/blender/blenfont/intern/blf_font.cc
index bc89b8683fc..22348399c76 100644
--- a/source/blender/blenfont/intern/blf_font.cc
+++ b/source/blender/blenfont/intern/blf_font.cc
@@ -187,7 +187,8 @@ static ft_pix blf_unscaled_F26Dot6_to_pixels(FontBLF *font, const FT_Pos value)
  */
 static void blf_batch_draw_init()
 {
-  g_batch.glyph_buf = GPU_storagebuf_create(sizeof(g_batch.glyph_data));
+  g_batch.glyph_buf = GPU_storagebuf_create_ex(
+      sizeof(g_batch.glyph_data), nullptr, GPU_USAGE_STREAM, __func__);
   g_batch.glyph_len = 0;
   /* We render a quad as a triangle strip and instance it for each glyph. */
   g_batch.batch = GPU_batch_create_procedural(GPU_PRIM_TRI_STRIP, 4);
@@ -315,6 +316,7 @@ void blf_batch_draw()
   }
 
   blender::gpu::Texture *texture = blf_batch_cache_texture_load();
+  GPU_storagebuf_usage_size_set(g_batch.glyph_buf, size_t(g_batch.glyph_len) * sizeof(GlyphQuad));
   GPU_storagebuf_update(g_batch.glyph_buf, g_batch.glyph_data);
   GPU_storagebuf_bind(g_batch.glyph_buf, 0);
 
diff --git a/source/blender/gpu/GPU_storage_buffer.hh b/source/blender/gpu/GPU_storage_buffer.hh
index b06be3fae45..828977ed342 100644
--- a/source/blender/gpu/GPU_storage_buffer.hh
+++ b/source/blender/gpu/GPU_storage_buffer.hh
@@ -31,6 +31,12 @@ blender::gpu::StorageBuf *GPU_storagebuf_create_ex(size_t size,
 
 void GPU_storagebuf_free(blender::gpu::StorageBuf *ssbo);
 
+/**
+ * Limit the size of the storage buffer.
+ *
+ * Backends can optimize data transfers using the size that is actually used.
+ */
+void GPU_storagebuf_usage_size_set(blender::gpu::StorageBuf *ssbo, size_t size);
 void GPU_storagebuf_update(blender::gpu::StorageBuf *ssbo, const void *data);
 
 void GPU_storagebuf_bind(blender::gpu::StorageBuf *ssbo, int slot);
diff --git a/source/blender/gpu/intern/gpu_storage_buffer.cc b/source/blender/gpu/intern/gpu_storage_buffer.cc
index e0336385a73..f56a5b6a933 100644
--- a/source/blender/gpu/intern/gpu_storage_buffer.cc
+++ b/source/blender/gpu/intern/gpu_storage_buffer.cc
@@ -29,7 +29,7 @@ namespace blender::gpu {
 
 StorageBuf::StorageBuf(size_t size, const char *name)
 {
-  size_in_bytes_ = size;
+  size_in_bytes_ = usage_size_in_bytes_ = size;
   STRNCPY(name_, name);
 }
 
@@ -38,6 +38,12 @@ StorageBuf::~StorageBuf()
   MEM_SAFE_FREE(data_);
 }
 
+void StorageBuf::usage_size_set(size_t usage_size)
+{
+  BLI_assert(usage_size <= size_in_bytes_);
+  usage_size_in_bytes_ = usage_size;
+}
+
 }  // namespace blender::gpu
 
 /** \} */
@@ -73,6 +79,11 @@ void GPU_storagebuf_free(blender::gpu::StorageBuf *ssbo)
   delete ssbo;
 }
 
+void GPU_storagebuf_usage_size_set(blender::gpu::StorageBuf *ssbo, size_t usage_size)
+{
+  ssbo->usage_size_set(usage_size);
+}
+
 void GPU_storagebuf_update(blender::gpu::StorageBuf *ssbo, const void *data)
 {
   ssbo->update(data);
diff --git a/source/blender/gpu/intern/gpu_storage_buffer_private.hh b/source/blender/gpu/intern/gpu_storage_buffer_private.hh
index c17eb6c0912..33e233ba1e1 100644
--- a/source/blender/gpu/intern/gpu_storage_buffer_private.hh
+++ b/source/blender/gpu/intern/gpu_storage_buffer_private.hh
@@ -29,6 +29,7 @@ class StorageBuf {
  protected:
   /** Data size in bytes. Doesn't need to match actual allocation size due to alignment rules. */
   size_t size_in_bytes_ = -1;
+  size_t usage_size_in_bytes_ = -1;
   /** Continuous memory block to copy to GPU. This data is owned by the StorageBuf. */
   void *data_ = nullptr;
   /** Debugging name */
@@ -37,7 +38,11 @@ class StorageBuf {
  public:
   StorageBuf(size_t size, const char *name);
   virtual ~StorageBuf();
-
+  void usage_size_set(size_t size);
+  size_t usage_size_get() const
+  {
+    return usage_size_in_bytes_;
+  }
   virtual void update(const void *data) = 0;
   virtual void bind(int slot) = 0;
   virtual void unbind() = 0;
diff --git a/source/blender/gpu/vulkan/vk_descriptor_set.cc b/source/blender/gpu/vulkan/vk_descriptor_set.cc
index edb8e815992..c393003c785 100644
--- a/source/blender/gpu/vulkan/vk_descriptor_set.cc
+++ b/source/blender/gpu/vulkan/vk_descriptor_set.cc
@@ -235,7 +235,7 @@ void VKDescriptorSetUpdator::bind_storage_buffer_resource(
       VKStorageBuffer *storage_buffer = static_cast<VKStorageBuffer *>(elem.resource);
       storage_buffer->ensure_allocated();
       vk_buffer = storage_buffer->vk_handle();
-      vk_device_size = storage_buffer->size_in_bytes();
+      vk_device_size = storage_buffer->usage_size_get();
       vk_device_address = storage_buffer->device_address_get();
       break;
     }
diff --git a/source/blender/gpu/vulkan/vk_storage_buffer.cc b/source/blender/gpu/vulkan/vk_storage_buffer.cc
index 40e3359bf26..6d15da270d0 100644
--- a/source/blender/gpu/vulkan/vk_storage_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_storage_buffer.cc
@@ -36,7 +36,8 @@ void VKStorageBuffer::update(const void *data)
 {
   VKContext &context = *VKContext::get();
   ensure_allocated();
-  VKStagingBuffer staging_buffer(buffer_, VKStagingBuffer::Direction::HostToDevice);
+  VKStagingBuffer staging_buffer(
+      buffer_, VKStagingBuffer::Direction::HostToDevice, 0, usage_size_in_bytes_);
   VKBuffer &buffer = staging_buffer.host_buffer_get();
   if (buffer.is_allocated()) {
     buffer.update_immediately(data);