From 7becc38a3cbbd658aff066a6d8b2c35a190ce632 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= <fclem@noreply.localhost>
Date: Wed, 27 Aug 2025 14:52:19 +0200
Subject: [PATCH] GPU: Make text rendering not use instance buffer

Use SSBO loads instead.

Add a new `GlyphQuad` interface.

Note that this reduces the size of glyph batch since the
buffer is always fully uploaded.
Can be improved with partial update later on if that causes
significant performance regression.

The motivation for this is to remove the instance buffer from
the batch API.

Pull Request: https://projects.blender.org/blender/blender/pulls/145225
---
 source/blender/blenfont/intern/blf_font.cc    | 45 ++++---------------
 source/blender/blenfont/intern/blf_glyph.cc   | 20 ++++-----
 .../blenfont/intern/blf_internal_types.hh     | 13 +++---
 source/blender/gpu/GPU_shader_shared.hh       | 13 +++++-
 source/blender/gpu/GPU_vertex_buffer.hh       | 13 ++++--
 .../gpu/shaders/gpu_shader_text_vert.glsl     | 13 +++---
 .../gpu/shaders/infos/gpu_shader_text_info.hh |  7 +--
 7 files changed, 56 insertions(+), 68 deletions(-)

diff --git a/source/blender/blenfont/intern/blf_font.cc b/source/blender/blenfont/intern/blf_font.cc
index 195b61fb986..67bb440fe96 100644
--- a/source/blender/blenfont/intern/blf_font.cc
+++ b/source/blender/blenfont/intern/blf_font.cc
@@ -187,40 +187,18 @@ static ft_pix blf_unscaled_F26Dot6_to_pixels(FontBLF *font, FT_Pos value)
  */
 static void blf_batch_draw_init()
 {
-  GPUVertFormat format = {0};
-  g_batch.pos_loc = GPU_vertformat_attr_add(
-      &format, "pos", blender::gpu::VertAttrType::SFLOAT_32_32_32_32);
-  g_batch.col_loc = GPU_vertformat_attr_add(
-      &format, "col", blender::gpu::VertAttrType::UNORM_8_8_8_8);
-  g_batch.offset_loc = GPU_vertformat_attr_add(
-      &format, "offset", blender::gpu::VertAttrType::SINT_32);
-  g_batch.glyph_size_loc = GPU_vertformat_attr_add(
-      &format, "glyph_size", blender::gpu::VertAttrType::SINT_32_32);
-  g_batch.glyph_flags_loc = GPU_vertformat_attr_add(
-      &format, "flags", blender::gpu::VertAttrType::UINT_32);
-
-  g_batch.verts = GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_STREAM);
-  GPU_vertbuf_data_alloc(*g_batch.verts, BLF_BATCH_DRAW_LEN_MAX);
-
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.pos_loc, &g_batch.pos_step);
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.col_loc, &g_batch.col_step);
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.offset_loc, &g_batch.offset_step);
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.glyph_size_loc, &g_batch.glyph_size_step);
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.glyph_flags_loc, &g_batch.glyph_flags_step);
+  g_batch.glyph_buf = GPU_storagebuf_create(sizeof(g_batch.glyph_data));
   g_batch.glyph_len = 0;
-
-  /* A dummy VBO containing 4 points, attributes are not used. */
-  blender::gpu::VertBuf *vbo = GPU_vertbuf_create_with_format(format);
-  GPU_vertbuf_data_alloc(*vbo, 4);
-
   /* We render a quad as a triangle strip and instance it for each glyph. */
-  g_batch.batch = GPU_batch_create_ex(GPU_PRIM_TRI_STRIP, vbo, nullptr, GPU_BATCH_OWNS_VBO);
-  GPU_batch_instbuf_set(g_batch.batch, g_batch.verts, true);
+  g_batch.batch = GPU_batch_create_procedural(GPU_PRIM_TRI_STRIP, 4);
 }
 
 static void blf_batch_draw_exit()
 {
   GPU_BATCH_DISCARD_SAFE(g_batch.batch);
+  if (g_batch.glyph_buf) {
+    GPU_storagebuf_free(g_batch.glyph_buf);
+  }
 }
 
 void blf_batch_draw_begin(FontBLF *font)
@@ -337,8 +315,8 @@ void blf_batch_draw()
   }
 
   blender::gpu::Texture *texture = blf_batch_cache_texture_load();
-  GPU_vertbuf_data_len_set(*g_batch.verts, g_batch.glyph_len);
-  GPU_vertbuf_use(g_batch.verts); /* Send data. */
+  GPU_storagebuf_update(g_batch.glyph_buf, g_batch.glyph_data);
+  GPU_storagebuf_bind(g_batch.glyph_buf, 0);
 
   GPU_batch_program_set_builtin(g_batch.batch, GPU_SHADER_TEXT);
   GPU_batch_texture_bind(g_batch.batch, "glyph", texture);
@@ -348,18 +326,11 @@ void blf_batch_draw()
   int width_shift = 31 - bitscan_reverse_i(tex_width);
   GPU_batch_uniform_1i(g_batch.batch, "glyph_tex_width_mask", tex_width - 1);
   GPU_batch_uniform_1i(g_batch.batch, "glyph_tex_width_shift", width_shift);
-  GPU_batch_draw(g_batch.batch);
+  GPU_batch_draw_advanced(g_batch.batch, 0, 4, 0, g_batch.glyph_len);
 
   GPU_blend(GPU_BLEND_NONE);
 
   GPU_texture_unbind(texture);
-
-  /* Restart to 1st vertex data pointers. */
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.pos_loc, &g_batch.pos_step);
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.col_loc, &g_batch.col_step);
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.offset_loc, &g_batch.offset_step);
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.glyph_size_loc, &g_batch.glyph_size_step);
-  GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.glyph_flags_loc, &g_batch.glyph_flags_step);
   g_batch.glyph_len = 0;
 }
 
diff --git a/source/blender/blenfont/intern/blf_glyph.cc b/source/blender/blenfont/intern/blf_glyph.cc
index 15bcd97d536..2db5e4b22f2 100644
--- a/source/blender/blenfont/intern/blf_glyph.cc
+++ b/source/blender/blenfont/intern/blf_glyph.cc
@@ -1465,20 +1465,18 @@ static void blf_texture_draw(const GlyphBLF *g,
                              const int x2,
                              const int y2)
 {
+  using namespace blender;
+  BLI_assert(size_t(g_batch.glyph_len) < ARRAY_SIZE(g_batch.glyph_data));
+  GlyphQuad &glyph_data = g_batch.glyph_data[g_batch.glyph_len++];
   /* One vertex per glyph, instancing expands it into a quad. */
-  copy_v4_fl4(static_cast<float *>(GPU_vertbuf_raw_step(&g_batch.pos_step)),
-              float(x1 + g_batch.ofs[0]),
-              float(y1 + g_batch.ofs[1]),
-              float(x2 + g_batch.ofs[0]),
-              float(y2 + g_batch.ofs[1]));
-  copy_v4_v4_uchar(static_cast<uchar *>(GPU_vertbuf_raw_step(&g_batch.col_step)), color);
-  copy_v2_v2_int(static_cast<int *>(GPU_vertbuf_raw_step(&g_batch.glyph_size_step)), g->dims);
-  *((int *)GPU_vertbuf_raw_step(&g_batch.offset_step)) = g->offset;
+  glyph_data.position = int4(
+      x1 + g_batch.ofs[0], y1 + g_batch.ofs[1], x2 + g_batch.ofs[0], y2 + g_batch.ofs[1]);
+  glyph_data.glyph_color = float4(UNPACK4(color)) / 255.0f;
+  glyph_data.glyph_size = int2(g->dims);
+  glyph_data.offset = g->offset;
   /* Glyph flags packs color channel count and shadow type. */
-  uint32_t flags = uint32_t(shadow) | (uint32_t(g->num_channels) << 4);
-  *((uint32_t *)GPU_vertbuf_raw_step(&g_batch.glyph_flags_step)) = flags;
+  glyph_data.flags = uint32_t(shadow) | (uint32_t(g->num_channels) << 4);
 
-  g_batch.glyph_len++;
   /* Flush cache if it's full. */
   if (g_batch.glyph_len == BLF_BATCH_DRAW_LEN_MAX) {
     blf_batch_draw();
diff --git a/source/blender/blenfont/intern/blf_internal_types.hh b/source/blender/blenfont/intern/blf_internal_types.hh
index af0ea2cc2a7..507429257e3 100644
--- a/source/blender/blenfont/intern/blf_internal_types.hh
+++ b/source/blender/blenfont/intern/blf_internal_types.hh
@@ -19,8 +19,9 @@
 #include "BLI_mutex.hh"
 #include "BLI_vector.hh"
 
+#include "GPU_shader_shared.hh"
+#include "GPU_storage_buffer.hh"
 #include "GPU_texture.hh"
-#include "GPU_vertex_buffer.hh"
 
 #include <ft2build.h>
 
@@ -99,7 +100,7 @@ inline ft_pix ft_pix_from_float(float v)
 
 /** \} */
 
-#define BLF_BATCH_DRAW_LEN_MAX 2048 /* in glyph */
+#define BLF_BATCH_DRAW_LEN_MAX 128 /* in glyph */
 
 /** Number of characters in #KerningCacheBLF.table. */
 #define KERNING_CACHE_TABLE_SIZE 128
@@ -111,16 +112,16 @@ struct BatchBLF {
   /** Can only batch glyph from the same font. */
   FontBLF *font;
   blender::gpu::Batch *batch;
-  blender::gpu::VertBuf *verts;
-  GPUVertBufRaw pos_step, col_step, offset_step, glyph_size_step, glyph_flags_step;
-  unsigned int pos_loc, col_loc, offset_loc, glyph_size_loc, glyph_flags_loc;
-  unsigned int glyph_len;
+  blender::gpu::StorageBuf *glyph_buf;
+  int glyph_len;
   /** Copy of `font->pos`. */
   int ofs[2];
   /** Previous call `modelmatrix`. */
   float mat[4][4];
   bool enabled, active, simple_shader;
   GlyphCacheBLF *glyph_cache;
+
+  GlyphQuad glyph_data[BLF_BATCH_DRAW_LEN_MAX];
 };
 
 extern BatchBLF g_batch;
diff --git a/source/blender/gpu/GPU_shader_shared.hh b/source/blender/gpu/GPU_shader_shared.hh
index c06c952f18a..5f068f72282 100644
--- a/source/blender/gpu/GPU_shader_shared.hh
+++ b/source/blender/gpu/GPU_shader_shared.hh
@@ -6,8 +6,9 @@
  * \ingroup gpu
  */
 
+#pragma once
+
 #ifndef USE_GPU_SHADER_CREATE_INFO
-#  pragma once
 
 #  include "GPU_shader_shared_utils.hh"
 
@@ -128,6 +129,16 @@ enum eGPUSeqFlags : uint32_t {
   GPU_SEQ_FLAG_ANY_HANDLE = GPU_SEQ_FLAG_SELECTED_LH | GPU_SEQ_FLAG_SELECTED_RH
 };
 
+/* Glyph for text rendering. */
+struct GlyphQuad {
+  int4 position;
+  float4 glyph_color; /* Cannot be name `color` because of metal macros. */
+  int2 glyph_size;
+  int offset;
+  uint flags;
+};
+BLI_STATIC_ASSERT_ALIGN(GlyphQuad, 16)
+
 /* VSE per-strip data for timeline rendering. */
 struct SeqStripDrawData {
   /* Horizontal strip positions (1.0 is one frame). */
diff --git a/source/blender/gpu/GPU_vertex_buffer.hh b/source/blender/gpu/GPU_vertex_buffer.hh
index 2157b13ccd6..8d41d883bfd 100644
--- a/source/blender/gpu/GPU_vertex_buffer.hh
+++ b/source/blender/gpu/GPU_vertex_buffer.hh
@@ -118,15 +118,22 @@ class VertBuf {
   VertBuf();
   virtual ~VertBuf();
 
-  template<typename T> static VertBufPtr from_size(const int size)
+  template<typename FormatT>
+  static VertBufPtr from_size_with_format(const int size, GPUUsageType usage = GPU_USAGE_STATIC)
   {
     BLI_assert(size > 0);
-    VertBufPtr buf = VertBufPtr(GPU_vertbuf_create_with_format(GenericVertexFormat<T>::format()));
+    VertBufPtr buf = VertBufPtr(GPU_vertbuf_create_with_format_ex(FormatT::format(), usage));
     /* GPU formats needs to be aligned to 4 bytes. */
-    buf->allocate(ceil_to_multiple_u(size * sizeof(T), 4) / sizeof(GenericVertexFormat<T>));
+    buf->allocate(ceil_to_multiple_u(size * sizeof(FormatT), 4) / sizeof(FormatT));
     return buf;
   }
 
+  template<typename T>
+  static VertBufPtr from_size(const int size, GPUUsageType usage = GPU_USAGE_STATIC)
+  {
+    return from_size_with_format<GenericVertexFormat<T>>(size, usage);
+  }
+
   template<typename T> static VertBufPtr from_span(const Span<T> data)
   {
     BLI_assert(!data.is_empty());
diff --git a/source/blender/gpu/shaders/gpu_shader_text_vert.glsl b/source/blender/gpu/shaders/gpu_shader_text_vert.glsl
index ee420e93b72..087acb47765 100644
--- a/source/blender/gpu/shaders/gpu_shader_text_vert.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_text_vert.glsl
@@ -8,13 +8,15 @@ VERTEX_SHADER_CREATE_INFO(gpu_shader_text)
 
 void main()
 {
-  color_flat = col;
-  glyph_offset = offset;
-  glyph_dim = glyph_size;
-  glyph_flags = flags;
+  int glyph_index = gl_InstanceID;
+
+  color_flat = glyphs[glyph_index].glyph_color;
+  glyph_offset = glyphs[glyph_index].offset;
+  glyph_dim = glyphs[glyph_index].glyph_size;
+  glyph_flags = glyphs[glyph_index].flags;
 
   /* Depending on shadow outline / blur level, we might need to expand the quad. */
-  uint shadow_type = flags & 0xFu;
+  uint shadow_type = glyph_flags & 0xFu;
   int interp_size = shadow_type > 4 ? 2 : (shadow_type > 0 ? 1 : 0);
 
   /* Quad expansion using instanced rendering. */
@@ -22,6 +24,7 @@ void main()
   float y = float(gl_VertexID / 2);
   float2 quad = float2(x, y);
 
+  float4 pos = float4(glyphs[glyph_index].position);
   float2 interp_offset = float(interp_size) / abs(pos.zw - pos.xy);
   texCoord_interp = mix(-interp_offset, 1.0f + interp_offset, quad) * float2(glyph_dim) +
                     float2(0.5f);
diff --git a/source/blender/gpu/shaders/infos/gpu_shader_text_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_text_info.hh
index 7aa03f0f4b9..6d2ce594280 100644
--- a/source/blender/gpu/shaders/infos/gpu_shader_text_info.hh
+++ b/source/blender/gpu/shaders/infos/gpu_shader_text_info.hh
@@ -25,17 +25,14 @@ FLAT(int2, glyph_dim)
 GPU_SHADER_INTERFACE_END()
 
 GPU_SHADER_CREATE_INFO(gpu_shader_text)
-VERTEX_IN(0, float4, pos)
-VERTEX_IN(1, float4, col)
-VERTEX_IN(2, int2, glyph_size)
-VERTEX_IN(3, int, offset)
-VERTEX_IN(4, uint, flags)
 VERTEX_OUT(text_iface)
 FRAGMENT_OUT(0, float4, fragColor)
 PUSH_CONSTANT(float4x4, ModelViewProjectionMatrix)
 PUSH_CONSTANT(int, glyph_tex_width_mask)
 PUSH_CONSTANT(int, glyph_tex_width_shift)
 SAMPLER_FREQ(0, sampler2D, glyph, PASS)
+STORAGE_BUF(0, read, GlyphQuad, glyphs[])
+TYPEDEF_SOURCE("GPU_shader_shared.hh")
 VERTEX_SOURCE("gpu_shader_text_vert.glsl")
 FRAGMENT_SOURCE("gpu_shader_text_frag.glsl")
 ADDITIONAL_INFO(gpu_srgb_to_framebuffer_space) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END()