GPU: Make text rendering not use instance buffer

Use SSBO loads instead.

Add a new `GlyphQuad` interface.

Note that this reduces the size of glyph batch since the
buffer is always fully uploaded.
Can be improved with partial update later on if that causes
significant performance regression.

The motivation for this is to remove the instance buffer from
the batch API.

Pull Request: https://projects.blender.org/blender/blender/pulls/145225
This commit is contained in:
Clément Foucault
2025-08-27 14:52:19 +02:00
parent 117e660491
commit 7becc38a3c
7 changed files with 56 additions and 68 deletions

View File

@@ -187,40 +187,18 @@ static ft_pix blf_unscaled_F26Dot6_to_pixels(FontBLF *font, FT_Pos value)
*/
static void blf_batch_draw_init()
{
GPUVertFormat format = {0};
g_batch.pos_loc = GPU_vertformat_attr_add(
&format, "pos", blender::gpu::VertAttrType::SFLOAT_32_32_32_32);
g_batch.col_loc = GPU_vertformat_attr_add(
&format, "col", blender::gpu::VertAttrType::UNORM_8_8_8_8);
g_batch.offset_loc = GPU_vertformat_attr_add(
&format, "offset", blender::gpu::VertAttrType::SINT_32);
g_batch.glyph_size_loc = GPU_vertformat_attr_add(
&format, "glyph_size", blender::gpu::VertAttrType::SINT_32_32);
g_batch.glyph_flags_loc = GPU_vertformat_attr_add(
&format, "flags", blender::gpu::VertAttrType::UINT_32);
g_batch.verts = GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_STREAM);
GPU_vertbuf_data_alloc(*g_batch.verts, BLF_BATCH_DRAW_LEN_MAX);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.pos_loc, &g_batch.pos_step);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.col_loc, &g_batch.col_step);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.offset_loc, &g_batch.offset_step);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.glyph_size_loc, &g_batch.glyph_size_step);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.glyph_flags_loc, &g_batch.glyph_flags_step);
g_batch.glyph_buf = GPU_storagebuf_create(sizeof(g_batch.glyph_data));
g_batch.glyph_len = 0;
/* A dummy VBO containing 4 points, attributes are not used. */
blender::gpu::VertBuf *vbo = GPU_vertbuf_create_with_format(format);
GPU_vertbuf_data_alloc(*vbo, 4);
/* We render a quad as a triangle strip and instance it for each glyph. */
g_batch.batch = GPU_batch_create_ex(GPU_PRIM_TRI_STRIP, vbo, nullptr, GPU_BATCH_OWNS_VBO);
GPU_batch_instbuf_set(g_batch.batch, g_batch.verts, true);
g_batch.batch = GPU_batch_create_procedural(GPU_PRIM_TRI_STRIP, 4);
}
static void blf_batch_draw_exit()
{
GPU_BATCH_DISCARD_SAFE(g_batch.batch);
if (g_batch.glyph_buf) {
GPU_storagebuf_free(g_batch.glyph_buf);
}
}
void blf_batch_draw_begin(FontBLF *font)
@@ -337,8 +315,8 @@ void blf_batch_draw()
}
blender::gpu::Texture *texture = blf_batch_cache_texture_load();
GPU_vertbuf_data_len_set(*g_batch.verts, g_batch.glyph_len);
GPU_vertbuf_use(g_batch.verts); /* Send data. */
GPU_storagebuf_update(g_batch.glyph_buf, g_batch.glyph_data);
GPU_storagebuf_bind(g_batch.glyph_buf, 0);
GPU_batch_program_set_builtin(g_batch.batch, GPU_SHADER_TEXT);
GPU_batch_texture_bind(g_batch.batch, "glyph", texture);
@@ -348,18 +326,11 @@ void blf_batch_draw()
int width_shift = 31 - bitscan_reverse_i(tex_width);
GPU_batch_uniform_1i(g_batch.batch, "glyph_tex_width_mask", tex_width - 1);
GPU_batch_uniform_1i(g_batch.batch, "glyph_tex_width_shift", width_shift);
GPU_batch_draw(g_batch.batch);
GPU_batch_draw_advanced(g_batch.batch, 0, 4, 0, g_batch.glyph_len);
GPU_blend(GPU_BLEND_NONE);
GPU_texture_unbind(texture);
/* Restart to 1st vertex data pointers. */
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.pos_loc, &g_batch.pos_step);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.col_loc, &g_batch.col_step);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.offset_loc, &g_batch.offset_step);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.glyph_size_loc, &g_batch.glyph_size_step);
GPU_vertbuf_attr_get_raw_data(g_batch.verts, g_batch.glyph_flags_loc, &g_batch.glyph_flags_step);
g_batch.glyph_len = 0;
}

View File

@@ -1465,20 +1465,18 @@ static void blf_texture_draw(const GlyphBLF *g,
const int x2,
const int y2)
{
using namespace blender;
BLI_assert(size_t(g_batch.glyph_len) < ARRAY_SIZE(g_batch.glyph_data));
GlyphQuad &glyph_data = g_batch.glyph_data[g_batch.glyph_len++];
/* One vertex per glyph, instancing expands it into a quad. */
copy_v4_fl4(static_cast<float *>(GPU_vertbuf_raw_step(&g_batch.pos_step)),
float(x1 + g_batch.ofs[0]),
float(y1 + g_batch.ofs[1]),
float(x2 + g_batch.ofs[0]),
float(y2 + g_batch.ofs[1]));
copy_v4_v4_uchar(static_cast<uchar *>(GPU_vertbuf_raw_step(&g_batch.col_step)), color);
copy_v2_v2_int(static_cast<int *>(GPU_vertbuf_raw_step(&g_batch.glyph_size_step)), g->dims);
*((int *)GPU_vertbuf_raw_step(&g_batch.offset_step)) = g->offset;
glyph_data.position = int4(
x1 + g_batch.ofs[0], y1 + g_batch.ofs[1], x2 + g_batch.ofs[0], y2 + g_batch.ofs[1]);
glyph_data.glyph_color = float4(UNPACK4(color)) / 255.0f;
glyph_data.glyph_size = int2(g->dims);
glyph_data.offset = g->offset;
/* Glyph flags packs color channel count and shadow type. */
uint32_t flags = uint32_t(shadow) | (uint32_t(g->num_channels) << 4);
*((uint32_t *)GPU_vertbuf_raw_step(&g_batch.glyph_flags_step)) = flags;
glyph_data.flags = uint32_t(shadow) | (uint32_t(g->num_channels) << 4);
g_batch.glyph_len++;
/* Flush cache if it's full. */
if (g_batch.glyph_len == BLF_BATCH_DRAW_LEN_MAX) {
blf_batch_draw();

View File

@@ -19,8 +19,9 @@
#include "BLI_mutex.hh"
#include "BLI_vector.hh"
#include "GPU_shader_shared.hh"
#include "GPU_storage_buffer.hh"
#include "GPU_texture.hh"
#include "GPU_vertex_buffer.hh"
#include <ft2build.h>
@@ -99,7 +100,7 @@ inline ft_pix ft_pix_from_float(float v)
/** \} */
#define BLF_BATCH_DRAW_LEN_MAX 2048 /* in glyph */
#define BLF_BATCH_DRAW_LEN_MAX 128 /* in glyph */
/** Number of characters in #KerningCacheBLF.table. */
#define KERNING_CACHE_TABLE_SIZE 128
@@ -111,16 +112,16 @@ struct BatchBLF {
/** Can only batch glyph from the same font. */
FontBLF *font;
blender::gpu::Batch *batch;
blender::gpu::VertBuf *verts;
GPUVertBufRaw pos_step, col_step, offset_step, glyph_size_step, glyph_flags_step;
unsigned int pos_loc, col_loc, offset_loc, glyph_size_loc, glyph_flags_loc;
unsigned int glyph_len;
blender::gpu::StorageBuf *glyph_buf;
int glyph_len;
/** Copy of `font->pos`. */
int ofs[2];
/** Previous call `modelmatrix`. */
float mat[4][4];
bool enabled, active, simple_shader;
GlyphCacheBLF *glyph_cache;
GlyphQuad glyph_data[BLF_BATCH_DRAW_LEN_MAX];
};
extern BatchBLF g_batch;

View File

@@ -6,8 +6,9 @@
* \ingroup gpu
*/
#pragma once
#ifndef USE_GPU_SHADER_CREATE_INFO
# pragma once
# include "GPU_shader_shared_utils.hh"
@@ -128,6 +129,16 @@ enum eGPUSeqFlags : uint32_t {
GPU_SEQ_FLAG_ANY_HANDLE = GPU_SEQ_FLAG_SELECTED_LH | GPU_SEQ_FLAG_SELECTED_RH
};
/* Glyph for text rendering. */
struct GlyphQuad {
int4 position;
float4 glyph_color; /* Cannot be name `color` because of metal macros. */
int2 glyph_size;
int offset;
uint flags;
};
BLI_STATIC_ASSERT_ALIGN(GlyphQuad, 16)
/* VSE per-strip data for timeline rendering. */
struct SeqStripDrawData {
/* Horizontal strip positions (1.0 is one frame). */

View File

@@ -118,15 +118,22 @@ class VertBuf {
VertBuf();
virtual ~VertBuf();
template<typename T> static VertBufPtr from_size(const int size)
template<typename FormatT>
static VertBufPtr from_size_with_format(const int size, GPUUsageType usage = GPU_USAGE_STATIC)
{
BLI_assert(size > 0);
VertBufPtr buf = VertBufPtr(GPU_vertbuf_create_with_format(GenericVertexFormat<T>::format()));
VertBufPtr buf = VertBufPtr(GPU_vertbuf_create_with_format_ex(FormatT::format(), usage));
/* GPU formats needs to be aligned to 4 bytes. */
buf->allocate(ceil_to_multiple_u(size * sizeof(T), 4) / sizeof(GenericVertexFormat<T>));
buf->allocate(ceil_to_multiple_u(size * sizeof(FormatT), 4) / sizeof(FormatT));
return buf;
}
template<typename T>
static VertBufPtr from_size(const int size, GPUUsageType usage = GPU_USAGE_STATIC)
{
return from_size_with_format<GenericVertexFormat<T>>(size, usage);
}
template<typename T> static VertBufPtr from_span(const Span<T> data)
{
BLI_assert(!data.is_empty());

View File

@@ -8,13 +8,15 @@ VERTEX_SHADER_CREATE_INFO(gpu_shader_text)
void main()
{
color_flat = col;
glyph_offset = offset;
glyph_dim = glyph_size;
glyph_flags = flags;
int glyph_index = gl_InstanceID;
color_flat = glyphs[glyph_index].glyph_color;
glyph_offset = glyphs[glyph_index].offset;
glyph_dim = glyphs[glyph_index].glyph_size;
glyph_flags = glyphs[glyph_index].flags;
/* Depending on shadow outline / blur level, we might need to expand the quad. */
uint shadow_type = flags & 0xFu;
uint shadow_type = glyph_flags & 0xFu;
int interp_size = shadow_type > 4 ? 2 : (shadow_type > 0 ? 1 : 0);
/* Quad expansion using instanced rendering. */
@@ -22,6 +24,7 @@ void main()
float y = float(gl_VertexID / 2);
float2 quad = float2(x, y);
float4 pos = float4(glyphs[glyph_index].position);
float2 interp_offset = float(interp_size) / abs(pos.zw - pos.xy);
texCoord_interp = mix(-interp_offset, 1.0f + interp_offset, quad) * float2(glyph_dim) +
float2(0.5f);

View File

@@ -25,17 +25,14 @@ FLAT(int2, glyph_dim)
GPU_SHADER_INTERFACE_END()
GPU_SHADER_CREATE_INFO(gpu_shader_text)
VERTEX_IN(0, float4, pos)
VERTEX_IN(1, float4, col)
VERTEX_IN(2, int2, glyph_size)
VERTEX_IN(3, int, offset)
VERTEX_IN(4, uint, flags)
VERTEX_OUT(text_iface)
FRAGMENT_OUT(0, float4, fragColor)
PUSH_CONSTANT(float4x4, ModelViewProjectionMatrix)
PUSH_CONSTANT(int, glyph_tex_width_mask)
PUSH_CONSTANT(int, glyph_tex_width_shift)
SAMPLER_FREQ(0, sampler2D, glyph, PASS)
STORAGE_BUF(0, read, GlyphQuad, glyphs[])
TYPEDEF_SOURCE("GPU_shader_shared.hh")
VERTEX_SOURCE("gpu_shader_text_vert.glsl")
FRAGMENT_SOURCE("gpu_shader_text_frag.glsl")
ADDITIONAL_INFO(gpu_srgb_to_framebuffer_space) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END()