From 4c655f076c7bae6845440cc59a54b1a4d640bf54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Wed, 11 Jun 2025 14:08:38 +0200 Subject: [PATCH] GPU: Shader: Parallel warmup of builtin shaders This reduces the time needed to get to the first pixel on screen by multithreading the builtin shader compilation. We avoid doing this if subprocess compilation is on as the overhead of potentially partially starting all subprocess is far greater than the benefit of paralllel compilation. For some reasons, the compilation is much slower when done async for these shaders (on Metal ~200ms > ~1.2ms), so the saving might not be substantial. Mac M1: First frame 6s > 5s. Pull Request: https://projects.blender.org/blender/blender/pulls/139627 --- source/blender/gpu/GPU_shader_builtin.hh | 2 + source/blender/gpu/intern/gpu_init_exit.cc | 1 + .../blender/gpu/intern/gpu_shader_builtin.cc | 89 ++++++++++++++++--- 3 files changed, 80 insertions(+), 12 deletions(-) diff --git a/source/blender/gpu/GPU_shader_builtin.hh b/source/blender/gpu/GPU_shader_builtin.hh index cbb43a7bba3..43b9bc52333 100644 --- a/source/blender/gpu/GPU_shader_builtin.hh +++ b/source/blender/gpu/GPU_shader_builtin.hh @@ -156,4 +156,6 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader, eGPUShaderConfig sh_cfg); GPUShader *GPU_shader_get_builtin_shader(eGPUBuiltinShader shader); +void GPU_shader_builtin_warm_up(); + void GPU_shader_free_builtin_shaders(); diff --git a/source/blender/gpu/intern/gpu_init_exit.cc b/source/blender/gpu/intern/gpu_init_exit.cc index 1b54d290968..d866d3518d9 100644 --- a/source/blender/gpu/intern/gpu_init_exit.cc +++ b/source/blender/gpu/intern/gpu_init_exit.cc @@ -36,6 +36,7 @@ void GPU_init() gpu_shader_dependency_init(); gpu_shader_create_info_init(); + GPU_shader_builtin_warm_up(); GPU_pass_cache_init(); gpu_batch_init(); diff --git a/source/blender/gpu/intern/gpu_shader_builtin.cc b/source/blender/gpu/intern/gpu_shader_builtin.cc index 6c8ddce8b53..d9c3090aa80 100644 --- a/source/blender/gpu/intern/gpu_shader_builtin.cc +++ b/source/blender/gpu/intern/gpu_shader_builtin.cc @@ -8,12 +8,14 @@ #include "BLI_utildefines.h" +#include "GPU_capabilities.hh" #include "GPU_shader.hh" #include "gpu_shader_private.hh" /* Cache of built-in shaders (each is created on first use). */ -static GPUShader *builtin_shaders[GPU_SHADER_CFG_LEN][GPU_SHADER_BUILTIN_LEN] = {{nullptr}}; +static blender::gpu::StaticShader *builtin_shaders[GPU_SHADER_CFG_LEN][GPU_SHADER_BUILTIN_LEN] = { + {nullptr}}; static const char *builtin_shader_create_info_name(eGPUBuiltinShader shader) { @@ -145,7 +147,7 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader, # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Warray-bounds" #endif - GPUShader **sh_p = &builtin_shaders[sh_cfg][shader]; + blender::gpu::StaticShader **sh_p = &builtin_shaders[sh_cfg][shader]; #ifdef __GNUC__ # pragma GCC diagnostic pop #endif @@ -153,7 +155,8 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader, if (*sh_p == nullptr) { if (sh_cfg == GPU_SHADER_CFG_DEFAULT) { /* Common case. */ - *sh_p = GPU_shader_create_from_info_name(builtin_shader_create_info_name(shader)); + const char *info_name = builtin_shader_create_info_name(shader); + *sh_p = MEM_new(__func__, info_name); if (ELEM(shader, GPU_SHADER_3D_POLYLINE_CLIPPED_UNIFORM_COLOR, GPU_SHADER_3D_POLYLINE_UNIFORM_COLOR, @@ -162,17 +165,18 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader, { /* Set a default value for `lineSmooth`. * Ideally this value should be set by the caller. */ - GPU_shader_bind(*sh_p); - GPU_shader_uniform_1i(*sh_p, "lineSmooth", 1); + GPUShader *sh = (*sh_p)->get(); + GPU_shader_bind(sh); + GPU_shader_uniform_1i(sh, "lineSmooth", 1); /* WORKAROUND: See is_polyline declaration. */ - blender::gpu::unwrap(*sh_p)->is_polyline = true; + blender::gpu::unwrap(sh)->is_polyline = true; } } else if (sh_cfg == GPU_SHADER_CFG_CLIPPED) { /* In rare cases geometry shaders calculate clipping themselves. */ const char *info_name_clipped = builtin_shader_create_info_name_clipped(shader); if (!blender::StringRefNull(info_name_clipped).is_empty()) { - *sh_p = GPU_shader_create_from_info_name(info_name_clipped); + *sh_p = MEM_new(__func__, info_name_clipped); } } else { @@ -180,7 +184,41 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader, } } - return *sh_p; + return (*sh_p)->get(); +} + +static void gpu_shader_warm_builtin_shader_async(eGPUBuiltinShader shader, eGPUShaderConfig sh_cfg) +{ + BLI_assert(shader < GPU_SHADER_BUILTIN_LEN); + BLI_assert(sh_cfg < GPU_SHADER_CFG_LEN); + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Warray-bounds" +#endif + blender::gpu::StaticShader **sh_p = &builtin_shaders[sh_cfg][shader]; +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + + if (*sh_p == nullptr) { + if (sh_cfg == GPU_SHADER_CFG_DEFAULT) { + /* Common case. */ + const char *info_name = builtin_shader_create_info_name(shader); + *sh_p = MEM_new(__func__, info_name); + } + else if (sh_cfg == GPU_SHADER_CFG_CLIPPED) { + /* In rare cases geometry shaders calculate clipping themselves. */ + const char *info_name_clipped = builtin_shader_create_info_name_clipped(shader); + if (!blender::StringRefNull(info_name_clipped).is_empty()) { + *sh_p = MEM_new(__func__, info_name_clipped); + } + } + else { + BLI_assert(0); + } + } + (*sh_p)->ensure_compile_async(); } GPUShader *GPU_shader_get_builtin_shader(eGPUBuiltinShader shader) @@ -188,16 +226,43 @@ GPUShader *GPU_shader_get_builtin_shader(eGPUBuiltinShader shader) return GPU_shader_get_builtin_shader_with_config(shader, GPU_SHADER_CFG_DEFAULT); } +void GPU_shader_builtin_warm_up() +{ + if (GPU_use_subprocess_compilation() && (GPU_backend_get_type() == GPU_BACKEND_OPENGL)) { + /* The overhead of creating the subprocesses at this exact moment can create bubbles during the + * startup process. It is usually fast enough on OpenGL that we can skip it. */ + return; + } + /* Ordered by first usage in default startup screen. + * Adding more to this list will delay the scheduling of engine shaders and increase time to + * first pixel. */ + gpu_shader_warm_builtin_shader_async(GPU_SHADER_TEXT, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_WIDGET_BASE, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_UNIFORM_COLOR, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_POLYLINE_UNIFORM_COLOR, + GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_IMAGE_COLOR, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_NODE_SOCKET, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_WIDGET_BASE_INST, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_LINE_DASHED_UNIFORM_COLOR, + GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_IMAGE_DESATURATE_COLOR, + GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_POLYLINE_SMOOTH_COLOR, + GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_WIDGET_SHADOW, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_DIAG_STRIPES, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_IMAGE_RECT_COLOR, GPU_SHADER_CFG_DEFAULT); + gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_AREA_BORDERS, GPU_SHADER_CFG_DEFAULT); +} + void GPU_shader_free_builtin_shaders() { /* Make sure non is bound before deleting. */ GPU_shader_unbind(); for (int i = 0; i < GPU_SHADER_CFG_LEN; i++) { for (int j = 0; j < GPU_SHADER_BUILTIN_LEN; j++) { - if (builtin_shaders[i][j]) { - GPU_shader_free(builtin_shaders[i][j]); - builtin_shaders[i][j] = nullptr; - } + MEM_SAFE_DELETE(builtin_shaders[i][j]); } } }