GPU: Shader: Parallel warmup of builtin shaders

This reduces the time needed to get to the first pixel on screen by multithreading the builtin shader compilation. We avoid doing this if subprocess compilation is on as the overhead of potentially partially starting all subprocess is far greater than the benefit of paralllel compilation. For some reasons, the compilation is much slower when done async for these shaders (on Metal ~200ms > ~1.2ms), so the saving might not be substantial. Mac M1: First frame 6s > 5s. Pull Request: https://projects.blender.org/blender/blender/pulls/139627
2025-06-11 14:08:38 +02:00
parent abe7f8cb13
commit 4c655f076c
3 changed files with 80 additions and 12 deletions
--- a/source/blender/gpu/GPU_shader_builtin.hh
+++ b/source/blender/gpu/GPU_shader_builtin.hh
@@ -156,4 +156,6 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader,
                                                     eGPUShaderConfig sh_cfg);
 GPUShader *GPU_shader_get_builtin_shader(eGPUBuiltinShader shader);

+void GPU_shader_builtin_warm_up();
+
 void GPU_shader_free_builtin_shaders();
--- a/source/blender/gpu/intern/gpu_init_exit.cc
+++ b/source/blender/gpu/intern/gpu_init_exit.cc
@@ -36,6 +36,7 @@ void GPU_init()
  gpu_shader_dependency_init();
  gpu_shader_create_info_init();

+  GPU_shader_builtin_warm_up();
  GPU_pass_cache_init();

  gpu_batch_init();
--- a/source/blender/gpu/intern/gpu_shader_builtin.cc
+++ b/source/blender/gpu/intern/gpu_shader_builtin.cc
@@ -8,12 +8,14 @@

 #include "BLI_utildefines.h"

+#include "GPU_capabilities.hh"
 #include "GPU_shader.hh"

 #include "gpu_shader_private.hh"

 /* Cache of built-in shaders (each is created on first use). */
-static GPUShader *builtin_shaders[GPU_SHADER_CFG_LEN][GPU_SHADER_BUILTIN_LEN] = {{nullptr}};
+static blender::gpu::StaticShader *builtin_shaders[GPU_SHADER_CFG_LEN][GPU_SHADER_BUILTIN_LEN] = {
+    {nullptr}};

 static const char *builtin_shader_create_info_name(eGPUBuiltinShader shader)
 {
@@ -145,7 +147,7 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader,
 #  pragma GCC diagnostic push
 #  pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
-  GPUShader **sh_p = &builtin_shaders[sh_cfg][shader];
+  blender::gpu::StaticShader **sh_p = &builtin_shaders[sh_cfg][shader];
 #ifdef __GNUC__
 #  pragma GCC diagnostic pop
 #endif
@@ -153,7 +155,8 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader,
  if (*sh_p == nullptr) {
    if (sh_cfg == GPU_SHADER_CFG_DEFAULT) {
      /* Common case. */
-      *sh_p = GPU_shader_create_from_info_name(builtin_shader_create_info_name(shader));
+      const char *info_name = builtin_shader_create_info_name(shader);
+      *sh_p = MEM_new<blender::gpu::StaticShader>(__func__, info_name);
      if (ELEM(shader,
               GPU_SHADER_3D_POLYLINE_CLIPPED_UNIFORM_COLOR,
               GPU_SHADER_3D_POLYLINE_UNIFORM_COLOR,
@@ -162,17 +165,18 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader,
      {
        /* Set a default value for `lineSmooth`.
         * Ideally this value should be set by the caller. */
-        GPU_shader_bind(*sh_p);
-        GPU_shader_uniform_1i(*sh_p, "lineSmooth", 1);
+        GPUShader *sh = (*sh_p)->get();
+        GPU_shader_bind(sh);
+        GPU_shader_uniform_1i(sh, "lineSmooth", 1);
        /* WORKAROUND: See is_polyline declaration. */
-        blender::gpu::unwrap(*sh_p)->is_polyline = true;
+        blender::gpu::unwrap(sh)->is_polyline = true;
      }
    }
    else if (sh_cfg == GPU_SHADER_CFG_CLIPPED) {
      /* In rare cases geometry shaders calculate clipping themselves. */
      const char *info_name_clipped = builtin_shader_create_info_name_clipped(shader);
      if (!blender::StringRefNull(info_name_clipped).is_empty()) {
-        *sh_p = GPU_shader_create_from_info_name(info_name_clipped);
+        *sh_p = MEM_new<blender::gpu::StaticShader>(__func__, info_name_clipped);
      }
    }
    else {
@@ -180,7 +184,41 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader,
    }
  }

-  return *sh_p;
+  return (*sh_p)->get();
+}
+
+static void gpu_shader_warm_builtin_shader_async(eGPUBuiltinShader shader, eGPUShaderConfig sh_cfg)
+{
+  BLI_assert(shader < GPU_SHADER_BUILTIN_LEN);
+  BLI_assert(sh_cfg < GPU_SHADER_CFG_LEN);
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+  blender::gpu::StaticShader **sh_p = &builtin_shaders[sh_cfg][shader];
+#ifdef __GNUC__
+#  pragma GCC diagnostic pop
+#endif
+
+  if (*sh_p == nullptr) {
+    if (sh_cfg == GPU_SHADER_CFG_DEFAULT) {
+      /* Common case. */
+      const char *info_name = builtin_shader_create_info_name(shader);
+      *sh_p = MEM_new<blender::gpu::StaticShader>(__func__, info_name);
+    }
+    else if (sh_cfg == GPU_SHADER_CFG_CLIPPED) {
+      /* In rare cases geometry shaders calculate clipping themselves. */
+      const char *info_name_clipped = builtin_shader_create_info_name_clipped(shader);
+      if (!blender::StringRefNull(info_name_clipped).is_empty()) {
+        *sh_p = MEM_new<blender::gpu::StaticShader>(__func__, info_name_clipped);
+      }
+    }
+    else {
+      BLI_assert(0);
+    }
+  }
+  (*sh_p)->ensure_compile_async();
 }

 GPUShader *GPU_shader_get_builtin_shader(eGPUBuiltinShader shader)
@@ -188,16 +226,43 @@ GPUShader *GPU_shader_get_builtin_shader(eGPUBuiltinShader shader)
  return GPU_shader_get_builtin_shader_with_config(shader, GPU_SHADER_CFG_DEFAULT);
 }

+void GPU_shader_builtin_warm_up()
+{
+  if (GPU_use_subprocess_compilation() && (GPU_backend_get_type() == GPU_BACKEND_OPENGL)) {
+    /* The overhead of creating the subprocesses at this exact moment can create bubbles during the
+     * startup process. It is usually fast enough on OpenGL that we can skip it. */
+    return;
+  }
+  /* Ordered by first usage in default startup screen.
+   * Adding more to this list will delay the scheduling of engine shaders and increase time to
+   * first pixel. */
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_TEXT, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_WIDGET_BASE, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_UNIFORM_COLOR, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_POLYLINE_UNIFORM_COLOR,
+                                       GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_IMAGE_COLOR, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_NODE_SOCKET, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_WIDGET_BASE_INST, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_LINE_DASHED_UNIFORM_COLOR,
+                                       GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_IMAGE_DESATURATE_COLOR,
+                                       GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_3D_POLYLINE_SMOOTH_COLOR,
+                                       GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_WIDGET_SHADOW, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_DIAG_STRIPES, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_IMAGE_RECT_COLOR, GPU_SHADER_CFG_DEFAULT);
+  gpu_shader_warm_builtin_shader_async(GPU_SHADER_2D_AREA_BORDERS, GPU_SHADER_CFG_DEFAULT);
+}
+
 void GPU_shader_free_builtin_shaders()
 {
  /* Make sure non is bound before deleting. */
  GPU_shader_unbind();
  for (int i = 0; i < GPU_SHADER_CFG_LEN; i++) {
    for (int j = 0; j < GPU_SHADER_BUILTIN_LEN; j++) {
-      if (builtin_shaders[i][j]) {
-        GPU_shader_free(builtin_shaders[i][j]);
-        builtin_shaders[i][j] = nullptr;
-      }
+      MEM_SAFE_DELETE(builtin_shaders[i][j]);
    }
  }
 }