GPU: Make Shader Specialization Constant API Thread Safe

This allows multiple threads to request different specializations without locking usage of all specialized shaders program when a new specialization is being compiled. The specialization constants are bundled in a structure that is being passed to the `Shader::bind()` method. The structure is owned by the calling thread and only used by the `Shader::bind()`. Only querying for the specialized shader (Map lookup) is locking the shader usage. The variant compilation is now also locking and ensured that multiple thread trying to compile the same variant will never result in race condition. Note that this removes the `is_dirty` optimization. This can be added back if this becomes a bottleneck in the future. Otherwise, the performance impact is not noticeable. Pull Request: https://projects.blender.org/blender/blender/pulls/136991
2025-05-19 17:42:55 +02:00
parent cfb8aa24b2
commit caac241c84
32 changed files with 486 additions and 347 deletions
--- a/source/blender/draw/engines/eevee/eevee_film.cc
+++ b/source/blender/draw/engines/eevee/eevee_film.cc
@@ -641,6 +641,10 @@ void Film::end_sync()
  aovs_info.push_update();

  sync_mist();
+
+  inst_.manager->warm_shader_specialization(accumulate_ps_);
+  inst_.manager->warm_shader_specialization(copy_ps_);
+  inst_.manager->warm_shader_specialization(cryptomatte_post_ps_);
 }

 float2 Film::pixel_jitter_get() const
--- a/source/blender/draw/engines/eevee/eevee_pipeline.cc
+++ b/source/blender/draw/engines/eevee/eevee_pipeline.cc
@@ -676,17 +676,22 @@ void DeferredLayer::end_sync(bool is_first_pass,
        sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
        sub.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
        sub.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
+        const ShadowSceneData &shadow_scene = inst_.shadows.get_data();
+        auto set_specialization_constants =
+            [&](PassSimple::Sub &sub, GPUShader *sh, bool use_transmission) {
+              sub.specialize_constant(sh, "render_pass_shadow_id", rbuf_data.shadow_id);
+              sub.specialize_constant(sh, "use_split_indirect", use_split_indirect);
+              sub.specialize_constant(sh, "use_lightprobe_eval", use_lightprobe_eval);
+              sub.specialize_constant(sh, "use_transmission", use_transmission);
+              sub.specialize_constant(sh, "shadow_ray_count", &shadow_scene.ray_count);
+              sub.specialize_constant(sh, "shadow_ray_step_count", &shadow_scene.step_count);
+            };
        /* Submit the more costly ones first to avoid long tail in occupancy.
         * See page 78 of "SIGGRAPH 2023: Unreal Engine Substrate" by Hillaire & de Rousiers. */
+
        for (int i = min_ii(3, closure_count_) - 1; i >= 0; i--) {
          GPUShader *sh = inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
-          sub.specialize_constant(sh, "render_pass_shadow_id", rbuf_data.shadow_id);
-          sub.specialize_constant(sh, "use_split_indirect", use_split_indirect);
-          sub.specialize_constant(sh, "use_lightprobe_eval", use_lightprobe_eval);
-          sub.specialize_constant(sh, "use_transmission", false);
-          const ShadowSceneData &shadow_scene = inst_.shadows.get_data();
-          sub.specialize_constant(sh, "shadow_ray_count", &shadow_scene.ray_count);
-          sub.specialize_constant(sh, "shadow_ray_step_count", &shadow_scene.step_count);
+          set_specialization_constants(sub, sh, false);
          sub.shader_set(sh);
          sub.bind_image("direct_radiance_1_img", &direct_radiance_txs_[0]);
          sub.bind_image("direct_radiance_2_img", &direct_radiance_txs_[1]);
@@ -709,7 +714,7 @@ void DeferredLayer::end_sync(bool is_first_pass,
          sub.draw_procedural(GPU_PRIM_TRIS, 1, 3);
          if (use_transmission) {
            /* Separate pass for transmission BSDF as their evaluation is quite costly. */
-            sub.specialize_constant(sh, "use_transmission", true);
+            set_specialization_constants(sub, sh, true);
            sub.shader_set(sh);
            sub.state_stencil(0x0u, (i + 1) | uint8_t(StencilBits::TRANSMISSION), compare_mask);
            sub.draw_procedural(GPU_PRIM_TRIS, 1, 3);
--- a/source/blender/draw/engines/eevee/eevee_raytrace.cc
+++ b/source/blender/draw/engines/eevee/eevee_raytrace.cc
@@ -330,6 +330,23 @@ void RayTraceModule::sync()
    pass.dispatch(horizon_denoise_dispatch_buf_);
    pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
  }
+  for (int i : IndexRange(3)) {
+    data_.closure_index = i;
+    inst_.manager->warm_shader_specialization(tile_classify_ps_);
+    inst_.manager->warm_shader_specialization(tile_compact_ps_);
+    inst_.manager->warm_shader_specialization(generate_ps_);
+    inst_.manager->warm_shader_specialization(trace_planar_ps_);
+    inst_.manager->warm_shader_specialization(trace_screen_ps_);
+    inst_.manager->warm_shader_specialization(trace_fallback_ps_);
+    inst_.manager->warm_shader_specialization(denoise_spatial_ps_);
+    inst_.manager->warm_shader_specialization(denoise_temporal_ps_);
+    inst_.manager->warm_shader_specialization(denoise_bilateral_ps_);
+    inst_.manager->warm_shader_specialization(horizon_schedule_ps_);
+    inst_.manager->warm_shader_specialization(horizon_setup_ps_);
+    inst_.manager->warm_shader_specialization(horizon_scan_ps_);
+    inst_.manager->warm_shader_specialization(horizon_denoise_ps_);
+    inst_.manager->warm_shader_specialization(horizon_resolve_ps_);
+  }
 }

 void RayTraceModule::debug_pass_sync() {}
--- a/source/blender/draw/engines/eevee/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee/eevee_shader.cc
@@ -139,14 +139,24 @@ bool ShaderModule::request_specializations(bool block_until_ready,
        Vector<ShaderSpecialization> specializations;
        for (int i = 0; i < 3; i++) {
          GPUShader *sh = static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
+          int render_pass_shadow_id_index = GPU_shader_get_constant(sh, "render_pass_shadow_id");
+          int use_split_indirect_index = GPU_shader_get_constant(sh, "use_split_indirect");
+          int use_lightprobe_eval_index = GPU_shader_get_constant(sh, "use_lightprobe_eval");
+          int use_transmission_index = GPU_shader_get_constant(sh, "use_transmission");
+          int shadow_ray_count_index = GPU_shader_get_constant(sh, "shadow_ray_count");
+          int shadow_ray_step_count_index = GPU_shader_get_constant(sh, "shadow_ray_step_count");
+
+          gpu::shader::SpecializationConstants sp = GPU_shader_get_default_constant_state(sh);
+
          for (bool use_transmission : {false, true}) {
-            specializations.append({sh,
-                                    {{"render_pass_shadow_id", render_buffers_shadow_id},
-                                     {"use_split_indirect", use_split_indirect},
-                                     {"use_lightprobe_eval", use_lightprobe_eval},
-                                     {"use_transmission", use_transmission},
-                                     {"shadow_ray_count", shadow_ray_count},
-                                     {"shadow_ray_step_count", shadow_ray_step_count}}});
+            sp.set_value(render_pass_shadow_id_index, render_buffers_shadow_id);
+            sp.set_value(use_split_indirect_index, use_split_indirect);
+            sp.set_value(use_lightprobe_eval_index, use_lightprobe_eval);
+            sp.set_value(use_transmission_index, use_transmission);
+            sp.set_value(shadow_ray_count_index, shadow_ray_count);
+            sp.set_value(shadow_ray_step_count_index, shadow_ray_step_count);
+
+            specializations.append({sh, sp});
          }
        }

--- a/source/blender/draw/intern/draw_command.cc
+++ b/source/blender/draw/intern/draw_command.cc
@@ -45,9 +45,13 @@ static gpu::Batch *procedural_batch_get(GPUPrimType primitive)

 void ShaderBind::execute(RecordingState &state) const
 {
-  if (assign_if_different(state.shader, shader)) {
-    GPU_shader_bind(shader);
+  state.shader_use_specialization = !GPU_shader_get_default_constant_state(shader).is_empty();
+  if (assign_if_different(state.shader, shader) || state.shader_use_specialization) {
+    GPU_shader_bind(shader, state.specialization_constants_get());
  }
+  /* Signal that we can reload the default for a different specialization later on.
+   * However, we keep the specialization_constants state around for compute shaders. */
+  state.specialization_constants_in_use = false;
 }

 void FramebufferBind::execute() const
@@ -129,35 +133,40 @@ void PushConstant::execute(RecordingState &state) const
  }
 }

-void SpecializeConstant::execute() const
+void SpecializeConstant::execute(command::RecordingState &state) const
 {
  /* All specialization constants should exist as they are not optimized out like uniforms. */
  BLI_assert(location != -1);

+  if (state.specialization_constants_in_use == false) {
+    state.specialization_constants = GPU_shader_get_default_constant_state(this->shader);
+    state.specialization_constants_in_use = true;
+  }
+
  switch (type) {
    case SpecializeConstant::Type::IntValue:
-      GPU_shader_constant_int_ex(shader, location, int_value);
+      state.specialization_constants.set_value(location, int_value);
      break;
    case SpecializeConstant::Type::IntReference:
-      GPU_shader_constant_int_ex(shader, location, *int_ref);
+      state.specialization_constants.set_value(location, *int_ref);
      break;
    case SpecializeConstant::Type::UintValue:
-      GPU_shader_constant_uint_ex(shader, location, uint_value);
+      state.specialization_constants.set_value(location, uint_value);
      break;
    case SpecializeConstant::Type::UintReference:
-      GPU_shader_constant_uint_ex(shader, location, *uint_ref);
+      state.specialization_constants.set_value(location, *uint_ref);
      break;
    case SpecializeConstant::Type::FloatValue:
-      GPU_shader_constant_float_ex(shader, location, float_value);
+      state.specialization_constants.set_value(location, float_value);
      break;
    case SpecializeConstant::Type::FloatReference:
-      GPU_shader_constant_float_ex(shader, location, *float_ref);
+      state.specialization_constants.set_value(location, *float_ref);
      break;
    case SpecializeConstant::Type::BoolValue:
-      GPU_shader_constant_bool_ex(shader, location, bool_value);
+      state.specialization_constants.set_value(location, bool_value);
      break;
    case SpecializeConstant::Type::BoolReference:
-      GPU_shader_constant_bool_ex(shader, location, *bool_ref);
+      state.specialization_constants.set_value(location, *bool_ref);
      break;
  }
 }
@@ -177,6 +186,8 @@ void Draw::execute(RecordingState &state) const
    state.instance_offset += instance_len;
  }

+  GPU_shader_get_default_constant_state(state.shader).is_empty();
+
  if (is_primitive_expansion()) {
    /* Expanded draw-call. */
    IndexRange expanded_range = GPU_batch_draw_expanded_parameter_get(
@@ -194,13 +205,13 @@ void Draw::execute(RecordingState &state) const
    GPU_batch_bind_as_resources(batch, state.shader);

    gpu::Batch *gpu_batch = procedural_batch_get(GPUPrimType(expand_prim_type));
-    GPU_batch_set_shader(gpu_batch, state.shader);
+    GPU_batch_set_shader(gpu_batch, state.shader, state.specialization_constants_get());
    GPU_batch_draw_advanced(
        gpu_batch, expanded_range.start(), expanded_range.size(), instance_first, instance_len);
  }
  else {
    /* Regular draw-call. */
-    GPU_batch_set_shader(batch, state.shader);
+    GPU_batch_set_shader(batch, state.shader, state.specialization_constants_get());
    GPU_batch_draw_advanced(batch, vertex_first, vertex_len, instance_first, instance_len);
  }
 }
@@ -227,7 +238,7 @@ void DrawMulti::execute(RecordingState &state) const
        GPU_batch_resource_id_buf_set(batch, state.resource_id_buf);
      }

-      GPU_batch_set_shader(batch, state.shader);
+      GPU_batch_set_shader(batch, state.shader, state.specialization_constants_get());

      constexpr intptr_t stride = sizeof(DrawCommand);
      /* We have 2 indirect command reserved per draw group. */
@@ -259,16 +270,18 @@ void DrawIndirect::execute(RecordingState &state) const
 void Dispatch::execute(RecordingState &state) const
 {
  if (is_reference) {
-    GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z);
+    GPU_compute_dispatch(
+        state.shader, size_ref->x, size_ref->y, size_ref->z, state.specialization_constants_get());
  }
  else {
-    GPU_compute_dispatch(state.shader, size.x, size.y, size.z);
+    GPU_compute_dispatch(
+        state.shader, size.x, size.y, size.z, state.specialization_constants_get());
  }
 }

 void DispatchIndirect::execute(RecordingState &state) const
 {
-  GPU_compute_dispatch_indirect(state.shader, *indirect_buf);
+  GPU_compute_dispatch_indirect(state.shader, *indirect_buf, state.specialization_constants_get());
 }

 void Barrier::execute() const
--- a/source/blender/draw/intern/draw_command.hh
+++ b/source/blender/draw/intern/draw_command.hh
@@ -44,6 +44,11 @@ class DrawMultiBuf;
 * Keep track of several states and avoid redundant state changes.
 */
 struct RecordingState {
+  gpu::shader::SpecializationConstants specialization_constants;
+  /* True if specialization_constants was set. */
+  bool specialization_constants_in_use = false;
+  /* True if the bound shader uses specialization. */
+  bool shader_use_specialization = false;
  GPUShader *shader = nullptr;
  bool front_facing = true;
  bool inverted_view = false;
@@ -77,6 +82,11 @@ struct RecordingState {
      GPU_uniformbuf_debug_unbind_all();
    }
  }
+
+  const gpu::shader::SpecializationConstants *specialization_constants_get()
+  {
+    return shader_use_specialization ? &specialization_constants : nullptr;
+  }
 };

 /** \} */
@@ -348,7 +358,7 @@ struct SpecializeConstant {
  SpecializeConstant(GPUShader *sh, int loc, const bool *val)
      : shader(sh), bool_ref(val), location(loc), type(Type::BoolReference){};

-  void execute() const;
+  void execute(RecordingState &state) const;
  std::string serialize() const;
 };

--- a/source/blender/draw/intern/draw_manager.cc
+++ b/source/blender/draw/intern/draw_manager.cc
@@ -251,6 +251,24 @@ void Manager::generate_commands(PassSimple &pass)
  pass.draw_commands_buf_.generate_commands(pass.headers_, pass.commands_, pass.sub_passes_);
 }

+void Manager::warm_shader_specialization(PassMain &pass)
+{
+  if (pass.is_empty()) {
+    return;
+  }
+  command::RecordingState state;
+  pass.warm_shader_specialization(state);
+}
+
+void Manager::warm_shader_specialization(PassSimple &pass)
+{
+  if (pass.is_empty()) {
+    return;
+  }
+  command::RecordingState state;
+  pass.warm_shader_specialization(state);
+}
+
 void Manager::submit_only(PassMain &pass, View &view)
 {
  if (pass.is_empty()) {
--- a/source/blender/draw/intern/draw_manager.hh
+++ b/source/blender/draw/intern/draw_manager.hh
@@ -246,6 +246,13 @@ class Manager {
   */
  void generate_commands(PassSimple &pass);

+  /**
+   * Make sure the shader specialization constants are already compiled.
+   * This avoid stalling the real submission call because of specialization.
+   */
+  void warm_shader_specialization(PassMain &pass);
+  void warm_shader_specialization(PassSimple &pass);
+
  /**
   * Submit a pass for drawing. All resource reference will be dereferenced and commands will be
   * sent to GPU. Visibility and command generation **must** have already been done explicitly
--- a/source/blender/draw/intern/draw_pass.hh
+++ b/source/blender/draw/intern/draw_pass.hh
@@ -467,6 +467,12 @@ class PassBase {
   */
  command::Undetermined &create_command(command::Type type);

+  /**
+   * Make sure the shader specialization constants are already compiled.
+   * This avoid stalling the real submission call because of specialization.
+   */
+  void warm_shader_specialization(command::RecordingState &state) const;
+
  void submit(command::RecordingState &state) const;

  bool has_generated_commands() const
@@ -679,6 +685,59 @@ template<class T> inline PassBase<T> &PassBase<T>::sub(const char *name)
  return sub_passes_[index];
 }

+template<class T>
+void PassBase<T>::warm_shader_specialization(command::RecordingState &state) const
+{
+  GPU_debug_group_begin("warm_shader_specialization");
+
+  for (const command::Header &header : headers_) {
+    switch (header.type) {
+      default:
+      case Type::None:
+        break;
+      case Type::SubPass:
+        sub_passes_[header.index].warm_shader_specialization(state);
+        break;
+      case command::Type::FramebufferBind:
+        break;
+      case command::Type::SubPassTransition:
+        break;
+      case command::Type::ShaderBind:
+        commands_[header.index].shader_bind.execute(state);
+        break;
+      case command::Type::ResourceBind:
+        break;
+      case command::Type::PushConstant:
+        break;
+      case command::Type::SpecializeConstant:
+        commands_[header.index].specialize_constant.execute(state);
+        break;
+      case command::Type::Draw:
+        break;
+      case command::Type::DrawMulti:
+        break;
+      case command::Type::DrawIndirect:
+        break;
+      case command::Type::Dispatch:
+        break;
+      case command::Type::DispatchIndirect:
+        break;
+      case command::Type::Barrier:
+        break;
+      case command::Type::Clear:
+        break;
+      case command::Type::ClearMulti:
+        break;
+      case command::Type::StateSet:
+        break;
+      case command::Type::StencilSet:
+        break;
+    }
+  }
+
+  GPU_debug_group_end();
+}
+
 template<class T> void PassBase<T>::submit(command::RecordingState &state) const
 {
  if (headers_.is_empty()) {
@@ -711,7 +770,7 @@ template<class T> void PassBase<T>::submit(command::RecordingState &state) const
        commands_[header.index].push_constant.execute(state);
        break;
      case command::Type::SpecializeConstant:
-        commands_[header.index].specialize_constant.execute();
+        commands_[header.index].specialize_constant.execute(state);
        break;
      case command::Type::Draw:
        commands_[header.index].draw.execute(state);
--- a/source/blender/gpu/GPU_batch.hh
+++ b/source/blender/gpu/GPU_batch.hh
@@ -276,7 +276,10 @@ void GPU_batch_resource_id_buf_set(blender::gpu::Batch *batch, GPUStorageBuf *re
 * \note This need to be called first for the `GPU_batch_uniform_*` functions to work.
 */
 /* TODO(fclem): These should be removed and replaced by `GPU_shader_bind()`. */
-void GPU_batch_set_shader(blender::gpu::Batch *batch, GPUShader *shader);
+void GPU_batch_set_shader(
+    blender::gpu::Batch *batch,
+    GPUShader *shader,
+    const blender::gpu::shader::SpecializationConstants *constants_state = nullptr);
 void GPU_batch_program_set_builtin(blender::gpu::Batch *batch, eGPUBuiltinShader shader_id);
 void GPU_batch_program_set_builtin_with_config(blender::gpu::Batch *batch,
                                               eGPUBuiltinShader shader_id,
--- a/source/blender/gpu/GPU_common_types.hh
+++ b/source/blender/gpu/GPU_common_types.hh
@@ -9,6 +9,7 @@
 #pragma once

 #include "BLI_string_ref.hh"
+#include "BLI_vector.hh"

 /**
 * Describes the load operation of a frame-buffer attachment at the start of a render pass.
@@ -220,4 +221,44 @@ struct SpecializationConstant {
  }
 };

+/**
+ * Specialization constants as a Struct-of-Arrays. Allow simpler comparison and reset.
+ * The backend is free to implement their support as they see fit.
+ */
+struct SpecializationConstants {
+  Vector<gpu::shader::Type, 8> types;
+  /* Current values set by `GPU_shader_constant_*()` call. The backend can choose to interpret
+   * that however it wants (i.e: bind another shader instead). */
+  Vector<SpecializationConstant::Value, 8> values;
+
+  void set_value(int index, uint32_t value)
+  {
+    BLI_assert_msg(types[index] == Type::uint_t, "Mismatch between interface and constant type");
+    values[index].u = value;
+  }
+
+  void set_value(int index, int value)
+  {
+    BLI_assert_msg(types[index] == Type::int_t, "Mismatch between interface and constant type");
+    values[index].i = value;
+  }
+
+  void set_value(int index, float value)
+  {
+    BLI_assert_msg(types[index] == Type::float_t, "Mismatch between interface and constant type");
+    values[index].f = value;
+  }
+
+  void set_value(int index, bool value)
+  {
+    BLI_assert_msg(types[index] == Type::bool_t, "Mismatch between interface and constant type");
+    values[index].u = value ? 1 : 0;
+  }
+
+  bool is_empty() const
+  {
+    return types.is_empty();
+  }
+};
+
 }  // namespace blender::gpu::shader
--- a/source/blender/gpu/GPU_compute.hh
+++ b/source/blender/gpu/GPU_compute.hh
@@ -23,10 +23,12 @@
 * The number of work groups (aka thread groups) is bounded by `GPU_max_work_group_count()` which
 * might be different in each of the 3 dimensions.
 */
-void GPU_compute_dispatch(GPUShader *shader,
-                          uint groups_x_len,
-                          uint groups_y_len,
-                          uint groups_z_len);
+void GPU_compute_dispatch(
+    GPUShader *shader,
+    uint groups_x_len,
+    uint groups_y_len,
+    uint groups_z_len,
+    const blender::gpu::shader::SpecializationConstants *constants_state = nullptr);

 /**
 * Dispatch a compute shader task. The size of the dispatch is sourced from a \a indirect_buf
@@ -45,4 +47,7 @@ void GPU_compute_dispatch(GPUShader *shader,
 * The number of work groups (aka thread groups) is bounded by `GPU_max_work_group_count()` which
 * might be different in each of the 3 dimensions.
 */
-void GPU_compute_dispatch_indirect(GPUShader *shader, GPUStorageBuf *indirect_buf);
+void GPU_compute_dispatch_indirect(
+    GPUShader *shader,
+    GPUStorageBuf *indirect_buf,
+    const blender::gpu::shader::SpecializationConstants *constants_state = nullptr);
--- a/source/blender/gpu/GPU_shader.hh
+++ b/source/blender/gpu/GPU_shader.hh
@@ -127,7 +127,9 @@ void GPU_shader_free(GPUShader *shader);
 * Uniform functions need to have the shader bound in order to work. (TODO: until we use
 * glProgramUniform)
 */
-void GPU_shader_bind(GPUShader *shader);
+void GPU_shader_bind(
+    GPUShader *shader,
+    const blender::gpu::shader::SpecializationConstants *constants_state = nullptr);

 /**
 * Unbind the active shader.
@@ -234,21 +236,16 @@ bool GPU_shader_get_ssbo_input_info(const GPUShader *shader, int ssbo_location,
 * Otherwise, it will produce undefined behavior.
 * \{ */

-void GPU_shader_constant_int_ex(GPUShader *sh, int location, int value);
-void GPU_shader_constant_uint_ex(GPUShader *sh, int location, unsigned int value);
-void GPU_shader_constant_float_ex(GPUShader *sh, int location, float value);
-void GPU_shader_constant_bool_ex(GPUShader *sh, int location, bool value);
-
-void GPU_shader_constant_int(GPUShader *sh, const char *name, int value);
-void GPU_shader_constant_uint(GPUShader *sh, const char *name, unsigned int value);
-void GPU_shader_constant_float(GPUShader *sh, const char *name, float value);
-void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value);
+/* Return the default constants.
+ * All constants available for this shader should fit the returned structure. */
+const blender::gpu::shader::SpecializationConstants &GPU_shader_get_default_constant_state(
+    GPUShader *sh);

 using SpecializationBatchHandle = int64_t;

 struct ShaderSpecialization {
  GPUShader *shader;
-  blender::Vector<blender::gpu::shader::SpecializationConstant> constants;
+  blender::gpu::shader::SpecializationConstants constants;
 };

 /**
--- a/source/blender/gpu/intern/gpu_batch.cc
+++ b/source/blender/gpu/intern/gpu_batch.cc
@@ -223,10 +223,12 @@ void GPU_batch_resource_id_buf_set(Batch *batch, GPUStorageBuf *resource_id_buf)
 *
 * \{ */

-void GPU_batch_set_shader(Batch *batch, GPUShader *shader)
+void GPU_batch_set_shader(Batch *batch,
+                          GPUShader *shader,
+                          const shader::SpecializationConstants *constants_state)
 {
  batch->shader = shader;
-  GPU_shader_bind(batch->shader);
+  GPU_shader_bind(batch->shader, constants_state);
 }

 static uint16_t bind_attribute_as_ssbo(const ShaderInterface *interface,
--- a/source/blender/gpu/intern/gpu_compute.cc
+++ b/source/blender/gpu/intern/gpu_compute.cc
@@ -13,19 +13,23 @@
 void GPU_compute_dispatch(GPUShader *shader,
                          uint groups_x_len,
                          uint groups_y_len,
-                          uint groups_z_len)
+                          uint groups_z_len,
+                          const blender::gpu::shader::SpecializationConstants *constants_state)
 {
  blender::gpu::GPUBackend &gpu_backend = *blender::gpu::GPUBackend::get();
-  GPU_shader_bind(shader);
+  GPU_shader_bind(shader, constants_state);
  gpu_backend.compute_dispatch(groups_x_len, groups_y_len, groups_z_len);
 }

-void GPU_compute_dispatch_indirect(GPUShader *shader, GPUStorageBuf *indirect_buf_)
+void GPU_compute_dispatch_indirect(
+    GPUShader *shader,
+    GPUStorageBuf *indirect_buf_,
+    const blender::gpu::shader::SpecializationConstants *constants_state)
 {
  blender::gpu::GPUBackend &gpu_backend = *blender::gpu::GPUBackend::get();
  blender::gpu::StorageBuf *indirect_buf = reinterpret_cast<blender::gpu::StorageBuf *>(
      indirect_buf_);

-  GPU_shader_bind(shader);
+  GPU_shader_bind(shader, constants_state);
  gpu_backend.compute_dispatch_indirect(indirect_buf);
 }
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -402,23 +402,24 @@ void GPU_shader_cache_dir_clear_old()
 /** \name Binding
 * \{ */

-void GPU_shader_bind(GPUShader *gpu_shader)
+void GPU_shader_bind(GPUShader *gpu_shader, const shader::SpecializationConstants *constants_state)
 {
  Shader *shader = unwrap(gpu_shader);

+  BLI_assert_msg(constants_state != nullptr || shader->constants->is_empty(),
+                 "Shader requires specialization constants but none was passed");
+
  Context *ctx = Context::get();

  if (ctx->shader != shader) {
    ctx->shader = shader;
-    shader->bind();
+    shader->bind(constants_state);
    GPU_matrix_bind(gpu_shader);
    Shader::set_srgb_uniform(ctx, gpu_shader);
-    shader->constants.is_dirty = false;
  }
  else {
-    if (shader->constants.is_dirty) {
-      shader->bind();
-      shader->constants.is_dirty = false;
+    if (constants_state) {
+      shader->bind(constants_state);
    }
    if (ctx->shader_builtin_srgb_is_dirty) {
      Shader::set_srgb_uniform(ctx, gpu_shader);
@@ -493,57 +494,20 @@ void GPU_shader_warm_cache(GPUShader *shader, int limit)
 /** \name Assign specialization constants.
 * \{ */

+const shader::SpecializationConstants &GPU_shader_get_default_constant_state(GPUShader *sh)
+{
+  return *unwrap(sh)->constants;
+}
+
 void Shader::specialization_constants_init(const shader::ShaderCreateInfo &info)
 {
  using namespace shader;
+  shader::SpecializationConstants constants_tmp;
  for (const SpecializationConstant &sc : info.specialization_constants_) {
-    constants.types.append(sc.type);
-    constants.values.append(sc.value);
+    constants_tmp.types.append(sc.type);
+    constants_tmp.values.append(sc.value);
  }
-  constants.is_dirty = true;
-}
-
-void GPU_shader_constant_int_ex(GPUShader *sh, int location, int value)
-{
-  Shader &shader = *unwrap(sh);
-  BLI_assert(shader.constants.types[location] == gpu::shader::Type::int_t);
-  shader.constants.is_dirty |= assign_if_different(shader.constants.values[location].i, value);
-}
-void GPU_shader_constant_uint_ex(GPUShader *sh, int location, uint value)
-{
-  Shader &shader = *unwrap(sh);
-  BLI_assert(shader.constants.types[location] == gpu::shader::Type::uint_t);
-  shader.constants.is_dirty |= assign_if_different(shader.constants.values[location].u, value);
-}
-void GPU_shader_constant_float_ex(GPUShader *sh, int location, float value)
-{
-  Shader &shader = *unwrap(sh);
-  BLI_assert(shader.constants.types[location] == gpu::shader::Type::float_t);
-  shader.constants.is_dirty |= assign_if_different(shader.constants.values[location].f, value);
-}
-void GPU_shader_constant_bool_ex(GPUShader *sh, int location, bool value)
-{
-  Shader &shader = *unwrap(sh);
-  BLI_assert(shader.constants.types[location] == gpu::shader::Type::bool_t);
-  shader.constants.is_dirty |= assign_if_different(shader.constants.values[location].u,
-                                                   uint32_t(value));
-}
-
-void GPU_shader_constant_int(GPUShader *sh, const char *name, int value)
-{
-  GPU_shader_constant_int_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
-}
-void GPU_shader_constant_uint(GPUShader *sh, const char *name, uint value)
-{
-  GPU_shader_constant_uint_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
-}
-void GPU_shader_constant_float(GPUShader *sh, const char *name, float value)
-{
-  GPU_shader_constant_float_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
-}
-void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value)
-{
-  GPU_shader_constant_bool_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
+  constants = std::make_unique<const shader::SpecializationConstants>(std::move(constants_tmp));
 }

 SpecializationBatchHandle GPU_shader_batch_specializations(
@@ -841,8 +805,10 @@ Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_ba
  }

  Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str());
-  shader->init(info, is_batch_compilation);
+  /* Needs to be called before init as GL uses the default specialization constants state to insert
+   * default shader inside a map. */
  shader->specialization_constants_init(info);
+  shader->init(info, is_batch_compilation);

  shader->fragment_output_bits = 0;
  for (const shader::ShaderCreateInfo::FragOut &frag_out : info.fragment_outputs_) {
--- a/source/blender/gpu/intern/gpu_shader_private.hh
+++ b/source/blender/gpu/intern/gpu_shader_private.hh
@@ -47,24 +47,9 @@ class Shader {
  /** Bit-set indicating the frame-buffer color attachments that this shader writes to. */
  uint16_t fragment_output_bits = 0;

-  /**
-   * Specialization constants as a Struct-of-Arrays. Allow simpler comparison and reset.
-   * The backend is free to implement their support as they see fit.
-   */
-  struct Constants {
-    using Value = shader::SpecializationConstant::Value;
-    Vector<gpu::shader::Type> types;
-    /* Current values set by `GPU_shader_constant_*()` call. The backend can choose to interpret
-     * that however it wants (i.e: bind another shader instead). */
-    Vector<Value> values;
-
-    /**
-     * OpenGL needs to know if a different program needs to be attached when constants are
-     * changed. Vulkan and Metal uses pipelines and don't have this issue. Attribute can be
-     * removed after the OpenGL backend has been phased out.
-     */
-    bool is_dirty;
-  } constants;
+  /* Default specialization constants state as defined inside ShaderCreateInfo.
+   * Should be considered as const after init(). */
+  std::unique_ptr<const shader::SpecializationConstants> constants;

  /* WORKAROUND: True if this shader is a polyline shader and needs an appropriate setup to render.
   * Eventually, in the future, we should modify the user code instead of relying on such hacks. */
@@ -98,7 +83,7 @@ class Shader {
   * See `GPU_shader_warm_cache(..)` in `GPU_shader.hh` for more information. */
  virtual void warm_cache(int limit) = 0;

-  virtual void bind() = 0;
+  virtual void bind(const shader::SpecializationConstants *constants_state) = 0;
  virtual void unbind() = 0;

  virtual void uniform_float(int location, int comp_len, int array_size, const float *data) = 0;
--- a/source/blender/gpu/metal/mtl_batch.hh
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -68,7 +68,6 @@ class MTLBatch : public Batch {

 private:
  MTLShader *active_shader_ = nullptr;
-  bool shader_in_use_ = false;
  MTLVertexDescriptorCache vao_cache = {this};

  /* Topology emulation. */
@@ -115,7 +114,6 @@ class MTLBatch : public Batch {
  }

 private:
-  void shader_bind();
  void draw_advanced(int v_first, int v_count, int i_first, int i_count);
  void draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset);
  int prepare_vertex_binding(MTLVertBuf *verts,
--- a/source/blender/gpu/metal/mtl_batch.mm
+++ b/source/blender/gpu/metal/mtl_batch.mm
@@ -34,28 +34,14 @@ namespace blender::gpu {
 * \{ */
 void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count)
 {
-  if (this->flag & GPU_BATCH_INVALID) {
-    this->shader_in_use_ = false;
-  }
  this->draw_advanced(v_first, v_count, i_first, i_count);
 }

 void MTLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
 {
-  if (this->flag & GPU_BATCH_INVALID) {
-    this->shader_in_use_ = false;
-  }
  this->draw_advanced_indirect(indirect_buf, offset);
 }

-void MTLBatch::shader_bind()
-{
-  if (active_shader_ && active_shader_->is_valid()) {
-    active_shader_->bind();
-    shader_in_use_ = true;
-  }
-}
-
 void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx)
 {
  BLI_assert(ctx != nullptr);
@@ -417,9 +403,6 @@ id<MTLRenderCommandEncoder> MTLBatch::bind()
  /* Debug Check: Ensure Frame-buffer instance is not dirty. */
  BLI_assert(!ctx->main_command_buffer.get_active_framebuffer()->get_dirty());

-  /* Bind Shader. */
-  this->shader_bind();
-
  /* GPU debug markers. */
  if (G.debug & G_DEBUG_GPU) {
    [rec pushDebugGroup:[NSString stringWithFormat:@"Draw Commands%@ (GPUShader: %s)",
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -838,6 +838,11 @@ class MTLContext : public Context {
  id<MTLSamplerState> get_sampler_from_state(MTLSamplerState state);
  id<MTLSamplerState> get_default_sampler_state();

+  /* Active shader specialization constants state. */
+  shader::SpecializationConstants constants_state;
+
+  void specialization_constants_set(const shader::SpecializationConstants *constants_state);
+
  /* Metal Context pipeline state. */
  void pipeline_state_init();
  MTLShader *get_active_shader();
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -709,6 +709,13 @@ void MTLContext::free_dummy_resources()
  }
 }

+void MTLContext::specialization_constants_set(
+    const shader::SpecializationConstants *constants_state)
+{
+  this->constants_state = (constants_state != nullptr) ? *constants_state :
+                                                         shader::SpecializationConstants{};
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
@@ -2185,11 +2192,10 @@ const MTLComputePipelineStateInstance *MTLContext::ensure_compute_pipeline_state
  MTLShader *active_shader = this->pipeline_state.active_shader;

  /* Set descriptor to default shader constants . */
-  MTLComputePipelineStateDescriptor compute_pipeline_descriptor(active_shader->constants.values);
+  MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants_state.values);

  const MTLComputePipelineStateInstance *compute_pso_inst =
-      this->pipeline_state.active_shader->bake_compute_pipeline_state(this,
-                                                                      compute_pipeline_descriptor);
+      active_shader->bake_compute_pipeline_state(this, compute_pipeline_descriptor);

  if (compute_pso_inst == nullptr || compute_pso_inst->pso == nil) {
    MTL_LOG_WARNING("No valid compute PSO for compute dispatch!", );
--- a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
+++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
@@ -173,10 +173,13 @@ struct MTLVertexDescriptor {
 };

 struct SpecializationStateDescriptor {
-  Vector<Shader::Constants::Value> values;
+  Vector<shader::SpecializationConstant::Value> values;

  SpecializationStateDescriptor() = default;
-  SpecializationStateDescriptor(Vector<Shader::Constants::Value> source) : values(source) {}
+  SpecializationStateDescriptor(Vector<shader::SpecializationConstant::Value> source)
+      : values(source)
+  {
+  }

  bool operator==(const SpecializationStateDescriptor &other) const
  {
@@ -187,7 +190,7 @@ struct SpecializationStateDescriptor {
  {
    uint64_t hash = values.size();
    uint seed = 0xFF;
-    for (const Shader::Constants::Value &value : values) {
+    for (const shader::SpecializationConstant::Value &value : values) {
      seed = seed << 1;
      hash ^= seed ^ value.u;
    }
@@ -343,7 +346,7 @@ struct MTLComputePipelineStateDescriptor {
  SpecializationStateDescriptor specialization_state;

  MTLComputePipelineStateDescriptor() = default;
-  MTLComputePipelineStateDescriptor(Vector<Shader::Constants::Value> values)
+  MTLComputePipelineStateDescriptor(Vector<shader::SpecializationConstant::Value> values)
  {
    specialization_state.values = values;
  }
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -282,7 +282,7 @@ class MTLShader : public Shader {
  std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override;
  std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override;

-  void bind() override;
+  void bind(const shader::SpecializationConstants *constants_state) override;
  void unbind() override;

  void uniform_float(int location, int comp_len, int array_size, const float *data) override;
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -452,7 +452,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
     * NOTE: This will compile the base unspecialized variant. */
    if (is_compute) {
      /* Set descriptor to default shader constants */
-      MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants.values);
+      MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants->values);

      this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor);
    }
@@ -470,9 +470,12 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 /** \name Shader Binding.
 * \{ */

-void MTLShader::bind()
+void MTLShader::bind(const shader::SpecializationConstants *constants_state)
 {
  MTLContext *ctx = MTLContext::get();
+  /* Copy constants state. */
+  ctx->specialization_constants_set(constants_state);
+
  if (interface == nullptr || !this->is_valid()) {
    MTL_LOG_WARNING(
        "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be "
@@ -763,11 +766,11 @@ void MTLShader::set_interface(MTLShaderInterface *interface)
 */
 static void populate_specialization_constant_values(
    MTLFunctionConstantValues *values,
-    const Shader::Constants &shader_constants,
+    const shader::SpecializationConstants &shader_constants,
    const SpecializationStateDescriptor &specialization_descriptor)
 {
  for (auto i : shader_constants.types.index_range()) {
-    const Shader::Constants::Value &value = specialization_descriptor.values[i];
+    const shader::SpecializationConstant::Value &value = specialization_descriptor.values[i];

    uint index = i + MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID;
    switch (shader_constants.types[i]) {
@@ -884,7 +887,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
      (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified;

  /* Specialization configuration. */
-  pipeline_descriptor.specialization_state = {this->constants.values};
+  pipeline_descriptor.specialization_state = {ctx->constants_state.values};

  /* Bake pipeline state using global descriptor. */
  return bake_pipeline_state(ctx, prim_type, pipeline_descriptor);
@@ -929,7 +932,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(

    /* Custom function constant values: */
    populate_specialization_constant_values(
-        values, this->constants, pipeline_descriptor.specialization_state);
+        values, *this->constants, pipeline_descriptor.specialization_state);

    /* Prepare Vertex descriptor based on current pipeline vertex binding state. */
    MTLRenderPipelineDescriptor *desc = pso_descriptor_;
@@ -1380,7 +1383,7 @@ MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(

  /* Check if current PSO exists in the cache. */
  pso_cache_lock_.lock();
-  MTLComputePipelineStateInstance **pso_lookup = compute_pso_cache_.lookup_ptr(
+  MTLComputePipelineStateInstance *const *pso_lookup = compute_pso_cache_.lookup_ptr(
      compute_pipeline_descriptor);
  MTLComputePipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
  pso_cache_lock_.unlock();
@@ -1401,7 +1404,7 @@ MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(

    /* Custom function constant values: */
    populate_specialization_constant_values(
-        values, this->constants, compute_pipeline_descriptor.specialization_state);
+        values, *this->constants, compute_pipeline_descriptor.specialization_state);

    /* Offset the bind index for Uniform buffers such that they begin after the VBO
     * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
@@ -1576,17 +1579,8 @@ void MTLShaderCompiler::specialize_shader(ShaderSpecialization &specialization)
    return;
  }

-  Vector<Shader::Constants::Value> specialization_values(shader->interface->constant_len_);
-
-  for (const SpecializationConstant &constant : specialization.constants) {
-    const ShaderInput *input = shader->interface->constant_get(constant.name.c_str());
-    BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists");
-    specialization_values[input->location].u = constant.value.u;
-  }
-  shader->constants.is_dirty = true;
-
  /* Create descriptor using these specialization constants. */
-  MTLComputePipelineStateDescriptor compute_pipeline_descriptor(specialization_values);
+  MTLComputePipelineStateDescriptor compute_pipeline_descriptor(specialization.constants.values);

  MTLContext *metal_context = static_cast<MTLContext *>(Context::get());
  shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor);
--- a/source/blender/gpu/opengl/gl_shader.cc
+++ b/source/blender/gpu/opengl/gl_shader.cc
@@ -71,6 +71,14 @@ void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat
  for (const SpecializationConstant &constant : info.specialization_constants_) {
    specialization_constant_names_.append(constant.name.c_str());
  }
+
+  /* NOTE: This is not threadsafe with regards to the specialization constants state access.
+   * The shader creation must be externally synchronized. */
+  main_program_ = &program_cache_.lookup_or_add_default(constants->values);
+  if (!main_program_->program_id) {
+    main_program_->program_id = glCreateProgram();
+    debug::object_label(GL_PROGRAM, main_program_->program_id, name);
+  }
 }

 /** \} */
@@ -622,15 +630,16 @@ std::string GLShader::resources_declare(const ShaderCreateInfo &info) const
  return ss.str();
 }

-std::string GLShader::constants_declare() const
+std::string GLShader::constants_declare(
+    const shader::SpecializationConstants &constants_state) const
 {
  std::stringstream ss;

  ss << "/* Specialization Constants. */\n";
-  for (int constant_index : IndexRange(constants.types.size())) {
+  for (int constant_index : IndexRange(constants_state.types.size())) {
    const StringRefNull name = specialization_constant_names_[constant_index];
-    gpu::shader::Type constant_type = constants.types[constant_index];
-    const SpecializationConstant::Value &value = constants.values[constant_index];
+    gpu::shader::Type constant_type = constants_state.types[constant_index];
+    const SpecializationConstant::Value &value = constants_state.values[constant_index];

    switch (constant_type) {
      case Type::int_t:
@@ -1189,14 +1198,14 @@ StringRefNull GLShader::glsl_patch_get(GLenum gl_stage)

 GLuint GLShader::create_shader_stage(GLenum gl_stage,
                                     MutableSpan<StringRefNull> sources,
-                                     GLSources &gl_sources)
+                                     GLSources &gl_sources,
+                                     const shader::SpecializationConstants &constants_state)
 {
  /* Patch the shader sources to include specialization constants. */
  std::string constants_source;
  Vector<StringRefNull> recreated_sources;
-  const bool has_specialization_constants = !constants.types.is_empty();
-  if (has_specialization_constants) {
-    constants_source = constants_declare();
+  if (has_specialization_constants()) {
+    constants_source = constants_declare(constants_state);
    if (sources.is_empty()) {
      recreated_sources = gl_sources.sources_get();
      sources = recreated_sources;
@@ -1291,40 +1300,38 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage,
 void GLShader::update_program_and_sources(GLSources &stage_sources,
                                          MutableSpan<StringRefNull> sources)
 {
-  const bool store_sources = !constants.types.is_empty() || async_compilation_;
+  const bool store_sources = has_specialization_constants() || async_compilation_;
  if (store_sources && stage_sources.is_empty()) {
    stage_sources = sources;
  }
-
-  init_program();
 }

 void GLShader::vertex_shader_from_glsl(MutableSpan<StringRefNull> sources)
 {
  update_program_and_sources(vertex_sources_, sources);
-  program_active_->vert_shader = this->create_shader_stage(
-      GL_VERTEX_SHADER, sources, vertex_sources_);
+  main_program_->vert_shader = create_shader_stage(
+      GL_VERTEX_SHADER, sources, vertex_sources_, *constants);
 }

 void GLShader::geometry_shader_from_glsl(MutableSpan<StringRefNull> sources)
 {
  update_program_and_sources(geometry_sources_, sources);
-  program_active_->geom_shader = this->create_shader_stage(
-      GL_GEOMETRY_SHADER, sources, geometry_sources_);
+  main_program_->geom_shader = create_shader_stage(
+      GL_GEOMETRY_SHADER, sources, geometry_sources_, *constants);
 }

 void GLShader::fragment_shader_from_glsl(MutableSpan<StringRefNull> sources)
 {
  update_program_and_sources(fragment_sources_, sources);
-  program_active_->frag_shader = this->create_shader_stage(
-      GL_FRAGMENT_SHADER, sources, fragment_sources_);
+  main_program_->frag_shader = create_shader_stage(
+      GL_FRAGMENT_SHADER, sources, fragment_sources_, *constants);
 }

 void GLShader::compute_shader_from_glsl(MutableSpan<StringRefNull> sources)
 {
  update_program_and_sources(compute_sources_, sources);
-  program_active_->compute_shader = this->create_shader_stage(
-      GL_COMPUTE_SHADER, sources, compute_sources_);
+  main_program_->compute_shader = create_shader_stage(
+      GL_COMPUTE_SHADER, sources, compute_sources_, *constants);
 }

 bool GLShader::finalize(const shader::ShaderCreateInfo *info)
@@ -1346,25 +1353,31 @@ bool GLShader::finalize(const shader::ShaderCreateInfo *info)
    return true;
  }

-  program_link();
+  main_program_->program_link(name);
  return post_finalize(info);
 }

 bool GLShader::post_finalize(const shader::ShaderCreateInfo *info)
 {
-  if (!check_link_status()) {
+  GLuint program_id = main_program_->program_id;
+  GLint status;
+  glGetProgramiv(program_id, GL_LINK_STATUS, &status);
+  if (!status) {
+    char log[5000];
+    glGetProgramInfoLog(program_id, sizeof(log), nullptr, log);
+    GLLogParser parser;
+    print_log({debug_source}, log, "Linking", true, &parser);
    return false;
  }

  /* Reset for specialization constants variations. */
  async_compilation_ = false;

-  GLuint program_id = program_get();
  if (info != nullptr) {
-    interface = new GLShaderInterface(program_id, *info);
+    interface = new GLShaderInterface(main_program_->program_id, *info);
  }
  else {
-    interface = new GLShaderInterface(program_id);
+    interface = new GLShaderInterface(main_program_->program_id);
  }

  return true;
@@ -1376,10 +1389,10 @@ bool GLShader::post_finalize(const shader::ShaderCreateInfo *info)
 /** \name Binding
 * \{ */

-void GLShader::bind()
+void GLShader::bind(const shader::SpecializationConstants *constants_state)
 {
-  GLuint program_id = program_get();
-  glUseProgram(program_id);
+  GLProgram &program = program_get(constants_state);
+  glUseProgram(program.program_id);
 }

 void GLShader::unbind()
@@ -1533,103 +1546,80 @@ GLShader::GLProgram::~GLProgram()
  glDeleteProgram(program_id);
 }

-void GLShader::program_link()
+void GLShader::GLProgram::program_link(StringRefNull shader_name)
 {
-  BLI_assert(program_active_ != nullptr);
-  if (program_active_->program_id == 0) {
-    program_active_->program_id = glCreateProgram();
-    debug::object_label(GL_PROGRAM, program_active_->program_id, name);
+  if (this->program_id == 0) {
+    this->program_id = glCreateProgram();
+    debug::object_label(GL_PROGRAM, this->program_id, shader_name.c_str());
  }

-  if (async_compilation_) {
-    return;
-  }
+  GLuint program_id = this->program_id;

-  GLuint program_id = program_active_->program_id;
-
-  if (program_active_->vert_shader) {
-    glAttachShader(program_id, program_active_->vert_shader);
+  if (this->vert_shader) {
+    glAttachShader(program_id, this->vert_shader);
  }
-  if (program_active_->geom_shader) {
-    glAttachShader(program_id, program_active_->geom_shader);
+  if (this->geom_shader) {
+    glAttachShader(program_id, this->geom_shader);
  }
-  if (program_active_->frag_shader) {
-    glAttachShader(program_id, program_active_->frag_shader);
+  if (this->frag_shader) {
+    glAttachShader(program_id, this->frag_shader);
  }
-  if (program_active_->compute_shader) {
-    glAttachShader(program_id, program_active_->compute_shader);
+  if (this->compute_shader) {
+    glAttachShader(program_id, this->compute_shader);
  }
  glLinkProgram(program_id);
 }

-bool GLShader::check_link_status()
+GLShader::GLProgram &GLShader::program_get(const shader::SpecializationConstants *constants_state)
 {
-  GLuint program_id = program_active_->program_id;
-  GLint status;
-  glGetProgramiv(program_id, GL_LINK_STATUS, &status);
-  if (!status) {
-    char log[5000];
-    glGetProgramInfoLog(program_id, sizeof(log), nullptr, log);
-    GLLogParser parser;
-    print_log({debug_source}, log, "Linking", true, &parser);
+  BLI_assert(constants_state == nullptr || this->has_specialization_constants() == true);
+
+  if (constants_state == nullptr) {
+    /* Early exit for shaders that doesn't use specialization constants. */
+    BLI_assert(main_program_);
+    return *main_program_;
  }

-  return bool(status);
-}
+  program_cache_mutex_.lock();

-void GLShader::init_program()
-{
-  if (program_active_) {
-    return;
+  GLProgram &program = program_cache_.lookup_or_add_default(constants_state->values);
+
+  program_cache_mutex_.unlock();
+
+  /* Avoid two threads trying to specialize the same shader at the same time. */
+  std::scoped_lock lock(program.compilation_mutex);
+
+  if (program.program_id != 0) {
+    /* Specialization is already compiled. */
+    return program;
  }

-  program_active_ = &program_cache_.lookup_or_add_default(constants.values);
-  if (!program_active_->program_id) {
-    program_active_->program_id = glCreateProgram();
-    debug::object_label(GL_PROGRAM, program_active_->program_id, name);
+  if (!vertex_sources_.is_empty()) {
+    program.vert_shader = create_shader_stage(
+        GL_VERTEX_SHADER, {}, vertex_sources_, *constants_state);
  }
-}
-
-GLuint GLShader::program_get()
-{
-  if (constants.types.is_empty()) {
-    /* Early exit for shaders that doesn't use specialization constants. The active shader should
-     * already be setup. */
-    BLI_assert(program_active_ && program_active_->program_id);
-    return program_active_->program_id;
+  if (!geometry_sources_.is_empty()) {
+    program.geom_shader = create_shader_stage(
+        GL_GEOMETRY_SHADER, {}, geometry_sources_, *constants_state);
+  }
+  if (!fragment_sources_.is_empty()) {
+    program.frag_shader = create_shader_stage(
+        GL_FRAGMENT_SHADER, {}, fragment_sources_, *constants_state);
+  }
+  if (!compute_sources_.is_empty()) {
+    program.compute_shader = create_shader_stage(
+        GL_COMPUTE_SHADER, {}, compute_sources_, *constants_state);
  }

-  if (!constants.is_dirty) {
-    /* Early exit when constants didn't change since the last call. */
-    BLI_assert(program_active_ && program_active_->program_id);
-    return program_active_->program_id;
+  if (async_compilation_) {
+    program.program_id = glCreateProgram();
+    debug::object_label(GL_PROGRAM, program.program_id, name);
+    return program;
  }

-  program_active_ = &program_cache_.lookup_or_add_default(constants.values);
-  if (!program_active_->program_id) {
-    MutableSpan<StringRefNull> no_sources;
-    if (!vertex_sources_.is_empty()) {
-      program_active_->vert_shader = create_shader_stage(
-          GL_VERTEX_SHADER, no_sources, vertex_sources_);
-    }
-    if (!geometry_sources_.is_empty()) {
-      program_active_->geom_shader = create_shader_stage(
-          GL_GEOMETRY_SHADER, no_sources, geometry_sources_);
-    }
-    if (!fragment_sources_.is_empty()) {
-      program_active_->frag_shader = create_shader_stage(
-          GL_FRAGMENT_SHADER, no_sources, fragment_sources_);
-    }
-    if (!compute_sources_.is_empty()) {
-      program_active_->compute_shader = create_shader_stage(
-          GL_COMPUTE_SHADER, no_sources, compute_sources_);
-    }
+  program.program_link(name);

-    program_link();
-  }
-
-  constants.is_dirty = false;
-  return program_active_->program_id;
+  return program;
 }

 GLSourcesBaked GLShader::get_sources()
@@ -1840,7 +1830,12 @@ Shader *GLShaderCompiler::compile_shader(const shader::ShaderCreateInfo &info)

  GLCompilerWorker *worker = get_compiler_worker(sources);

-  if (!worker->load_program_binary(shader->program_active_->program_id) ||
+  /* This path is always called for the default shader compilation. Not for specialization.
+   * Use the default constant template.*/
+  const shader::SpecializationConstants &constants = GPU_shader_get_default_constant_state(
+      wrap(shader));
+
+  if (!worker->load_program_binary(shader->program_cache_.lookup(constants.values).program_id) ||
      !shader->post_finalize(&info))
  {
    /* Compilation failed, try to compile it locally. */
@@ -1862,17 +1857,10 @@ void GLShaderCompiler::specialize_shader(ShaderSpecialization &specialization)
  static std::mutex mutex;

  GLShader *shader = static_cast<GLShader *>(unwrap(specialization.shader));
-  Vector<SpecializationConstant> &constants = specialization.constants;

  auto program_get = [&]() -> GLShader::GLProgram * {
-    for (const SpecializationConstant &constant : constants) {
-      const ShaderInput *input = shader->interface->constant_get(constant.name.c_str());
-      BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists");
-      shader->constants.values[input->location].u = constant.value.u;
-    }
-    shader->constants.is_dirty = true;
-    if (shader->program_cache_.contains(shader->constants.values)) {
-      return &shader->program_cache_.lookup(shader->constants.values);
+    if (shader->program_cache_.contains(specialization.constants.values)) {
+      return &shader->program_cache_.lookup(specialization.constants.values);
    }
    return nullptr;
  };
@@ -1882,7 +1870,6 @@ void GLShaderCompiler::specialize_shader(ShaderSpecialization &specialization)
    GLShader::GLProgram *program = program_get();
    glDeleteProgram(program->program_id);
    program->program_id = 0;
-    shader->constants.is_dirty = true;
  };

  GLSourcesBaked sources;
@@ -1896,7 +1883,7 @@ void GLShaderCompiler::specialize_shader(ShaderSpecialization &specialization)

    /** WORKAROUND: Set async_compilation to true, so only the sources are generated. */
    shader->async_compilation_ = true;
-    shader->program_get();
+    shader->program_get(&specialization.constants);
    shader->async_compilation_ = false;
    sources = shader->get_sources();

--- a/source/blender/gpu/opengl/gl_shader.hh
+++ b/source/blender/gpu/opengl/gl_shader.hh
@@ -81,6 +81,8 @@ class GLShader : public Shader {
    GLuint frag_shader = 0;
    GLuint compute_shader = 0;

+    std::mutex compilation_mutex;
+
    GLProgram() {}
    GLProgram(GLProgram &&other)
    {
@@ -96,16 +98,18 @@ class GLShader : public Shader {
      other.compute_shader = 0;
    }
    ~GLProgram();
+
+    void program_link(StringRefNull shader_name);
  };

  using GLProgramCacheKey = Vector<shader::SpecializationConstant::Value>;
+  /** Contains all specialized shader variants. */
  Map<GLProgramCacheKey, GLProgram> program_cache_;

-  /**
-   * Points to the active program. When binding a shader the active program is
-   * setup.
-   */
-  GLProgram *program_active_ = nullptr;
+  std::mutex program_cache_mutex_;
+
+  /** Main program instance. This is the default specialized variant that is first compiled. */
+  GLProgram *main_program_ = nullptr;

  /* When true, the shader generates its GLSources but it's not compiled.
   * (Used for batch compilation) */
@@ -123,29 +127,13 @@ class GLShader : public Shader {

  Vector<const char *> specialization_constant_names_;

-  /**
-   * Initialize an this instance.
-   *
-   * - Ensures that program_cache at least has a default GLProgram.
-   * - Ensures that active program is set.
-   * - Active GLProgram has a shader_program (at least in creation state).
-   * - Does nothing when instance was already initialized.
-   */
-  void init_program();
-
  void update_program_and_sources(GLSources &stage_sources, MutableSpan<StringRefNull> sources);

  /**
-   * Link the active program.
-   */
-  void program_link();
-  bool check_link_status();
-
-  /**
-   * Return a GLProgram program id that reflects the current state of shader.constants.values.
+   * Return a GLProgram that reflects the given `constants_state`.
   * The returned program_id is in linked state, or an error happened during linking.
   */
-  GLuint program_get();
+  GLShader::GLProgram &program_get(const shader::SpecializationConstants *constants_state);

  /** True if any shader failed to compile. */
  bool compilation_failed_ = false;
@@ -168,14 +156,14 @@ class GLShader : public Shader {
  void warm_cache(int /*limit*/) override{};

  std::string resources_declare(const shader::ShaderCreateInfo &info) const override;
-  std::string constants_declare() const;
+  std::string constants_declare(const shader::SpecializationConstants &constants_state) const;
  std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const override;
  std::string fragment_interface_declare(const shader::ShaderCreateInfo &info) const override;
  std::string geometry_interface_declare(const shader::ShaderCreateInfo &info) const override;
  std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override;
  std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override;

-  void bind() override;
+  void bind(const shader::SpecializationConstants *constants_state) override;
  void unbind() override;

  void uniform_float(int location, int comp_len, int array_size, const float *data) override;
@@ -189,7 +177,7 @@ class GLShader : public Shader {
    if (!compute_sources_.is_empty()) {
      return true;
    }
-    return program_active_->compute_shader != 0;
+    return main_program_->compute_shader != 0;
  }

  GLSourcesBaked get_sources();
@@ -197,10 +185,16 @@ class GLShader : public Shader {
 private:
  StringRefNull glsl_patch_get(GLenum gl_stage);

+  bool has_specialization_constants() const
+  {
+    return constants->types.is_empty() == false;
+  }
+
  /** Create, compile and attach the shader stage to the shader program. */
  GLuint create_shader_stage(GLenum gl_stage,
                             MutableSpan<StringRefNull> sources,
-                             GLSources &gl_sources);
+                             GLSources &gl_sources,
+                             const shader::SpecializationConstants &constants_state);

  /**
   * \brief features available on newer implementation such as native barycentric coordinates
--- a/source/blender/gpu/tests/framebuffer_test.cc
+++ b/source/blender/gpu/tests/framebuffer_test.cc
@@ -314,6 +314,8 @@ static void test_framebuffer_multi_viewport()
  }
  MEM_freeN(read_data);

+  GPU_shader_unbind();
+
  GPU_framebuffer_free(framebuffer);
  GPU_texture_free(texture);
  GPU_shader_free(shader);
@@ -400,6 +402,8 @@ static void test_framebuffer_subpass_input()
  EXPECT_EQ(*read_data_b, 0xDEADC0DE);
  MEM_freeN(read_data_b);

+  GPU_shader_unbind();
+
  GPU_framebuffer_free(framebuffer);
  GPU_texture_free(texture_a);
  GPU_texture_free(texture_b);
--- a/source/blender/gpu/tests/specialization_constants_test.cc
+++ b/source/blender/gpu/tests/specialization_constants_test.cc
@@ -14,8 +14,6 @@
 #include "GPU_storage_buffer.hh"
 #include "GPU_vertex_format.hh"

-#include "BLI_math_vector.hh"
-#include "BLI_utility_mixins.hh"
 #include "BLI_vector.hh"

 #include "gpu_shader_create_info.hh"
@@ -44,26 +42,24 @@ struct ShaderSpecializationConst {

    GPU_storagebuf_bind(ssbo, GPU_shader_get_ssbo_binding(shader, "data_out"));

-    /* Expect defaults. */
-    float_in = 2;
-    uint_in = 3;
-    int_in = 4;
-    bool_in = true;
-
-    this->validate();
-
    /* Test values. */
    float_in = 52;
    uint_in = 324;
    int_in = 455;
    bool_in = false;

-    GPU_shader_constant_float(shader, "float_in", float_in);
-    GPU_shader_constant_uint(shader, "uint_in", uint_in);
-    GPU_shader_constant_int(shader, "int_in", int_in);
-    GPU_shader_constant_bool(shader, "bool_in", bool_in);
+    int float_in_loc = GPU_shader_get_constant(shader, "float_in");
+    int uint_in_loc = GPU_shader_get_constant(shader, "uint_in");
+    int int_in_loc = GPU_shader_get_constant(shader, "int_in");
+    int bool_in_loc = GPU_shader_get_constant(shader, "bool_in");

-    this->validate();
+    shader::SpecializationConstants constants = GPU_shader_get_default_constant_state(shader);
+    constants.set_value(float_in_loc, float_in);
+    constants.set_value(uint_in_loc, uint_in);
+    constants.set_value(int_in_loc, int_in);
+    constants.set_value(bool_in_loc, bool_in);
+
+    this->validate(constants);

    GPU_render_end();
  }
@@ -94,7 +90,7 @@ struct ShaderSpecializationConst {
    EXPECT_NE(shader, nullptr);
  }

-  void validate()
+  void validate(shader::SpecializationConstants &constants)
  {
    if (is_graphic) {
      GPUFrameBuffer *fb = GPU_framebuffer_create("test_fb");
@@ -108,14 +104,14 @@ struct ShaderSpecializationConst {

      GPU_vertbuf_data_alloc(*verts, 1);
      Batch *batch = GPU_batch_create_ex(GPU_PRIM_POINTS, verts, nullptr, GPU_BATCH_OWNS_VBO);
-      GPU_batch_set_shader(batch, shader);
+      GPU_batch_set_shader(batch, shader, &constants);
      GPU_batch_draw_advanced(batch, 0, 1, 0, 1);
      GPU_batch_discard(batch);

      GPU_framebuffer_free(fb);
    }
    else {
-      GPU_compute_dispatch(shader, 1, 1, 1);
+      GPU_compute_dispatch(shader, 1, 1, 1, &constants);
    }

    GPU_finish();
--- a/source/blender/gpu/vulkan/vk_context.cc
+++ b/source/blender/gpu/vulkan/vk_context.cc
@@ -276,16 +276,17 @@ void VKContext::update_pipeline_data(GPUPrimType primitive,
 {
  VKShader &vk_shader = unwrap(*shader);
  VKFrameBuffer &framebuffer = *active_framebuffer_get();
-  update_pipeline_data(
-      vk_shader,
-      vk_shader.ensure_and_get_graphics_pipeline(primitive, vao, state_manager_get(), framebuffer),
-      r_pipeline_data);
+  update_pipeline_data(vk_shader,
+                       vk_shader.ensure_and_get_graphics_pipeline(
+                           primitive, vao, state_manager_get(), framebuffer, constants_state_),
+                       r_pipeline_data);
 }

 void VKContext::update_pipeline_data(render_graph::VKPipelineData &r_pipeline_data)
 {
  VKShader &vk_shader = unwrap(*shader);
-  update_pipeline_data(vk_shader, vk_shader.ensure_and_get_compute_pipeline(), r_pipeline_data);
+  update_pipeline_data(
+      vk_shader, vk_shader.ensure_and_get_compute_pipeline(constants_state_), r_pipeline_data);
 }

 void VKContext::update_pipeline_data(VKShader &vk_shader,
@@ -401,6 +402,13 @@ void VKContext::swap_buffers_post_handler()
  sync_backbuffer(true);
 }

+void VKContext::specialization_constants_set(
+    const shader::SpecializationConstants *constants_state)
+{
+  constants_state_ = (constants_state != nullptr) ? *constants_state :
+                                                    shader::SpecializationConstants{};
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
--- a/source/blender/gpu/vulkan/vk_context.hh
+++ b/source/blender/gpu/vulkan/vk_context.hh
@@ -52,6 +52,9 @@ class VKContext : public Context, NonCopyable {
  std::optional<std::reference_wrapper<VKThreadData>> thread_data_;
  std::optional<std::reference_wrapper<render_graph::VKRenderGraph>> render_graph_;

+  /* Active shader specialization constants state. */
+  shader::SpecializationConstants constants_state_;
+
 public:
  VKDiscardPool discard_pool;

@@ -135,6 +138,8 @@ class VKContext : public Context, NonCopyable {
  static void openxr_acquire_framebuffer_image_callback(GHOST_VulkanOpenXRData *data);
  static void openxr_release_framebuffer_image_callback(GHOST_VulkanOpenXRData *data);

+  void specialization_constants_set(const shader::SpecializationConstants *constants_state);
+
 private:
  void swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &data);
  void swap_buffers_post_handler();
--- a/source/blender/gpu/vulkan/vk_shader.cc
+++ b/source/blender/gpu/vulkan/vk_shader.cc
@@ -645,7 +645,9 @@ bool VKShader::finalize_post()
   * step for graphical shaders.
   */
  if (result && is_compute_shader_) {
-    ensure_and_get_compute_pipeline();
+    /* This is only done for the first shader compilation (not specialization).
+     * Give the default constants. */
+    ensure_and_get_compute_pipeline(*constants);
  }
  return result;
 }
@@ -732,8 +734,12 @@ bool VKShader::finalize_descriptor_set_layouts(VKDevice &vk_device,
  return vk_descriptor_set_layout_ != VK_NULL_HANDLE;
 }

-void VKShader::bind()
+void VKShader::bind(const shader::SpecializationConstants *constants_state)
 {
+  VKContext *ctx = VKContext::get();
+  /* Copy constants state. */
+  ctx->specialization_constants_set(constants_state);
+
  /* Intentionally empty. Binding of the pipeline are done just before drawing/dispatching.
   * See #VKPipeline.update_and_bind */
 }
@@ -1294,7 +1300,8 @@ bool VKShader::do_geometry_shader_injection(const shader::ShaderCreateInfo *info

 /** \} */

-VkPipeline VKShader::ensure_and_get_compute_pipeline()
+VkPipeline VKShader::ensure_and_get_compute_pipeline(
+    const shader::SpecializationConstants &constants_state)
 {
  BLI_assert(is_compute_shader_);
  BLI_assert(compute_module.vk_shader_module != VK_NULL_HANDLE);
@@ -1302,12 +1309,12 @@ VkPipeline VKShader::ensure_and_get_compute_pipeline()

  /* Early exit when no specialization constants are used and the vk_pipeline_base_ is already
   * valid. This would handle most cases. */
-  if (constants.values.is_empty() && vk_pipeline_base_ != VK_NULL_HANDLE) {
+  if (constants_state.values.is_empty() && vk_pipeline_base_ != VK_NULL_HANDLE) {
    return vk_pipeline_base_;
  }

  VKComputeInfo compute_info = {};
-  compute_info.specialization_constants.extend(constants.values);
+  compute_info.specialization_constants.extend(constants_state.values);
  compute_info.vk_shader_module = compute_module.vk_shader_module;
  compute_info.vk_pipeline_layout = vk_pipeline_layout;

@@ -1325,7 +1332,8 @@ VkPipeline VKShader::ensure_and_get_compute_pipeline()
 VkPipeline VKShader::ensure_and_get_graphics_pipeline(GPUPrimType primitive,
                                                      VKVertexAttributeObject &vao,
                                                      VKStateManager &state_manager,
-                                                      VKFrameBuffer &framebuffer)
+                                                      VKFrameBuffer &framebuffer,
+                                                      SpecializationConstants &constants_state)
 {
  BLI_assert(!is_compute_shader_);
  BLI_assert_msg(
@@ -1336,7 +1344,7 @@ VkPipeline VKShader::ensure_and_get_graphics_pipeline(GPUPrimType primitive,

  /* TODO: Graphics info should be cached in VKContext and only the changes should be applied. */
  VKGraphicsInfo graphics_info = {};
-  graphics_info.specialization_constants.extend(constants.values);
+  graphics_info.specialization_constants.extend(constants_state.values);
  graphics_info.vk_pipeline_layout = vk_pipeline_layout;

  graphics_info.vertex_in.vk_topology = to_vk_primitive_topology(primitive);
--- a/source/blender/gpu/vulkan/vk_shader.hh
+++ b/source/blender/gpu/vulkan/vk_shader.hh
@@ -76,7 +76,7 @@ class VKShader : public Shader {
  bool is_ready() const;
  void warm_cache(int limit) override;

-  void bind() override;
+  void bind(const shader::SpecializationConstants *constants_state) override;
  void unbind() override;

  void uniform_float(int location, int comp_len, int array_size, const float *data) override;
@@ -89,11 +89,13 @@ class VKShader : public Shader {
  std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override;
  std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override;

-  VkPipeline ensure_and_get_compute_pipeline();
+  VkPipeline ensure_and_get_compute_pipeline(
+      const shader::SpecializationConstants &constants_state);
  VkPipeline ensure_and_get_graphics_pipeline(GPUPrimType primitive,
                                              VKVertexAttributeObject &vao,
                                              VKStateManager &state_manager,
-                                              VKFrameBuffer &framebuffer);
+                                              VKFrameBuffer &framebuffer,
+                                              shader::SpecializationConstants &constants_state);

  const VKShaderInterface &interface_get() const;