Refactor: GPU: GPUMaterial & GPUPass compilation

Cleanup and simplification of GPUMaterial and GPUPass compilation. See #133674 for details/goals. - Remove the `draw_manage_shader` thread. Deferred compilation is now handled by the gpu::ShaderCompiler through the batch compilation API. Batch management is handled by the `GPUPassCache`. - Simplify `GPUMaterial` status tracking so it just queries the `GPUPass` status. - Split the `GPUPass` and the `GPUCodegen` code. - Replaced the (broken) `GPU_material_recalc_flag_get` with the new `GPU_pass_compilation_timestamp`. - Add the `GPU_pass_cache_wait_for_all` and `GPU_shader_batch_wait_for_all`, and remove the busy waits from EEVEE. - Remove many unused functions, properties, includes... Pull Request: https://projects.blender.org/blender/blender/pulls/135637
2025-05-22 17:53:22 +02:00
parent 9222daacb7
commit e6638d6e5e
29 changed files with 1283 additions and 2111 deletions
--- a/source/blender/compositor/intern/shader_operation.cc
+++ b/source/blender/compositor/intern/shader_operation.cc
@@ -44,8 +44,6 @@ ShaderOperation::ShaderOperation(Context &context,
 {
  material_ = GPU_material_from_callbacks(
      GPU_MAT_COMPOSITOR, &construct_material, &generate_code, this);
-  GPU_material_status_set(material_, GPU_MAT_QUEUED);
-  GPU_material_compile(material_);
 }

 ShaderOperation::~ShaderOperation()
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -75,7 +75,6 @@ set(SRC
  intern/draw_gpu_context.cc
  intern/draw_hair.cc
  intern/draw_manager.cc
-  intern/draw_manager_shader.cc
  intern/draw_manager_text.cc
  intern/draw_pbvh.cc
  intern/draw_pointcloud.cc
--- a/source/blender/draw/DRW_engine.hh
+++ b/source/blender/draw/DRW_engine.hh
@@ -180,9 +180,6 @@ void DRW_system_gpu_render_context_disable(void *re_system_gpu_context);
 void DRW_blender_gpu_render_context_enable(void *re_gpu_context);
 void DRW_blender_gpu_render_context_disable(void *re_gpu_context);

-void DRW_deferred_shader_remove(GPUMaterial *mat);
-void DRW_deferred_shader_optimize_remove(GPUMaterial *mat);
-
 DRWData *DRW_viewport_data_create();
 void DRW_viewport_data_free(DRWData *drw_data);

--- a/source/blender/draw/engines/eevee/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee/eevee_instance.cc
@@ -25,6 +25,7 @@
 #include "ED_screen.hh"
 #include "ED_view3d.hh"
 #include "GPU_context.hh"
+#include "GPU_pass.hh"
 #include "IMB_imbuf_types.hh"

 #include "RE_pipeline.h"
@@ -484,9 +485,12 @@ void Instance::render_sample()
  if (!is_viewport() && sampling.do_render_sync()) {
    render_sync();
    while (materials.queued_shaders_count > 0) {
-      /* Leave some time for shaders to compile. */
-      BLI_time_sleep_ms(50);
-      /** WORKAROUND: Re-sync to check if all shaders are already compiled. */
+      GPU_pass_cache_wait_for_all();
+      /** WORKAROUND: Re-sync now that all shaders are compiled. */
+      /* This may need to happen more than once, since actual materials may require more passes
+       * (eg. volume ones) than the fallback material used for queued passes. */
+      /* TODO(@pragma37): There seems to be an issue where multiple `step_object_sync` calls on the
+       * same step can cause mismatching `has_motion` values between sync. */
      render_sync();
    }
  }
@@ -824,10 +828,13 @@ void Instance::light_bake_irradiance(
  custom_pipeline_wrapper([&]() {
    this->render_sync();
    while (materials.queued_shaders_count > 0) {
-      /* Leave some time for shaders to compile. */
-      BLI_time_sleep_ms(50);
-      /** WORKAROUND: Re-sync to check if all shaders are already compiled. */
-      this->render_sync();
+      GPU_pass_cache_wait_for_all();
+      /** WORKAROUND: Re-sync now that all shaders are compiled. */
+      /* This may need to happen more than once, since actual materials may require more passes
+       * (eg. volume ones) than the fallback material used for queued passes. */
+      /* TODO(@pragma37): There seems to be an issue where multiple `step_object_sync` calls on the
+       * same step can cause mismatching `has_motion` values between sync. */
+      render_sync();
    }
    /* Sampling module needs to be initialized to computing lighting. */
    sampling.init(probe);
--- a/source/blender/draw/engines/eevee/eevee_lookdev.cc
+++ b/source/blender/draw/engines/eevee/eevee_lookdev.cc
@@ -312,7 +312,7 @@ void LookdevModule::sync_pass(PassSimple &pass,
  const DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_CULL_BACK;

  GPUMaterial *gpumat = inst_.shaders.material_shader_get(
-      mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_MESH, MAT_PROBE_NONE);
+      mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_MESH, false, inst_.materials.default_surface);
  pass.state_set(state);
  pass.material_set(*inst_.manager, gpumat);
  pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
--- a/source/blender/draw/engines/eevee/eevee_material.cc
+++ b/source/blender/draw/engines/eevee/eevee_material.cc
@@ -119,6 +119,12 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)

    bke::node_set_active(*ntree, *output);
  }
+  {
+    default_surface = reinterpret_cast<::Material *>(BKE_id_copy_ex(
+        nullptr, &BKE_material_default_surface()->id, nullptr, LIB_ID_COPY_LOCALIZE));
+    default_volume = reinterpret_cast<::Material *>(BKE_id_copy_ex(
+        nullptr, &BKE_material_default_volume()->id, nullptr, LIB_ID_COPY_LOCALIZE));
+  }
  {
    error_mat_ = BKE_id_new_nomain<::Material>("EEVEE default error");
    bNodeTree *ntree = bke::node_tree_add_tree_embedded(
@@ -146,6 +152,8 @@ MaterialModule::~MaterialModule()
 {
  BKE_id_free(nullptr, metallic_mat);
  BKE_id_free(nullptr, diffuse_mat);
+  BKE_id_free(nullptr, default_surface);
+  BKE_id_free(nullptr, default_volume);
  BKE_id_free(nullptr, error_mat_);
 }

@@ -154,6 +162,10 @@ void MaterialModule::begin_sync()
  queued_shaders_count = 0;
  queued_optimize_shaders_count = 0;

+  uint64_t next_update = GPU_pass_global_compilation_count();
+  gpu_pass_last_update_ = gpu_pass_next_update_;
+  gpu_pass_next_update_ = next_update;
+
  material_map_.clear();
  shader_map_.clear();
 }
@@ -174,11 +186,13 @@ MaterialPass MaterialModule::material_pass_get(Object *ob,
    use_deferred_compilation = false;
  }

+  const bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_OCCUPANCY, MAT_PIPE_VOLUME_MATERIAL);
+  ::Material *default_mat = is_volume ? default_volume : default_surface;
+
  MaterialPass matpass = MaterialPass();
  matpass.gpumat = inst_.shaders.material_shader_get(
-      blender_mat, ntree, pipeline_type, geometry_type, use_deferred_compilation);
+      blender_mat, ntree, pipeline_type, geometry_type, use_deferred_compilation, default_mat);

-  const bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_OCCUPANCY, MAT_PIPE_VOLUME_MATERIAL);
  const bool is_forward = ELEM(pipeline_type,
                               MAT_PIPE_FORWARD,
                               MAT_PIPE_PREPASS_FORWARD,
@@ -196,12 +210,13 @@ MaterialPass MaterialModule::material_pass_get(Object *ob,
    }
    case GPU_MAT_QUEUED:
      queued_shaders_count++;
-      matpass.gpumat = inst_.shaders.material_default_shader_get(pipeline_type, geometry_type);
+      matpass.gpumat = inst_.shaders.material_shader_get(
+          default_mat, default_mat->nodetree, pipeline_type, geometry_type, false, nullptr);
      break;
    case GPU_MAT_FAILED:
    default:
      matpass.gpumat = inst_.shaders.material_shader_get(
-          error_mat_, error_mat_->nodetree, pipeline_type, geometry_type, false);
+          error_mat_, error_mat_->nodetree, pipeline_type, geometry_type, false, nullptr);
      break;
  }
  /* Returned material should be ready to be drawn. */
@@ -211,11 +226,9 @@ MaterialPass MaterialModule::material_pass_get(Object *ob,

  const bool is_transparent = GPU_material_flag_get(matpass.gpumat, GPU_MATFLAG_TRANSPARENT);

-  if (inst_.is_viewport() && use_deferred_compilation &&
-      GPU_material_recalc_flag_get(matpass.gpumat))
-  {
-    /* TODO(Miguel Pozo): This is broken, it consumes the flag,
-     * but GPUMats can be shared across viewports. */
+  bool pass_updated = GPU_material_compilation_timestamp(matpass.gpumat) > gpu_pass_last_update_;
+
+  if (inst_.is_viewport() && use_deferred_compilation && pass_updated) {
    inst_.sampling.reset();

    const bool has_displacement = GPU_material_has_displacement_output(matpass.gpumat) &&
--- a/source/blender/draw/engines/eevee/eevee_material.hh
+++ b/source/blender/draw/engines/eevee/eevee_material.hh
@@ -352,6 +352,8 @@ class MaterialModule {
 public:
  ::Material *diffuse_mat;
  ::Material *metallic_mat;
+  ::Material *default_surface;
+  ::Material *default_volume;

  int64_t queued_shaders_count = 0;
  int64_t queued_optimize_shaders_count = 0;
@@ -368,6 +370,9 @@ class MaterialModule {

  ::Material *error_mat_;

+  uint64_t gpu_pass_last_update_ = 0;
+  uint64_t gpu_pass_next_update_ = 0;
+
 public:
  MaterialModule(Instance &inst);
  ~MaterialModule();
--- a/source/blender/draw/engines/eevee/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee/eevee_shader.cc
@@ -12,6 +12,7 @@
 #include "GPU_capabilities.hh"

 #include "BKE_material.hh"
+#include "DNA_world_types.h"

 #include "gpu_shader_create_info.hh"

@@ -916,17 +917,25 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut
  }
 }

+struct CallbackThunk {
+  ShaderModule *shader_module;
+  ::Material *default_mat;
+};
+
 /* WATCH: This can be called from another thread! Needs to not touch the shader module in any
 * thread unsafe manner. */
-static void codegen_callback(void *thunk, GPUMaterial *mat, GPUCodegenOutput *codegen)
+static void codegen_callback(void *void_thunk, GPUMaterial *mat, GPUCodegenOutput *codegen)
 {
-  reinterpret_cast<ShaderModule *>(thunk)->material_create_info_amend(mat, codegen);
+  CallbackThunk *thunk = static_cast<CallbackThunk *>(void_thunk);
+  thunk->shader_module->material_create_info_amend(mat, codegen);
 }

-static GPUPass *pass_replacement_cb(void *thunk, GPUMaterial *mat)
+static GPUPass *pass_replacement_cb(void *void_thunk, GPUMaterial *mat)
 {
  using namespace blender::gpu::shader;

+  CallbackThunk *thunk = static_cast<CallbackThunk *>(void_thunk);
+
  const ::Material *blender_mat = GPU_material_get_material(mat);

  uint64_t shader_uuid = GPU_material_uuid_get(mat);
@@ -963,100 +972,66 @@ static GPUPass *pass_replacement_cb(void *thunk, GPUMaterial *mat)
                         (is_prepass && (!has_vertex_displacement && !has_transparency &&
                                         !has_raytraced_transmission));
  if (can_use_default) {
-    GPUMaterial *mat = reinterpret_cast<ShaderModule *>(thunk)->material_default_shader_get(
-        pipeline_type, geometry_type);
+    GPUMaterial *mat = thunk->shader_module->material_shader_get(thunk->default_mat,
+                                                                 thunk->default_mat->nodetree,
+                                                                 pipeline_type,
+                                                                 geometry_type,
+                                                                 false,
+                                                                 nullptr);
    return GPU_material_get_pass(mat);
  }

  return nullptr;
 }

-GPUMaterial *ShaderModule::material_default_shader_get(eMaterialPipeline pipeline_type,
-                                                       eMaterialGeometry geometry_type)
-{
-  bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);
-  ::Material *blender_mat = (is_volume) ? BKE_material_default_volume() :
-                                          BKE_material_default_surface();
-
-  return material_shader_get(
-      blender_mat, blender_mat->nodetree, pipeline_type, geometry_type, false);
-}
-
 GPUMaterial *ShaderModule::material_shader_get(::Material *blender_mat,
                                               bNodeTree *nodetree,
                                               eMaterialPipeline pipeline_type,
                                               eMaterialGeometry geometry_type,
-                                               bool deferred_compilation)
+                                               bool deferred_compilation,
+                                               ::Material *default_mat)
 {
-  bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);
-
  eMaterialDisplacement displacement_type = to_displacement_type(blender_mat->displacement_method);
  eMaterialThickness thickness_type = to_thickness_type(blender_mat->thickness_mode);

  uint64_t shader_uuid = shader_uuid_from_material_type(
      pipeline_type, geometry_type, displacement_type, thickness_type, blender_mat->blend_flag);

-  bool is_default_material = ELEM(
-      blender_mat, BKE_material_default_surface(), BKE_material_default_volume());
+  bool is_default_material = default_mat == nullptr;
+  BLI_assert(blender_mat != default_mat);

-  GPUMaterial *mat = DRW_shader_from_material(blender_mat,
-                                              nodetree,
-                                              GPU_MAT_EEVEE,
-                                              shader_uuid,
-                                              is_volume,
-                                              deferred_compilation,
-                                              codegen_callback,
-                                              this,
-                                              is_default_material ? nullptr : pass_replacement_cb);
+  CallbackThunk thunk = {this, default_mat};

-  return mat;
+  return GPU_material_from_nodetree(blender_mat,
+                                    nodetree,
+                                    &blender_mat->gpumaterial,
+                                    blender_mat->id.name,
+                                    GPU_MAT_EEVEE,
+                                    shader_uuid,
+                                    deferred_compilation,
+                                    codegen_callback,
+                                    &thunk,
+                                    is_default_material ? nullptr : pass_replacement_cb);
 }

 GPUMaterial *ShaderModule::world_shader_get(::World *blender_world,
                                            bNodeTree *nodetree,
-                                            eMaterialPipeline pipeline_type)
+                                            eMaterialPipeline pipeline_type,
+                                            bool deferred_compilation)
 {
-  bool is_volume = (pipeline_type == MAT_PIPE_VOLUME_MATERIAL);
-  bool defer_compilation = is_volume;
-
  uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, MAT_GEOM_WORLD);

-  return DRW_shader_from_world(blender_world,
-                               nodetree,
-                               GPU_MAT_EEVEE,
-                               shader_uuid,
-                               is_volume,
-                               defer_compilation,
-                               codegen_callback,
-                               this);
-}
+  CallbackThunk thunk = {this, nullptr};

-GPUMaterial *ShaderModule::material_shader_get(const char *name,
-                                               ListBase &materials,
-                                               bNodeTree *nodetree,
-                                               eMaterialPipeline pipeline_type,
-                                               eMaterialGeometry geometry_type)
-{
-  uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type);
-
-  bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);
-
-  GPUMaterial *gpumat = GPU_material_from_nodetree(nullptr,
-                                                   nullptr,
-                                                   nodetree,
-                                                   &materials,
-                                                   name,
-                                                   GPU_MAT_EEVEE,
-                                                   shader_uuid,
-                                                   is_volume,
-                                                   false,
-                                                   codegen_callback,
-                                                   this);
-  GPU_material_status_set(gpumat, GPU_MAT_CREATED);
-  GPU_material_compile(gpumat);
-  /* Queue deferred material optimization. */
-  DRW_shader_queue_optimize_material(gpumat);
-  return gpumat;
+  return GPU_material_from_nodetree(nullptr,
+                                    nodetree,
+                                    &blender_world->gpumaterial,
+                                    blender_world->id.name,
+                                    GPU_MAT_EEVEE,
+                                    shader_uuid,
+                                    deferred_compilation,
+                                    codegen_callback,
+                                    &thunk);
 }

 /** \} */
--- a/source/blender/draw/engines/eevee/eevee_shader.hh
+++ b/source/blender/draw/engines/eevee/eevee_shader.hh
@@ -235,26 +235,16 @@ class ShaderModule {
                               bool use_lightprobe_eval);

  GPUShader *static_shader_get(eShaderType shader_type);
-  GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type,
-                                           eMaterialGeometry geometry_type);
  GPUMaterial *material_shader_get(::Material *blender_mat,
                                   bNodeTree *nodetree,
                                   eMaterialPipeline pipeline_type,
                                   eMaterialGeometry geometry_type,
-                                   bool deferred_compilation);
+                                   bool deferred_compilation,
+                                   ::Material *default_mat);
  GPUMaterial *world_shader_get(::World *blender_world,
                                bNodeTree *nodetree,
-                                eMaterialPipeline pipeline_type);
-
-  /**
-   * Variation to compile a material only with a `nodetree`. Caller needs to maintain the list of
-   * materials and call GPU_material_free on it to update the material.
-   */
-  GPUMaterial *material_shader_get(const char *name,
-                                   ListBase &materials,
-                                   bNodeTree *nodetree,
-                                   eMaterialPipeline pipeline_type,
-                                   eMaterialGeometry geometry_type);
+                                eMaterialPipeline pipeline_type,
+                                bool deferred_compilation);

  void material_create_info_amend(GPUMaterial *mat, GPUCodegenOutput *codegen);

--- a/source/blender/draw/engines/eevee/eevee_world.cc
+++ b/source/blender/draw/engines/eevee/eevee_world.cc
@@ -148,7 +148,7 @@ void World::sync()
    inst_.sampling.reset();
  }

-  GPUMaterial *gpumat = inst_.shaders.world_shader_get(bl_world, ntree, MAT_PIPE_DEFERRED);
+  GPUMaterial *gpumat = inst_.shaders.world_shader_get(bl_world, ntree, MAT_PIPE_DEFERRED, false);

  inst_.manager->register_layer_attributes(gpumat);

@@ -169,7 +169,8 @@ void World::sync_volume(const WorldHandle &world_handle)

  /* Only the scene world nodetree can have volume shader. */
  if (world && world->nodetree && world->use_nodes) {
-    gpumat = inst_.shaders.world_shader_get(world, world->nodetree, MAT_PIPE_VOLUME_MATERIAL);
+    gpumat = inst_.shaders.world_shader_get(
+        world, world->nodetree, MAT_PIPE_VOLUME_MATERIAL, !inst_.is_image_render);
  }

  bool had_volume = has_volume_;
--- a/source/blender/draw/intern/DRW_render.hh
+++ b/source/blender/draw/intern/DRW_render.hh
@@ -123,31 +123,6 @@ struct DrawEngine {
  };
 };

-/* Shaders */
-/** IMPORTANT: Modify the currently bound context. */
-void DRW_shader_init();
-void DRW_shader_exit();
-
-GPUMaterial *DRW_shader_from_world(World *wo,
-                                   bNodeTree *ntree,
-                                   eGPUMaterialEngine engine,
-                                   const uint64_t shader_id,
-                                   const bool is_volume_shader,
-                                   bool deferred,
-                                   GPUCodegenCallbackFn callback,
-                                   void *thunk);
-GPUMaterial *DRW_shader_from_material(
-    Material *ma,
-    bNodeTree *ntree,
-    eGPUMaterialEngine engine,
-    const uint64_t shader_id,
-    const bool is_volume_shader,
-    bool deferred,
-    GPUCodegenCallbackFn callback,
-    void *thunk,
-    GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr);
-void DRW_shader_queue_optimize_material(GPUMaterial *mat);
-
 /* Viewport. */

 /**
--- a/source/blender/draw/intern/draw_gpu_context.cc
+++ b/source/blender/draw/intern/draw_gpu_context.cc
@@ -150,17 +150,9 @@ void DRW_gpu_context_create()
  viewport_context = MEM_new<ContextShared>(__func__);
  preview_context = MEM_new<ContextShared>(__func__);

-  {
-    /** IMPORTANT: Very delicate context handling. Changing the order of context creation makes it
-     * crash in background mode on windows (see #136270). */
-
-    /* Setup compilation context. Called first as it changes the active GPUContext. */
-    DRW_shader_init();
-
-    /* Some part of the code assumes no context is left bound. */
-    GPU_context_active_set(nullptr);
-    WM_system_gpu_context_release(preview_context->system_gpu_context_);
-  }
+  /* Some part of the code assumes no context is left bound. */
+  GPU_context_active_set(nullptr);
+  WM_system_gpu_context_release(preview_context->system_gpu_context_);

  /* Activate the window's context if any. */
  wm_window_reset_drawable();
@@ -172,7 +164,6 @@ void DRW_gpu_context_destroy()
  if (viewport_context == nullptr) {
    return;
  }
-  DRW_shader_exit();
  DRW_submission_mutex_exit();

  MEM_SAFE_DELETE(viewport_context);
--- a/source/blender/draw/intern/draw_manager_shader.cc
+++ b/source/blender/draw/intern/draw_manager_shader.cc
@@ -1,435 +0,0 @@
-/* SPDX-FileCopyrightText: 2016 Blender Authors
- *
- * SPDX-License-Identifier: GPL-2.0-or-later */
-
-/** \file
- * \ingroup draw
- */
-
-#include "DNA_material_types.h"
-#include "DNA_world_types.h"
-
-#include "BLI_threads.h"
-#include "BLI_time.h"
-
-#include "DEG_depsgraph_query.hh"
-
-#include "GPU_capabilities.hh"
-#include "GPU_material.hh"
-#include "GPU_state.hh"
-
-#include "WM_api.hh"
-
-#include "draw_context_private.hh"
-
-#include <atomic>
-#include <condition_variable>
-#include <mutex>
-
-extern "C" char datatoc_gpu_shader_depth_only_frag_glsl[];
-extern "C" char datatoc_common_fullscreen_vert_glsl[];
-
-using namespace blender;
-
-/* -------------------------------------------------------------------- */
-/** \name Deferred Compilation (DRW_deferred)
- *
- * Since compiling shader can take a long time, we do it in a non blocking
- * manner in another thread.
- *
- * \{ */
-
-struct DRWShaderCompiler {
-  /** Default compilation queue. */
-  Vector<GPUMaterial *> queue;
-  /** Optimization queue. */
-  Vector<GPUMaterial *> optimize_queue;
-
-  std::mutex queue_mutex;
-  std::condition_variable queue_cv;
-
-  void *system_gpu_context;
-  GPUContext *blender_gpu_context;
-
-  std::atomic<bool> stop;
-};
-
-/** NOTE: While the `BLI_threads` API requires a List,
- * we only create a single thread at application startup and delete it at exit. */
-static ListBase &compilation_threadpool()
-{
-  static ListBase compilation_threadpool_ = {};
-  return compilation_threadpool_;
-}
-
-static DRWShaderCompiler &compiler_data()
-{
-  static DRWShaderCompiler compiler_data_ = {};
-  return compiler_data_;
-}
-
-static void *drw_deferred_shader_compilation_exec(void * /*unused*/)
-{
-  using namespace blender;
-
-  void *system_gpu_context = compiler_data().system_gpu_context;
-  GPUContext *blender_gpu_context = compiler_data().blender_gpu_context;
-  BLI_assert(system_gpu_context != nullptr);
-  BLI_assert(blender_gpu_context != nullptr);
-  GPU_render_begin();
-  WM_system_gpu_context_activate(system_gpu_context);
-  GPU_context_active_set(blender_gpu_context);
-
-  const bool use_parallel_compilation = GPU_use_parallel_compilation();
-  Vector<GPUMaterial *> async_mats;
-
-  while (true) {
-    if (compiler_data().stop) {
-      break;
-    }
-
-    compiler_data().queue_mutex.lock();
-    /* Pop last because it will be less likely to lock the main thread
-     * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
-    GPUMaterial *mat = compiler_data().queue.is_empty() ? nullptr :
-                                                          compiler_data().queue.pop_last();
-    if (mat) {
-      /* Avoid another thread freeing the material mid compilation. */
-      GPU_material_acquire(mat);
-    }
-    compiler_data().queue_mutex.unlock();
-
-    if (mat) {
-      /* We have a new material that must be compiled,
-       * we either compile it directly or add it to the async compilation list. */
-      if (use_parallel_compilation) {
-        GPU_material_async_compile(mat);
-        async_mats.append(mat);
-      }
-      else {
-        GPU_material_compile(mat);
-        GPU_material_release(mat);
-      }
-    }
-    else if (!async_mats.is_empty()) {
-      /* (only if use_parallel_compilation == true)
-       * Keep querying the requested materials until all of them are ready. */
-      async_mats.remove_if([](GPUMaterial *mat) {
-        if (GPU_material_async_try_finalize(mat)) {
-          GPU_material_release(mat);
-          return true;
-        }
-        return false;
-      });
-    }
-    else {
-      /* Check for Material Optimization job once there are no more
-       * shaders to compile. */
-      compiler_data().queue_mutex.lock();
-      /* Pop last because it will be less likely to lock the main thread
-       * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
-      GPUMaterial *optimize_mat = compiler_data().optimize_queue.is_empty() ?
-                                      nullptr :
-                                      compiler_data().optimize_queue.pop_last();
-      if (optimize_mat) {
-        /* Avoid another thread freeing the material during optimization. */
-        GPU_material_acquire(optimize_mat);
-      }
-      compiler_data().queue_mutex.unlock();
-
-      if (optimize_mat) {
-        /* Compile optimized material shader. */
-        GPU_material_optimize(optimize_mat);
-        GPU_material_release(optimize_mat);
-      }
-      else {
-        /* No more materials to optimize, or shaders to compile. */
-        std::unique_lock lock(compiler_data().queue_mutex);
-        compiler_data().queue_cv.wait(lock);
-      }
-    }
-
-    if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
-      GPU_flush();
-    }
-  }
-
-  /* We have to wait until all the requested batches are ready,
-   * even if compiler_data().stop is true. */
-  while (!async_mats.is_empty()) {
-    async_mats.remove_if([](GPUMaterial *mat) {
-      if (GPU_material_async_try_finalize(mat)) {
-        GPU_material_release(mat);
-        return true;
-      }
-      return false;
-    });
-  }
-
-  GPU_context_active_set(nullptr);
-  WM_system_gpu_context_release(system_gpu_context);
-  GPU_render_end();
-
-  return nullptr;
-}
-
-void DRW_shader_init()
-{
-  if (GPU_use_main_context_workaround()) {
-    /* Deferred compilation is not supported. */
-    return;
-  }
-  static bool initialized = false;
-  if (initialized) {
-    BLI_assert_unreachable();
-    return;
-  }
-  initialized = true;
-
-  compiler_data().stop = false;
-
-  compiler_data().system_gpu_context = WM_system_gpu_context_create();
-  compiler_data().blender_gpu_context = GPU_context_create(nullptr,
-                                                           compiler_data().system_gpu_context);
-
-  /* Some part of the code assumes no context is left bound. */
-  GPU_context_active_set(nullptr);
-  WM_system_gpu_context_release(compiler_data().system_gpu_context);
-
-  BLI_threadpool_init(&compilation_threadpool(), drw_deferred_shader_compilation_exec, 1);
-  BLI_threadpool_insert(&compilation_threadpool(), nullptr);
-}
-
-void DRW_shader_exit()
-{
-  if (GPU_use_main_context_workaround()) {
-    /* Deferred compilation is not supported. */
-    return;
-  }
-
-  compiler_data().stop = true;
-  compiler_data().queue_cv.notify_one();
-  BLI_threadpool_end(&compilation_threadpool());
-
-  /* Revert the queued state for the materials that has not been compiled.
-   * Note that this is not strictly needed since this function is called at program exit. */
-  {
-    std::scoped_lock queue_lock(compiler_data().queue_mutex);
-
-    while (!compiler_data().queue.is_empty()) {
-      GPU_material_status_set(compiler_data().queue.pop_last(), GPU_MAT_CREATED);
-    }
-    while (!compiler_data().optimize_queue.is_empty()) {
-      GPU_material_optimization_status_set(compiler_data().optimize_queue.pop_last(),
-                                           GPU_MAT_OPTIMIZATION_READY);
-    }
-  }
-
-  WM_system_gpu_context_activate(compiler_data().system_gpu_context);
-  GPU_context_active_set(compiler_data().blender_gpu_context);
-  GPU_context_discard(compiler_data().blender_gpu_context);
-  WM_system_gpu_context_dispose(compiler_data().system_gpu_context);
-}
-
-/**
- * Append either shader compilation or optimization job to deferred queue.
- * We keep two separate queue's to ensure core compilations always complete before optimization.
- */
-static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
-{
-  std::scoped_lock queue_lock(compiler_data().queue_mutex);
-
-  /* Add to either compilation or optimization queue. */
-  if (is_optimization_job) {
-    BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
-    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
-    compiler_data().optimize_queue.append(mat);
-  }
-  else {
-    GPU_material_status_set(mat, GPU_MAT_QUEUED);
-    compiler_data().queue.append(mat);
-  }
-
-  compiler_data().queue_cv.notify_one();
-}
-
-static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
-{
-  if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
-    return;
-  }
-
-  if (GPU_use_main_context_workaround()) {
-    deferred = false;
-  }
-
-  if (!deferred) {
-    DRW_deferred_shader_remove(mat);
-    /* Shaders could already be compiling. Have to wait for compilation to finish. */
-    while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
-      BLI_time_sleep_ms(20);
-    }
-    if (GPU_material_status(mat) == GPU_MAT_CREATED) {
-      GPU_material_compile(mat);
-    }
-    return;
-  }
-
-  /* Don't add material to the queue twice. */
-  if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
-    return;
-  }
-
-  /* Add deferred shader compilation to queue. */
-  drw_deferred_queue_append(mat, false);
-}
-
-void DRW_deferred_shader_remove(GPUMaterial *mat)
-{
-  if (GPU_use_main_context_workaround()) {
-    /* Deferred compilation is not supported. */
-    return;
-  }
-
-  std::scoped_lock queue_lock(compiler_data().queue_mutex);
-
-  /* Search for compilation job in queue. */
-  if (compiler_data().queue.contains(mat)) {
-    compiler_data().queue.remove_first_occurrence_and_reorder(mat);
-    GPU_material_status_set(mat, GPU_MAT_CREATED);
-  }
-
-  /* Search for optimization job in queue. */
-  if (compiler_data().optimize_queue.contains(mat)) {
-    compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat);
-    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
-  }
-}
-
-void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
-{
-  if (GPU_use_main_context_workaround()) {
-    /* Deferred compilation is not supported. */
-    return;
-  }
-
-  std::scoped_lock queue_lock(compiler_data().queue_mutex);
-
-  /* Search for optimization job in queue. */
-  if (compiler_data().optimize_queue.contains(mat)) {
-    compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat);
-    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
-  }
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-
-/** \{ */
-
-GPUMaterial *DRW_shader_from_world(World *wo,
-                                   bNodeTree *ntree,
-                                   eGPUMaterialEngine engine,
-                                   const uint64_t shader_id,
-                                   const bool is_volume_shader,
-                                   bool deferred,
-                                   GPUCodegenCallbackFn callback,
-                                   void *thunk)
-{
-  Scene *scene = DEG_get_original(drw_get().scene);
-  GPUMaterial *mat = GPU_material_from_nodetree(scene,
-                                                nullptr,
-                                                ntree,
-                                                &wo->gpumaterial,
-                                                wo->id.name,
-                                                engine,
-                                                shader_id,
-                                                is_volume_shader,
-                                                false,
-                                                callback,
-                                                thunk);
-
-  if (DRW_context_get()->is_image_render()) {
-    /* Do not deferred if doing render. */
-    deferred = false;
-  }
-
-  drw_deferred_shader_add(mat, deferred);
-  DRW_shader_queue_optimize_material(mat);
-  return mat;
-}
-
-GPUMaterial *DRW_shader_from_material(Material *ma,
-                                      bNodeTree *ntree,
-                                      eGPUMaterialEngine engine,
-                                      const uint64_t shader_id,
-                                      const bool is_volume_shader,
-                                      bool deferred,
-                                      GPUCodegenCallbackFn callback,
-                                      void *thunk,
-                                      GPUMaterialPassReplacementCallbackFn pass_replacement_cb)
-{
-  Scene *scene = DEG_get_original(drw_get().scene);
-  GPUMaterial *mat = GPU_material_from_nodetree(scene,
-                                                ma,
-                                                ntree,
-                                                &ma->gpumaterial,
-                                                ma->id.name,
-                                                engine,
-                                                shader_id,
-                                                is_volume_shader,
-                                                false,
-                                                callback,
-                                                thunk,
-                                                pass_replacement_cb);
-
-  drw_deferred_shader_add(mat, deferred);
-  DRW_shader_queue_optimize_material(mat);
-  return mat;
-}
-
-void DRW_shader_queue_optimize_material(GPUMaterial *mat)
-{
-  /* Do not perform deferred optimization if performing render.
-   * De-queue any queued optimization jobs. */
-  if (DRW_context_get()->is_image_render()) {
-    if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
-      /* Remove from pending optimization job queue. */
-      DRW_deferred_shader_optimize_remove(mat);
-      /* If optimization job had already started, wait for it to complete. */
-      while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
-        BLI_time_sleep_ms(20);
-      }
-    }
-    return;
-  }
-
-  /* We do not need to perform optimization on the material if it is already compiled or in the
-   * optimization queue. If optimization is not required, the status will be flagged as
-   * `GPU_MAT_OPTIMIZATION_SKIP`.
-   * We can also skip cases which have already been queued up. */
-  if (ELEM(GPU_material_optimization_status(mat),
-           GPU_MAT_OPTIMIZATION_SKIP,
-           GPU_MAT_OPTIMIZATION_SUCCESS,
-           GPU_MAT_OPTIMIZATION_QUEUED))
-  {
-    return;
-  }
-
-  /* Only queue optimization once the original shader has been successfully compiled. */
-  if (GPU_material_status(mat) != GPU_MAT_SUCCESS) {
-    return;
-  }
-
-  /* Defer optimization until sufficient time has passed beyond creation. This avoids excessive
-   * recompilation for shaders which are being actively modified. */
-  if (!GPU_material_optimization_ready(mat)) {
-    return;
-  }
-
-  /* Add deferred shader compilation to queue. */
-  drw_deferred_queue_append(mat, true);
-}
-
-/** \} */
--- a/source/blender/draw/intern/draw_pass.hh
+++ b/source/blender/draw/intern/draw_pass.hh
@@ -50,6 +50,7 @@
 #include "GPU_debug.hh"
 #include "GPU_index_buffer.hh"
 #include "GPU_material.hh"
+#include "GPU_pass.hh"

 #include "DRW_gpu_wrapper.hh"

@@ -59,8 +60,6 @@
 #include "draw_shader_shared.hh"
 #include "draw_state.hh"

-#include "intern/gpu_codegen.hh"
-
 #include <cstdint>
 #include <sstream>

--- a/source/blender/editors/space_view3d/view3d_draw.cc
+++ b/source/blender/editors/space_view3d/view3d_draw.cc
@@ -64,7 +64,6 @@
 #include "GPU_framebuffer.hh"
 #include "GPU_immediate.hh"
 #include "GPU_immediate_util.hh"
-#include "GPU_material.hh"
 #include "GPU_matrix.hh"
 #include "GPU_state.hh"
 #include "GPU_viewport.hh"
@@ -1683,7 +1682,6 @@ void view3d_main_region_draw(const bContext *C, ARegion *region)
  DRW_cache_free_old_subdiv();
  DRW_cache_free_old_batches(bmain);
  BKE_image_free_old_gputextures(bmain);
-  GPU_pass_cache_garbage_collect();

  /* No depth test for drawing action zones afterwards. */
  GPU_depth_test(GPU_DEPTH_NONE);
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -77,6 +77,7 @@ set(SRC
  intern/gpu_material.cc
  intern/gpu_matrix.cc
  intern/gpu_node_graph.cc
+  intern/gpu_pass.cc
  intern/gpu_platform.cc
  intern/gpu_query.cc
  intern/gpu_select.cc
@@ -118,6 +119,7 @@ set(SRC
  GPU_init_exit.hh
  GPU_material.hh
  GPU_matrix.hh
+  GPU_pass.hh
  GPU_platform.hh
  GPU_platform_backend_enum.h
  GPU_primitive.hh
--- a/source/blender/gpu/GPU_material.hh
+++ b/source/blender/gpu/GPU_material.hh
@@ -33,31 +33,28 @@ struct Scene;
 struct bNode;
 struct bNodeTree;

-/* Functions to create GPU Materials nodes. */
+/**
+ * High level functions to create and use GPU materials.
+ */

-enum eGPUType {
-  /* Keep in sync with GPU_DATATYPE_STR */
-  /* The value indicates the number of elements in each type */
-  GPU_NONE = 0,
-  GPU_FLOAT = 1,
-  GPU_VEC2 = 2,
-  GPU_VEC3 = 3,
-  GPU_VEC4 = 4,
-  GPU_MAT3 = 9,
-  GPU_MAT4 = 16,
-  GPU_MAX_CONSTANT_DATA = GPU_MAT4,
+enum eGPUMaterialEngine {
+  GPU_MAT_EEVEE,
+  GPU_MAT_COMPOSITOR,
+  GPU_MAT_ENGINE_MAX,
+};

-  /* Values not in GPU_DATATYPE_STR */
-  GPU_TEX1D_ARRAY = 1001,
-  GPU_TEX2D = 1002,
-  GPU_TEX2D_ARRAY = 1003,
-  GPU_TEX3D = 1004,
+enum eGPUMaterialStatus {
+  GPU_MAT_FAILED = 0,
+  GPU_MAT_QUEUED,
+  GPU_MAT_SUCCESS,
+};

-  /* GLSL Struct types */
-  GPU_CLOSURE = 1007,
-
-  /* Opengl Attributes */
-  GPU_ATTR = 3001,
+/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
+ * plan to perform optimization on a given material. */
+enum eGPUMaterialOptimizationStatus {
+  GPU_MAT_OPTIMIZATION_SKIP = 0,
+  GPU_MAT_OPTIMIZATION_QUEUED,
+  GPU_MAT_OPTIMIZATION_SUCCESS,
 };

 enum eGPUMaterialFlag {
@@ -84,12 +81,193 @@ enum eGPUMaterialFlag {

  /* Tells the render engine the material was just compiled or updated. */
  GPU_MATFLAG_UPDATED = (1 << 29),
+};
+ENUM_OPERATORS(eGPUMaterialFlag, GPU_MATFLAG_UPDATED);

-  /* HACK(fclem) Tells the environment texture node to not bail out if empty. */
-  GPU_MATFLAG_LOOKDEV_HACK = (1 << 30),
+using GPUCodegenCallbackFn = void (*)(void *thunk,
+                                      GPUMaterial *mat,
+                                      struct GPUCodegenOutput *codegen);
+/**
+ * Should return an already compiled pass if it's functionally equivalent to the one being
+ * compiled.
+ */
+using GPUMaterialPassReplacementCallbackFn = GPUPass *(*)(void *thunk, GPUMaterial *mat);
+
+/** WARNING: gpumaterials thread safety must be ensured by the caller. */
+GPUMaterial *GPU_material_from_nodetree(
+    Material *ma,
+    bNodeTree *ntree,
+    ListBase *gpumaterials,
+    const char *name,
+    eGPUMaterialEngine engine,
+    uint64_t shader_uuid,
+    bool deferred_compilation,
+    GPUCodegenCallbackFn callback,
+    void *thunk,
+    GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr);
+
+/* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and
+ * linking the necessary GPU material nodes. */
+using ConstructGPUMaterialFn = void (*)(void *thunk, GPUMaterial *material);
+
+/* Construct a GPU material from a set of callbacks. See the callback types for more information.
+ * The given thunk will be passed as the first parameter of each callback. */
+GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
+                                         ConstructGPUMaterialFn construct_function_cb,
+                                         GPUCodegenCallbackFn generate_code_function_cb,
+                                         void *thunk);
+
+void GPU_material_free_single(GPUMaterial *material);
+void GPU_material_free(ListBase *gpumaterial);
+
+void GPU_materials_free(Main *bmain);
+
+GPUPass *GPU_material_get_pass(GPUMaterial *material);
+/** Return the most optimal shader configuration for the given material. */
+GPUShader *GPU_material_get_shader(GPUMaterial *material);
+
+const char *GPU_material_get_name(GPUMaterial *material);
+
+/**
+ * Return can be null if it's a world material.
+ */
+Material *GPU_material_get_material(GPUMaterial *material);
+/**
+ * Return true if the material compilation has not yet begin or begin.
+ */
+eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
+
+/**
+ * Return status for asynchronous optimization jobs.
+ */
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
+
+uint64_t GPU_material_compilation_timestamp(GPUMaterial *mat);
+
+GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
+/**
+ * Create dynamic UBO from parameters
+ *
+ * \param inputs: Items are #LinkData, data is #GPUInput (`BLI_genericNodeN(GPUInput)`).
+ */
+void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs);
+
+bool GPU_material_has_surface_output(GPUMaterial *mat);
+bool GPU_material_has_volume_output(GPUMaterial *mat);
+bool GPU_material_has_displacement_output(GPUMaterial *mat);
+
+bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag);
+
+uint64_t GPU_material_uuid_get(GPUMaterial *mat);
+
+struct GPULayerAttr {
+  GPULayerAttr *next, *prev;
+
+  /* Meaningful part of the attribute set key. */
+  char name[256]; /* Multiple MAX_CUSTOMDATA_LAYER_NAME */
+  /** Hash of name[68]. */
+  uint32_t hash_code;
+
+  /* Helper fields used by code generation. */
+  int users;
 };

-ENUM_OPERATORS(eGPUMaterialFlag, GPU_MATFLAG_LOOKDEV_HACK);
+const ListBase *GPU_material_layer_attributes(const GPUMaterial *material);
+
+/* Requested Material Attributes and Textures */
+
+enum eGPUType {
+  /* Keep in sync with GPU_DATATYPE_STR */
+  /* The value indicates the number of elements in each type */
+  GPU_NONE = 0,
+  GPU_FLOAT = 1,
+  GPU_VEC2 = 2,
+  GPU_VEC3 = 3,
+  GPU_VEC4 = 4,
+  GPU_MAT3 = 9,
+  GPU_MAT4 = 16,
+  GPU_MAX_CONSTANT_DATA = GPU_MAT4,
+
+  /* Values not in GPU_DATATYPE_STR */
+  GPU_TEX1D_ARRAY = 1001,
+  GPU_TEX2D = 1002,
+  GPU_TEX2D_ARRAY = 1003,
+  GPU_TEX3D = 1004,
+
+  /* GLSL Struct types */
+  GPU_CLOSURE = 1007,
+
+  /* Opengl Attributes */
+  GPU_ATTR = 3001,
+};
+
+enum eGPUDefaultValue {
+  GPU_DEFAULT_0 = 0,
+  GPU_DEFAULT_1,
+};
+
+struct GPUMaterialAttribute {
+  GPUMaterialAttribute *next, *prev;
+  int type;                /* eCustomDataType */
+  char name[68];           /* MAX_CUSTOMDATA_LAYER_NAME */
+  char input_name[12 + 1]; /* GPU_MAX_SAFE_ATTR_NAME + 1 */
+  eGPUType gputype;
+  eGPUDefaultValue default_value; /* Only for volumes attributes. */
+  int id;
+  int users;
+  /**
+   * If true, the corresponding attribute is the specified default color attribute on the mesh,
+   * if it exists. In that case the type and name data can vary per geometry, so it will not be
+   * valid here.
+   */
+  bool is_default_color;
+  /**
+   * If true, the attribute is the length of hair particles and curves.
+   */
+  bool is_hair_length;
+};
+
+struct GPUMaterialTexture {
+  GPUMaterialTexture *next, *prev;
+  Image *ima;
+  ImageUser iuser;
+  bool iuser_available;
+  GPUTexture **colorband;
+  GPUTexture **sky;
+  char sampler_name[32];       /* Name of sampler in GLSL. */
+  char tiled_mapping_name[32]; /* Name of tile mapping sampler in GLSL. */
+  int users;
+  GPUSamplerState sampler_state;
+};
+
+ListBase GPU_material_attributes(const GPUMaterial *material);
+ListBase GPU_material_textures(GPUMaterial *material);
+
+struct GPUUniformAttr {
+  GPUUniformAttr *next, *prev;
+
+  /* Meaningful part of the attribute set key. */
+  char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */
+  /** Hash of name[68] + use_dupli. */
+  uint32_t hash_code;
+  bool use_dupli;
+
+  /* Helper fields used by code generation. */
+  short id;
+  int users;
+};
+
+struct GPUUniformAttrList {
+  ListBase list; /* GPUUniformAttr */
+
+  /* List length and hash code precomputed for fast lookup and comparison. */
+  unsigned int count, hash_code;
+};
+
+const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material);
+
+/* Functions to create GPU Materials nodes. */
+/* TODO: Move to its own header. */

 struct GPUNodeStack {
  eGPUType type;
@@ -101,27 +279,6 @@ struct GPUNodeStack {
  bool end;
 };

-enum eGPUMaterialStatus {
-  GPU_MAT_FAILED = 0,
-  GPU_MAT_CREATED,
-  GPU_MAT_QUEUED,
-  GPU_MAT_SUCCESS,
-};
-
-/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
- * plan to perform optimization on a given material. */
-enum eGPUMaterialOptimizationStatus {
-  GPU_MAT_OPTIMIZATION_SKIP = 0,
-  GPU_MAT_OPTIMIZATION_READY,
-  GPU_MAT_OPTIMIZATION_QUEUED,
-  GPU_MAT_OPTIMIZATION_SUCCESS,
-};
-
-enum eGPUDefaultValue {
-  GPU_DEFAULT_0 = 0,
-  GPU_DEFAULT_1,
-};
-
 struct GPUCodegenOutput {
  std::string attr_load;
  /* Node-tree functions calls. */
@@ -135,13 +292,6 @@ struct GPUCodegenOutput {
  GPUShaderCreateInfo *create_info;
 };

-using GPUCodegenCallbackFn = void (*)(void *thunk, GPUMaterial *mat, GPUCodegenOutput *codegen);
-/**
- * Should return an already compiled pass if it's functionally equivalent to the one being
- * compiled.
- */
-using GPUMaterialPassReplacementCallbackFn = GPUPass *(*)(void *thunk, GPUMaterial *mat);
-
 GPUNodeLink *GPU_constant(const float *num);
 GPUNodeLink *GPU_uniform(const float *num);
 GPUNodeLink *GPU_attribute(GPUMaterial *mat, eCustomDataType type, const char *name);
@@ -217,203 +367,9 @@ char *GPU_material_split_sub_function(GPUMaterial *material,
                                      eGPUType return_type,
                                      GPUNodeLink **link);

-/**
- * High level functions to create and use GPU materials.
- */
-
-enum eGPUMaterialEngine {
-  GPU_MAT_EEVEE_LEGACY = 0,
-  GPU_MAT_EEVEE,
-  GPU_MAT_COMPOSITOR,
-};
-
-GPUMaterial *GPU_material_from_nodetree(
-    Scene *scene,
-    Material *ma,
-    bNodeTree *ntree,
-    ListBase *gpumaterials,
-    const char *name,
-    eGPUMaterialEngine engine,
-    uint64_t shader_uuid,
-    bool is_volume_shader,
-    bool is_lookdev,
-    GPUCodegenCallbackFn callback,
-    void *thunk,
-    GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr);
-
-void GPU_material_compile(GPUMaterial *mat);
-void GPU_material_free_single(GPUMaterial *material);
-void GPU_material_free(ListBase *gpumaterial);
-
-void GPU_material_async_compile(GPUMaterial *mat);
-/** Returns true if the material have finished its compilation. */
-bool GPU_material_async_try_finalize(GPUMaterial *mat);
-
-void GPU_material_acquire(GPUMaterial *mat);
-void GPU_material_release(GPUMaterial *mat);
-
-void GPU_materials_free(Main *bmain);
-
-Scene *GPU_material_scene(GPUMaterial *material);
-GPUPass *GPU_material_get_pass(GPUMaterial *material);
-/** Return the most optimal shader configuration for the given material. */
-GPUShader *GPU_material_get_shader(GPUMaterial *material);
-/** Return the base un-optimized shader. */
-GPUShader *GPU_material_get_shader_base(GPUMaterial *material);
-const char *GPU_material_get_name(GPUMaterial *material);
-
-/**
- * Material Optimization.
- * \note Compiles optimal version of shader graph, populating mat->optimized_pass.
- * This operation should always be deferred until existing compilations have completed.
- * Default un-optimized materials will still exist for interactive material editing performance.
- */
-void GPU_material_optimize(GPUMaterial *mat);
-
-/**
- * Return can be null if it's a world material.
- */
-Material *GPU_material_get_material(GPUMaterial *material);
-/**
- * Return true if the material compilation has not yet begin or begin.
- */
-eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
-void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status);
-
-/**
- * Return status for asynchronous optimization jobs.
- */
-eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
-void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status);
-bool GPU_material_optimization_ready(GPUMaterial *mat);
-
-/**
- * Store reference to a similar default material for asynchronous PSO cache warming.
- *
- * This function expects `material` to have not yet been compiled and for `default_material` to be
- * ready. When compiling `material` as part of an asynchronous shader compilation job, use existing
- * PSO descriptors from `default_material`'s shader to also compile PSOs for this new material
- * asynchronously, rather than at runtime.
- *
- * The default_material `options` should match this new materials options in order
- * for PSO descriptors to match those needed by the new `material`.
- *
- * NOTE: `default_material` must exist when `GPU_material_compile(..)` is called for
- * `material`.
- *
- * See `GPU_shader_warm_cache(..)` for more information.
- */
-void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material);
-
-GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
-/**
- * Create dynamic UBO from parameters
- *
- * \param inputs: Items are #LinkData, data is #GPUInput (`BLI_genericNodeN(GPUInput)`).
- */
-void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs);
-
-bool GPU_material_has_surface_output(GPUMaterial *mat);
-bool GPU_material_has_volume_output(GPUMaterial *mat);
-bool GPU_material_has_displacement_output(GPUMaterial *mat);
-
 void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag);
-bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag);
 eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat);
-bool GPU_material_recalc_flag_get(GPUMaterial *mat);
-uint64_t GPU_material_uuid_get(GPUMaterial *mat);
-
-void GPU_pass_cache_init();
-void GPU_pass_cache_garbage_collect();
-void GPU_pass_cache_free();
-
-/* Requested Material Attributes and Textures */
-
-struct GPUMaterialAttribute {
-  GPUMaterialAttribute *next, *prev;
-  int type;                /* eCustomDataType */
-  char name[68];           /* MAX_CUSTOMDATA_LAYER_NAME */
-  char input_name[12 + 1]; /* GPU_MAX_SAFE_ATTR_NAME + 1 */
-  eGPUType gputype;
-  eGPUDefaultValue default_value; /* Only for volumes attributes. */
-  int id;
-  int users;
-  /**
-   * If true, the corresponding attribute is the specified default color attribute on the mesh,
-   * if it exists. In that case the type and name data can vary per geometry, so it will not be
-   * valid here.
-   */
-  bool is_default_color;
-  /**
-   * If true, the attribute is the length of hair particles and curves.
-   */
-  bool is_hair_length;
-};
-
-struct GPUMaterialTexture {
-  GPUMaterialTexture *next, *prev;
-  Image *ima;
-  ImageUser iuser;
-  bool iuser_available;
-  GPUTexture **colorband;
-  GPUTexture **sky;
-  char sampler_name[32];       /* Name of sampler in GLSL. */
-  char tiled_mapping_name[32]; /* Name of tile mapping sampler in GLSL. */
-  int users;
-  GPUSamplerState sampler_state;
-};
-
-ListBase GPU_material_attributes(const GPUMaterial *material);
-ListBase GPU_material_textures(GPUMaterial *material);
-
-struct GPUUniformAttr {
-  GPUUniformAttr *next, *prev;
-
-  /* Meaningful part of the attribute set key. */
-  char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */
-  /** Hash of name[68] + use_dupli. */
-  uint32_t hash_code;
-  bool use_dupli;
-
-  /* Helper fields used by code generation. */
-  short id;
-  int users;
-};
-
-struct GPUUniformAttrList {
-  ListBase list; /* GPUUniformAttr */
-
-  /* List length and hash code precomputed for fast lookup and comparison. */
-  unsigned int count, hash_code;
-};
-
-const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material);

 GHash *GPU_uniform_attr_list_hash_new(const char *info);
 void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, const GPUUniformAttrList *src);
 void GPU_uniform_attr_list_free(GPUUniformAttrList *set);
-
-struct GPULayerAttr {
-  GPULayerAttr *next, *prev;
-
-  /* Meaningful part of the attribute set key. */
-  char name[256]; /* Multiple MAX_CUSTOMDATA_LAYER_NAME */
-  /** Hash of name[68]. */
-  uint32_t hash_code;
-
-  /* Helper fields used by code generation. */
-  int users;
-};
-
-const ListBase *GPU_material_layer_attributes(const GPUMaterial *material);
-
-/* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and
- * linking the necessary GPU material nodes. */
-using ConstructGPUMaterialFn = void (*)(void *thunk, GPUMaterial *material);
-
-/* Construct a GPU material from a set of callbacks. See the callback types for more information.
- * The given thunk will be passed as the first parameter of each callback. */
-GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
-                                         ConstructGPUMaterialFn construct_function_cb,
-                                         GPUCodegenCallbackFn generate_code_function_cb,
-                                         void *thunk);
--- a/source/blender/gpu/GPU_pass.hh
+++ b/source/blender/gpu/GPU_pass.hh
@@ -0,0 +1,48 @@
+/* SPDX-FileCopyrightText: 2025 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Generate and cache shaders generated from the intermediate node graph.
+ */
+
+#pragma once
+
+#include "GPU_material.hh"
+#include "GPU_shader.hh"
+
+struct GPUNodeGraph;
+
+struct GPUPass;
+
+enum eGPUPassStatus {
+  GPU_PASS_FAILED = 0,
+  GPU_PASS_QUEUED,
+  GPU_PASS_SUCCESS,
+};
+
+GPUPass *GPU_generate_pass(GPUMaterial *material,
+                           GPUNodeGraph *graph,
+                           const char *debug_name,
+                           eGPUMaterialEngine engine,
+                           bool deferred_compilation,
+                           GPUCodegenCallbackFn finalize_source_cb,
+                           void *thunk,
+                           bool optimize_graph);
+
+eGPUPassStatus GPU_pass_status(GPUPass *pass);
+bool GPU_pass_should_optimize(GPUPass *pass);
+void GPU_pass_ensure_its_ready(GPUPass *pass);
+GPUShader *GPU_pass_shader_get(GPUPass *pass);
+void GPU_pass_acquire(GPUPass *pass);
+void GPU_pass_release(GPUPass *pass);
+
+uint64_t GPU_pass_global_compilation_count();
+uint64_t GPU_pass_compilation_timestamp(GPUPass *pass);
+
+void GPU_pass_cache_init();
+void GPU_pass_cache_update();
+void GPU_pass_cache_wait_for_all();
+void GPU_pass_cache_free();
--- a/source/blender/gpu/GPU_shader.hh
+++ b/source/blender/gpu/GPU_shader.hh
@@ -105,6 +105,10 @@ blender::Vector<GPUShader *> GPU_shader_batch_finalize(BatchHandle &handle);
 * WARNING: The handle will be invalidated by this call.
 */
 void GPU_shader_batch_cancel(BatchHandle &handle);
+/**
+ *  Wait until all the requested batches have been compiled.
+ */
+void GPU_shader_batch_wait_for_all();

 /** \} */

--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@@ -10,184 +10,30 @@

 #include "MEM_guardedalloc.h"

-#include "DNA_customdata_types.h"
 #include "DNA_material_types.h"

-#include "BLI_ghash.h"
-#include "BLI_hash_mm2a.hh"
-#include "BLI_link_utils.h"
-#include "BLI_listbase.h"
 #include "BLI_span.hh"
 #include "BLI_string.h"
-#include "BLI_threads.h"
-#include "BLI_time.h"
+#include "BLI_vector.hh"

 #include "BKE_cryptomatte.hh"
-#include "BKE_material.hh"

 #include "IMB_colormanagement.hh"

 #include "GPU_capabilities.hh"
-#include "GPU_context.hh"
-#include "GPU_material.hh"
 #include "GPU_shader.hh"
 #include "GPU_uniform_buffer.hh"
 #include "GPU_vertex_format.hh"

-#include "BLI_sys_types.h" /* for intptr_t support */
-#include "BLI_vector.hh"
-
 #include "gpu_codegen.hh"
-#include "gpu_node_graph.hh"
-#include "gpu_shader_create_info.hh"
 #include "gpu_shader_dependency_private.hh"

 #include <cstdarg>
 #include <cstring>

-#include <sstream>
-#include <string>
-
+using namespace blender;
 using namespace blender::gpu::shader;

-/**
- * IMPORTANT: Never add external reference. The GPUMaterial used to create the GPUPass (and its
- * GPUCodegenCreateInfo) can be free before actually compiling. This happens if there is an update
- * before deferred compilation happens and the GPUPass gets picked up by another GPUMaterial
- * (because of GPUPass reuse).
- */
-struct GPUCodegenCreateInfo : ShaderCreateInfo {
-  struct NameBuffer {
-    using NameEntry = std::array<char, 32>;
-
-    /** Duplicate attribute names to avoid reference the GPUNodeGraph directly. */
-    char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1];
-    char var_names[16][8];
-    blender::Vector<std::unique_ptr<NameEntry>, 16> sampler_names;
-
-    /* Returns the appended name memory location */
-    const char *append_sampler_name(const char name[32])
-    {
-      auto index = sampler_names.size();
-      sampler_names.append(std::make_unique<NameEntry>());
-      char *name_buffer = sampler_names[index]->data();
-      memcpy(name_buffer, name, 32);
-      return name_buffer;
-    }
-  };
-
-  /** Optional generated interface. */
-  StageInterfaceInfo *interface_generated = nullptr;
-  /** Optional name buffer containing names referenced by StringRefNull. */
-  NameBuffer name_buffer;
-
-  GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name){};
-  ~GPUCodegenCreateInfo()
-  {
-    delete interface_generated;
-  };
-};
-
-struct GPUPass {
-  GPUPass *next = nullptr;
-
-  GPUShader *shader = nullptr;
-  GPUCodegenCreateInfo *create_info = nullptr;
-  /** Orphaned GPUPasses gets freed by the garbage collector. */
-  uint refcount = 0;
-  /** The last time the refcount was greater than 0. */
-  int gc_timestamp = 0;
-  /** The engine type this pass is compiled for. */
-  eGPUMaterialEngine engine = GPU_MAT_EEVEE_LEGACY;
-  /** Identity hash generated from all GLSL code. */
-  uint32_t hash = 0;
-  /** Did we already tried to compile the attached GPUShader. */
-  bool compiled = false;
-  /** If this pass is already being_compiled (A GPUPass can be shared by multiple GPUMaterials). */
-  bool compilation_requested = false;
-  /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
-   * during pass code generation. */
-  bool should_optimize = false;
-  /** Whether pass is in the GPUPass cache. */
-  bool cached = false;
-  /** Protects pass shader from being created from multiple threads at the same time. */
-  ThreadMutex shader_creation_mutex = {};
-
-  BatchHandle async_compilation_handle = {};
-};
-
-/* -------------------------------------------------------------------- */
-/** \name GPUPass Cache
- *
- * Internal shader cache: This prevent the shader recompilation / stall when
- * using undo/redo AND also allows for GPUPass reuse if the Shader code is the
- * same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
- * \{ */
-
-/* Only use one linklist that contains the GPUPasses grouped by hash. */
-static GPUPass *pass_cache = nullptr;
-static SpinLock pass_cache_spin;
-
-/* Search by hash only. Return first pass with the same hash.
- * There is hash collision if (pass->next && pass->next->hash == hash) */
-static GPUPass *gpu_pass_cache_lookup(eGPUMaterialEngine engine, uint32_t hash)
-{
-  BLI_spin_lock(&pass_cache_spin);
-  /* Could be optimized with a Lookup table. */
-  for (GPUPass *pass = pass_cache; pass; pass = pass->next) {
-    if (pass->hash == hash && pass->engine == engine) {
-      BLI_spin_unlock(&pass_cache_spin);
-      return pass;
-    }
-  }
-  BLI_spin_unlock(&pass_cache_spin);
-  return nullptr;
-}
-
-static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
-{
-  BLI_spin_lock(&pass_cache_spin);
-  pass->cached = true;
-  if (node != nullptr) {
-    /* Add after the first pass having the same hash. */
-    pass->next = node->next;
-    node->next = pass;
-  }
-  else {
-    /* No other pass have same hash, just prepend to the list. */
-    BLI_LINKS_PREPEND(pass_cache, pass);
-  }
-  BLI_spin_unlock(&pass_cache_spin);
-}
-
-/* Check all possible passes with the same hash. */
-static GPUPass *gpu_pass_cache_resolve_collision(GPUPass *pass,
-                                                 GPUShaderCreateInfo *info,
-                                                 uint32_t hash)
-{
-  eGPUMaterialEngine engine = pass->engine;
-  BLI_spin_lock(&pass_cache_spin);
-  for (; pass && (pass->hash == hash); pass = pass->next) {
-    if (*reinterpret_cast<ShaderCreateInfo *>(info) ==
-            *reinterpret_cast<ShaderCreateInfo *>(pass->create_info) &&
-        pass->engine == engine)
-    {
-      BLI_spin_unlock(&pass_cache_spin);
-      return pass;
-    }
-  }
-  BLI_spin_unlock(&pass_cache_spin);
-  return nullptr;
-}
-
-static bool gpu_pass_is_valid(const GPUPass *pass)
-{
-  /* Shader is not null if compilation is successful. */
-  return (pass->compiled == false || pass->shader != nullptr);
-}
-
-/** \} */
-
 /* -------------------------------------------------------------------- */
 /** \name Type > string conversion
 * \{ */
@@ -234,12 +80,12 @@ static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
 }

 /* Print data constructor (i.e: vec2(1.0f, 1.0f)). */
-static std::ostream &operator<<(std::ostream &stream, const blender::Span<float> &span)
+static std::ostream &operator<<(std::ostream &stream, const Span<float> &span)
 {
  stream << (eGPUType)span.size() << "(";
  /* Use uint representation to allow exact same bit pattern even if NaN. This is
   * because we can pass UINTs as floats for constants. */
-  const blender::Span<uint32_t> uint_span = span.cast<uint32_t>();
+  const Span<uint32_t> uint_span = span.cast<uint32_t>();
  for (const uint32_t &element : uint_span) {
    char formatted_float[32];
    SNPRINTF(formatted_float, "uintBitsToFloat(%uu)", element);
@@ -257,84 +103,57 @@ struct GPUConstant : public GPUInput {};

 static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input)
 {
-  stream << blender::Span<float>(input->vec, input->type);
+  stream << Span<float>(input->vec, input->type);
  return stream;
 }

+namespace blender::gpu::shader {
+/* Needed to use the << operators from nested namespaces. :(
+ * https://stackoverflow.com/questions/5195512/namespaces-and-operator-resolution */
+using ::operator<<;
+}  // namespace blender::gpu::shader
+
 /** \} */

 /* -------------------------------------------------------------------- */
 /** \name GLSL code generation
 * \{ */

-class GPUCodegen {
- public:
-  GPUMaterial &mat;
-  GPUNodeGraph &graph;
-  GPUCodegenOutput output = {};
-  GPUCodegenCreateInfo *create_info = nullptr;
+const char *GPUCodegenCreateInfo::NameBuffer::append_sampler_name(const char name[32])
+{
+  auto index = sampler_names.size();
+  sampler_names.append(std::make_unique<NameEntry>());
+  char *name_buffer = sampler_names[index]->data();
+  memcpy(name_buffer, name, 32);
+  return name_buffer;
+}

- private:
-  uint32_t hash_ = 0;
-  BLI_HashMurmur2A hm2a_;
-  ListBase ubo_inputs_ = {nullptr, nullptr};
-  GPUInput *cryptomatte_input_ = nullptr;
+GPUCodegen::GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name)
+    : mat(*mat_), graph(*graph_)
+{
+  BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat));
+  BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat));
+  create_info = MEM_new<GPUCodegenCreateInfo>(__func__, debug_name);
+  output.create_info = reinterpret_cast<GPUShaderCreateInfo *>(
+      static_cast<ShaderCreateInfo *>(create_info));
+}

-  /** Cache parameters for complexity heuristic. */
-  uint nodes_total_ = 0;
-  uint textures_total_ = 0;
-  uint uniforms_total_ = 0;
-
- public:
-  GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
-  {
-    BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat));
-    BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat));
-    create_info = new GPUCodegenCreateInfo("codegen");
-    output.create_info = reinterpret_cast<GPUShaderCreateInfo *>(
-        static_cast<ShaderCreateInfo *>(create_info));
-  }
-
-  ~GPUCodegen()
-  {
-    MEM_SAFE_FREE(cryptomatte_input_);
-    delete create_info;
-    BLI_freelistN(&ubo_inputs_);
-  };
-
-  void generate_graphs();
-  void generate_cryptomatte();
-  void generate_uniform_buffer();
-  void generate_attribs();
-  void generate_resources();
-  void generate_library();
-
-  uint32_t hash_get() const
-  {
-    return hash_;
-  }
-
-  /* Heuristic determined during pass codegen for whether a
-   * more optimal variant of this material should be compiled. */
-  bool should_optimize_heuristic() const
-  {
-    /* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
-     * the baseline is met. */
-    bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
-                       (textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
-    return do_optimize;
-  }
-
- private:
-  void set_unique_ids();
-
-  void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
-  std::string graph_serialize(eGPUNodeTag tree_tag,
-                              GPUNodeLink *output_link,
-                              const char *output_default = nullptr);
-  std::string graph_serialize(eGPUNodeTag tree_tag);
+GPUCodegen::~GPUCodegen()
+{
+  MEM_SAFE_FREE(cryptomatte_input_);
+  MEM_delete(create_info);
+  BLI_freelistN(&ubo_inputs_);
 };

+bool GPUCodegen::should_optimize_heuristic() const
+{
+  /* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
+   * the baseline is met. */
+  bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
+                     (textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
+  return do_optimize;
+}
+
 void GPUCodegen::generate_attribs()
 {
  if (BLI_listbase_is_empty(&graph.attributes)) {
@@ -344,7 +163,7 @@ void GPUCodegen::generate_attribs()

  GPUCodegenCreateInfo &info = *create_info;

-  info.interface_generated = new StageInterfaceInfo("codegen_iface", "var_attrs");
+  info.interface_generated = MEM_new<StageInterfaceInfo>(__func__, "codegen_iface", "var_attrs");
  StageInterfaceInfo &iface = *info.interface_generated;
  info.vertex_out(iface);

@@ -360,8 +179,8 @@ void GPUCodegen::generate_attribs()
    STRNCPY(info.name_buffer.attr_names[slot], attr->input_name);
    SNPRINTF(info.name_buffer.var_names[slot], "v%d", attr->id);

-    blender::StringRefNull attr_name = info.name_buffer.attr_names[slot];
-    blender::StringRefNull var_name = info.name_buffer.var_names[slot];
+    StringRefNull attr_name = info.name_buffer.attr_names[slot];
+    StringRefNull var_name = info.name_buffer.var_names[slot];

    eGPUType input_type, iface_type;

@@ -470,7 +289,7 @@ void GPUCodegen::generate_library()
  GPUCodegenCreateInfo &info = *create_info;

  void *value;
-  blender::Vector<std::string> source_files;
+  Vector<std::string> source_files;

  /* Iterate over libraries. We need to keep this struct intact in case it is required for the
   * optimization pass. The first pass just collects the keys from the GSET, given items in a GSET
@@ -543,7 +362,7 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
          if (from == GPU_VEC4 && to == GPU_FLOAT) {
            float coefficients[3];
            IMB_colormanagement_get_luminance_coefficients(coefficients);
-            eval_ss << ", " << blender::Span<float>(coefficients, 3);
+            eval_ss << ", " << Span<float>(coefficients, 3);
          }

          eval_ss << ")";
@@ -628,8 +447,8 @@ void GPUCodegen::generate_cryptomatte()
  float material_hash = 0.0f;
  Material *material = GPU_material_get_material(&mat);
  if (material) {
-    blender::bke::cryptomatte::CryptomatteHash hash(
-        material->id.name + 2, BLI_strnlen(material->id.name + 2, MAX_NAME - 2));
+    bke::cryptomatte::CryptomatteHash hash(material->id.name + 2,
+                                           BLI_strnlen(material->id.name + 2, MAX_NAME - 2));
    material_hash = hash.float_encoded();
  }
  cryptomatte_input_->vec[0] = material_hash;
@@ -711,355 +530,3 @@ void GPUCodegen::generate_graphs()
 }

 /** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name GPUPass
- * \{ */
-
-GPUPass *GPU_generate_pass(GPUMaterial *material,
-                           GPUNodeGraph *graph,
-                           eGPUMaterialEngine engine,
-                           GPUCodegenCallbackFn finalize_source_cb,
-                           void *thunk,
-                           bool optimize_graph)
-{
-  gpu_node_graph_prune_unused(graph);
-
-  /* If Optimize flag is passed in, we are generating an optimized
-   * variant of the GPUMaterial's GPUPass. */
-  if (optimize_graph) {
-    gpu_node_graph_optimize(graph);
-  }
-
-  /* Extract attributes before compiling so the generated VBOs are ready to accept the future
-   * shader. */
-  gpu_node_graph_finalize_uniform_attrs(graph);
-
-  GPUCodegen codegen(material, graph);
-  codegen.generate_graphs();
-  codegen.generate_cryptomatte();
-
-  GPUPass *pass_hash = nullptr;
-
-  if (!optimize_graph) {
-    /* The optimized version of the shader should not re-generate a UBO.
-     * The UBO will not be used for this variant. */
-    codegen.generate_uniform_buffer();
-
-    /** Cache lookup: Reuse shaders already compiled.
-     * NOTE: We only perform cache look-up for non-optimized shader
-     * graphs, as baked constant data among other optimizations will generate too many
-     * shader source permutations, with minimal re-usability. */
-    pass_hash = gpu_pass_cache_lookup(engine, codegen.hash_get());
-
-    /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
-     * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
-    if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
-      if (!gpu_pass_is_valid(pass_hash)) {
-        /* Shader has already been created but failed to compile. */
-        return nullptr;
-      }
-      /* No collision, just return the pass. */
-      BLI_spin_lock(&pass_cache_spin);
-      pass_hash->refcount += 1;
-      BLI_spin_unlock(&pass_cache_spin);
-      return pass_hash;
-    }
-  }
-
-  /* Either the shader is not compiled or there is a hash collision...
-   * continue generating the shader strings. */
-  codegen.generate_attribs();
-  codegen.generate_resources();
-  codegen.generate_library();
-
-  /* Make engine add its own code and implement the generated functions. */
-  finalize_source_cb(thunk, material, &codegen.output);
-
-  GPUPass *pass = nullptr;
-  if (pass_hash) {
-    /* Cache lookup: Reuse shaders already compiled. */
-    pass = gpu_pass_cache_resolve_collision(
-        pass_hash, codegen.output.create_info, codegen.hash_get());
-  }
-
-  if (pass) {
-    /* Cache hit. Reuse the same GPUPass and GPUShader. */
-    if (!gpu_pass_is_valid(pass)) {
-      /* Shader has already been created but failed to compile. */
-      return nullptr;
-    }
-    BLI_spin_lock(&pass_cache_spin);
-    pass->refcount += 1;
-    BLI_spin_unlock(&pass_cache_spin);
-  }
-  else {
-    /* We still create a pass even if shader compilation
-     * fails to avoid trying to compile again and again. */
-    pass = MEM_new<GPUPass>("GPUPass");
-    pass->shader = nullptr;
-    pass->refcount = 1;
-    pass->create_info = codegen.create_info;
-    /* Finalize before adding the pass to the cache, to prevent race conditions. */
-    pass->create_info->finalize();
-    pass->engine = engine;
-    pass->hash = codegen.hash_get();
-    pass->compiled = false;
-    pass->compilation_requested = false;
-    pass->cached = false;
-    /* Only flag pass optimization hint if this is the first generated pass for a material.
-     * Optimized passes cannot be optimized further, even if the heuristic is still not
-     * favorable. */
-    pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
-    pass->async_compilation_handle = -1;
-    BLI_mutex_init(&pass->shader_creation_mutex);
-
-    codegen.create_info = nullptr;
-
-    /* Only insert non-optimized graphs into cache.
-     * Optimized graphs will continuously be recompiled with new unique source during material
-     * editing, and thus causing the cache to fill up quickly with materials offering minimal
-     * re-use. */
-    if (!optimize_graph) {
-      gpu_pass_cache_insert_after(pass_hash, pass);
-    }
-  }
-  return pass;
-}
-
-bool GPU_pass_should_optimize(GPUPass *pass)
-{
-  /* Returns optimization heuristic prepared during
-   * initial codegen.
-   * NOTE: Optimization currently limited to Metal backend as repeated compilations required for
-   * material specialization cause impactful CPU stalls on OpenGL platforms. */
-  return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize;
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Compilation
- * \{ */
-
-static int count_active_texture_sampler(GPUPass *pass, GPUShader *shader)
-{
-  int num_samplers = 0;
-
-  for (const ShaderCreateInfo::Resource &res : pass->create_info->pass_resources_) {
-    if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
-      if (GPU_shader_get_uniform(shader, res.sampler.name.c_str()) != -1) {
-        num_samplers += 1;
-      }
-    }
-  }
-
-  return num_samplers;
-}
-
-static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
-{
-  if (shader == nullptr) {
-    return false;
-  }
-
-  /* NOTE: The only drawback of this method is that it will count a sampler
-   * used in the fragment shader and only declared (but not used) in the vertex
-   * shader as used by both. But this corner case is not happening for now. */
-  int active_samplers_len = count_active_texture_sampler(pass, shader);
-
-  /* Validate against opengl limit. */
-  if ((active_samplers_len > GPU_max_textures_frag()) ||
-      (active_samplers_len > GPU_max_textures_vert()))
-  {
-    return false;
-  }
-
-  if (pass->create_info->geometry_source_.is_empty() == false) {
-    if (active_samplers_len > GPU_max_textures_geom()) {
-      return false;
-    }
-  }
-
-  return (active_samplers_len * 3 <= GPU_max_textures());
-}
-
-GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname)
-{
-  if (!pass->compilation_requested) {
-    pass->compilation_requested = true;
-    pass->create_info->name_ = shname;
-    GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>(
-        static_cast<ShaderCreateInfo *>(pass->create_info));
-    return info;
-  }
-  return nullptr;
-}
-
-bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader)
-{
-  bool success = true;
-  if (!pass->compiled) {
-    /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
-     * We need to make sure to count active samplers to avoid undefined behavior. */
-    if (!gpu_pass_shader_validate(pass, shader)) {
-      success = false;
-      if (shader != nullptr) {
-        fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
-        GPU_shader_free(shader);
-        shader = nullptr;
-      }
-    }
-    pass->shader = shader;
-    pass->compiled = true;
-  }
-  return success;
-}
-
-void GPU_pass_begin_async_compilation(GPUPass *pass, const char *shname)
-{
-  BLI_mutex_lock(&pass->shader_creation_mutex);
-
-  if (pass->async_compilation_handle == -1) {
-    if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) {
-      pass->async_compilation_handle = GPU_shader_batch_create_from_infos({info});
-    }
-    else {
-      /* The pass has been already compiled synchronously. */
-      BLI_assert(pass->compiled);
-      pass->async_compilation_handle = 0;
-    }
-  }
-
-  BLI_mutex_unlock(&pass->shader_creation_mutex);
-}
-
-bool GPU_pass_async_compilation_try_finalize(GPUPass *pass)
-{
-  BLI_mutex_lock(&pass->shader_creation_mutex);
-
-  BLI_assert(pass->async_compilation_handle != -1);
-  if (pass->async_compilation_handle) {
-    if (GPU_shader_batch_is_ready(pass->async_compilation_handle)) {
-      GPU_pass_finalize_compilation(
-          pass, GPU_shader_batch_finalize(pass->async_compilation_handle).first());
-    }
-  }
-
-  BLI_mutex_unlock(&pass->shader_creation_mutex);
-
-  return pass->async_compilation_handle == 0;
-}
-
-bool GPU_pass_compile(GPUPass *pass, const char *shname)
-{
-  BLI_mutex_lock(&pass->shader_creation_mutex);
-
-  bool success = true;
-  if (pass->async_compilation_handle > 0) {
-    /* We're trying to compile this pass synchronously, but there's a pending asynchronous
-     * compilation already started. */
-    success = GPU_pass_finalize_compilation(
-        pass, GPU_shader_batch_finalize(pass->async_compilation_handle).first());
-  }
-  else if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) {
-    GPUShader *shader = GPU_shader_create_from_info(info);
-    success = GPU_pass_finalize_compilation(pass, shader);
-  }
-
-  BLI_mutex_unlock(&pass->shader_creation_mutex);
-  return success;
-}
-
-GPUShader *GPU_pass_shader_get(GPUPass *pass)
-{
-  return pass->shader;
-}
-
-static void gpu_pass_free(GPUPass *pass)
-{
-  BLI_assert(pass->refcount == 0);
-  BLI_mutex_end(&pass->shader_creation_mutex);
-  if (pass->shader) {
-    GPU_shader_free(pass->shader);
-  }
-  delete pass->create_info;
-  MEM_delete(pass);
-}
-
-void GPU_pass_acquire(GPUPass *pass)
-{
-  BLI_spin_lock(&pass_cache_spin);
-  BLI_assert(pass->refcount > 0);
-  pass->refcount++;
-  BLI_spin_unlock(&pass_cache_spin);
-}
-
-void GPU_pass_release(GPUPass *pass)
-{
-  BLI_spin_lock(&pass_cache_spin);
-  BLI_assert(pass->refcount > 0);
-  pass->refcount--;
-  /* Un-cached passes will not be filtered by garbage collection, so release here. */
-  if (pass->refcount == 0 && !pass->cached) {
-    gpu_pass_free(pass);
-  }
-  BLI_spin_unlock(&pass_cache_spin);
-}
-
-void GPU_pass_cache_garbage_collect()
-{
-  const int shadercollectrate = 60; /* hardcoded for now. */
-  int ctime = int(BLI_time_now_seconds());
-
-  BLI_spin_lock(&pass_cache_spin);
-  GPUPass *next, **prev_pass = &pass_cache;
-  for (GPUPass *pass = pass_cache; pass; pass = next) {
-    next = pass->next;
-    if (pass->refcount > 0) {
-      pass->gc_timestamp = ctime;
-    }
-    else if (pass->gc_timestamp + shadercollectrate < ctime) {
-      /* Remove from list */
-      *prev_pass = next;
-      gpu_pass_free(pass);
-      continue;
-    }
-    prev_pass = &pass->next;
-  }
-  BLI_spin_unlock(&pass_cache_spin);
-}
-
-void GPU_pass_cache_init()
-{
-  BLI_spin_init(&pass_cache_spin);
-}
-
-void GPU_pass_cache_free()
-{
-  BLI_spin_lock(&pass_cache_spin);
-  while (pass_cache) {
-    GPUPass *next = pass_cache->next;
-    gpu_pass_free(pass_cache);
-    pass_cache = next;
-  }
-  BLI_spin_unlock(&pass_cache_spin);
-
-  BLI_spin_end(&pass_cache_spin);
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Module
- * \{ */
-
-void gpu_codegen_init() {}
-
-void gpu_codegen_exit()
-{
-  BKE_material_defaults_free_gpu();
-  GPU_shader_free_builtin_shaders();
-}
-
-/** \} */
--- a/source/blender/gpu/intern/gpu_codegen.hh
+++ b/source/blender/gpu/intern/gpu_codegen.hh
@@ -10,39 +10,97 @@

 #pragma once

+#include "BLI_hash_mm2a.hh"
+#include "BLI_listbase.h"
+#include "BLI_vector.hh"
+
 #include "GPU_material.hh"
-#include "GPU_shader.hh"
+#include "GPU_vertex_format.hh"
+#include "gpu_node_graph.hh"
+#include "gpu_shader_create_info.hh"

-struct GPUNodeGraph;
+#include <sstream>
+#include <string>

-struct GPUPass;
+namespace blender::gpu::shader {

-/* Pass */
+struct GPUCodegenCreateInfo : ShaderCreateInfo {
+  struct NameBuffer {
+    using NameEntry = std::array<char, 32>;

-GPUPass *GPU_generate_pass(GPUMaterial *material,
-                           GPUNodeGraph *graph,
-                           eGPUMaterialEngine engine,
-                           GPUCodegenCallbackFn finalize_source_cb,
-                           void *thunk,
-                           bool optimize_graph);
-GPUShader *GPU_pass_shader_get(GPUPass *pass);
-bool GPU_pass_compile(GPUPass *pass, const char *shname);
-void GPU_pass_acquire(GPUPass *pass);
-void GPU_pass_release(GPUPass *pass);
-bool GPU_pass_should_optimize(GPUPass *pass);
+    /** Duplicate attribute names to avoid reference the GPUNodeGraph directly. */
+    char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1];
+    char var_names[16][8];
+    Vector<std::unique_ptr<NameEntry>, 16> sampler_names;

-/* Custom pass compilation. */
+    /* Returns the appended name memory location */
+    const char *append_sampler_name(const char name[32]);
+  };

-GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname);
-bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader);
+  /** Optional generated interface. */
+  StageInterfaceInfo *interface_generated = nullptr;
+  /** Optional name buffer containing names referenced by StringRefNull. */
+  NameBuffer name_buffer;
+  /** Copy of the GPUMaterial name, to prevent dangling pointers. */
+  std::string info_name_;

-void GPU_pass_begin_async_compilation(GPUPass *pass, const char *shname);
-/** NOTE: Unlike the non-async version, this one returns true when compilation has finalized,
- * regardless if it succeeded or not.
- * To check for success, see if `GPU_pass_shader_get() != nullptr`. */
-bool GPU_pass_async_compilation_try_finalize(GPUPass *pass);
+  GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name), info_name_(name)
+  {
+    /* Base class is always initialized first, so we need to update the name_ pointer here. */
+    name_ = info_name_.c_str();
+  };
+  ~GPUCodegenCreateInfo()
+  {
+    MEM_delete(interface_generated);
+  }
+};

-/* Module */
+class GPUCodegen {
+ public:
+  GPUMaterial &mat;
+  GPUNodeGraph &graph;
+  GPUCodegenOutput output = {};
+  GPUCodegenCreateInfo *create_info = nullptr;

-void gpu_codegen_init();
-void gpu_codegen_exit();
+ private:
+  uint32_t hash_ = 0;
+  BLI_HashMurmur2A hm2a_;
+  ListBase ubo_inputs_ = {nullptr, nullptr};
+  GPUInput *cryptomatte_input_ = nullptr;
+
+  /** Cache parameters for complexity heuristic. */
+  uint nodes_total_ = 0;
+  uint textures_total_ = 0;
+  uint uniforms_total_ = 0;
+
+ public:
+  GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name);
+  ~GPUCodegen();
+
+  void generate_graphs();
+  void generate_cryptomatte();
+  void generate_uniform_buffer();
+  void generate_attribs();
+  void generate_resources();
+  void generate_library();
+
+  uint32_t hash_get() const
+  {
+    return hash_;
+  }
+
+  /* Heuristic determined during pass codegen for whether a
+   * more optimal variant of this material should be compiled. */
+  bool should_optimize_heuristic() const;
+
+ private:
+  void set_unique_ids();
+
+  void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
+  std::string graph_serialize(eGPUNodeTag tree_tag,
+                              GPUNodeLink *output_link,
+                              const char *output_default = nullptr);
+  std::string graph_serialize(eGPUNodeTag tree_tag);
+};
+
+}  // namespace blender::gpu::shader
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -27,6 +27,7 @@
 #include "GPU_context.hh"

 #include "GPU_batch.hh"
+#include "GPU_pass.hh"
 #include "gpu_backend.hh"
 #include "gpu_context_private.hh"
 #include "gpu_matrix_private.hh"
@@ -328,6 +329,8 @@ void GPU_render_step(bool force_resource_release)
    backend->render_step(force_resource_release);
    printf_begin(active_ctx);
  }
+
+  GPU_pass_cache_update();
 }

 /** \} */
--- a/source/blender/gpu/intern/gpu_init_exit.cc
+++ b/source/blender/gpu/intern/gpu_init_exit.cc
@@ -6,10 +6,12 @@
 * \ingroup gpu
 */

-#include "GPU_init_exit.hh" /* interface */
-#include "GPU_batch.hh"
+#include "BKE_material.hh"
+
+#include "GPU_batch.hh"
+#include "GPU_init_exit.hh" /* interface */
+#include "GPU_pass.hh"

-#include "intern/gpu_codegen.hh"
 #include "intern/gpu_private.hh"
 #include "intern/gpu_shader_create_info_private.hh"
 #include "intern/gpu_shader_dependency_private.hh"
@@ -34,7 +36,7 @@ void GPU_init()
  gpu_shader_dependency_init();
  gpu_shader_create_info_init();

-  gpu_codegen_init();
+  GPU_pass_cache_init();

  gpu_batch_init();
 }
@@ -43,7 +45,10 @@ void GPU_exit()
 {
  gpu_batch_exit();

-  gpu_codegen_exit();
+  GPU_pass_cache_free();
+
+  BKE_material_defaults_free_gpu();
+  GPU_shader_free_builtin_shaders();

  gpu_backend_delete_resources();

--- a/source/blender/gpu/intern/gpu_material.cc
+++ b/source/blender/gpu/intern/gpu_material.cc
@@ -29,33 +29,24 @@
 #include "NOD_shader.h"

 #include "GPU_material.hh"
+#include "GPU_pass.hh"
 #include "GPU_shader.hh"
 #include "GPU_texture.hh"
 #include "GPU_uniform_buffer.hh"

 #include "DRW_engine.hh"

-#include "gpu_codegen.hh"
 #include "gpu_node_graph.hh"

 #include "atomic_ops.h"

+static void gpu_material_ramp_texture_build(GPUMaterial *mat);
+static void gpu_material_sky_texture_build(GPUMaterial *mat);
+
 /* Structs */
 #define MAX_COLOR_BAND 128
 #define MAX_GPU_SKIES 8

-/**
- * Whether the optimized variant of the GPUPass should be created asynchronously.
- * Usage of this depends on whether there are possible threading challenges of doing so.
- * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
- * compilation, though this option exists in case any potential scenarios for material graph
- * optimization cause a slow down on the main thread.
- *
- * NOTE: The actual shader program for the optimized pass will always be compiled asynchronously,
- * this flag controls whether shader node graph source serialization happens on the compilation
- * worker thread as well. */
-#define ASYNC_OPTIMIZED_PASS_CREATION 0
-
 struct GPUColorBandBuilder {
  float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
  int current_layer;
@@ -68,82 +59,354 @@ struct GPUSkyBuilder {

 struct GPUMaterial {
  /* Contains #GPUShader and source code for deferred compilation.
-   * Can be shared between similar material (i.e: sharing same node-tree topology). */
-  GPUPass *pass;
+   * Can be shared between materials sharing same node-tree topology. */
+  GPUPass *pass = nullptr;
  /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
   * This shader variant bakes dynamic uniform data as constant. This variant will not use
   * the ubo, and instead bake constants directly into the shader source. */
-  GPUPass *optimized_pass;
-  /* Optimization status.
-   * We also use this status to determine whether this material should be considered for
-   * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
-   *   `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
-   *   `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
-   * performance to do so, based on the heuristic.
-   */
-  eGPUMaterialOptimizationStatus optimization_status;
-  double creation_time;
-#if ASYNC_OPTIMIZED_PASS_CREATION == 1
-  struct DeferredOptimizePass {
-    GPUCodegenCallbackFn callback;
-    void *thunk;
-  } DeferredOptimizePass;
-  struct DeferredOptimizePass optimize_pass_info;
-#endif
+  GPUPass *optimized_pass = nullptr;

-  /** UBOs for this material parameters. */
-  GPUUniformBuf *ubo;
-  /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
-  eGPUMaterialStatus status;
-  /** Some flags about the nodetree & the needed resources. */
-  eGPUMaterialFlag flag;
-  /** The engine type this material is compiled for. */
+  /* UBOs for this material parameters. */
+  GPUUniformBuf *ubo = nullptr;
+  /* Some flags about the nodetree & the needed resources. */
+  eGPUMaterialFlag flag = GPU_MATFLAG_UPDATED;
+  /* The engine type this material is compiled for. */
  eGPUMaterialEngine engine;
  /* Identify shader variations (shadow, probe, world background...) */
-  uint64_t uuid;
+  uint64_t uuid = 0;
  /* Number of generated function. */
-  int generated_function_len;
-  /** Object type for attribute fetching. */
-  bool is_volume_shader;
+  int generated_function_len = 0;

-  /** DEPRECATED Currently only used for deferred compilation. */
-  Scene *scene;
-  /** Source material, might be null. */
-  Material *ma;
-  /** 1D Texture array containing all color bands. */
-  GPUTexture *coba_tex;
-  /** Builder for coba_tex. */
-  GPUColorBandBuilder *coba_builder;
-  /** 2D Texture array containing all sky textures. */
-  GPUTexture *sky_tex;
-  /** Builder for sky_tex. */
-  GPUSkyBuilder *sky_builder;
+  /* Source material, might be null. */
+  Material *source_material = nullptr;
+  /* 1D Texture array containing all color bands. */
+  GPUTexture *coba_tex = nullptr;
+  /* Builder for coba_tex. */
+  GPUColorBandBuilder *coba_builder = nullptr;
+  /* 2D Texture array containing all sky textures. */
+  GPUTexture *sky_tex = nullptr;
+  /* Builder for sky_tex. */
+  GPUSkyBuilder *sky_builder = nullptr;
  /* Low level node graph(s). Also contains resources needed by the material. */
-  GPUNodeGraph graph;
+  GPUNodeGraph graph = {};

-  /** Default material reference used for PSO cache warming. Default materials may perform
-   * different operations, but the permutation will frequently share the same input PSO
-   * descriptors. This enables asynchronous PSO compilation as part of the deferred compilation
-   * pass, reducing runtime stuttering and responsiveness while compiling materials. */
-  GPUMaterial *default_mat;
+  bool has_surface_output = false;
+  bool has_volume_output = false;
+  bool has_displacement_output = false;

-  /** DEPRECATED: To remove. */
-  bool has_surface_output;
-  bool has_volume_output;
-  bool has_displacement_output;
+  std::string name;

-  uint32_t refcount;
+  GPUMaterial(eGPUMaterialEngine engine) : engine(engine)
+  {
+    graph.used_libraries = BLI_gset_new(
+        BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
+  };

-  bool do_batch_compilation;
+  ~GPUMaterial()
+  {
+    gpu_node_graph_free(&graph);

-#ifndef NDEBUG
-  char name[64];
-#else
-  char name[16];
-#endif
+    if (optimized_pass != nullptr) {
+      GPU_pass_release(optimized_pass);
+    }
+    if (pass != nullptr) {
+      GPU_pass_release(pass);
+    }
+    if (ubo != nullptr) {
+      GPU_uniformbuf_free(ubo);
+    }
+    if (coba_builder != nullptr) {
+      MEM_freeN(coba_builder);
+    }
+    if (coba_tex != nullptr) {
+      GPU_texture_free(coba_tex);
+    }
+    if (sky_tex != nullptr) {
+      GPU_texture_free(sky_tex);
+    }
+  }
 };

-/* Functions */
+/* Public API */
+
+GPUMaterial *GPU_material_from_nodetree(Material *ma,
+                                        bNodeTree *ntree,
+                                        ListBase *gpumaterials,
+                                        const char *name,
+                                        eGPUMaterialEngine engine,
+                                        uint64_t shader_uuid,
+                                        bool deferred_compilation,
+                                        GPUCodegenCallbackFn callback,
+                                        void *thunk,
+                                        GPUMaterialPassReplacementCallbackFn pass_replacement_cb)
+{
+  /* Search if this material is not already compiled. */
+  LISTBASE_FOREACH (LinkData *, link, gpumaterials) {
+    GPUMaterial *mat = (GPUMaterial *)link->data;
+    if (mat->uuid == shader_uuid && mat->engine == engine) {
+      if (!deferred_compilation) {
+        GPU_pass_ensure_its_ready(mat->pass);
+      }
+      return mat;
+    }
+  }
+
+  GPUMaterial *mat = MEM_new<GPUMaterial>(__func__, engine);
+  mat->source_material = ma;
+  mat->uuid = shader_uuid;
+  mat->name = name;
+
+  /* Localize tree to create links for reroute and mute. */
+  bNodeTree *localtree = blender::bke::node_tree_localize(ntree, nullptr);
+  ntreeGPUMaterialNodes(localtree, mat);
+
+  gpu_material_ramp_texture_build(mat);
+  gpu_material_sky_texture_build(mat);
+
+  /* Use default material pass when possible. */
+  if (GPUPass *default_pass = pass_replacement_cb ? pass_replacement_cb(thunk, mat) : nullptr) {
+    mat->pass = default_pass;
+    GPU_pass_acquire(mat->pass);
+    /** WORKAROUND:
+     * The node tree code is never executed in default replaced passes,
+     * but the GPU validation will still complain if the node tree UBO is not bound.
+     * So we create a dummy UBO with (at least) the size of the default material one (192 bytes).
+     * We allocate 256 bytes to leave some room for future changes. */
+    mat->ubo = GPU_uniformbuf_create_ex(256, nullptr, "Dummy UBO");
+  }
+  else {
+    /* Create source code and search pass cache for an already compiled version. */
+    mat->pass = GPU_generate_pass(
+        mat, &mat->graph, mat->name.c_str(), engine, deferred_compilation, callback, thunk, false);
+  }
+
+  /* Determine whether we should generate an optimized variant of the graph.
+   * Heuristic is based on complexity of default material pass and shader node graph. */
+  if (GPU_pass_should_optimize(mat->pass)) {
+    mat->optimized_pass = GPU_generate_pass(
+        mat, &mat->graph, mat->name.c_str(), engine, true, callback, thunk, true);
+  }
+
+  gpu_node_graph_free_nodes(&mat->graph);
+  /* Only free after GPU_pass_shader_get where GPUUniformBuf read data from the local tree. */
+  blender::bke::node_tree_free_local_tree(localtree);
+  BLI_assert(!localtree->id.py_instance); /* Or call #BKE_libblock_free_data_py. */
+  MEM_freeN(localtree);
+
+  /* Note that even if building the shader fails in some way, we want to keep
+   * it to avoid trying to compile again and again, and simply do not use
+   * the actual shader on drawing. */
+  LinkData *link = MEM_callocN<LinkData>("GPUMaterialLink");
+  link->data = mat;
+  BLI_addtail(gpumaterials, link);
+
+  return mat;
+}
+
+GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
+                                         ConstructGPUMaterialFn construct_function_cb,
+                                         GPUCodegenCallbackFn generate_code_function_cb,
+                                         void *thunk)
+{
+  /* Allocate a new material and its material graph. */
+  GPUMaterial *material = MEM_new<GPUMaterial>(__func__, engine);
+
+  /* Construct the material graph by adding and linking the necessary GPU material nodes. */
+  construct_function_cb(thunk, material);
+
+  /* Create and initialize the texture storing color bands used by Ramp and Curve nodes. */
+  gpu_material_ramp_texture_build(material);
+
+  /* Lookup an existing pass in the cache or generate a new one. */
+  material->pass = GPU_generate_pass(material,
+                                     &material->graph,
+                                     __func__,
+                                     engine,
+                                     false,
+                                     generate_code_function_cb,
+                                     thunk,
+                                     false);
+
+  /* Determine whether we should generate an optimized variant of the graph.
+   * Heuristic is based on complexity of default material pass and shader node graph. */
+  if (GPU_pass_should_optimize(material->pass)) {
+    material->optimized_pass = GPU_generate_pass(material,
+                                                 &material->graph,
+                                                 __func__,
+                                                 engine,
+                                                 true,
+                                                 generate_code_function_cb,
+                                                 thunk,
+                                                 true);
+  }
+
+  gpu_node_graph_free_nodes(&material->graph);
+
+  return material;
+}
+
+void GPU_material_free_single(GPUMaterial *material)
+{
+  MEM_delete(material);
+}
+
+void GPU_material_free(ListBase *gpumaterial)
+{
+  LISTBASE_FOREACH (LinkData *, link, gpumaterial) {
+    GPUMaterial *material = static_cast<GPUMaterial *>(link->data);
+    GPU_material_free_single(material);
+  }
+  BLI_freelistN(gpumaterial);
+}
+
+void GPU_materials_free(Main *bmain)
+{
+  LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
+    GPU_material_free(&ma->gpumaterial);
+  }
+
+  LISTBASE_FOREACH (World *, wo, &bmain->worlds) {
+    GPU_material_free(&wo->gpumaterial);
+  }
+
+  BKE_material_defaults_free_gpu();
+}
+
+const char *GPU_material_get_name(GPUMaterial *material)
+{
+  return material->name.c_str();
+}
+
+uint64_t GPU_material_uuid_get(GPUMaterial *mat)
+{
+  return mat->uuid;
+}
+
+Material *GPU_material_get_material(GPUMaterial *material)
+{
+  return material->source_material;
+}
+
+GPUPass *GPU_material_get_pass(GPUMaterial *material)
+{
+  /* If an optimized pass variant is available, and optimization is
+   * flagged as complete, we use this one instead. */
+  return GPU_material_optimization_status(material) == GPU_MAT_OPTIMIZATION_SUCCESS ?
+             material->optimized_pass :
+             material->pass;
+}
+
+GPUShader *GPU_material_get_shader(GPUMaterial *material)
+{
+  return GPU_pass_shader_get(GPU_material_get_pass(material));
+}
+
+eGPUMaterialStatus GPU_material_status(GPUMaterial *mat)
+{
+  switch (GPU_pass_status(mat->pass)) {
+    case GPU_PASS_SUCCESS:
+      return GPU_MAT_SUCCESS;
+    case GPU_PASS_QUEUED:
+      return GPU_MAT_QUEUED;
+    default:
+      return GPU_MAT_FAILED;
+  }
+}
+
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
+{
+  if (!GPU_pass_should_optimize(mat->pass)) {
+    return GPU_MAT_OPTIMIZATION_SKIP;
+  }
+
+  switch (GPU_pass_status(mat->optimized_pass)) {
+    case GPU_PASS_SUCCESS:
+      return GPU_MAT_OPTIMIZATION_SUCCESS;
+    case GPU_PASS_QUEUED:
+      return GPU_MAT_OPTIMIZATION_QUEUED;
+    default:
+      BLI_assert_unreachable();
+      return GPU_MAT_OPTIMIZATION_SKIP;
+  }
+}
+
+uint64_t GPU_material_compilation_timestamp(GPUMaterial *mat)
+{
+  return GPU_pass_compilation_timestamp(mat->pass);
+}
+
+bool GPU_material_has_surface_output(GPUMaterial *mat)
+{
+  return mat->has_surface_output;
+}
+
+bool GPU_material_has_volume_output(GPUMaterial *mat)
+{
+  return mat->has_volume_output;
+}
+
+bool GPU_material_has_displacement_output(GPUMaterial *mat)
+{
+  return mat->has_displacement_output;
+}
+
+bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag)
+{
+  return (mat->flag & flag) != 0;
+}
+
+eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat)
+{
+  return mat->flag;
+}
+
+void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag)
+{
+  if ((flag & GPU_MATFLAG_GLOSSY) && (mat->flag & GPU_MATFLAG_GLOSSY)) {
+    /* Tag material using multiple glossy BSDF as using clear coat. */
+    mat->flag |= GPU_MATFLAG_COAT;
+  }
+  mat->flag |= flag;
+}
+
+void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
+{
+  material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name.c_str());
+}
+
+GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material)
+{
+  return material->ubo;
+}
+
+ListBase GPU_material_attributes(const GPUMaterial *material)
+{
+  return material->graph.attributes;
+}
+
+ListBase GPU_material_textures(GPUMaterial *material)
+{
+  return material->graph.textures;
+}
+
+const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material)
+{
+  const GPUUniformAttrList *attrs = &material->graph.uniform_attrs;
+  return attrs->count > 0 ? attrs : nullptr;
+}
+
+const ListBase *GPU_material_layer_attributes(const GPUMaterial *material)
+{
+  const ListBase *attrs = &material->graph.layer_attrs;
+  return !BLI_listbase_is_empty(attrs) ? attrs : nullptr;
+}
+
+GPUNodeGraph *gpu_material_node_graph(GPUMaterial *material)
+{
+  return &material->graph;
+}
+
+/* Resources */

 GPUTexture **gpu_material_sky_texture_layer_set(
    GPUMaterial *mat, int width, int height, const float *pixels, float *row)
@@ -243,119 +506,7 @@ static void gpu_material_sky_texture_build(GPUMaterial *mat)
  mat->sky_builder = nullptr;
 }

-void GPU_material_free_single(GPUMaterial *material)
-{
-  bool do_free = atomic_sub_and_fetch_uint32(&material->refcount, 1) == 0;
-  if (!do_free) {
-    return;
-  }
-
-  gpu_node_graph_free(&material->graph);
-
-  if (material->optimized_pass != nullptr) {
-    GPU_pass_release(material->optimized_pass);
-  }
-  if (material->pass != nullptr) {
-    GPU_pass_release(material->pass);
-  }
-  if (material->ubo != nullptr) {
-    GPU_uniformbuf_free(material->ubo);
-  }
-  if (material->coba_builder != nullptr) {
-    MEM_freeN(material->coba_builder);
-  }
-  if (material->coba_tex != nullptr) {
-    GPU_texture_free(material->coba_tex);
-  }
-  if (material->sky_tex != nullptr) {
-    GPU_texture_free(material->sky_tex);
-  }
-  MEM_freeN(material);
-}
-
-void GPU_material_free(ListBase *gpumaterial)
-{
-  LISTBASE_FOREACH (LinkData *, link, gpumaterial) {
-    GPUMaterial *material = static_cast<GPUMaterial *>(link->data);
-    DRW_deferred_shader_remove(material);
-    GPU_material_free_single(material);
-  }
-  BLI_freelistN(gpumaterial);
-}
-
-Scene *GPU_material_scene(GPUMaterial *material)
-{
-  return material->scene;
-}
-
-GPUPass *GPU_material_get_pass(GPUMaterial *material)
-{
-  /* If an optimized pass variant is available, and optimization is
-   * flagged as complete, we use this one instead. */
-  return ((GPU_material_optimization_status(material) == GPU_MAT_OPTIMIZATION_SUCCESS) &&
-          material->optimized_pass) ?
-             material->optimized_pass :
-             material->pass;
-}
-
-GPUShader *GPU_material_get_shader(GPUMaterial *material)
-{
-  /* If an optimized material shader variant is available, and optimization is
-   * flagged as complete, we use this one instead. */
-  GPUShader *shader = ((GPU_material_optimization_status(material) ==
-                        GPU_MAT_OPTIMIZATION_SUCCESS) &&
-                       material->optimized_pass) ?
-                          GPU_pass_shader_get(material->optimized_pass) :
-                          nullptr;
-  return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : nullptr);
-}
-
-GPUShader *GPU_material_get_shader_base(GPUMaterial *material)
-{
-  return (material->pass) ? GPU_pass_shader_get(material->pass) : nullptr;
-}
-
-const char *GPU_material_get_name(GPUMaterial *material)
-{
-  return material->name;
-}
-
-Material *GPU_material_get_material(GPUMaterial *material)
-{
-  return material->ma;
-}
-
-GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material)
-{
-  return material->ubo;
-}
-
-void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
-{
-  material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name);
-}
-
-ListBase GPU_material_attributes(const GPUMaterial *material)
-{
-  return material->graph.attributes;
-}
-
-ListBase GPU_material_textures(GPUMaterial *material)
-{
-  return material->graph.textures;
-}
-
-const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material)
-{
-  const GPUUniformAttrList *attrs = &material->graph.uniform_attrs;
-  return attrs->count > 0 ? attrs : nullptr;
-}
-
-const ListBase *GPU_material_layer_attributes(const GPUMaterial *material)
-{
-  const ListBase *attrs = &material->graph.layer_attrs;
-  return !BLI_listbase_is_empty(attrs) ? attrs : nullptr;
-}
+/* Code generation */

 void GPU_material_output_surface(GPUMaterial *material, GPUNodeLink *link)
 {
@@ -430,476 +581,3 @@ char *GPU_material_split_sub_function(GPUMaterial *material,

  return func_link->name;
 }
-
-GPUNodeGraph *gpu_material_node_graph(GPUMaterial *material)
-{
-  return &material->graph;
-}
-
-eGPUMaterialStatus GPU_material_status(GPUMaterial *mat)
-{
-  return mat->status;
-}
-
-void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
-{
-  mat->status = status;
-}
-
-eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
-{
-  return mat->optimization_status;
-}
-
-void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
-{
-  mat->optimization_status = status;
-  if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
-    /* Reset creation timer to delay optimization pass. */
-    mat->creation_time = BLI_time_now_seconds();
-  }
-}
-
-bool GPU_material_optimization_ready(GPUMaterial *mat)
-{
-  /* Timer threshold before optimizations will be queued.
-   * When materials are frequently being modified, optimization
-   * can incur CPU overhead from excessive compilation.
-   *
-   * As the optimization is entirely asynchronous, it is still beneficial
-   * to do this quickly to avoid build-up and improve runtime performance.
-   * The threshold just prevents compilations being queued frame after frame. */
-  const double optimization_time_threshold_s = 1.2;
-  return ((BLI_time_now_seconds() - mat->creation_time) >= optimization_time_threshold_s);
-}
-
-void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material)
-{
-  if (material != default_material) {
-    material->default_mat = default_material;
-  }
-}
-
-/* Code generation */
-
-bool GPU_material_has_surface_output(GPUMaterial *mat)
-{
-  return mat->has_surface_output;
-}
-
-bool GPU_material_has_volume_output(GPUMaterial *mat)
-{
-  return mat->has_volume_output;
-}
-
-bool GPU_material_has_displacement_output(GPUMaterial *mat)
-{
-  return mat->has_displacement_output;
-}
-
-void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag)
-{
-  if ((flag & GPU_MATFLAG_GLOSSY) && (mat->flag & GPU_MATFLAG_GLOSSY)) {
-    /* Tag material using multiple glossy BSDF as using clear coat. */
-    mat->flag |= GPU_MATFLAG_COAT;
-  }
-  mat->flag |= flag;
-}
-
-bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag)
-{
-  return (mat->flag & flag) != 0;
-}
-
-eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat)
-{
-  return mat->flag;
-}
-
-bool GPU_material_recalc_flag_get(GPUMaterial *mat)
-{
-  /* NOTE: Consumes the flags. */
-
-  bool updated = (mat->flag & GPU_MATFLAG_UPDATED) != 0;
-  mat->flag &= ~GPU_MATFLAG_UPDATED;
-  return updated;
-}
-
-uint64_t GPU_material_uuid_get(GPUMaterial *mat)
-{
-  return mat->uuid;
-}
-
-GPUMaterial *GPU_material_from_nodetree(Scene *scene,
-                                        Material *ma,
-                                        bNodeTree *ntree,
-                                        ListBase *gpumaterials,
-                                        const char *name,
-                                        eGPUMaterialEngine engine,
-                                        uint64_t shader_uuid,
-                                        bool is_volume_shader,
-                                        bool is_lookdev,
-                                        GPUCodegenCallbackFn callback,
-                                        void *thunk,
-                                        GPUMaterialPassReplacementCallbackFn pass_replacement_cb)
-{
-  /* Search if this material is not already compiled. */
-  LISTBASE_FOREACH (LinkData *, link, gpumaterials) {
-    GPUMaterial *mat = (GPUMaterial *)link->data;
-    if (mat->uuid == shader_uuid && mat->engine == engine) {
-      return mat;
-    }
-  }
-
-  GPUMaterial *mat = MEM_callocN<GPUMaterial>("GPUMaterial");
-  mat->ma = ma;
-  mat->scene = scene;
-  mat->engine = engine;
-  mat->uuid = shader_uuid;
-  mat->flag = GPU_MATFLAG_UPDATED;
-  mat->status = GPU_MAT_CREATED;
-  mat->default_mat = nullptr;
-  mat->is_volume_shader = is_volume_shader;
-  mat->graph.used_libraries = BLI_gset_new(
-      BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
-  mat->refcount = 1;
-  STRNCPY(mat->name, name);
-  if (is_lookdev) {
-    mat->flag |= GPU_MATFLAG_LOOKDEV_HACK;
-  }
-
-  /* Localize tree to create links for reroute and mute. */
-  bNodeTree *localtree = blender::bke::node_tree_localize(ntree, nullptr);
-  ntreeGPUMaterialNodes(localtree, mat);
-
-  gpu_material_ramp_texture_build(mat);
-  gpu_material_sky_texture_build(mat);
-
-  /* Use default material pass when possible. */
-  if (GPUPass *default_pass = pass_replacement_cb ? pass_replacement_cb(thunk, mat) : nullptr) {
-    mat->pass = default_pass;
-    GPU_pass_acquire(mat->pass);
-    /** WORKAROUND:
-     * The node tree code is never executed in default replaced passes,
-     * but the GPU validation will still complain if the node tree UBO is not bound.
-     * So we create a dummy UBO with (at least) the size of the default material one (192 bytes).
-     * We allocate 256 bytes to leave some room for future changes. */
-    mat->ubo = GPU_uniformbuf_create_ex(256, nullptr, "Dummy UBO");
-  }
-  else {
-    /* Create source code and search pass cache for an already compiled version. */
-    mat->pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, false);
-  }
-
-  if (mat->pass == nullptr) {
-    /* We had a cache hit and the shader has already failed to compile. */
-    mat->status = GPU_MAT_FAILED;
-    gpu_node_graph_free(&mat->graph);
-  }
-  else {
-    /* Determine whether we should generate an optimized variant of the graph.
-     * Heuristic is based on complexity of default material pass and shader node graph. */
-    if (GPU_pass_should_optimize(mat->pass)) {
-      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
-    }
-
-    GPUShader *sh = GPU_pass_shader_get(mat->pass);
-    if (sh != nullptr) {
-      /* We had a cache hit and the shader is already compiled. */
-      mat->status = GPU_MAT_SUCCESS;
-
-      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
-        gpu_node_graph_free_nodes(&mat->graph);
-      }
-    }
-
-    /* Generate optimized pass. */
-    if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
-#if ASYNC_OPTIMIZED_PASS_CREATION == 1
-      mat->optimized_pass = nullptr;
-      mat->optimize_pass_info.callback = callback;
-      mat->optimize_pass_info.thunk = thunk;
-#else
-      mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, true);
-      if (mat->optimized_pass == nullptr) {
-        /* Failed to create optimized pass. */
-        gpu_node_graph_free_nodes(&mat->graph);
-        GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
-      }
-      else {
-        GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
-        if (optimized_sh != nullptr) {
-          /* Optimized shader already available. */
-          gpu_node_graph_free_nodes(&mat->graph);
-          GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
-        }
-      }
-#endif
-    }
-  }
-
-  /* Only free after GPU_pass_shader_get where GPUUniformBuf read data from the local tree. */
-  blender::bke::node_tree_free_local_tree(localtree);
-  BLI_assert(!localtree->id.py_instance); /* Or call #BKE_libblock_free_data_py. */
-  MEM_freeN(localtree);
-
-  /* Note that even if building the shader fails in some way, we still keep
-   * it to avoid trying to compile again and again, and simply do not use
-   * the actual shader on drawing. */
-  LinkData *link = MEM_callocN<LinkData>("GPUMaterialLink");
-  link->data = mat;
-  BLI_addtail(gpumaterials, link);
-
-  return mat;
-}
-
-void GPU_material_acquire(GPUMaterial *mat)
-{
-  atomic_add_and_fetch_uint32(&mat->refcount, 1);
-}
-
-void GPU_material_release(GPUMaterial *mat)
-{
-  GPU_material_free_single(mat);
-}
-
-static void gpu_material_finalize(GPUMaterial *mat, bool success)
-{
-  mat->flag |= GPU_MATFLAG_UPDATED;
-
-  if (success) {
-    GPUShader *sh = GPU_pass_shader_get(mat->pass);
-    if (sh != nullptr) {
-
-      /** Perform asynchronous Render Pipeline State Object (PSO) compilation.
-       *
-       * Warm PSO cache within asynchronous compilation thread using default material as source.
-       * GPU_shader_warm_cache(..) performs the API-specific PSO compilation using the assigned
-       * parent shader's cached PSO descriptors as an input.
-       *
-       * This is only applied if the given material has a specified default reference
-       * material available, and the default material is already compiled.
-       *
-       * As PSOs do not always match for default shaders, we limit warming for PSO
-       * configurations to ensure compile time remains fast, as these first
-       * entries will be the most commonly used PSOs. As not all PSOs are necessarily
-       * required immediately, this limit should remain low (1-3 at most). */
-      if (!ELEM(mat->default_mat, nullptr, mat)) {
-        if (mat->default_mat->pass != nullptr) {
-          GPUShader *parent_sh = GPU_pass_shader_get(mat->default_mat->pass);
-          if (parent_sh) {
-            /* Skip warming if cached pass is identical to the default material. */
-            if (mat->default_mat->pass != mat->pass && parent_sh != sh) {
-              GPU_shader_set_parent(sh, parent_sh);
-              GPU_shader_warm_cache(sh, 1);
-            }
-          }
-        }
-      }
-
-      /* Flag success. */
-      mat->status = GPU_MAT_SUCCESS;
-      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
-        /* Only free node graph nodes if not required by secondary optimization pass. */
-        gpu_node_graph_free_nodes(&mat->graph);
-      }
-    }
-    else {
-      mat->status = GPU_MAT_FAILED;
-    }
-  }
-  else {
-    mat->status = GPU_MAT_FAILED;
-    GPU_pass_release(mat->pass);
-    mat->pass = nullptr;
-    gpu_node_graph_free(&mat->graph);
-  }
-}
-
-void GPU_material_compile(GPUMaterial *mat)
-{
-  bool success;
-  BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
-  BLI_assert(mat->pass);
-
-/* NOTE: The shader may have already been compiled here since we are
- * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
-#ifndef NDEBUG
-  success = GPU_pass_compile(mat->pass, mat->name);
-#else
-  success = GPU_pass_compile(mat->pass, __func__);
-#endif
-
-  gpu_material_finalize(mat, success);
-}
-
-void GPU_material_async_compile(GPUMaterial *mat)
-{
-  BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
-  BLI_assert(mat->pass);
-#ifndef NDEBUG
-  const char *name = mat->name;
-#else
-  const char *name = __func__;
-#endif
-  GPU_pass_begin_async_compilation(mat->pass, name);
-}
-
-bool GPU_material_async_try_finalize(GPUMaterial *mat)
-{
-  BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
-  if (GPU_pass_async_compilation_try_finalize(mat->pass)) {
-    gpu_material_finalize(mat, GPU_pass_shader_get(mat->pass) != nullptr);
-    return true;
-  }
-  return false;
-}
-
-void GPU_material_optimize(GPUMaterial *mat)
-{
-  /* If shader is flagged for skipping optimization or has already been successfully
-   * optimized, skip. */
-  if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
-    return;
-  }
-
-  /* If original shader has not been fully compiled, we are not
-   * ready to perform optimization. */
-  if (mat->status != GPU_MAT_SUCCESS) {
-    /* Reset optimization status. */
-    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
-    return;
-  }
-
-#if ASYNC_OPTIMIZED_PASS_CREATION == 1
-  /* If the optimized pass is not valid, first generate optimized pass.
-   * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
-   * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
-   * optimal, as these do not benefit from caching, due to baked constants. However, this could
-   * possibly be cause for concern for certain cases. */
-  if (!mat->optimized_pass) {
-    mat->optimized_pass = GPU_generate_pass(mat,
-                                            &mat->graph,
-                                            mat->engine,
-                                            mat->optimize_pass_info.callback,
-                                            mat->optimize_pass_info.thunk,
-                                            true);
-    BLI_assert(mat->optimized_pass);
-  }
-#else
-  if (!mat->optimized_pass) {
-    /* Optimized pass has not been created, skip future optimization attempts. */
-    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
-    return;
-  }
-#endif
-
-  bool success;
-/* NOTE: The shader may have already been compiled here since we are
- * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
-#ifndef NDEBUG
-  success = GPU_pass_compile(mat->optimized_pass, mat->name);
-#else
-  success = GPU_pass_compile(mat->optimized_pass, __func__);
-#endif
-
-  if (success) {
-    GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
-    if (sh != nullptr) {
-      /** Perform asynchronous Render Pipeline State Object (PSO) compilation.
-       *
-       * Warm PSO cache within asynchronous compilation thread for optimized materials.
-       * This setup assigns the original unoptimized shader as a "parent" shader
-       * for the optimized version. This then allows the associated GPU backend to
-       * compile PSOs within this asynchronous pass, using the identical PSO descriptors of the
-       * parent shader.
-       *
-       * This eliminates all run-time stuttering associated with material optimization and ensures
-       * realtime material editing and animation remains seamless, while retaining optimal realtime
-       * performance. */
-      GPUShader *parent_sh = GPU_pass_shader_get(mat->pass);
-      if (parent_sh) {
-        GPU_shader_set_parent(sh, parent_sh);
-        GPU_shader_warm_cache(sh, -1);
-      }
-
-      /* Mark as complete. */
-      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
-    }
-    else {
-      /* Optimized pass failed to compile. Disable any future optimization attempts. */
-      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
-    }
-  }
-  else {
-    /* Optimization pass generation failed. Disable future attempts to optimize. */
-    GPU_pass_release(mat->optimized_pass);
-    mat->optimized_pass = nullptr;
-    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
-  }
-
-  /* Release node graph as no longer needed. */
-  gpu_node_graph_free_nodes(&mat->graph);
-}
-
-void GPU_materials_free(Main *bmain)
-{
-  LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
-    GPU_material_free(&ma->gpumaterial);
-  }
-
-  LISTBASE_FOREACH (World *, wo, &bmain->worlds) {
-    GPU_material_free(&wo->gpumaterial);
-  }
-
-  BKE_material_defaults_free_gpu();
-}
-
-GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
-                                         ConstructGPUMaterialFn construct_function_cb,
-                                         GPUCodegenCallbackFn generate_code_function_cb,
-                                         void *thunk)
-{
-  /* Allocate a new material and its material graph, and initialize its reference count. */
-  GPUMaterial *material = MEM_callocN<GPUMaterial>("GPUMaterial");
-  material->graph.used_libraries = BLI_gset_new(
-      BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
-  material->refcount = 1;
-  material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
-  material->optimized_pass = nullptr;
-  material->default_mat = nullptr;
-  material->engine = engine;
-
-  /* Construct the material graph by adding and linking the necessary GPU material nodes. */
-  construct_function_cb(thunk, material);
-
-  /* Create and initialize the texture storing color bands used by Ramp and Curve nodes. */
-  gpu_material_ramp_texture_build(material);
-
-  /* Lookup an existing pass in the cache or generate a new one. */
-  material->pass = GPU_generate_pass(
-      material, &material->graph, material->engine, generate_code_function_cb, thunk, false);
-  material->optimized_pass = nullptr;
-
-  /* The pass already exists in the pass cache but its shader already failed to compile. */
-  if (material->pass == nullptr) {
-    material->status = GPU_MAT_FAILED;
-    gpu_node_graph_free(&material->graph);
-    return material;
-  }
-
-  /* The pass already exists in the pass cache and its shader is already compiled. */
-  GPUShader *shader = GPU_pass_shader_get(material->pass);
-  if (shader != nullptr) {
-    material->status = GPU_MAT_SUCCESS;
-    if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
-      /* Only free node graph if not required by secondary optimization pass. */
-      gpu_node_graph_free_nodes(&material->graph);
-    }
-    return material;
-  }
-
-  /* The material was created successfully but still needs to be compiled. */
-  material->status = GPU_MAT_CREATED;
-  return material;
-}
--- a/source/blender/gpu/intern/gpu_pass.cc
+++ b/source/blender/gpu/intern/gpu_pass.cc
@@ -0,0 +1,417 @@
+/* SPDX-FileCopyrightText: 2025 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Convert material node-trees to GLSL.
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_map.hh"
+#include "BLI_span.hh"
+#include "BLI_time.h"
+#include "BLI_vector.hh"
+
+#include "GPU_capabilities.hh"
+#include "GPU_context.hh"
+#include "GPU_pass.hh"
+#include "GPU_vertex_format.hh"
+#include "gpu_codegen.hh"
+
+#include <mutex>
+#include <string>
+
+using namespace blender;
+using namespace blender::gpu::shader;
+
+static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info);
+
+/* -------------------------------------------------------------------- */
+/** \name GPUPass
+ * \{ */
+
+struct GPUPass {
+  static inline std::atomic<uint64_t> compilation_counts = 0;
+
+  GPUCodegenCreateInfo *create_info = nullptr;
+  BatchHandle compilation_handle = 0;
+  std::atomic<GPUShader *> shader = nullptr;
+  std::atomic<eGPUPassStatus> status = GPU_PASS_QUEUED;
+  /* Orphaned GPUPasses gets freed by the garbage collector. */
+  std::atomic<int> refcount = 1;
+  /* The last time the refcount was greater than 0. */
+  double gc_timestamp = 0.0f;
+
+  uint64_t compilation_timestamp = 0;
+
+  /** Hint that an optimized variant of this pass should be created.
+   *  Based on a complexity heuristic from pass code generation. */
+  bool should_optimize = false;
+  bool is_optimization_pass = false;
+
+  GPUPass(GPUCodegenCreateInfo *info,
+          bool deferred_compilation,
+          bool is_optimization_pass,
+          bool should_optimize)
+      : create_info(info),
+        should_optimize(should_optimize),
+        is_optimization_pass(is_optimization_pass)
+  {
+    BLI_assert(!is_optimization_pass || !should_optimize);
+    if (is_optimization_pass && deferred_compilation) {
+      // Defer until all non optimization passes are compiled.
+      return;
+    }
+
+    GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
+
+    if (deferred_compilation) {
+      compilation_handle = GPU_shader_batch_create_from_infos(
+          Span<GPUShaderCreateInfo *>(&base_info, 1));
+    }
+    else {
+      shader = GPU_shader_create_from_info(base_info);
+      finalize_compilation();
+    }
+  }
+
+  ~GPUPass()
+  {
+    if (compilation_handle) {
+      GPU_shader_batch_cancel(compilation_handle);
+    }
+    else {
+      BLI_assert(create_info == nullptr || (is_optimization_pass && status == GPU_PASS_QUEUED));
+    }
+    MEM_delete(create_info);
+    GPU_SHADER_FREE_SAFE(shader);
+  }
+
+  void finalize_compilation()
+  {
+    BLI_assert_msg(create_info, "GPUPass::finalize_compilation() called more than once.");
+
+    if (compilation_handle) {
+      shader = GPU_shader_batch_finalize(compilation_handle).first();
+    }
+
+    compilation_timestamp = ++compilation_counts;
+
+    if (!shader && !gpu_pass_validate(create_info)) {
+      fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
+    }
+
+    status = shader ? GPU_PASS_SUCCESS : GPU_PASS_FAILED;
+
+    MEM_delete(create_info);
+    create_info = nullptr;
+  }
+
+  void update(double timestamp)
+  {
+    update_compilation();
+    update_gc_timestamp(timestamp);
+  }
+
+  void update_compilation()
+  {
+    if (compilation_handle) {
+      if (GPU_shader_batch_is_ready(compilation_handle)) {
+        finalize_compilation();
+      }
+    }
+    else if (status == GPU_PASS_QUEUED && refcount > 0) {
+      BLI_assert(is_optimization_pass);
+      GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
+      compilation_handle = GPU_shader_batch_create_from_infos(
+          Span<GPUShaderCreateInfo *>(&base_info, 1));
+    }
+  }
+
+  void update_gc_timestamp(double timestamp)
+  {
+    if (refcount != 0 || gc_timestamp == 0.0f) {
+      gc_timestamp = timestamp;
+    }
+  }
+
+  bool should_gc(int gc_collect_rate, double timestamp)
+  {
+    BLI_assert(gc_timestamp != 0.0f);
+    return !compilation_handle && status != GPU_PASS_FAILED &&
+           (timestamp - gc_timestamp) >= gc_collect_rate;
+  }
+};
+
+eGPUPassStatus GPU_pass_status(GPUPass *pass)
+{
+  return pass->status;
+}
+
+bool GPU_pass_should_optimize(GPUPass *pass)
+{
+  /* Returns optimization heuristic prepared during
+   * initial codegen.
+   * NOTE: Only enabled on Metal, since it doesn't seem to yield any performance improvements for
+   * other backends. */
+  return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize;
+
+#if 0
+  /* Returns optimization heuristic prepared during initial codegen.
+   * NOTE: Optimization limited to parallel compilation as it causes CPU stalls otherwise. */
+  return pass->should_optimize && GPU_use_parallel_compilation();
+#endif
+}
+
+GPUShader *GPU_pass_shader_get(GPUPass *pass)
+{
+  return pass->shader;
+}
+
+void GPU_pass_acquire(GPUPass *pass)
+{
+  int previous_refcount = pass->refcount++;
+  UNUSED_VARS_NDEBUG(previous_refcount);
+  BLI_assert(previous_refcount > 0);
+}
+
+void GPU_pass_release(GPUPass *pass)
+{
+  int previous_refcount = pass->refcount--;
+  UNUSED_VARS_NDEBUG(previous_refcount);
+  BLI_assert(previous_refcount > 0);
+}
+
+uint64_t GPU_pass_global_compilation_count()
+{
+  return GPUPass::compilation_counts;
+}
+
+uint64_t GPU_pass_compilation_timestamp(GPUPass *pass)
+{
+  return pass->compilation_timestamp;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name GPUPass Cache
+ *
+ * Internal shader cache: This prevent the shader recompilation / stall when
+ * using undo/redo AND also allows for GPUPass reuse if the Shader code is the
+ * same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
+ * \{ */
+
+class GPUPassCache {
+
+  /* Number of seconds with 0 users required before garbage collecting a pass.*/
+  static constexpr float gc_collect_rate_ = 60.0f;
+  /* Number of seconds without base compilations required before starting to compile optimization
+   * passes.*/
+  static constexpr float optimization_delay_ = 10.0f;
+
+  double last_base_compilation_timestamp_ = -1.0;
+
+  Map<uint32_t, std::unique_ptr<GPUPass>> passes_[GPU_MAT_ENGINE_MAX][2 /*is_optimization_pass*/];
+  std::mutex mutex_;
+
+ public:
+  void add(eGPUMaterialEngine engine,
+           GPUCodegen &codegen,
+           bool deferred_compilation,
+           bool is_optimization_pass)
+  {
+    std::lock_guard lock(mutex_);
+
+    passes_[engine][is_optimization_pass].add(
+        codegen.hash_get(),
+        std::make_unique<GPUPass>(codegen.create_info,
+                                  deferred_compilation,
+                                  is_optimization_pass,
+                                  codegen.should_optimize_heuristic()));
+  };
+
+  GPUPass *get(eGPUMaterialEngine engine,
+               size_t hash,
+               bool allow_deferred,
+               bool is_optimization_pass)
+  {
+    std::lock_guard lock(mutex_);
+    std::unique_ptr<GPUPass> *pass = passes_[engine][is_optimization_pass].lookup_ptr(hash);
+    if (!allow_deferred && pass && pass->get()->status == GPU_PASS_QUEUED) {
+      pass->get()->finalize_compilation();
+    }
+    return pass ? pass->get() : nullptr;
+  }
+
+  void update()
+  {
+    std::lock_guard lock(mutex_);
+
+    double timestamp = BLI_time_now_seconds();
+
+    bool base_passes_ready = true;
+
+    /* Base Passes. */
+    for (auto &engine_passes : passes_) {
+      for (std::unique_ptr<GPUPass> &pass : engine_passes[false].values()) {
+        pass->update(timestamp);
+        if (pass->status == GPU_PASS_QUEUED) {
+          base_passes_ready = false;
+        }
+      }
+
+      engine_passes[false].remove_if(
+          [&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); });
+    }
+
+    /* Optimization Passes GC. */
+    for (auto &engine_passes : passes_) {
+      for (std::unique_ptr<GPUPass> &pass : engine_passes[true].values()) {
+        pass->update_gc_timestamp(timestamp);
+      }
+
+      engine_passes[true].remove_if(
+          /* TODO: Use lower rate for optimization passes? */
+          [&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); });
+    }
+
+    if (!base_passes_ready) {
+      last_base_compilation_timestamp_ = timestamp;
+      return;
+    }
+
+    if ((timestamp - last_base_compilation_timestamp_) < optimization_delay_) {
+      return;
+    }
+
+    /* Optimization Passes Compilation. */
+    for (auto &engine_passes : passes_) {
+      for (std::unique_ptr<GPUPass> &pass : engine_passes[true].values()) {
+        pass->update_compilation();
+      }
+    }
+  }
+
+  std::mutex &get_mutex()
+  {
+    return mutex_;
+  }
+};
+
+static GPUPassCache *g_cache = nullptr;
+
+void GPU_pass_ensure_its_ready(GPUPass *pass)
+{
+  if (pass->status == GPU_PASS_QUEUED) {
+    std::lock_guard lock(g_cache->get_mutex());
+    if (pass->status == GPU_PASS_QUEUED) {
+      pass->finalize_compilation();
+    }
+  }
+}
+
+void GPU_pass_cache_init()
+{
+  g_cache = MEM_new<GPUPassCache>(__func__);
+}
+
+void GPU_pass_cache_update()
+{
+  g_cache->update();
+}
+
+void GPU_pass_cache_wait_for_all()
+{
+  GPU_shader_batch_wait_for_all();
+  g_cache->update();
+}
+
+void GPU_pass_cache_free()
+{
+  MEM_SAFE_DELETE(g_cache);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Compilation
+ * \{ */
+
+static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info)
+{
+  int samplers_len = 0;
+  for (const ShaderCreateInfo::Resource &res : create_info->resources_get_all_()) {
+    if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
+      samplers_len++;
+    }
+  }
+
+  /* Validate against GPU limit. */
+  if ((samplers_len > GPU_max_textures_frag()) || (samplers_len > GPU_max_textures_vert())) {
+    return false;
+  }
+
+  return (samplers_len * 2 <= GPU_max_textures());
+}
+
+GPUPass *GPU_generate_pass(GPUMaterial *material,
+                           GPUNodeGraph *graph,
+                           const char *debug_name,
+                           eGPUMaterialEngine engine,
+                           bool deferred_compilation,
+                           GPUCodegenCallbackFn finalize_source_cb,
+                           void *thunk,
+                           bool optimize_graph)
+{
+  gpu_node_graph_prune_unused(graph);
+
+  /* If Optimize flag is passed in, we are generating an optimized
+   * variant of the GPUMaterial's GPUPass. */
+  if (optimize_graph) {
+    gpu_node_graph_optimize(graph);
+  }
+
+  /* Extract attributes before compiling so the generated VBOs are ready to accept the future
+   * shader. */
+  gpu_node_graph_finalize_uniform_attrs(graph);
+
+  GPUCodegen codegen(material, graph, debug_name);
+  codegen.generate_graphs();
+  codegen.generate_cryptomatte();
+
+  GPUPass *pass = nullptr;
+
+  if (!optimize_graph) {
+    /* The optimized version of the shader should not re-generate a UBO.
+     * The UBO will not be used for this variant. */
+    codegen.generate_uniform_buffer();
+  }
+
+  /* Cache lookup: Reuse shaders already compiled. */
+  pass = g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
+
+  if (pass) {
+    pass->refcount++;
+    return pass;
+  }
+
+  /* The shader is not compiled, continue generating the shader strings. */
+  codegen.generate_attribs();
+  codegen.generate_resources();
+  codegen.generate_library();
+
+  /* Make engine add its own code and implement the generated functions. */
+  finalize_source_cb(thunk, material, &codegen.output);
+
+  codegen.create_info->finalize();
+  g_cache->add(engine, codegen, deferred_compilation, optimize_graph);
+  codegen.create_info = nullptr;
+
+  return g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
+}
+
+/** \} */
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -387,6 +387,11 @@ void GPU_shader_batch_cancel(BatchHandle &handle)
  GPUBackend::get()->get_compiler()->batch_cancel(handle);
 }

+void GPU_shader_batch_wait_for_all()
+{
+  GPUBackend::get()->get_compiler()->wait_for_all();
+}
+
 void GPU_shader_compile_static()
 {
  printf("Compiling all static GPU shaders. This process takes a while.\n");
@@ -1029,6 +1034,7 @@ bool ShaderCompiler::batch_is_ready(BatchHandle handle)
 Vector<Shader *> ShaderCompiler::batch_finalize(BatchHandle &handle)
 {
  std::unique_lock lock(mutex_);
+  /* TODO: Move to be first on the queue. */
  compilation_finished_notification_.wait(lock,
                                          [&]() { return batches_.lookup(handle)->is_ready(); });

@@ -1116,6 +1122,24 @@ void ShaderCompiler::run_thread()
  }
 }

+void ShaderCompiler::wait_for_all()
+{
+  std::unique_lock lock(mutex_);
+  compilation_finished_notification_.wait(lock, [&]() {
+    if (!compilation_queue_.empty()) {
+      return false;
+    }
+
+    for (Batch *batch : batches_.values()) {
+      if (!batch->is_ready()) {
+        return false;
+      }
+    }
+
+    return true;
+  });
+}
+
 /** \} */

 }  // namespace blender::gpu
--- a/source/blender/gpu/intern/gpu_shader_private.hh
+++ b/source/blender/gpu/intern/gpu_shader_private.hh
@@ -223,6 +223,8 @@ class ShaderCompiler {
  SpecializationBatchHandle precompile_specializations(Span<ShaderSpecialization> specializations);

  bool specialization_batch_is_ready(SpecializationBatchHandle &handle);
+
+  void wait_for_all();
 };

 enum class Severity {
--- a/source/blender/nodes/shader/nodes/node_shader_tex_environment.cc
+++ b/source/blender/nodes/shader/nodes/node_shader_tex_environment.cc
@@ -56,9 +56,7 @@ static int node_shader_gpu_tex_environment(GPUMaterial *mat,

  GPUNodeLink *outalpha;

-  /* HACK(@fclem): For lookdev mode: do not compile an empty environment and just create an empty
-   * texture entry point. We manually bind to it after #DRW_shgroup_add_material_resources(). */
-  if (!ima && !GPU_material_flag_get(mat, GPU_MATFLAG_LOOKDEV_HACK)) {
+  if (!ima) {
    return GPU_stack_link(mat, node, "node_tex_environment_empty", in, out);
  }

--- a/source/blender/windowmanager/intern/wm_init_exit.cc
+++ b/source/blender/windowmanager/intern/wm_init_exit.cc
@@ -105,7 +105,7 @@

 #include "GPU_context.hh"
 #include "GPU_init_exit.hh"
-#include "GPU_material.hh"
+#include "GPU_shader.hh"

 #include "COM_compositor.hh"

@@ -161,8 +161,6 @@ void WM_init_gpu()

  GPU_init();

-  GPU_pass_cache_init();
-
  if (G.debug & G_DEBUG_GPU_COMPILE_SHADERS) {
    GPU_shader_compile_static();
  }
@@ -645,7 +643,6 @@ void WM_exit_ex(bContext *C, const bool do_python_exit, const bool do_user_exit_
  if (gpu_is_init) {
    DRW_gpu_context_enable_ex(false);
    UI_exit();
-    GPU_pass_cache_free();
    GPU_shader_cache_dir_clear_old();
    GPU_exit();
    DRW_gpu_context_disable_ex(false);