Draw: Batch shader compilation for image render

Enable deferred parallel batch compilation for image renders. This replaces the use of the `WM_job` system with a regular thread, since `WM_job` requires access to the main context, which is not accessible from the render thread. It also simplifies the system so it creates a single thread at startup and deletes it at exit. Pull Request: https://projects.blender.org/blender/blender/pulls/125005
2024-09-06 18:13:43 +02:00
parent 2952498724
commit eab640e044
5 changed files with 142 additions and 182 deletions
--- a/source/blender/draw/engines/eevee_next/eevee_instance.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc
@@ -517,6 +517,14 @@ void Instance::render_frame(RenderEngine *engine, RenderLayer *render_layer, con
 {
  /* TODO: Break on RE_engine_test_break(engine) */
  while (!sampling.finished()) {
+    if (materials.queued_shaders_count > 0) {
+      /* Leave some time for shaders to compile. */
+      BLI_time_sleep_ms(50);
+      /** WORKAROUND: Re-sync to check if all shaders are already compiled. */
+      this->render_sync();
+      continue;
+    }
+
    this->render_sample();

    if ((sampling.sample_index() == 1) || ((sampling.sample_index() % 25) == 0) ||
--- a/source/blender/draw/engines/eevee_next/eevee_material.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_material.cc
@@ -167,7 +167,7 @@ MaterialPass MaterialModule::material_pass_get(Object *ob,
                         blender_mat->nodetree :
                         default_surface_ntree_.nodetree_get(blender_mat);

-  bool use_deferred_compilation = inst_.is_viewport();
+  bool use_deferred_compilation = inst_.is_viewport() || GPU_use_parallel_compilation();

  MaterialPass matpass = MaterialPass();
  matpass.gpumat = inst_.shaders.material_shader_get(
--- a/source/blender/draw/intern/DRW_render.hh
+++ b/source/blender/draw/intern/DRW_render.hh
@@ -260,6 +260,8 @@ void DRW_texture_free(GPUTexture *tex);
  } while (0)

 /* Shaders */
+void DRW_shader_init();
+void DRW_shader_exit();

 GPUMaterial *DRW_shader_from_world(World *wo,
                                   bNodeTree *ntree,
--- a/source/blender/draw/intern/draw_manager_c.cc
+++ b/source/blender/draw/intern/draw_manager_c.cc
@@ -3255,7 +3255,9 @@ void DRW_gpu_context_create()
  WM_system_gpu_context_activate(DST.system_gpu_context);
  /* Be sure to create blender_gpu_context too. */
  DST.blender_gpu_context = GPU_context_create(nullptr, DST.system_gpu_context);
-  /* So we activate the window's one afterwards. */
+  /* Setup compilation context. */
+  DRW_shader_init();
+  /* Activate the window's context afterwards. */
  wm_window_reset_drawable();
 }

@@ -3263,6 +3265,7 @@ void DRW_gpu_context_destroy()
 {
  BLI_assert(BLI_thread_is_main());
  if (DST.system_gpu_context != nullptr) {
+    DRW_shader_exit();
    WM_system_gpu_context_activate(DST.system_gpu_context);
    GPU_context_active_set(DST.blender_gpu_context);
    GPU_context_discard(DST.blender_gpu_context);
--- a/source/blender/draw/intern/draw_manager_shader.cc
+++ b/source/blender/draw/intern/draw_manager_shader.cc
@@ -34,11 +34,17 @@

 #include "draw_manager_c.hh"

+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+
 extern "C" char datatoc_gpu_shader_depth_only_frag_glsl[];
 extern "C" char datatoc_common_fullscreen_vert_glsl[];

 #define USE_DEFERRED_COMPILATION 1

+using namespace blender;
+
 /* -------------------------------------------------------------------- */
 /** \name Deferred Compilation (DRW_deferred)
 *
@@ -49,59 +55,63 @@ extern "C" char datatoc_common_fullscreen_vert_glsl[];

 struct DRWShaderCompiler {
  /** Default compilation queue. */
-  ListBase queue; /* GPUMaterial */
-  SpinLock list_lock;
-
+  Vector<GPUMaterial *> queue;
  /** Optimization queue. */
-  ListBase optimize_queue; /* GPUMaterial */
+  Vector<GPUMaterial *> optimize_queue;
+
+  std::mutex queue_mutex;
+  std::condition_variable queue_cv;

  void *system_gpu_context;
  GPUContext *blender_gpu_context;
-  bool own_context;
+
+  std::atomic<bool> stop;
 };

-static void drw_deferred_shader_compilation_exec(void *custom_data,
-                                                 wmJobWorkerStatus *worker_status)
+/** NOTE: While the `BLI_threads` API requires a List,
+ * we only create a single thread at application startup and delete it at exit. */
+static ListBase &compilation_threadpool()
+{
+  static ListBase compilation_threadpool_ = {};
+  return compilation_threadpool_;
+}
+
+static DRWShaderCompiler &compiler_data()
+{
+  static DRWShaderCompiler compiler_data_ = {};
+  return compiler_data_;
+}
+
+static void *drw_deferred_shader_compilation_exec(void *)
 {
  using namespace blender;

-  GPU_render_begin();
-  DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
-  void *system_gpu_context = comp->system_gpu_context;
-  GPUContext *blender_gpu_context = comp->blender_gpu_context;
-
+  void *system_gpu_context = compiler_data().system_gpu_context;
+  GPUContext *blender_gpu_context = compiler_data().blender_gpu_context;
  BLI_assert(system_gpu_context != nullptr);
  BLI_assert(blender_gpu_context != nullptr);
-
-  const bool use_main_context_workaround = GPU_use_main_context_workaround();
-  if (use_main_context_workaround) {
-    BLI_assert(system_gpu_context == DST.system_gpu_context);
-    GPU_context_main_lock();
-  }
-
-  const bool use_parallel_compilation = GPU_use_parallel_compilation();
-
+  GPU_render_begin();
  WM_system_gpu_context_activate(system_gpu_context);
  GPU_context_active_set(blender_gpu_context);

+  const bool use_parallel_compilation = GPU_use_parallel_compilation();
  Vector<GPUMaterial *> async_mats;

  while (true) {
-    if (worker_status->stop) {
+    if (compiler_data().stop) {
      break;
    }

-    BLI_spin_lock(&comp->list_lock);
-    /* Pop tail because it will be less likely to lock the main thread
+    compiler_data().queue_mutex.lock();
+    /* Pop last because it will be less likely to lock the main thread
     * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
-    LinkData *link = (LinkData *)BLI_poptail(&comp->queue);
-    GPUMaterial *mat = link ? (GPUMaterial *)link->data : nullptr;
+    GPUMaterial *mat = compiler_data().queue.is_empty() ? nullptr :
+                                                          compiler_data().queue.pop_last();
    if (mat) {
      /* Avoid another thread freeing the material mid compilation. */
      GPU_material_acquire(mat);
-      MEM_freeN(link);
    }
-    BLI_spin_unlock(&comp->list_lock);
+    compiler_data().queue_mutex.unlock();

    if (mat) {
      /* We have a new material that must be compiled,
@@ -129,26 +139,27 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
    else {
      /* Check for Material Optimization job once there are no more
       * shaders to compile. */
-      BLI_spin_lock(&comp->list_lock);
-      /* Pop tail because it will be less likely to lock the main thread
+      compiler_data().queue_mutex.lock();
+      /* Pop last because it will be less likely to lock the main thread
       * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
-      LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue);
-      GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : nullptr;
+      GPUMaterial *optimize_mat = compiler_data().optimize_queue.is_empty() ?
+                                      nullptr :
+                                      compiler_data().optimize_queue.pop_last();
      if (optimize_mat) {
        /* Avoid another thread freeing the material during optimization. */
        GPU_material_acquire(optimize_mat);
      }
-      BLI_spin_unlock(&comp->list_lock);
+      compiler_data().queue_mutex.unlock();

      if (optimize_mat) {
        /* Compile optimized material shader. */
        GPU_material_optimize(optimize_mat);
        GPU_material_release(optimize_mat);
-        MEM_freeN(link);
      }
      else {
        /* No more materials to optimize, or shaders to compile. */
-        break;
+        std::unique_lock lock(compiler_data().queue_mutex);
+        compiler_data().queue_cv.wait(lock);
      }
    }

@@ -158,7 +169,7 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
  }

  /* We have to wait until all the requested batches are ready,
-   * even if worker_status->stop is true. */
+   * even if compiler_data().stop is true. */
  while (!async_mats.is_empty()) {
    async_mats.remove_if([](GPUMaterial *mat) {
      if (GPU_material_async_try_finalize(mat)) {
@@ -171,118 +182,88 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,

  GPU_context_active_set(nullptr);
  WM_system_gpu_context_release(system_gpu_context);
-  if (use_main_context_workaround) {
-    GPU_context_main_unlock();
-  }
  GPU_render_end();
+
+  return nullptr;
 }

-static void drw_deferred_shader_compilation_free(void *custom_data)
+void DRW_shader_init()
 {
-  DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
-
-  BLI_spin_lock(&comp->list_lock);
-  LISTBASE_FOREACH (LinkData *, link, &comp->queue) {
-    GPU_material_status_set(static_cast<GPUMaterial *>(link->data), GPU_MAT_CREATED);
+  if (GPU_use_main_context_workaround()) {
+    /* Deferred compilation is not supported. */
+    return;
  }
-  LISTBASE_FOREACH (LinkData *, link, &comp->optimize_queue) {
-    GPU_material_optimization_status_set(static_cast<GPUMaterial *>(link->data),
-                                         GPU_MAT_OPTIMIZATION_READY);
+  static bool initialized = false;
+  if (initialized) {
+    BLI_assert_unreachable();
+    return;
  }
-  BLI_freelistN(&comp->queue);
-  BLI_freelistN(&comp->optimize_queue);
-  BLI_spin_unlock(&comp->list_lock);
+  initialized = true;

-  if (comp->own_context) {
-    /* Only destroy if the job owns the context. */
-    WM_system_gpu_context_activate(comp->system_gpu_context);
-    GPU_context_active_set(comp->blender_gpu_context);
-    GPU_context_discard(comp->blender_gpu_context);
-    WM_system_gpu_context_dispose(comp->system_gpu_context);
+  compiler_data().stop = false;

-    wm_window_reset_drawable();
+  compiler_data().system_gpu_context = WM_system_gpu_context_create();
+  compiler_data().blender_gpu_context = GPU_context_create(nullptr,
+                                                           compiler_data().system_gpu_context);
+  GPU_context_active_set(nullptr);
+  WM_system_gpu_context_activate(DST.system_gpu_context);
+  GPU_context_active_set(DST.blender_gpu_context);
+
+  BLI_threadpool_init(&compilation_threadpool(), drw_deferred_shader_compilation_exec, 1);
+  BLI_threadpool_insert(&compilation_threadpool(), nullptr);
+}
+
+void DRW_shader_exit()
+{
+  if (GPU_use_main_context_workaround()) {
+    /* Deferred compilation is not supported. */
+    return;
  }

-  MEM_freeN(comp);
+  compiler_data().stop = true;
+  compiler_data().queue_cv.notify_one();
+  BLI_threadpool_end(&compilation_threadpool());
+
+  /* Revert the queued state for the materials that has not been compiled.
+   * Note that this is not strictly needed since this function is called at program exit. */
+  {
+    std::scoped_lock queue_lock(compiler_data().queue_mutex);
+
+    while (!compiler_data().queue.is_empty()) {
+      GPU_material_status_set(compiler_data().queue.pop_last(), GPU_MAT_CREATED);
+    }
+    while (!compiler_data().optimize_queue.is_empty()) {
+      GPU_material_optimization_status_set(compiler_data().optimize_queue.pop_last(),
+                                           GPU_MAT_OPTIMIZATION_READY);
+    }
+  }
+
+  WM_system_gpu_context_activate(compiler_data().system_gpu_context);
+  GPU_context_active_set(compiler_data().blender_gpu_context);
+  GPU_context_discard(compiler_data().blender_gpu_context);
+  WM_system_gpu_context_dispose(compiler_data().system_gpu_context);
 }

 /**
- * Append either shader compilation or optimization job to deferred queue and
- * ensure shader compilation worker is active.
+ * Append either shader compilation or optimization job to deferred queue.
 * We keep two separate queue's to ensure core compilations always complete before optimization.
 */
 static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
 {
-  const bool use_main_context = GPU_use_main_context_workaround();
-  const bool job_own_context = !use_main_context;
-
-  BLI_assert(DST.draw_ctx.evil_C);
-  wmWindowManager *wm = CTX_wm_manager(DST.draw_ctx.evil_C);
-  wmWindow *win = CTX_wm_window(DST.draw_ctx.evil_C);
-
-  /* Get the running job or a new one if none is running. Can only have one job per type & owner.
-   */
-  wmJob *wm_job = WM_jobs_get(
-      wm, win, wm, "Shaders Compilation", eWM_JobFlag(0), WM_JOB_TYPE_SHADER_COMPILATION);
-
-  DRWShaderCompiler *old_comp = (DRWShaderCompiler *)WM_jobs_customdata_get(wm_job);
-
-  DRWShaderCompiler *comp = static_cast<DRWShaderCompiler *>(
-      MEM_callocN(sizeof(DRWShaderCompiler), "DRWShaderCompiler"));
-  BLI_spin_init(&comp->list_lock);
-
-  if (old_comp) {
-    BLI_spin_lock(&old_comp->list_lock);
-    BLI_movelisttolist(&comp->queue, &old_comp->queue);
-    BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue);
-    BLI_spin_unlock(&old_comp->list_lock);
-    /* Do not recreate context, just pass ownership. */
-    if (old_comp->system_gpu_context) {
-      comp->system_gpu_context = old_comp->system_gpu_context;
-      comp->blender_gpu_context = old_comp->blender_gpu_context;
-      old_comp->own_context = false;
-      comp->own_context = job_own_context;
-    }
-  }
+  std::scoped_lock queue_lock(compiler_data().queue_mutex);

  /* Add to either compilation or optimization queue. */
  if (is_optimization_job) {
    BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
-    LinkData *node = BLI_genericNodeN(mat);
-    BLI_addtail(&comp->optimize_queue, node);
+    compiler_data().optimize_queue.append(mat);
  }
  else {
    GPU_material_status_set(mat, GPU_MAT_QUEUED);
-    LinkData *node = BLI_genericNodeN(mat);
-    BLI_addtail(&comp->queue, node);
+    compiler_data().queue.append(mat);
  }

-  /* Create only one context. */
-  if (comp->system_gpu_context == nullptr) {
-    if (use_main_context) {
-      comp->system_gpu_context = DST.system_gpu_context;
-      comp->blender_gpu_context = DST.blender_gpu_context;
-    }
-    else {
-      comp->system_gpu_context = WM_system_gpu_context_create();
-      comp->blender_gpu_context = GPU_context_create(nullptr, comp->system_gpu_context);
-      GPU_context_active_set(nullptr);
-
-      WM_system_gpu_context_activate(DST.system_gpu_context);
-      GPU_context_active_set(DST.blender_gpu_context);
-    }
-    comp->own_context = job_own_context;
-  }
-
-  WM_jobs_customdata_set(wm_job, comp, drw_deferred_shader_compilation_free);
-  WM_jobs_timer(wm_job, 0.1, NC_MATERIAL | ND_SHADING_DRAW, 0);
-  WM_jobs_delay_start(wm_job, 0.1);
-  WM_jobs_callbacks(wm_job, drw_deferred_shader_compilation_exec, nullptr, nullptr, nullptr);
-
-  G.is_break = false;
-
-  WM_jobs_start(wm, wm_job);
+  compiler_data().queue_cv.notify_one();
 }

 static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
@@ -291,16 +272,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
    return;
  }

-  /* Do not defer the compilation if we are rendering for image.
-   * deferred rendering is only possible when `evil_C` is available */
-  if (DST.draw_ctx.evil_C == nullptr || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
-    deferred = false;
-  }
-
-  /* Avoid crashes with RenderDoc on Windows + Nvidia. */
-  if (G.debug & G_DEBUG_GPU_RENDERDOC &&
-      GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL))
-  {
+  if (GPU_use_main_context_workaround()) {
    deferred = false;
  }

@@ -363,59 +335,39 @@ static void drw_register_shader_vlattrs(GPUMaterial *mat)

 void DRW_deferred_shader_remove(GPUMaterial *mat)
 {
-  LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
-    LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
-      DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
-          wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
-      if (comp != nullptr) {
-        BLI_spin_lock(&comp->list_lock);
+  if (GPU_use_main_context_workaround()) {
+    /* Deferred compilation is not supported. */
+    return;
+  }

-        /* Search for compilation job in queue. */
-        LinkData *link = (LinkData *)BLI_findptr(&comp->queue, mat, offsetof(LinkData, data));
-        if (link) {
-          BLI_remlink(&comp->queue, link);
-          GPU_material_status_set(static_cast<GPUMaterial *>(link->data), GPU_MAT_CREATED);
-        }
+  std::scoped_lock queue_lock(compiler_data().queue_mutex);

-        MEM_SAFE_FREE(link);
+  /* Search for compilation job in queue. */
+  if (compiler_data().queue.contains(mat)) {
+    compiler_data().queue.remove_first_occurrence_and_reorder(mat);
+    GPU_material_status_set(mat, GPU_MAT_CREATED);
+  }

-        /* Search for optimization job in queue. */
-        LinkData *opti_link = (LinkData *)BLI_findptr(
-            &comp->optimize_queue, mat, offsetof(LinkData, data));
-        if (opti_link) {
-          BLI_remlink(&comp->optimize_queue, opti_link);
-          GPU_material_optimization_status_set(static_cast<GPUMaterial *>(opti_link->data),
-                                               GPU_MAT_OPTIMIZATION_READY);
-        }
-        BLI_spin_unlock(&comp->list_lock);
-
-        MEM_SAFE_FREE(opti_link);
-      }
-    }
+  /* Search for optimization job in queue. */
+  if (compiler_data().optimize_queue.contains(mat)) {
+    compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat);
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
  }
 }

 void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
 {
-  LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
-    LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
-      DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
-          wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
-      if (comp != nullptr) {
-        BLI_spin_lock(&comp->list_lock);
-        /* Search for optimization job in queue. */
-        LinkData *opti_link = (LinkData *)BLI_findptr(
-            &comp->optimize_queue, mat, offsetof(LinkData, data));
-        if (opti_link) {
-          BLI_remlink(&comp->optimize_queue, opti_link);
-          GPU_material_optimization_status_set(static_cast<GPUMaterial *>(opti_link->data),
-                                               GPU_MAT_OPTIMIZATION_READY);
-        }
-        BLI_spin_unlock(&comp->list_lock);
+  if (GPU_use_main_context_workaround()) {
+    /* Deferred compilation is not supported. */
+    return;
+  }

-        MEM_SAFE_FREE(opti_link);
-      }
-    }
+  std::scoped_lock queue_lock(compiler_data().queue_mutex);
+
+  /* Search for optimization job in queue. */
+  if (compiler_data().optimize_queue.contains(mat)) {
+    compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat);
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
  }
 }

@@ -485,11 +437,6 @@ GPUMaterial *DRW_shader_from_material(Material *ma,

  drw_register_shader_vlattrs(mat);

-  if (DRW_state_is_image_render()) {
-    /* Do not deferred if doing render. */
-    deferred = false;
-  }
-
  drw_deferred_shader_add(mat, deferred);
  DRW_shader_queue_optimize_material(mat);
  return mat;