From fca4a16975959df055525912cefb0da429763615 Mon Sep 17 00:00:00 2001
From: Miguel Pozo <pragma37@gmail.com>
Date: Tue, 10 Jun 2025 17:24:18 +0200
Subject: [PATCH] Fix: GPU: GPUWorker lock

Prevent race conditions caused by calling `GPUWorker::wake_up` when the
worker is not waiting.
Found to be an issue in #139627, since `wake_up` is likely to be called
before the thread has fully started.

Pull Request: https://projects.blender.org/blender/blender/pulls/139842
---
 source/blender/gpu/GPU_worker.hh              | 23 ++++--
 source/blender/gpu/intern/gpu_shader.cc       | 73 ++++++++++---------
 .../blender/gpu/intern/gpu_shader_private.hh  |  3 +-
 source/blender/gpu/intern/gpu_worker.cc       | 33 ++++++---
 4 files changed, 82 insertions(+), 50 deletions(-)
diff --git a/source/blender/gpu/GPU_worker.hh b/source/blender/gpu/GPU_worker.hh
index 2c7d01f7f38..76e04d526a0 100644
--- a/source/blender/gpu/GPU_worker.hh
+++ b/source/blender/gpu/GPU_worker.hh
@@ -23,8 +23,8 @@ class GPUWorker {
  private:
   Vector<std::unique_ptr<std::thread>> threads_;
   std::condition_variable condition_var_;
-  std::mutex mutex_;
-  std::atomic<bool> terminate_ = false;
+  std::mutex &mutex_;
+  bool terminate_ = false;
 
  public:
   enum class ContextType {
@@ -37,9 +37,20 @@ class GPUWorker {
   /**
    * \param threads_count: Number of threads to span.
    * \param context_type: The type of context each thread uses.
-   * \param run_cb: The callback function that will be called by a thread on `wake_up()`.
+   * \param mutex: Mutex used when trying to acquire the next work
+   *               (and reused internally for termation).
+   * \param pop_work: The callback function that will be called to acquire the next work,
+   *                  should return a void pointer.
+   *                  NOTE: The mutex is locked when this function is called.
+   * \param do_work: The callback function that will be called for each acquired work
+   *                 (passed as a void pointer).
+   *                 NOTE: The mutex is unlocked when this function is called.
    */
-  GPUWorker(uint32_t threads_count, ContextType context_type, std::function<void()> run_cb);
+  GPUWorker(uint32_t threads_count,
+            ContextType context_type,
+            std::mutex &mutex,
+            std::function<void *()> pop_work,
+            std::function<void(void *)> do_work);
   ~GPUWorker();
 
   /* Wake up a single thread. */
@@ -49,7 +60,9 @@ class GPUWorker {
   }
 
  private:
-  void run(std::shared_ptr<GPUSecondaryContext> context, std::function<void()> run_cb);
+  void run(std::shared_ptr<GPUSecondaryContext> context,
+           std::function<void *()> pop_work,
+           std::function<void(void *)> do_work);
 };
 
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc
index f083461441e..1fdc120420b 100644
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -967,7 +967,11 @@ ShaderCompiler::ShaderCompiler(uint32_t threads_count,
 
   if (!GPU_use_main_context_workaround()) {
     compilation_worker_ = std::make_unique<GPUWorker>(
-        threads_count, context_type, [this]() { this->run_thread(); });
+        threads_count,
+        context_type,
+        mutex_,
+        [this]() -> void * { return this->pop_work(); },
+        [this](void *work) { this->do_work(work); });
   }
 }
 
@@ -1097,42 +1101,43 @@ bool ShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &ha
   return handle == 0;
 }
 
-void ShaderCompiler::run_thread()
+void *ShaderCompiler::pop_work()
 {
-  while (true) {
-    Batch *batch;
-    int shader_index;
-    {
-      std::lock_guard lock(mutex_);
+  /* NOTE: Already under mutex lock when GPUWorker calls this function. */
 
-      if (compilation_queue_.is_empty()) {
-        return;
-      }
-
-      ParallelWork work = compilation_queue_.pop();
-      batch = work.batch;
-      shader_index = work.shader_index;
-    }
-
-    /* Compile */
-    if (!batch->is_specialization_batch()) {
-      batch->shaders[shader_index] = compile_shader(*batch->infos[shader_index]);
-    }
-    else {
-      specialize_shader(batch->specializations[shader_index]);
-    }
-
-    {
-      std::lock_guard lock(mutex_);
-      batch->pending_compilations--;
-      if (batch->is_ready() && batch->is_cancelled) {
-        batch->free_shaders();
-        MEM_delete(batch);
-      }
-    }
-
-    compilation_finished_notification_.notify_all();
+  if (compilation_queue_.is_empty()) {
+    return nullptr;
   }
+
+  ParallelWork work = compilation_queue_.pop();
+  return MEM_new<ParallelWork>(__func__, work);
+}
+
+void ShaderCompiler::do_work(void *work_payload)
+{
+  ParallelWork *work = reinterpret_cast<ParallelWork *>(work_payload);
+  Batch *batch = work->batch;
+  int shader_index = work->shader_index;
+  MEM_delete(work);
+
+  /* Compile */
+  if (!batch->is_specialization_batch()) {
+    batch->shaders[shader_index] = compile_shader(*batch->infos[shader_index]);
+  }
+  else {
+    specialize_shader(batch->specializations[shader_index]);
+  }
+
+  {
+    std::lock_guard lock(mutex_);
+    batch->pending_compilations--;
+    if (batch->is_ready() && batch->is_cancelled) {
+      batch->free_shaders();
+      MEM_delete(batch);
+    }
+  }
+
+  compilation_finished_notification_.notify_all();
 }
 
 void ShaderCompiler::wait_for_all()
diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh
index 55c5335cf5f..ca7cac59559 100644
--- a/source/blender/gpu/intern/gpu_shader_private.hh
+++ b/source/blender/gpu/intern/gpu_shader_private.hh
@@ -265,7 +265,8 @@ class ShaderCompiler {
 
   bool support_specializations_;
 
-  void run_thread();
+  void *pop_work();
+  void do_work(void *work_payload);
 
   BatchHandle next_batch_handle_ = 1;
 
diff --git a/source/blender/gpu/intern/gpu_worker.cc b/source/blender/gpu/intern/gpu_worker.cc
index a38b5c8774e..940c0f38b88 100644
--- a/source/blender/gpu/intern/gpu_worker.cc
+++ b/source/blender/gpu/intern/gpu_worker.cc
@@ -8,43 +8,56 @@ namespace blender::gpu {
 
 GPUWorker::GPUWorker(uint32_t threads_count,
                      ContextType context_type,
-                     std::function<void()> run_cb)
+                     std::mutex &mutex,
+                     std::function<void *()> pop_work,
+                     std::function<void(void *)> do_work)
+    : mutex_(mutex)
 {
   for (int i : IndexRange(threads_count)) {
     UNUSED_VARS(i);
     std::shared_ptr<GPUSecondaryContext> thread_context =
         context_type == ContextType::PerThread ? std::make_shared<GPUSecondaryContext>() : nullptr;
-    threads_.append(std::make_unique<std::thread>([=]() { this->run(thread_context, run_cb); }));
+    threads_.append(
+        std::make_unique<std::thread>([=]() { this->run(thread_context, pop_work, do_work); }));
   }
 }
 
 GPUWorker::~GPUWorker()
 {
-  terminate_ = true;
+  {
+    std::unique_lock<std::mutex> lock(mutex_);
+    terminate_ = true;
+  }
   condition_var_.notify_all();
   for (std::unique_ptr<std::thread> &thread : threads_) {
     thread->join();
   }
 }
 
-void GPUWorker::run(std::shared_ptr<GPUSecondaryContext> context, std::function<void()> run_cb)
+void GPUWorker::run(std::shared_ptr<GPUSecondaryContext> context,
+                    std::function<void *()> pop_work,
+                    std::function<void(void *)> do_work)
 {
   if (context) {
     context->activate();
   }
 
+  std::unique_lock<std::mutex> lock(mutex_);
+
   /* Loop until we get the terminate signal. */
   while (!terminate_) {
-    {
-      /* Wait until wake_up() */
-      std::unique_lock<std::mutex> lock(mutex_);
+    void *work = pop_work();
+    if (!work) {
       condition_var_.wait(lock);
-    }
-    if (terminate_) {
+      if (terminate_) {
+        break;
+      }
       continue;
     }
 
-    run_cb();
+    lock.unlock();
+    do_work(work);
+    lock.lock();
   }
 }