From 74224b25a57bb459deb0d5f59404a49d6c15e82c Mon Sep 17 00:00:00 2001
From: Miguel Pozo <pragma37@gmail.com>
Date: Wed, 5 Jun 2024 18:45:57 +0200
Subject: [PATCH] GPU: Add GPU_shader_batch_create_from_infos

This is the first commit of the several required to support
subprocess-based parallel compilation on OpenGL.
This provides the base API and implementation, and exposes the max
subprocesses setting on the UI, but it's not used by any code yet.

More information and the rest of the code can be found in #121925.

This one includes:
- A new `GPU_shader_batch` API that allows requesting the compilation
  of multiple shaders at once, allowing GPU backed to compile them in
  parallel and asynchronously without blocking the Blender UI.
- A virtual `ShaderCompiler` class that backends can use to add their
  own implementation.
- A `ShaderCompilerGeneric` class that implements synchronous/blocking
  compilation of batches for backends that don't have their own
  implementation yet.
- A `GLShaderCompiler` that supports parallel compilation using
  subprocesses.
- A new `BLI_subprocess` API, including IPC (required for the
  `GLShaderCompiler` implementation).
- The implementation of the subprocess program in
  `GPU_compilation_subprocess`.
- A new `Max Shader Compilation Subprocesses` option in
  `Preferences > System > Memory & Limits` to enable parallel shader
  compilation and the max number of subprocesses to allocate (each
  subprocess has a relatively high memory footprint).

Implementation Overview:
There's a single `GLShaderCompiler` shared by all OpenGL contexts.
This class stores a pool of up to `GCaps.max_parallel_compilations`
subprocesses that can be used for compilation.
Each subprocess has a shared memory pool used for sending the shader
source code from the main Blender process and for receiving the already
compiled shader binary from the subprocess. This is synchronized using
a series of shared semaphores.
The subprocesses maintain a shader cache on disk inside a
`BLENDER_SHADER_CACHE` folder at the OS temporary folder.
Shaders that fail to compile are tried to be compiled again locally for
proper error reports.
Hanged subprocesses are currently detected using a timeout of 30s.

Pull Request: https://projects.blender.org/blender/blender/pulls/122232
---
 release/datafiles/userdef/userdef_default.c   |   1 +
 scripts/startup/bl_ui/space_userpref.py       |   7 +
 source/blender/blenlib/BLI_subprocess.hh      | 157 +++++++
 source/blender/blenlib/CMakeLists.txt         |   2 +
 .../blender/blenlib/intern/BLI_subprocess.cc  | 420 ++++++++++++++++++
 source/blender/gpu/CMakeLists.txt             |   7 +
 source/blender/gpu/GPU_capabilities.hh        |   2 +
 .../blender/gpu/GPU_compilation_subprocess.hh |  13 +
 source/blender/gpu/GPU_shader.hh              |  25 ++
 source/blender/gpu/intern/gpu_capabilities.cc |   5 +
 .../gpu/intern/gpu_capabilities_private.hh    |   2 +
 .../blender/gpu/intern/gpu_context_private.hh |   2 +
 source/blender/gpu/intern/gpu_shader.cc       | 315 +++++++------
 .../blender/gpu/intern/gpu_shader_private.hh  |  42 +-
 source/blender/gpu/metal/mtl_context.mm       |   4 +
 source/blender/gpu/metal/mtl_shader.hh        |   2 +-
 source/blender/gpu/opengl/gl_backend.cc       |  10 +
 source/blender/gpu/opengl/gl_backend.hh       |   7 +
 .../gpu/opengl/gl_compilation_subprocess.cc   | 222 +++++++++
 .../gpu/opengl/gl_compilation_subprocess.hh   |  31 ++
 source/blender/gpu/opengl/gl_context.cc       |   2 +
 source/blender/gpu/opengl/gl_shader.cc        | 324 +++++++++++++-
 source/blender/gpu/opengl/gl_shader.hh        |  93 +++-
 source/blender/gpu/vulkan/vk_context.cc       |   4 +
 source/blender/gpu/vulkan/vk_shader.cc        |   2 +-
 source/blender/gpu/vulkan/vk_shader.hh        |   2 +-
 source/blender/makesdna/DNA_userdef_types.h   |   5 +-
 source/blender/makesrna/intern/rna_userdef.cc |   9 +
 source/creator/creator.cc                     |  10 +
 29 files changed, 1582 insertions(+), 145 deletions(-)
 create mode 100644 source/blender/blenlib/BLI_subprocess.hh
 create mode 100644 source/blender/blenlib/intern/BLI_subprocess.cc
 create mode 100644 source/blender/gpu/GPU_compilation_subprocess.hh
 create mode 100644 source/blender/gpu/opengl/gl_compilation_subprocess.cc
 create mode 100644 source/blender/gpu/opengl/gl_compilation_subprocess.hh

diff --git a/release/datafiles/userdef/userdef_default.c b/release/datafiles/userdef/userdef_default.c
index 030a5b2ae67..d634a1c4dae 100644
--- a/release/datafiles/userdef/userdef_default.c
+++ b/release/datafiles/userdef/userdef_default.c
@@ -114,6 +114,7 @@ const UserDef U_default = {
 #else
     .gpu_backend = GPU_BACKEND_OPENGL,
 #endif
+    .max_shader_compilation_subprocesses = 0,
 
     /** Initialized by: #BKE_studiolight_default. */
     .light_param = {{0}},
diff --git a/scripts/startup/bl_ui/space_userpref.py b/scripts/startup/bl_ui/space_userpref.py
index 3053b0477a0..8671cab04c5 100644
--- a/scripts/startup/bl_ui/space_userpref.py
+++ b/scripts/startup/bl_ui/space_userpref.py
@@ -746,6 +746,8 @@ class USERPREF_PT_system_memory(SystemPanel, CenterAlignMixIn, Panel):
     bl_label = "Memory & Limits"
 
     def draw_centered(self, context, layout):
+        import sys
+
         prefs = context.preferences
         system = prefs.system
         edit = prefs.edit
@@ -772,6 +774,11 @@ class USERPREF_PT_system_memory(SystemPanel, CenterAlignMixIn, Panel):
         col.prop(system, "vbo_time_out", text="VBO Time Out")
         col.prop(system, "vbo_collection_rate", text="Garbage Collection Rate")
 
+        if sys.platform != "darwin":
+            layout.separator()
+            col = layout.column()
+            col.prop(system, "max_shader_compilation_subprocesses")
+
 
 class USERPREF_PT_system_video_sequencer(SystemPanel, CenterAlignMixIn, Panel):
     bl_label = "Video Sequencer"
diff --git a/source/blender/blenlib/BLI_subprocess.hh b/source/blender/blenlib/BLI_subprocess.hh
new file mode 100644
index 00000000000..15cf2a2a715
--- /dev/null
+++ b/source/blender/blenlib/BLI_subprocess.hh
@@ -0,0 +1,157 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * API for subprocess creation and inter-process communication.
+ * NOTE: The use of subprocesses is generally discouraged.
+ * It should only be used for parallelizing workloads that can only happen on a per-process level
+ * due to OS or driver limitations.
+ * WARNING: The Subprocess API is only supported on Windows and Linux.
+ * Its use should always be inside `#if BLI_SUBPROCESS_SUPPORT` preprocessor directives.
+ */
+
+#if defined(_WIN32) || defined(__linux__)
+#  define BLI_SUBPROCESS_SUPPORT 1
+#else
+#  define BLI_SUBPROCESS_SUPPORT 0
+#endif
+
+#if BLI_SUBPROCESS_SUPPORT
+
+#  include "BLI_span.hh"
+#  include "BLI_string_ref.hh"
+#  include "BLI_sys_types.h"
+#  include "BLI_utility_mixins.hh"
+#  include <string>
+
+#  ifdef _WIN32
+typedef void *HANDLE;
+#  else
+#    include <semaphore.h>
+#  endif
+
+namespace blender {
+
+/**
+ * Creates a subprocess of the current Blender executable.
+ * WARNING: This class doesn't handle subprocess destruction.
+ * On Windows, subprocesses are closed automatically when the parent process finishes.
+ * On Linux, subprocesses become children of init or systemd when the parent process finishes.
+ */
+class BlenderSubprocess : NonCopyable {
+ private:
+#  ifdef _WIN32
+  HANDLE handle_ = nullptr;
+#  else
+  pid_t pid_ = 0;
+#  endif
+ public:
+  ~BlenderSubprocess();
+
+  /**
+   * Create a subprocess and pass the arguments to the main function.
+   * NOTE: The subprocess path is not passed as `argv[0]`.
+   * `args` only support alpha-numeric characters, underscores and hyphen-minus as a safety
+   * measure.
+   * WARNING: This function shouldn't be called again after it succeeds.
+   */
+  bool create(Span<StringRefNull> args);
+  /**
+   * Checks if the subprocess is still running.
+   * It always returns false if creation failed.
+   * It doesn't detects hanged subprocesses.
+   */
+  bool is_running();
+};
+
+/**
+ * Creates or gets access to a block of memory that can be read and written by more than once
+ * process.
+ * WARNING: It doesn't have any built-in safety measure to prevent concurrent writes or
+ * read/writes. Synchronization should be handled with SharedSemaphores.
+ */
+class SharedMemory : NonCopyable {
+ private:
+  std::string name_;
+#  ifdef _WIN32
+  HANDLE handle_;
+#  else
+  int handle_;
+#  endif
+  void *data_;
+  size_t data_size_;
+  bool is_owner_;
+
+ public:
+  /**
+   * WARNING: The name should be unique a unique identifier accross all processes (including
+   * multiple Blender instances). You should include the PID of the "owner" process in the name to
+   * prevent name collisions.
+   * `is_owner` should only be true for the first process that creates a SharedMemory with a given
+   * name.
+   * On Linux, the memory will become invalid across all processes after the owner destructor has
+   * run (Windows uses reference counting).
+   */
+  SharedMemory(std::string name, size_t size, bool is_owner);
+  ~SharedMemory();
+
+  /**
+   * Get a pointer to the shared memory block.
+   * WARNING: It can be null if creation failed, or invalid if the owner destructor has run.
+   * */
+  void *get_data()
+  {
+    return data_;
+  }
+
+  size_t get_size()
+  {
+    return data_size_;
+  }
+};
+
+/**
+ * Creates or get access to a semaphore that can be used accros multiple processes.
+ */
+class SharedSemaphore : NonCopyable {
+ private:
+  std::string name_;
+#  if defined(_WIN32)
+  HANDLE handle_;
+#  else
+  sem_t *handle_;
+#  endif
+  bool is_owner_;
+
+ public:
+  /**
+   * WARNING: The name should be unique a unique identifier accross all processes (including
+   * multiple Blender instances). You should include the PID of the "owner" process in the name to
+   * prevent name collisions.
+   * `is_owner` should only be true for the last process that needs to read it (It's ok if the
+   * creator is not the owner).
+   * On Linux, the semaphore will become invalid across all processes after the owner destructor
+   * has run (Windows uses reference counting).
+   */
+  SharedSemaphore(std::string name, bool is_owner);
+  ~SharedSemaphore();
+
+  /* Increment the semaphore value. */
+  void increment();
+  /* Decrement the semaphore value (Blocks until the semaphore value is greater than 0). */
+  void decrement();
+  /**
+   * Try to decrement the semaphore value. Returns true on success.
+   * (Blocks until the semaphore value is greater than 0 or the wait time runs out).
+   */
+  bool try_decrement(int wait_ms = 0);
+};
+
+}  // namespace blender
+
+#endif
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index 59cef66f814..76605bd9602 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -40,6 +40,7 @@ set(SRC
   intern/BLI_memiter.c
   intern/BLI_mempool.c
   intern/BLI_mmap.c
+  intern/BLI_subprocess.cc
   intern/BLI_timer.c
   intern/DLRB_tree.c
   intern/array_store.cc
@@ -365,6 +366,7 @@ set(SRC
   BLI_string_utils.hh
   BLI_struct_equality_utils.hh
   BLI_sub_frame.hh
+  BLI_subprocess.hh
   BLI_sys_types.h
   BLI_system.h
   BLI_task.h
diff --git a/source/blender/blenlib/intern/BLI_subprocess.cc b/source/blender/blenlib/intern/BLI_subprocess.cc
new file mode 100644
index 00000000000..bfb0dabbc5a
--- /dev/null
+++ b/source/blender/blenlib/intern/BLI_subprocess.cc
@@ -0,0 +1,420 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_subprocess.hh"
+
+#if BLI_SUBPROCESS_SUPPORT
+
+/* Based on https://github.com/jarikomppa/ipc (Unlicense) */
+
+#  include "BLI_assert.h"
+#  include "BLI_path_util.h"
+#  include "BLI_string_utf8.h"
+#  include <iostream>
+
+namespace blender {
+
+static bool check_arguments_are_valid(Span<StringRefNull> args)
+{
+  for (StringRefNull arg : args) {
+    for (const char c : arg) {
+      if (!std::isalnum(c) && !ELEM(c, '_', '-')) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+}  // namespace blender
+
+#  ifdef _WIN32
+
+#    define WIN32_LEAN_AND_MEAN
+#    include <comdef.h>
+#    include <windows.h>
+
+namespace blender {
+
+static void print_last_error(const char *function, const char *msg)
+{
+  DWORD error_code = GetLastError();
+  std::cerr << "ERROR (" << error_code << "): " << function << " : " << msg << std::endl;
+}
+
+static void check(bool result, const char *function, const char *msg)
+{
+  if (!result) {
+    print_last_error(function, msg);
+    BLI_assert(false);
+  }
+}
+
+#    define CHECK(result) check((result), __func__, #result)
+#    undef ERROR /* Defined in wingdi.h */
+#    define ERROR(msg) check(false, __func__, msg)
+
+bool BlenderSubprocess::create(Span<StringRefNull> args)
+{
+  BLI_assert(handle_ == nullptr);
+
+  if (!check_arguments_are_valid(args)) {
+    BLI_assert(false);
+    return false;
+  }
+
+  wchar_t path[FILE_MAX];
+  if (!GetModuleFileNameW(nullptr, path, FILE_MAX)) {
+    ERROR("GetModuleFileNameW");
+    return false;
+  }
+
+  std::string args_str;
+  for (StringRefNull arg : args) {
+    args_str += arg + " ";
+  }
+
+  const int length_wc = MultiByteToWideChar(
+      CP_UTF8, 0, args_str.c_str(), args_str.length(), nullptr, 0);
+  std::wstring w_args(length_wc, 0);
+  CHECK(MultiByteToWideChar(
+      CP_UTF8, 0, args_str.c_str(), args_str.length(), w_args.data(), length_wc));
+
+  STARTUPINFOW startup_info = {0};
+  startup_info.cb = sizeof(startup_info);
+  PROCESS_INFORMATION process_info = {0};
+  if (!CreateProcessW(path,
+                      /** Use data() since lpCommandLine must be mutable. */
+                      w_args.data(),
+                      nullptr,
+                      nullptr,
+                      false,
+                      0,
+                      nullptr,
+                      nullptr,
+                      &startup_info,
+                      &process_info))
+  {
+    ERROR("CreateProcessW");
+    return false;
+  }
+
+  handle_ = process_info.hProcess;
+  CHECK(CloseHandle(process_info.hThread));
+
+  return true;
+}
+
+BlenderSubprocess::~BlenderSubprocess()
+{
+  if (handle_) {
+    CHECK(CloseHandle(handle_));
+  }
+}
+
+bool BlenderSubprocess::is_running()
+{
+  if (!handle_) {
+    return false;
+  }
+
+  DWORD exit_code = 0;
+  if (GetExitCodeProcess(handle_, &exit_code)) {
+    return exit_code == STILL_ACTIVE;
+  }
+
+  ERROR("GetExitCodeProcess");
+  /* Assume the process is still running. */
+  return true;
+}
+
+SharedMemory::SharedMemory(std::string name, size_t size, bool is_owner)
+    : name_(name), is_owner_(is_owner)
+{
+  if (is_owner) {
+    handle_ = CreateFileMappingA(
+        INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0, size, name.c_str());
+    CHECK(handle_ /*Create*/);
+  }
+  else {
+    handle_ = OpenFileMappingA(FILE_MAP_ALL_ACCESS, FALSE, name.c_str());
+    CHECK(handle_ /*Open*/);
+  }
+
+  if (handle_) {
+    data_ = MapViewOfFile(handle_, FILE_MAP_ALL_ACCESS, 0, 0, size);
+    CHECK(data_);
+  }
+  else {
+    data_ = nullptr;
+  }
+
+  data_size_ = data_ ? size : 0;
+}
+
+SharedMemory::~SharedMemory()
+{
+  if (data_) {
+    CHECK(UnmapViewOfFile(data_));
+  }
+  if (handle_) {
+    CHECK(CloseHandle(handle_));
+  }
+}
+
+SharedSemaphore::SharedSemaphore(std::string name, bool is_owner)
+    : name_(name), is_owner_(is_owner)
+{
+  handle_ = CreateSemaphoreA(nullptr, 0, 1, name.c_str());
+  CHECK(handle_);
+}
+
+SharedSemaphore::~SharedSemaphore()
+{
+  if (handle_) {
+    CHECK(CloseHandle(handle_));
+  }
+}
+
+void SharedSemaphore::increment()
+{
+  CHECK(ReleaseSemaphore(handle_, 1, nullptr));
+}
+
+void SharedSemaphore::decrement()
+{
+  CHECK(WaitForSingleObject(handle_, INFINITE) != WAIT_FAILED);
+}
+
+bool SharedSemaphore::try_decrement(int wait_ms)
+{
+  DWORD result = WaitForSingleObject(handle_, wait_ms);
+  CHECK(result != WAIT_FAILED);
+  return result == WAIT_OBJECT_0;
+}
+
+}  // namespace blender
+
+#  elif defined(__linux__)
+
+#    include "BLI_time.h"
+#    include "BLI_vector.hh"
+#    include <fcntl.h>
+#    include <linux/limits.h>
+#    include <stdlib.h>
+#    include <sys/mman.h>
+#    include <sys/stat.h>
+#    include <unistd.h>
+#    include <wait.h>
+
+namespace blender {
+
+static void print_last_error(const char *function, const char *msg)
+{
+  int error_code = errno;
+  std::string error_msg = "ERROR (" + std::to_string(error_code) + "): " + function + " : " + msg;
+  perror(error_msg.c_str());
+}
+
+static void check(int result, const char *function, const char *msg)
+{
+  if (result == -1) {
+    print_last_error(function, msg);
+    BLI_assert(false);
+  }
+}
+
+#    define CHECK(result) check((result), __func__, #result)
+#    define ERROR(msg) check(-1, __func__, msg)
+
+bool BlenderSubprocess::create(Span<StringRefNull> args)
+{
+  if (!check_arguments_are_valid(args)) {
+    BLI_assert(false);
+    return false;
+  }
+
+  char path[PATH_MAX + 1];
+  size_t len = readlink("/proc/self/exe", path, PATH_MAX);
+  if (len == -1) {
+    ERROR("readlink");
+    return false;
+  }
+  /* readlink doesn't append a null terminator. */
+  path[len] = '\0';
+
+  Vector<char *> char_args;
+  for (StringRefNull arg : args) {
+    char_args.append((char *)arg.data());
+  }
+  char_args.append(nullptr);
+
+  pid_ = fork();
+
+  if (pid_ == -1) {
+    ERROR("fork");
+    return false;
+  }
+  else if (pid_ > 0) {
+    return true;
+  }
+
+  /* Child process initialization. */
+  execv(path, char_args.data());
+
+  ERROR("execv");
+  exit(errno);
+
+  return false;
+}
+
+BlenderSubprocess::~BlenderSubprocess() {}
+
+bool BlenderSubprocess::is_running()
+{
+  if (pid_ == -1) {
+    return false;
+  }
+
+  pid_t result = waitpid(pid_, nullptr, WNOHANG);
+  CHECK(result);
+
+  if (result == pid_) {
+    pid_ = -1;
+    return false;
+  }
+
+  return true;
+}
+
+SharedMemory::SharedMemory(std::string name, size_t size, bool is_owner)
+    : name_(name), is_owner_(is_owner)
+{
+  constexpr mode_t user_mode = S_IRUSR | S_IWUSR;
+  if (is_owner) {
+    handle_ = shm_open(name.c_str(), O_CREAT | O_EXCL | O_RDWR, user_mode);
+    CHECK(handle_);
+    if (handle_ != -1) {
+      if (ftruncate(handle_, size) == -1) {
+        ERROR("ftruncate");
+        CHECK(close(handle_));
+        handle_ = -1;
+      }
+    }
+  }
+  else {
+    handle_ = shm_open(name.c_str(), O_RDWR, user_mode);
+    CHECK(handle_);
+  }
+
+  if (handle_ != -1) {
+    data_ = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, handle_, 0);
+    if (data_ == MAP_FAILED) {
+      ERROR("mmap");
+      data_ = nullptr;
+    }
+    /* File descriptor can close after mmap. */
+    CHECK(close(handle_));
+  }
+  else {
+    data_ = nullptr;
+  }
+
+  data_size_ = data_ ? size : 0;
+}
+
+SharedMemory::~SharedMemory()
+{
+  if (data_) {
+    CHECK(munmap(data_, data_size_));
+    if (is_owner_) {
+      CHECK(shm_unlink(name_.c_str()));
+    }
+  }
+}
+
+SharedSemaphore::SharedSemaphore(std::string name, bool is_owner)
+    : name_(name), is_owner_(is_owner)
+{
+  constexpr mode_t user_mode = S_IRUSR | S_IWUSR;
+  handle_ = sem_open(name.c_str(), O_CREAT, user_mode, 0);
+  if (!handle_) {
+    ERROR("sem_open");
+  }
+}
+
+SharedSemaphore::~SharedSemaphore()
+{
+  if (handle_) {
+    CHECK(sem_close(handle_));
+    if (is_owner_) {
+      CHECK(sem_unlink(name_.c_str()));
+    }
+  }
+}
+
+void SharedSemaphore::increment()
+{
+  CHECK(sem_post(handle_));
+}
+
+void SharedSemaphore::decrement()
+{
+  while (true) {
+    int result = sem_wait(handle_);
+    if (result == 0) {
+      return;
+    }
+    else if (errno != EINTR) {
+      ERROR("sem_wait");
+      return;
+    }
+    /* Try again if interrupted by handler. */
+  }
+}
+
+bool SharedSemaphore::try_decrement(int wait_ms)
+{
+  if (wait_ms == 0) {
+    int result = sem_trywait(handle_);
+    if (result == 0) {
+      return true;
+    }
+    else if (errno == EINVAL) {
+      ERROR("sem_trywait");
+    }
+    return false;
+  }
+
+  timespec time;
+  if (clock_gettime(CLOCK_REALTIME, &time) == -1) {
+    ERROR("clock_gettime");
+    BLI_time_sleep_ms(wait_ms);
+    return try_decrement(0);
+  }
+
+  time.tv_sec += wait_ms / 1000;
+  time.tv_nsec += (wait_ms % 1000) * 10e6;
+
+  while (true) {
+    int result = sem_timedwait(handle_, &time);
+    if (result == 0) {
+      return true;
+    }
+    else if (errno != EINTR) {
+      if (errno != ETIMEDOUT) {
+        ERROR("sem_timedwait");
+      }
+      return false;
+    }
+    /* Try again if interrupted by handler. */
+  }
+}
+
+}  // namespace blender
+
+#  endif
+
+#endif
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 199beddb9f5..7d47d898ff7 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -88,6 +88,7 @@ set(SRC
   GPU_capabilities.hh
   GPU_common.hh
   GPU_common_types.hh
+  GPU_compilation_subprocess.hh
   GPU_compute.hh
   GPU_context.hh
   GPU_debug.hh
@@ -150,6 +151,7 @@ set(OPENGL_SRC
 
   opengl/gl_backend.cc
   opengl/gl_batch.cc
+  opengl/gl_compilation_subprocess.cc
   opengl/gl_compute.cc
   opengl/gl_context.cc
   opengl/gl_debug.cc
@@ -171,6 +173,7 @@ set(OPENGL_SRC
 
   opengl/gl_backend.hh
   opengl/gl_batch.hh
+  opengl/gl_compilation_subprocess.hh
   opengl/gl_compute.hh
   opengl/gl_context.hh
   opengl/gl_debug.hh
@@ -841,6 +844,10 @@ target_link_libraries(bf_gpu PUBLIC
   bf_gpu_shaders
 )
 
+if(WITH_OPENGL_BACKEND AND UNIX)
+target_link_libraries(bf_gpu PUBLIC rt)
+endif()
+
 if(WITH_OPENCOLORIO)
   target_link_libraries(bf_gpu PUBLIC bf_ocio_shaders)
 endif()
diff --git a/source/blender/gpu/GPU_capabilities.hh b/source/blender/gpu/GPU_capabilities.hh
index d8df48b896c..844f1b196af 100644
--- a/source/blender/gpu/GPU_capabilities.hh
+++ b/source/blender/gpu/GPU_capabilities.hh
@@ -40,6 +40,8 @@ const char *GPU_extension_get(int i);
 
 int GPU_texture_size_with_limit(int res);
 
+bool GPU_use_parallel_compilation();
+
 bool GPU_mip_render_workaround();
 bool GPU_depth_blitting_workaround();
 bool GPU_use_main_context_workaround();
diff --git a/source/blender/gpu/GPU_compilation_subprocess.hh b/source/blender/gpu/GPU_compilation_subprocess.hh
new file mode 100644
index 00000000000..aa8040ec39a
--- /dev/null
+++ b/source/blender/gpu/GPU_compilation_subprocess.hh
@@ -0,0 +1,13 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "BLI_subprocess.hh"
+
+#if defined(WITH_OPENGL_BACKEND) && defined(BLI_SUBPROCESS_SUPPORT)
+
+void GPU_compilation_subprocess_run(const char *subprocess_name);
+
+#endif
diff --git a/source/blender/gpu/GPU_shader.hh b/source/blender/gpu/GPU_shader.hh
index 557134ed0de..f11594665b3 100644
--- a/source/blender/gpu/GPU_shader.hh
+++ b/source/blender/gpu/GPU_shader.hh
@@ -10,6 +10,8 @@
 
 #pragma once
 
+#include "BLI_span.hh"
+#include "BLI_vector.hh"
 #include "GPU_shader_builtin.hh"
 
 namespace blender::gpu {
@@ -59,6 +61,29 @@ const GPUShaderCreateInfo *GPU_shader_create_info_get(const char *info_name);
  */
 bool GPU_shader_create_info_check_error(const GPUShaderCreateInfo *_info, char r_error[128]);
 
+using BatchHandle = int64_t;
+/**
+ * Request the creation of multiple shaders at once, allowing the backend to use multithreaded
+ * compilation. Returns a handle that can be used to poll if all shaders have been compiled, and to
+ * retrieve the compiled shaders.
+ * NOTE: This function is asynchronous on OpenGL, but it's blocking on Vulkan and Metal.
+ * WARNING: The GPUShaderCreateInfo pointers should be valid until `GPU_shader_batch_finalize` has
+ * returned.
+ */
+BatchHandle GPU_shader_batch_create_from_infos(blender::Span<const GPUShaderCreateInfo *> infos);
+/**
+ * Returns true if all the shaders from the batch have finished their compilation.
+ */
+bool GPU_shader_batch_is_ready(BatchHandle handle);
+/**
+ * Retrieve the compiled shaders, in the same order as the `GPUShaderCreateInfo`s.
+ * If the compilation has not finished yet, this call will block the thread until all the shaders
+ * are ready.
+ * Shaders with compilation errors are returned as null pointers.
+ * WARNING: The handle will be invalidated by this call, you can't request the same batch twice.
+ */
+blender::Vector<GPUShader *> GPU_shader_batch_finalize(BatchHandle &handle);
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc
index ea416ca44f5..a4d743c1453 100644
--- a/source/blender/gpu/intern/gpu_capabilities.cc
+++ b/source/blender/gpu/intern/gpu_capabilities.cc
@@ -131,6 +131,11 @@ int GPU_max_samplers()
   return GCaps.max_samplers;
 }
 
+bool GPU_use_parallel_compilation()
+{
+  return GCaps.max_parallel_compilations > 0;
+}
+
 bool GPU_mip_render_workaround()
 {
   return GCaps.mip_render_workaround;
diff --git a/source/blender/gpu/intern/gpu_capabilities_private.hh b/source/blender/gpu/intern/gpu_capabilities_private.hh
index e373bcd3039..293dc7f3131 100644
--- a/source/blender/gpu/intern/gpu_capabilities_private.hh
+++ b/source/blender/gpu/intern/gpu_capabilities_private.hh
@@ -51,6 +51,8 @@ struct GPUCapabilities {
   bool texture_view_support = true;
   bool stencil_export_support = false;
 
+  int max_parallel_compilations = 0;
+
   /* OpenGL related workarounds. */
   bool mip_render_workaround = false;
   bool depth_blitting_workaround = false;
diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh
index 1ba2e47b295..208d672c877 100644
--- a/source/blender/gpu/intern/gpu_context_private.hh
+++ b/source/blender/gpu/intern/gpu_context_private.hh
@@ -35,6 +35,8 @@ class Context {
   StateManager *state_manager = nullptr;
   Immediate *imm = nullptr;
 
+  ShaderCompiler *compiler = nullptr;
+
   /**
    * All 4 window frame-buffers.
    * None of them are valid in an off-screen context.
diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc
index de58d473ed0..574a7bbefe0 100644
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -290,130 +290,7 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
 {
   using namespace blender::gpu::shader;
   const ShaderCreateInfo &info = *reinterpret_cast<const ShaderCreateInfo *>(_info);
-
-  const_cast<ShaderCreateInfo &>(info).finalize();
-
-  GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP);
-
-  const std::string error = info.check_error();
-  if (!error.empty()) {
-    std::cerr << error.c_str() << "\n";
-    BLI_assert(false);
-  }
-
-  Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str());
-  shader->init(info);
-  shader->specialization_constants_init(info);
-
-  std::string defines = shader->defines_declare(info);
-  std::string resources = shader->resources_declare(info);
-
-  if (info.legacy_resource_location_ == false) {
-    defines += "#define USE_GPU_SHADER_CREATE_INFO\n";
-  }
-
-  Vector<const char *> typedefs;
-  if (!info.typedef_sources_.is_empty() || !info.typedef_source_generated.empty()) {
-    typedefs.append(gpu_shader_dependency_get_source("GPU_shader_shared_utils.hh").c_str());
-  }
-  if (!info.typedef_source_generated.empty()) {
-    typedefs.append(info.typedef_source_generated.c_str());
-  }
-  for (auto filename : info.typedef_sources_) {
-    typedefs.append(gpu_shader_dependency_get_source(filename).c_str());
-  }
-
-  if (!info.vertex_source_.is_empty()) {
-    auto code = gpu_shader_dependency_get_resolved_source(info.vertex_source_);
-    std::string interface = shader->vertex_interface_declare(info);
-
-    Vector<const char *> sources;
-    standard_defines(sources);
-    sources.append("#define GPU_VERTEX_SHADER\n");
-    if (!info.geometry_source_.is_empty()) {
-      sources.append("#define USE_GEOMETRY_SHADER\n");
-    }
-    sources.append(defines.c_str());
-    sources.extend(typedefs);
-    sources.append(resources.c_str());
-    sources.append(interface.c_str());
-    sources.extend(code);
-    sources.extend(info.dependencies_generated);
-    sources.append(info.vertex_source_generated.c_str());
-
-    shader->vertex_shader_from_glsl(sources);
-  }
-
-  if (!info.fragment_source_.is_empty()) {
-    auto code = gpu_shader_dependency_get_resolved_source(info.fragment_source_);
-    std::string interface = shader->fragment_interface_declare(info);
-
-    Vector<const char *> sources;
-    standard_defines(sources);
-    sources.append("#define GPU_FRAGMENT_SHADER\n");
-    if (!info.geometry_source_.is_empty()) {
-      sources.append("#define USE_GEOMETRY_SHADER\n");
-    }
-    sources.append(defines.c_str());
-    sources.extend(typedefs);
-    sources.append(resources.c_str());
-    sources.append(interface.c_str());
-    sources.extend(code);
-    sources.extend(info.dependencies_generated);
-    sources.append(info.fragment_source_generated.c_str());
-
-    shader->fragment_shader_from_glsl(sources);
-  }
-
-  if (!info.geometry_source_.is_empty()) {
-    auto code = gpu_shader_dependency_get_resolved_source(info.geometry_source_);
-    std::string layout = shader->geometry_layout_declare(info);
-    std::string interface = shader->geometry_interface_declare(info);
-
-    Vector<const char *> sources;
-    standard_defines(sources);
-    sources.append("#define GPU_GEOMETRY_SHADER\n");
-    sources.append(defines.c_str());
-    sources.extend(typedefs);
-    sources.append(resources.c_str());
-    sources.append(layout.c_str());
-    sources.append(interface.c_str());
-    sources.append(info.geometry_source_generated.c_str());
-    sources.extend(code);
-
-    shader->geometry_shader_from_glsl(sources);
-  }
-
-  if (!info.compute_source_.is_empty()) {
-    auto code = gpu_shader_dependency_get_resolved_source(info.compute_source_);
-    std::string layout = shader->compute_layout_declare(info);
-
-    Vector<const char *> sources;
-    standard_defines(sources);
-    sources.append("#define GPU_COMPUTE_SHADER\n");
-    sources.append(defines.c_str());
-    sources.extend(typedefs);
-    sources.append(resources.c_str());
-    sources.append(layout.c_str());
-    sources.extend(code);
-    sources.extend(info.dependencies_generated);
-    sources.append(info.compute_source_generated.c_str());
-
-    shader->compute_shader_from_glsl(sources);
-  }
-
-  if (info.tf_type_ != GPU_SHADER_TFB_NONE && info.tf_names_.size() > 0) {
-    shader->transform_feedback_names_set(info.tf_names_.as_span(), info.tf_type_);
-  }
-
-  if (!shader->finalize(&info)) {
-    delete shader;
-    GPU_debug_group_end();
-    return nullptr;
-  }
-
-  GPU_debug_group_end();
-  return wrap(shader);
+  return wrap(Context::get()->compiler->compile(info, false));
 }
 
 GPUShader *GPU_shader_create_from_python(const char *vertcode,
@@ -450,6 +327,25 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode,
   return sh;
 }
 
+BatchHandle GPU_shader_batch_create_from_infos(Span<const GPUShaderCreateInfo *> infos)
+{
+  using namespace blender::gpu::shader;
+  Span<const ShaderCreateInfo *> &infos_ = reinterpret_cast<Span<const ShaderCreateInfo *> &>(
+      infos);
+  return Context::get()->compiler->batch_compile(infos_);
+}
+
+bool GPU_shader_batch_is_ready(BatchHandle handle)
+{
+  return Context::get()->compiler->batch_is_ready(handle);
+}
+
+Vector<GPUShader *> GPU_shader_batch_finalize(BatchHandle &handle)
+{
+  Vector<Shader *> result = Context::get()->compiler->batch_finalize(handle);
+  return reinterpret_cast<Vector<GPUShader *> &>(result);
+}
+
 void GPU_shader_compile_static()
 {
   printf("Compiling all static GPU shaders. This process takes a while.\n");
@@ -880,4 +776,175 @@ void Shader::set_framebuffer_srgb_target(int use_srgb_to_linear)
 
 /** \} */
 
+/* -------------------------------------------------------------------- */
+/** \name ShaderCompiler
+ * \{ */
+
+Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
+{
+  using namespace blender::gpu::shader;
+  const_cast<ShaderCreateInfo &>(info).finalize();
+
+  GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP);
+
+  const std::string error = info.check_error();
+  if (!error.empty()) {
+    std::cerr << error.c_str() << "\n";
+    BLI_assert(false);
+  }
+
+  Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str());
+  shader->init(info, is_batch_compilation);
+  shader->specialization_constants_init(info);
+
+  std::string defines = shader->defines_declare(info);
+  std::string resources = shader->resources_declare(info);
+
+  if (info.legacy_resource_location_ == false) {
+    defines += "#define USE_GPU_SHADER_CREATE_INFO\n";
+  }
+
+  Vector<const char *> typedefs;
+  if (!info.typedef_sources_.is_empty() || !info.typedef_source_generated.empty()) {
+    typedefs.append(gpu_shader_dependency_get_source("GPU_shader_shared_utils.hh").c_str());
+  }
+  if (!info.typedef_source_generated.empty()) {
+    typedefs.append(info.typedef_source_generated.c_str());
+  }
+  for (auto filename : info.typedef_sources_) {
+    typedefs.append(gpu_shader_dependency_get_source(filename).c_str());
+  }
+
+  if (!info.vertex_source_.is_empty()) {
+    auto code = gpu_shader_dependency_get_resolved_source(info.vertex_source_);
+    std::string interface = shader->vertex_interface_declare(info);
+
+    Vector<const char *> sources;
+    standard_defines(sources);
+    sources.append("#define GPU_VERTEX_SHADER\n");
+    if (!info.geometry_source_.is_empty()) {
+      sources.append("#define USE_GEOMETRY_SHADER\n");
+    }
+    sources.append(defines.c_str());
+    sources.extend(typedefs);
+    sources.append(resources.c_str());
+    sources.append(interface.c_str());
+    sources.extend(code);
+    sources.extend(info.dependencies_generated);
+    sources.append(info.vertex_source_generated.c_str());
+
+    shader->vertex_shader_from_glsl(sources);
+  }
+
+  if (!info.fragment_source_.is_empty()) {
+    auto code = gpu_shader_dependency_get_resolved_source(info.fragment_source_);
+    std::string interface = shader->fragment_interface_declare(info);
+
+    Vector<const char *> sources;
+    standard_defines(sources);
+    sources.append("#define GPU_FRAGMENT_SHADER\n");
+    if (!info.geometry_source_.is_empty()) {
+      sources.append("#define USE_GEOMETRY_SHADER\n");
+    }
+    sources.append(defines.c_str());
+    sources.extend(typedefs);
+    sources.append(resources.c_str());
+    sources.append(interface.c_str());
+    sources.extend(code);
+    sources.extend(info.dependencies_generated);
+    sources.append(info.fragment_source_generated.c_str());
+
+    shader->fragment_shader_from_glsl(sources);
+  }
+
+  if (!info.geometry_source_.is_empty()) {
+    auto code = gpu_shader_dependency_get_resolved_source(info.geometry_source_);
+    std::string layout = shader->geometry_layout_declare(info);
+    std::string interface = shader->geometry_interface_declare(info);
+
+    Vector<const char *> sources;
+    standard_defines(sources);
+    sources.append("#define GPU_GEOMETRY_SHADER\n");
+    sources.append(defines.c_str());
+    sources.extend(typedefs);
+    sources.append(resources.c_str());
+    sources.append(layout.c_str());
+    sources.append(interface.c_str());
+    sources.append(info.geometry_source_generated.c_str());
+    sources.extend(code);
+
+    shader->geometry_shader_from_glsl(sources);
+  }
+
+  if (!info.compute_source_.is_empty()) {
+    auto code = gpu_shader_dependency_get_resolved_source(info.compute_source_);
+    std::string layout = shader->compute_layout_declare(info);
+
+    Vector<const char *> sources;
+    standard_defines(sources);
+    sources.append("#define GPU_COMPUTE_SHADER\n");
+    sources.append(defines.c_str());
+    sources.extend(typedefs);
+    sources.append(resources.c_str());
+    sources.append(layout.c_str());
+    sources.extend(code);
+    sources.extend(info.dependencies_generated);
+    sources.append(info.compute_source_generated.c_str());
+
+    shader->compute_shader_from_glsl(sources);
+  }
+
+  if (info.tf_type_ != GPU_SHADER_TFB_NONE && info.tf_names_.size() > 0) {
+    shader->transform_feedback_names_set(info.tf_names_.as_span(), info.tf_type_);
+  }
+
+  if (!shader->finalize(&info)) {
+    delete shader;
+    GPU_debug_group_end();
+    return nullptr;
+  }
+
+  GPU_debug_group_end();
+  return shader;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name ShaderCompilerGeneric
+ * \{ */
+
+ShaderCompilerGeneric::~ShaderCompilerGeneric()
+{
+  /* Ensure all the requested batches have been retrieved. */
+  BLI_assert(batches.is_empty());
+}
+
+BatchHandle ShaderCompilerGeneric::batch_compile(Span<const shader::ShaderCreateInfo *> &infos)
+{
+  BatchHandle handle = next_batch_handle++;
+  batches.add(handle, {{}, infos, true});
+  Batch &batch = batches.lookup(handle);
+  batch.shaders.reserve(infos.size());
+  for (const shader::ShaderCreateInfo *info : infos) {
+    batch.shaders.append(compile(*info, true));
+  }
+  return handle;
+}
+
+bool ShaderCompilerGeneric::batch_is_ready(BatchHandle handle)
+{
+  bool is_ready = batches.lookup(handle).is_ready;
+  return is_ready;
+}
+
+Vector<Shader *> ShaderCompilerGeneric::batch_finalize(BatchHandle &handle)
+{
+  Vector<Shader *> shaders = batches.pop(handle).shaders;
+  handle = 0;
+  return shaders;
+}
+
+/** \} */
+
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh
index ac99f8551d3..23d7cad9ba7 100644
--- a/source/blender/gpu/intern/gpu_shader_private.hh
+++ b/source/blender/gpu/intern/gpu_shader_private.hh
@@ -17,6 +17,7 @@
 
 #include "BLI_map.hh"
 
+#include <mutex>
 #include <string>
 
 namespace blender {
@@ -77,7 +78,9 @@ class Shader {
   Shader(const char *name);
   virtual ~Shader();
 
-  virtual void init(const shader::ShaderCreateInfo &info) = 0;
+  /* `is_batch_compilation` is true when the shader is being compiled as part of a
+   * `GPU_shader_batch`. Backends that use the `ShaderCompilerGeneric` can ignore it. */
+  virtual void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) = 0;
 
   virtual void vertex_shader_from_glsl(MutableSpan<const char *> sources) = 0;
   virtual void geometry_shader_from_glsl(MutableSpan<const char *> sources) = 0;
@@ -160,6 +163,43 @@ static inline const Shader *unwrap(const GPUShader *vert)
   return reinterpret_cast<const Shader *>(vert);
 }
 
+class ShaderCompiler {
+ protected:
+  struct Sources {
+    std::string vert;
+    std::string geom;
+    std::string frag;
+    std::string comp;
+  };
+
+ public:
+  Shader *compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation);
+
+  virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) = 0;
+  virtual bool batch_is_ready(BatchHandle handle) = 0;
+  virtual Vector<Shader *> batch_finalize(BatchHandle &handle) = 0;
+};
+
+/* Generic (fully synchronous) implementation for backends that don't implement their own
+ * ShaderCompiler. Used by Vulkan and Metal. */
+class ShaderCompilerGeneric : public ShaderCompiler {
+ private:
+  struct Batch {
+    Vector<Shader *> shaders;
+    Vector<const shader::ShaderCreateInfo *> infos;
+    bool is_ready = false;
+  };
+  BatchHandle next_batch_handle = 1;
+  Map<BatchHandle, Batch> batches;
+
+ public:
+  ~ShaderCompilerGeneric();
+
+  virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) override;
+  virtual bool batch_is_ready(BatchHandle handle) override;
+  virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
+};
+
 enum class Severity {
   Unknown,
   Warning,
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index 0bb7dbf6806..b4d4362ae3e 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -267,6 +267,8 @@ MTLContext::MTLContext(void *ghost_window, void *ghost_context)
 
   /* Initialize samplers. */
   this->sampler_state_cache_init();
+
+  compiler = new ShaderCompilerGeneric();
 }
 
 MTLContext::~MTLContext()
@@ -369,6 +371,8 @@ MTLContext::~MTLContext()
   if (this->device) {
     [this->device release];
   }
+
+  delete compiler;
 }
 
 void MTLContext::begin_frame()
diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh
index 3a9cf2089bc..8d815724989 100644
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -277,7 +277,7 @@ class MTLShader : public Shader {
             NSString *fragment_function_name_);
   ~MTLShader();
 
-  void init(const shader::ShaderCreateInfo & /*info*/) override {}
+  void init(const shader::ShaderCreateInfo & /*info*/, bool /*is_batch_compilation*/) override {}
 
   /* Assign GLSL source. */
   void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc
index f5b2f1e0f5a..7f1efc3ec29 100644
--- a/source/blender/gpu/opengl/gl_backend.cc
+++ b/source/blender/gpu/opengl/gl_backend.cc
@@ -10,6 +10,9 @@
 #if defined(WIN32)
 #  include "BLI_winstuff.h"
 #endif
+#include "BLI_subprocess.hh"
+#include "BLI_threads.h"
+#include "DNA_userdef_types.h"
 
 #include "gpu_capabilities_private.hh"
 #include "gpu_platform_private.hh"
@@ -594,6 +597,13 @@ void GLBackend::capabilities_init()
 
   detect_workarounds();
 
+#if BLI_SUBPROCESS_SUPPORT
+  GCaps.max_parallel_compilations = std::min(int(U.max_shader_compilation_subprocesses),
+                                             BLI_system_thread_count());
+#else
+  GCaps.max_parallel_compilations = 0;
+#endif
+
   /* Disable this feature entirely when not debugging. */
   if ((G.debug & G_DEBUG_GPU) == 0) {
     GLContext::debug_layer_support = false;
diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh
index 5d7f1189b44..4635de1d402 100644
--- a/source/blender/gpu/opengl/gl_backend.hh
+++ b/source/blender/gpu/opengl/gl_backend.hh
@@ -39,6 +39,8 @@ class GLBackend : public GPUBackend {
   renderdoc::api::Renderdoc renderdoc_;
 #endif
 
+  GLShaderCompiler compiler_;
+
  public:
   GLBackend()
   {
@@ -64,6 +66,11 @@ class GLBackend : public GPUBackend {
     return static_cast<GLBackend *>(GPUBackend::get());
   }
 
+  GLShaderCompiler *get_compiler()
+  {
+    return &compiler_;
+  }
+
   void samplers_update() override
   {
     GLTexture::samplers_update();
diff --git a/source/blender/gpu/opengl/gl_compilation_subprocess.cc b/source/blender/gpu/opengl/gl_compilation_subprocess.cc
new file mode 100644
index 00000000000..5cdf88f804d
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_compilation_subprocess.cc
@@ -0,0 +1,222 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gl_compilation_subprocess.hh"
+
+#if BLI_SUBPROCESS_SUPPORT
+
+#  include "BKE_appdir.hh"
+#  include "BLI_fileops.hh"
+#  include "BLI_hash.hh"
+#  include "BLI_path_util.h"
+#  include "CLG_log.h"
+#  include "GHOST_C-api.h"
+#  include "GPU_context.hh"
+#  include "GPU_init_exit.hh"
+#  include <epoxy/gl.h>
+#  include <iostream>
+#  include <string>
+
+#  ifndef _WIN32
+#    include <unistd.h>
+#  endif
+
+namespace blender::gpu {
+
+class SubprocessShader {
+  GLuint vert_ = 0;
+  GLuint frag_ = 0;
+  GLuint program_ = 0;
+  bool success_ = false;
+
+ public:
+  SubprocessShader(const char *vert_src, const char *frag_src)
+  {
+    GLint status;
+
+    vert_ = glCreateShader(GL_VERTEX_SHADER);
+    glShaderSource(vert_, 1, &vert_src, nullptr);
+    glCompileShader(vert_);
+    glGetShaderiv(vert_, GL_COMPILE_STATUS, &status);
+    if (!status) {
+      return;
+    }
+
+    frag_ = glCreateShader(GL_FRAGMENT_SHADER);
+    glShaderSource(frag_, 1, &frag_src, nullptr);
+    glCompileShader(frag_);
+    glGetShaderiv(frag_, GL_COMPILE_STATUS, &status);
+    if (!status) {
+      return;
+    }
+
+    program_ = glCreateProgram();
+    glAttachShader(program_, vert_);
+    glAttachShader(program_, frag_);
+    glLinkProgram(program_);
+    glGetProgramiv(program_, GL_LINK_STATUS, &status);
+    if (!status) {
+      return;
+    }
+
+    success_ = true;
+  }
+
+  ~SubprocessShader()
+  {
+    glDeleteShader(vert_);
+    glDeleteShader(frag_);
+    glDeleteProgram(program_);
+  }
+
+  ShaderBinaryHeader *get_binary(void *memory)
+  {
+    ShaderBinaryHeader *bin = reinterpret_cast<ShaderBinaryHeader *>(memory);
+    bin->format = 0;
+    bin->size = 0;
+
+    if (success_) {
+      glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &bin->size);
+      if (bin->size + sizeof(ShaderBinaryHeader) < compilation_subprocess_shared_memory_size) {
+        glGetProgramBinary(program_, bin->size, nullptr, &bin->format, &bin->data_start);
+      }
+    }
+
+    return bin;
+  }
+};
+
+/* Check if the binary is valid and can be loaded by the driver. */
+static bool validate_binary(void *binary)
+{
+  ShaderBinaryHeader *bin = reinterpret_cast<ShaderBinaryHeader *>(binary);
+  GLuint program = glCreateProgram();
+  glProgramBinary(program, bin->format, &bin->data_start, bin->size);
+  GLint status;
+  glGetProgramiv(program, GL_LINK_STATUS, &status);
+  glDeleteProgram(program);
+  return status;
+}
+
+}  // namespace blender::gpu
+
+void GPU_compilation_subprocess_run(const char *subprocess_name)
+{
+  using namespace blender;
+  using namespace blender::gpu;
+
+#  ifndef _WIN32
+  /** NOTE: Technically, the parent process could have crashed before this. */
+  pid_t ppid = getppid();
+#  endif
+
+  CLG_init();
+
+  std::string name = subprocess_name;
+  SharedMemory shared_mem(name, compilation_subprocess_shared_memory_size, false);
+  if (!shared_mem.get_data()) {
+    std::cerr << "Compilation Subprocess: Failed to open shared memory " << subprocess_name
+              << "\n";
+    return;
+  }
+  SharedSemaphore start_semaphore(name + "_START", true);
+  SharedSemaphore end_semaphore(name + "_END", true);
+  SharedSemaphore close_semaphore(name + "_CLOSE", true);
+
+  GHOST_SystemHandle ghost_system = GHOST_CreateSystemBackground();
+  BLI_assert(ghost_system);
+  GHOST_GPUSettings gpu_settings = {0};
+  gpu_settings.context_type = GHOST_kDrawingContextTypeOpenGL;
+  GHOST_ContextHandle ghost_context = GHOST_CreateGPUContext(ghost_system, gpu_settings);
+  if (ghost_context == nullptr) {
+    std::cerr << "Compilation Subprocess: Failed to initialize GHOST context for "
+              << subprocess_name << "\n";
+    GHOST_DisposeSystem(ghost_system);
+    return;
+  }
+  GHOST_ActivateGPUContext(ghost_context);
+  GPUContext *gpu_context = GPU_context_create(nullptr, ghost_context);
+  GPU_init();
+
+  BKE_tempdir_init(nullptr);
+  std::string cache_dir = std::string(BKE_tempdir_base()) + "BLENDER_SHADER_CACHE" + SEP_STR;
+  BLI_dir_create_recursive(cache_dir.c_str());
+
+  while (true) {
+    /* Process events to avoid crashes on Wayland.
+     * See https://bugreports.qt.io/browse/QTBUG-81504 */
+    GHOST_ProcessEvents(ghost_system, false);
+
+#  ifdef _WIN32
+    start_semaphore.decrement();
+#  else
+    bool lost_parent = false;
+    while (!lost_parent && !start_semaphore.try_decrement(1000)) {
+      lost_parent = getppid() != ppid;
+    }
+    if (lost_parent) {
+      std::cerr << "Compilation Subprocess: Lost parent process\n";
+      break;
+    }
+#  endif
+
+    if (close_semaphore.try_decrement()) {
+      break;
+    }
+
+    const char *shaders = reinterpret_cast<const char *>(shared_mem.get_data());
+
+    const char *vert_src = shaders;
+    const char *frag_src = shaders + strlen(shaders) + 1;
+
+    DefaultHash<StringRefNull> hasher;
+    uint64_t vert_hash = hasher(vert_src);
+    uint64_t frag_hash = hasher(frag_src);
+    std::string hash_str = std::to_string(vert_hash) + "_" + std::to_string(frag_hash);
+    std::string cache_path = cache_dir + SEP_STR + hash_str;
+
+    /* TODO: This should lock the files? */
+    if (BLI_exists(cache_path.c_str())) {
+      /* Read cached binary. */
+      fstream file(cache_path, std::ios::binary | std::ios::in | std::ios::ate);
+      std::streamsize size = file.tellg();
+      if (size <= compilation_subprocess_shared_memory_size) {
+        file.seekg(0, std::ios::beg);
+        file.read(reinterpret_cast<char *>(shared_mem.get_data()), size);
+        /* Ensure it's valid. */
+        if (validate_binary(shared_mem.get_data())) {
+          end_semaphore.increment();
+          continue;
+        }
+        else {
+          std::cout << "Compilation Subprocess: Failed to load cached shader binary " << hash_str
+                    << "\n";
+        }
+      }
+      else {
+        /* This should never happen, since shaders larger than the pool size should be discarded
+         * and compiled in the main Blender process. */
+        std::cerr << "Compilation Subprocess: Wrong size for cached shader binary " << hash_str
+                  << "\n";
+        BLI_assert_unreachable();
+      }
+    }
+
+    SubprocessShader shader(vert_src, frag_src);
+    ShaderBinaryHeader *binary = shader.get_binary(shared_mem.get_data());
+
+    end_semaphore.increment();
+
+    fstream file(cache_path, std::ios::binary | std::ios::out);
+    file.write(reinterpret_cast<char *>(shared_mem.get_data()),
+               binary->size + offsetof(ShaderBinaryHeader, data_start));
+  }
+
+  GPU_exit();
+  GPU_context_discard(gpu_context);
+  GHOST_DisposeGPUContext(ghost_system, ghost_context);
+  GHOST_DisposeSystem(ghost_system);
+}
+
+#endif
diff --git a/source/blender/gpu/opengl/gl_compilation_subprocess.hh b/source/blender/gpu/opengl/gl_compilation_subprocess.hh
new file mode 100644
index 00000000000..022e731f51c
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_compilation_subprocess.hh
@@ -0,0 +1,31 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "GPU_compilation_subprocess.hh"
+
+#if BLI_SUBPROCESS_SUPPORT
+
+#  include "BLI_sys_types.h"
+
+namespace blender::gpu {
+
+/* The size of the memory pools shared by Blender and the compilation subprocesses. */
+constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5mB */
+
+struct ShaderBinaryHeader {
+  /* Size of the shader binary data. */
+  int32_t size;
+  /* Magic number that identifies the format of this shader binary (Driver-defined).
+   * This (and size) is set to 0 when the shader has failed to compile. */
+  uint32_t format;
+  /* When casting a shared memory pool into a ShaderBinaryHeader*, this is the first byte of the
+   * shader binary data. */
+  uint8_t data_start;
+};
+
+}  // namespace blender::gpu
+
+#endif
diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc
index de95e643e80..cbf3ce9d885 100644
--- a/source/blender/gpu/opengl/gl_context.cc
+++ b/source/blender/gpu/opengl/gl_context.cc
@@ -84,6 +84,8 @@ GLContext::GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list
   active_fb = back_left;
   static_cast<GLStateManager *>(state_manager)->active_fb = static_cast<GLFrameBuffer *>(
       active_fb);
+
+  compiler = GLBackend::get()->get_compiler();
 }
 
 GLContext::~GLContext()
diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc
index 4e3a7cd426c..15d1718d200 100644
--- a/source/blender/gpu/opengl/gl_shader.cc
+++ b/source/blender/gpu/opengl/gl_shader.cc
@@ -8,22 +8,34 @@
 
 #include <iomanip>
 
+#include "BKE_appdir.hh"
 #include "BKE_global.hh"
 
 #include "BLI_string.h"
+#include "BLI_time.h"
 #include "BLI_vector.hh"
 
+#include "BLI_system.h"
+#include BLI_SYSTEM_PID_H
+
 #include "GPU_capabilities.hh"
 #include "GPU_platform.hh"
+#include "gpu_capabilities_private.hh"
 #include "gpu_shader_dependency_private.hh"
 
 #include "gl_debug.hh"
 #include "gl_vertex_buffer.hh"
 
+#include "gl_compilation_subprocess.hh"
 #include "gl_shader.hh"
 #include "gl_shader_interface.hh"
 
 #include <sstream>
+#include <stdio.h>
+#ifdef WIN32
+#  define popen _popen
+#  define pclose _pclose
+#endif
 
 using namespace blender;
 using namespace blender::gpu;
@@ -51,8 +63,10 @@ GLShader::~GLShader()
 #endif
 }
 
-void GLShader::init(const shader::ShaderCreateInfo &info)
+void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
 {
+  async_compilation_ = is_batch_compilation;
+
   /* Extract the constants names from info and store them locally. */
   for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) {
     specialization_constant_names_.append(constant.name.c_str());
@@ -1093,14 +1107,8 @@ const char *GLShader::glsl_patch_get(GLenum gl_stage)
 
 GLuint GLShader::create_shader_stage(GLenum gl_stage,
                                      MutableSpan<const char *> sources,
-                                     const GLSources &gl_sources)
+                                     GLSources &gl_sources)
 {
-  GLuint shader = glCreateShader(gl_stage);
-  if (shader == 0) {
-    fprintf(stderr, "GLShader: Error: Could not create shader object.\n");
-    return 0;
-  }
-
   /* Patch the shader sources to include specialization constants. */
   std::string constants_source;
   Vector<const char *> recreated_sources;
@@ -1117,6 +1125,12 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage,
   sources[SOURCES_INDEX_VERSION] = glsl_patch_get(gl_stage);
   sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS] = constants_source.c_str();
 
+  if (async_compilation_) {
+    gl_sources[SOURCES_INDEX_VERSION].source = std::string(sources[SOURCES_INDEX_VERSION]);
+    gl_sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS].source = std::string(
+        sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS]);
+  }
+
   if (DEBUG_LOG_SHADER_SRC_ON_ERROR) {
     /* Store the generated source for printing in case the link fails. */
     StringRefNull source_type;
@@ -1141,6 +1155,17 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage,
     }
   }
 
+  if (async_compilation_) {
+    /* Only build the sources. */
+    return 0;
+  }
+
+  GLuint shader = glCreateShader(gl_stage);
+  if (shader == 0) {
+    fprintf(stderr, "GLShader: Error: Could not create shader object.\n");
+    return 0;
+  }
+
   glShaderSource(shader, sources.size(), sources.data(), nullptr);
   glCompileShader(shader);
 
@@ -1180,8 +1205,8 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage,
 void GLShader::update_program_and_sources(GLSources &stage_sources,
                                           MutableSpan<const char *> sources)
 {
-  const bool has_specialization_constants = !constants.types.is_empty();
-  if (has_specialization_constants && stage_sources.is_empty()) {
+  const bool store_sources = !constants.types.is_empty() || async_compilation_;
+  if (store_sources && stage_sources.is_empty()) {
     stage_sources = sources;
   }
 
@@ -1231,10 +1256,23 @@ bool GLShader::finalize(const shader::ShaderCreateInfo *info)
     geometry_shader_from_glsl(sources);
   }
 
-  if (!program_link()) {
+  if (async_compilation_) {
+    return true;
+  }
+
+  program_link();
+  return post_finalize(info);
+}
+
+bool GLShader::post_finalize(const shader::ShaderCreateInfo *info)
+{
+  if (!check_link_status()) {
     return false;
   }
 
+  /* Reset for specialization constants variations. */
+  async_compilation_ = false;
+
   GLuint program_id = program_get();
   if (info != nullptr && info->legacy_resource_location_ == false) {
     interface = new GLShaderInterface(program_id, *info);
@@ -1450,13 +1488,18 @@ GLShader::GLProgram::~GLProgram()
   glDeleteProgram(program_id);
 }
 
-bool GLShader::program_link()
+void GLShader::program_link()
 {
   BLI_assert(program_active_ != nullptr);
   if (program_active_->program_id == 0) {
     program_active_->program_id = glCreateProgram();
     debug::object_label(GL_PROGRAM, program_active_->program_id, name);
   }
+
+  if (async_compilation_) {
+    return;
+  }
+
   GLuint program_id = program_active_->program_id;
 
   if (program_active_->vert_shader) {
@@ -1472,7 +1515,11 @@ bool GLShader::program_link()
     glAttachShader(program_id, program_active_->compute_shader);
   }
   glLinkProgram(program_id);
+}
 
+bool GLShader::check_link_status()
+{
+  GLuint program_id = program_active_->program_id;
   GLint status;
   glGetProgramiv(program_id, GL_LINK_STATUS, &status);
   if (!status) {
@@ -1542,3 +1589,256 @@ GLuint GLShader::program_get()
 }
 
 /** \} */
+
+#if BLI_SUBPROCESS_SUPPORT
+
+/* -------------------------------------------------------------------- */
+/** \name Compiler workers
+ * \{ */
+
+GLCompilerWorker::GLCompilerWorker()
+{
+  static size_t pipe_id = 0;
+  pipe_id++;
+
+  std::string name = "BLENDER_SHADER_COMPILER_" + std::to_string(getpid()) + "_" +
+                     std::to_string(pipe_id);
+
+  shared_mem_ = std::make_unique<SharedMemory>(
+      name, compilation_subprocess_shared_memory_size, true);
+  start_semaphore_ = std::make_unique<SharedSemaphore>(name + "_START", false);
+  end_semaphore_ = std::make_unique<SharedSemaphore>(name + "_END", false);
+  close_semaphore_ = std::make_unique<SharedSemaphore>(name + "_CLOSE", false);
+
+  subprocess_.create({"--compilation-subprocess", name.c_str()});
+}
+
+GLCompilerWorker::~GLCompilerWorker()
+{
+  close_semaphore_->increment();
+  /* Flag start so the subprocess can reach the close semaphore. */
+  start_semaphore_->increment();
+}
+
+void GLCompilerWorker::compile(StringRefNull vert, StringRefNull frag)
+{
+  BLI_assert(state_ == AVAILABLE);
+
+  strcpy((char *)shared_mem_->get_data(), vert.c_str());
+  strcpy((char *)shared_mem_->get_data() + vert.size() + sizeof('\0'), frag.c_str());
+
+  start_semaphore_->increment();
+
+  state_ = COMPILATION_REQUESTED;
+  compilation_start = BLI_time_now_seconds();
+}
+
+bool GLCompilerWorker::is_ready()
+{
+  BLI_assert(ELEM(state_, COMPILATION_REQUESTED, COMPILATION_READY));
+  if (state_ == COMPILATION_READY) {
+    return true;
+  }
+
+  if (end_semaphore_->try_decrement()) {
+    state_ = COMPILATION_READY;
+  }
+
+  return state_ == COMPILATION_READY;
+}
+
+bool GLCompilerWorker::is_lost()
+{
+  /* Use a timeout for hanged processes. */
+  float max_timeout_seconds = 30.0f;
+  return !subprocess_.is_running() ||
+         (BLI_time_now_seconds() - compilation_start) > max_timeout_seconds;
+}
+
+bool GLCompilerWorker::load_program_binary(GLint program)
+{
+  BLI_assert(ELEM(state_, COMPILATION_REQUESTED, COMPILATION_READY));
+  if (state_ == COMPILATION_REQUESTED) {
+    end_semaphore_->decrement();
+    state_ = COMPILATION_READY;
+  }
+
+  ShaderBinaryHeader *binary = (ShaderBinaryHeader *)shared_mem_->get_data();
+
+  state_ = COMPILATION_FINISHED;
+
+  if (binary->size > 0) {
+    glProgramBinary(program, binary->format, &binary->data_start, binary->size);
+    return true;
+  }
+
+  return false;
+}
+
+void GLCompilerWorker::release()
+{
+  state_ = AVAILABLE;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name GLShaderCompiler
+ * \{ */
+
+GLShaderCompiler::~GLShaderCompiler()
+{
+  BLI_assert(batches.is_empty());
+
+  for (GLCompilerWorker *worker : workers_) {
+    delete worker;
+  }
+}
+
+GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const char *frag)
+{
+  GLCompilerWorker *result = nullptr;
+  for (GLCompilerWorker *compiler : workers_) {
+    if (compiler->state_ == GLCompilerWorker::AVAILABLE) {
+      result = compiler;
+      break;
+    }
+  }
+  if (!result && workers_.size() < GCaps.max_parallel_compilations) {
+    result = new GLCompilerWorker();
+    workers_.append(result);
+  }
+  if (result) {
+    result->compile(vert, frag);
+  }
+  return result;
+}
+
+bool GLShaderCompiler::worker_is_lost(GLCompilerWorker *&worker)
+{
+  if (worker->is_lost()) {
+    std::cerr << "ERROR: Compilation subprocess lost\n";
+    workers_.remove_first_occurrence_and_reorder(worker);
+    delete worker;
+    worker = nullptr;
+  }
+
+  return worker == nullptr;
+}
+
+BatchHandle GLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo *> &infos)
+{
+  BLI_assert(GPU_use_parallel_compilation());
+
+  std::scoped_lock lock(mutex_);
+  BatchHandle handle = next_batch_handle++;
+  batches.add(handle, {});
+  Batch &batch = batches.lookup(handle);
+  batch.items.reserve(infos.size());
+  batch.is_ready = false;
+
+  for (const shader::ShaderCreateInfo *info : infos) {
+    const_cast<ShaderCreateInfo *>(info)->finalize();
+    CompilationWork item = {};
+    item.info = info;
+    item.do_async_compilation = !info->vertex_source_.is_empty() &&
+                                !info->fragment_source_.is_empty() &&
+                                info->compute_source_.is_empty() &&
+                                info->geometry_source_.is_empty();
+    if (item.do_async_compilation) {
+      item.shader = static_cast<GLShader *>(compile(*info, true));
+      for (const char *src : item.shader->vertex_sources_.sources_get()) {
+        item.vertex_src.append(src);
+      }
+      for (const char *src : item.shader->fragment_sources_.sources_get()) {
+        item.fragment_src.append(src);
+      }
+
+      size_t required_size = item.vertex_src.size() + item.fragment_src.size();
+      if (required_size < compilation_subprocess_shared_memory_size) {
+        item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
+      }
+      else {
+        delete item.shader;
+        item.do_async_compilation = false;
+      }
+    }
+    batch.items.append(item);
+  }
+  return handle;
+}
+
+bool GLShaderCompiler::batch_is_ready(BatchHandle handle)
+{
+  std::scoped_lock lock(mutex_);
+  Batch &batch = batches.lookup(handle);
+  if (batch.is_ready) {
+    return true;
+  }
+
+  batch.is_ready = true;
+  for (CompilationWork &item : batch.items) {
+    if (item.is_ready) {
+      continue;
+    }
+
+    if (!item.do_async_compilation) {
+      /* Compile it locally. */
+      item.shader = static_cast<GLShader *>(compile(*item.info, false));
+      item.is_ready = true;
+      continue;
+    }
+
+    if (!item.worker) {
+      /* Try to acquire an available worker. */
+      item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str());
+    }
+    else if (item.worker->is_ready()) {
+      /* Retrieve the binary compiled by the worker. */
+      if (!item.worker->load_program_binary(item.shader->program_active_->program_id) ||
+          !item.shader->post_finalize(item.info))
+      {
+        /* Compilation failed, try to compile it locally. */
+        delete item.shader;
+        item.shader = nullptr;
+        item.do_async_compilation = false;
+      }
+      else {
+        item.is_ready = true;
+      }
+      item.worker->release();
+      item.worker = nullptr;
+    }
+    else if (worker_is_lost(item.worker)) {
+      /* We lost the worker, try to compile it locally. */
+      delete item.shader;
+      item.shader = nullptr;
+      item.do_async_compilation = false;
+    }
+
+    if (!item.is_ready) {
+      batch.is_ready = false;
+    }
+  }
+
+  return batch.is_ready;
+}
+
+Vector<Shader *> GLShaderCompiler::batch_finalize(BatchHandle &handle)
+{
+  while (!batch_is_ready(handle)) {
+    BLI_time_sleep_ms(1);
+  }
+  std::scoped_lock lock(mutex_);
+  Batch batch = batches.pop(handle);
+  Vector<Shader *> result;
+  for (CompilationWork &item : batch.items) {
+    result.append(item.shader);
+  }
+  handle = 0;
+  return result;
+}
+
+/** \} */
+
+#endif
diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh
index d4b7c618bb3..6240896d7d8 100644
--- a/source/blender/gpu/opengl/gl_shader.hh
+++ b/source/blender/gpu/opengl/gl_shader.hh
@@ -13,10 +13,14 @@
 #include <epoxy/gl.h>
 
 #include "BLI_map.hh"
+#include "BLI_subprocess.hh"
+#include "BLI_utility_mixins.hh"
 
 #include "gpu_shader_create_info.hh"
 #include "gpu_shader_private.hh"
 
+#include <functional>
+
 namespace blender::gpu {
 
 /**
@@ -48,6 +52,7 @@ class GLSources : public Vector<GLSource> {
 class GLShader : public Shader {
   friend shader::ShaderCreateInfo;
   friend shader::StageInterfaceInfo;
+  friend class GLShaderCompiler;
 
  private:
   struct GLProgram {
@@ -85,6 +90,8 @@ class GLShader : public Shader {
    */
   GLProgram *program_active_ = nullptr;
 
+  bool async_compilation_ = false;
+
   /**
    * When the shader uses Specialization Constants these attribute contains the sources to
    * rebuild shader stages. When Specialization Constants aren't used they are empty to
@@ -112,7 +119,8 @@ class GLShader : public Shader {
   /**
    * Link the active program.
    */
-  bool program_link();
+  void program_link();
+  bool check_link_status();
 
   /**
    * Return a GLProgram program id that reflects the current state of shader.constants.values.
@@ -131,7 +139,7 @@ class GLShader : public Shader {
   GLShader(const char *name);
   ~GLShader();
 
-  void init(const shader::ShaderCreateInfo &info) override;
+  void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) override;
 
   /** Return true on success. */
   void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
@@ -139,6 +147,7 @@ class GLShader : public Shader {
   void fragment_shader_from_glsl(MutableSpan<const char *> sources) override;
   void compute_shader_from_glsl(MutableSpan<const char *> sources) override;
   bool finalize(const shader::ShaderCreateInfo *info = nullptr) override;
+  bool post_finalize(const shader::ShaderCreateInfo *info = nullptr);
   void warm_cache(int /*limit*/) override{};
 
   std::string resources_declare(const shader::ShaderCreateInfo &info) const override;
@@ -191,7 +200,7 @@ class GLShader : public Shader {
   /** Create, compile and attach the shader stage to the shader program. */
   GLuint create_shader_stage(GLenum gl_stage,
                              MutableSpan<const char *> sources,
-                             const GLSources &gl_sources);
+                             GLSources &gl_sources);
 
   /**
    * \brief features available on newer implementation such as native barycentric coordinates
@@ -204,6 +213,84 @@ class GLShader : public Shader {
   MEM_CXX_CLASS_ALLOC_FUNCS("GLShader");
 };
 
+#if BLI_SUBPROCESS_SUPPORT
+
+class GLCompilerWorker {
+  friend class GLShaderCompiler;
+
+ private:
+  BlenderSubprocess subprocess_;
+  std::unique_ptr<SharedMemory> shared_mem_;
+  std::unique_ptr<SharedSemaphore> start_semaphore_;
+  std::unique_ptr<SharedSemaphore> end_semaphore_;
+  std::unique_ptr<SharedSemaphore> close_semaphore_;
+  enum eState {
+    /* The worker has been acquired and the compilation has been requested. */
+    COMPILATION_REQUESTED,
+    /* The shader binary result is ready to be read. */
+    COMPILATION_READY,
+    /* The binary result has been loaded into a program and the worker can be released. */
+    COMPILATION_FINISHED,
+    /* The worker is not currently in use and can be acquired. */
+    AVAILABLE
+  };
+  eState state_ = AVAILABLE;
+  double compilation_start = 0;
+
+  GLCompilerWorker();
+  ~GLCompilerWorker();
+
+  void compile(StringRefNull vert, StringRefNull frag);
+  bool is_ready();
+  bool load_program_binary(GLint program);
+  void release();
+
+  /* Check if the process may have closed/crashed/hanged. */
+  bool is_lost();
+};
+
+class GLShaderCompiler : public ShaderCompiler {
+ private:
+  std::mutex mutex_;
+  Vector<GLCompilerWorker *> workers_;
+
+  struct CompilationWork {
+    GLCompilerWorker *worker = nullptr;
+    GLShader *shader = nullptr;
+    const shader::ShaderCreateInfo *info = nullptr;
+    bool do_async_compilation = false;
+
+    std::string vertex_src;
+    std::string fragment_src;
+
+    bool is_ready = false;
+  };
+
+  struct Batch {
+    Vector<CompilationWork> items;
+    bool is_ready = false;
+  };
+
+  BatchHandle next_batch_handle = 1;
+  Map<BatchHandle, Batch> batches;
+
+  GLCompilerWorker *get_compiler_worker(const char *vert, const char *frag);
+  bool worker_is_lost(GLCompilerWorker *&worker);
+
+ public:
+  ~GLShaderCompiler();
+
+  virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) override;
+  virtual bool batch_is_ready(BatchHandle handle) override;
+  virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
+};
+
+#else
+
+class GLShaderCompiler : public ShaderCompilerGeneric {};
+
+#endif
+
 class GLLogParser : public GPULogParser {
  public:
   const char *parse_line(const char *source_combined,
diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc
index 5ebf7ac404e..76ca1fccfc0 100644
--- a/source/blender/gpu/vulkan/vk_context.cc
+++ b/source/blender/gpu/vulkan/vk_context.cc
@@ -36,6 +36,8 @@ VKContext::VKContext(void *ghost_window,
   VKFrameBuffer *framebuffer = new VKFrameBuffer("back_left");
   back_left = framebuffer;
   active_fb = framebuffer;
+
+  compiler = new ShaderCompilerGeneric();
 }
 
 VKContext::~VKContext()
@@ -51,6 +53,8 @@ VKContext::~VKContext()
 
   delete imm;
   imm = nullptr;
+
+  delete compiler;
 }
 
 void VKContext::sync_backbuffer()
diff --git a/source/blender/gpu/vulkan/vk_shader.cc b/source/blender/gpu/vulkan/vk_shader.cc
index 71123336fdc..9c5f2737844 100644
--- a/source/blender/gpu/vulkan/vk_shader.cc
+++ b/source/blender/gpu/vulkan/vk_shader.cc
@@ -568,7 +568,7 @@ VKShader::VKShader(const char *name) : Shader(name)
   context_ = VKContext::get();
 }
 
-void VKShader::init(const shader::ShaderCreateInfo &info)
+void VKShader::init(const shader::ShaderCreateInfo &info, bool /*is_batch_compilation*/)
 {
   VKShaderInterface *vk_interface = new VKShaderInterface();
   vk_interface->init(info);
diff --git a/source/blender/gpu/vulkan/vk_shader.hh b/source/blender/gpu/vulkan/vk_shader.hh
index 286941cc993..f096f83e751 100644
--- a/source/blender/gpu/vulkan/vk_shader.hh
+++ b/source/blender/gpu/vulkan/vk_shader.hh
@@ -49,7 +49,7 @@ class VKShader : public Shader {
   VKShader(const char *name);
   virtual ~VKShader();
 
-  void init(const shader::ShaderCreateInfo &info) override;
+  void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) override;
 
   void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
   void geometry_shader_from_glsl(MutableSpan<const char *> sources) override;
diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h
index 8b670da63d3..e1ecea0a938 100644
--- a/source/blender/makesdna/DNA_userdef_types.h
+++ b/source/blender/makesdna/DNA_userdef_types.h
@@ -979,11 +979,12 @@ typedef struct UserDef {
   /** #eGPUBackendType */
   short gpu_backend;
 
+  /** Max number of parallel shader compilation subprocesses. */
+  short max_shader_compilation_subprocesses;
+
   /** Number of samples for FPS display calculations. */
   short playback_fps_samples;
 
-  char _pad7[2];
-
   /** Private, defaults to 20 for 72 DPI setting. */
   short widget_unit;
   short anisotropic_filter;
diff --git a/source/blender/makesrna/intern/rna_userdef.cc b/source/blender/makesrna/intern/rna_userdef.cc
index 7288f5ee8a6..8168ea5b47d 100644
--- a/source/blender/makesrna/intern/rna_userdef.cc
+++ b/source/blender/makesrna/intern/rna_userdef.cc
@@ -6196,6 +6196,15 @@ static void rna_def_userdef_system(BlenderRNA *brna)
       "GPU Backend",
       "GPU backend to use (requires restarting Blender for changes to take effect)");
 
+  prop = RNA_def_property(srna, "max_shader_compilation_subprocesses", PROP_INT, PROP_NONE);
+  RNA_def_property_range(prop, 0, INT16_MAX);
+  RNA_def_property_ui_text(prop,
+                           "Max Shader Compilation Subprocesses",
+                           "Max number of parallel shader compilation subprocesses, "
+                           "clamped at the max threads supported by the CPU "
+                           "(requires restarting Blender for changes to take effect). "
+                           "Setting it to 0 disables subprocess shader compilation ");
+
   /* Network. */
 
   prop = RNA_def_property(srna, "use_online_access", PROP_BOOLEAN, PROP_NONE);
diff --git a/source/creator/creator.cc b/source/creator/creator.cc
index 6dcdf98ca40..ff2a9c5031e 100644
--- a/source/creator/creator.cc
+++ b/source/creator/creator.cc
@@ -71,6 +71,8 @@
 
 #include "RNA_define.hh"
 
+#include "GPU_compilation_subprocess.hh"
+
 #ifdef WITH_FREESTYLE
 #  include "FRS_freestyle.h"
 #endif
@@ -328,6 +330,14 @@ int main(int argc,
 #  endif /* USE_WIN32_UNICODE_ARGS */
 #endif   /* WIN32 */
 
+#if defined(WITH_OPENGL_BACKEND) && defined(BLI_SUBPROCESS_SUPPORT)
+  if (strcmp(argv[0], "--compilation-subprocess") == 0) {
+    BLI_assert(argc == 2);
+    GPU_compilation_subprocess_run(argv[1]);
+    return 0;
+  }
+#endif
+
   /* NOTE: Special exception for guarded allocator type switch:
    *       we need to perform switch from lock-free to fully
    *       guarded allocator before any allocation happened.