diff --git a/release/datafiles/userdef/userdef_default.c b/release/datafiles/userdef/userdef_default.c index 030a5b2ae67..d634a1c4dae 100644 --- a/release/datafiles/userdef/userdef_default.c +++ b/release/datafiles/userdef/userdef_default.c @@ -114,6 +114,7 @@ const UserDef U_default = { #else .gpu_backend = GPU_BACKEND_OPENGL, #endif + .max_shader_compilation_subprocesses = 0, /** Initialized by: #BKE_studiolight_default. */ .light_param = {{0}}, diff --git a/scripts/startup/bl_ui/space_userpref.py b/scripts/startup/bl_ui/space_userpref.py index 3053b0477a0..8671cab04c5 100644 --- a/scripts/startup/bl_ui/space_userpref.py +++ b/scripts/startup/bl_ui/space_userpref.py @@ -746,6 +746,8 @@ class USERPREF_PT_system_memory(SystemPanel, CenterAlignMixIn, Panel): bl_label = "Memory & Limits" def draw_centered(self, context, layout): + import sys + prefs = context.preferences system = prefs.system edit = prefs.edit @@ -772,6 +774,11 @@ class USERPREF_PT_system_memory(SystemPanel, CenterAlignMixIn, Panel): col.prop(system, "vbo_time_out", text="VBO Time Out") col.prop(system, "vbo_collection_rate", text="Garbage Collection Rate") + if sys.platform != "darwin": + layout.separator() + col = layout.column() + col.prop(system, "max_shader_compilation_subprocesses") + class USERPREF_PT_system_video_sequencer(SystemPanel, CenterAlignMixIn, Panel): bl_label = "Video Sequencer" diff --git a/source/blender/blenlib/BLI_subprocess.hh b/source/blender/blenlib/BLI_subprocess.hh new file mode 100644 index 00000000000..15cf2a2a715 --- /dev/null +++ b/source/blender/blenlib/BLI_subprocess.hh @@ -0,0 +1,157 @@ +/* SPDX-FileCopyrightText: 2024 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +/** \file + * \ingroup bli + * + * API for subprocess creation and inter-process communication. + * NOTE: The use of subprocesses is generally discouraged. + * It should only be used for parallelizing workloads that can only happen on a per-process level + * due to OS or driver limitations. + * WARNING: The Subprocess API is only supported on Windows and Linux. + * Its use should always be inside `#if BLI_SUBPROCESS_SUPPORT` preprocessor directives. + */ + +#if defined(_WIN32) || defined(__linux__) +# define BLI_SUBPROCESS_SUPPORT 1 +#else +# define BLI_SUBPROCESS_SUPPORT 0 +#endif + +#if BLI_SUBPROCESS_SUPPORT + +# include "BLI_span.hh" +# include "BLI_string_ref.hh" +# include "BLI_sys_types.h" +# include "BLI_utility_mixins.hh" +# include + +# ifdef _WIN32 +typedef void *HANDLE; +# else +# include +# endif + +namespace blender { + +/** + * Creates a subprocess of the current Blender executable. + * WARNING: This class doesn't handle subprocess destruction. + * On Windows, subprocesses are closed automatically when the parent process finishes. + * On Linux, subprocesses become children of init or systemd when the parent process finishes. + */ +class BlenderSubprocess : NonCopyable { + private: +# ifdef _WIN32 + HANDLE handle_ = nullptr; +# else + pid_t pid_ = 0; +# endif + public: + ~BlenderSubprocess(); + + /** + * Create a subprocess and pass the arguments to the main function. + * NOTE: The subprocess path is not passed as `argv[0]`. + * `args` only support alpha-numeric characters, underscores and hyphen-minus as a safety + * measure. + * WARNING: This function shouldn't be called again after it succeeds. + */ + bool create(Span args); + /** + * Checks if the subprocess is still running. + * It always returns false if creation failed. + * It doesn't detects hanged subprocesses. + */ + bool is_running(); +}; + +/** + * Creates or gets access to a block of memory that can be read and written by more than once + * process. + * WARNING: It doesn't have any built-in safety measure to prevent concurrent writes or + * read/writes. Synchronization should be handled with SharedSemaphores. + */ +class SharedMemory : NonCopyable { + private: + std::string name_; +# ifdef _WIN32 + HANDLE handle_; +# else + int handle_; +# endif + void *data_; + size_t data_size_; + bool is_owner_; + + public: + /** + * WARNING: The name should be unique a unique identifier accross all processes (including + * multiple Blender instances). You should include the PID of the "owner" process in the name to + * prevent name collisions. + * `is_owner` should only be true for the first process that creates a SharedMemory with a given + * name. + * On Linux, the memory will become invalid across all processes after the owner destructor has + * run (Windows uses reference counting). + */ + SharedMemory(std::string name, size_t size, bool is_owner); + ~SharedMemory(); + + /** + * Get a pointer to the shared memory block. + * WARNING: It can be null if creation failed, or invalid if the owner destructor has run. + * */ + void *get_data() + { + return data_; + } + + size_t get_size() + { + return data_size_; + } +}; + +/** + * Creates or get access to a semaphore that can be used accros multiple processes. + */ +class SharedSemaphore : NonCopyable { + private: + std::string name_; +# if defined(_WIN32) + HANDLE handle_; +# else + sem_t *handle_; +# endif + bool is_owner_; + + public: + /** + * WARNING: The name should be unique a unique identifier accross all processes (including + * multiple Blender instances). You should include the PID of the "owner" process in the name to + * prevent name collisions. + * `is_owner` should only be true for the last process that needs to read it (It's ok if the + * creator is not the owner). + * On Linux, the semaphore will become invalid across all processes after the owner destructor + * has run (Windows uses reference counting). + */ + SharedSemaphore(std::string name, bool is_owner); + ~SharedSemaphore(); + + /* Increment the semaphore value. */ + void increment(); + /* Decrement the semaphore value (Blocks until the semaphore value is greater than 0). */ + void decrement(); + /** + * Try to decrement the semaphore value. Returns true on success. + * (Blocks until the semaphore value is greater than 0 or the wait time runs out). + */ + bool try_decrement(int wait_ms = 0); +}; + +} // namespace blender + +#endif diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt index 59cef66f814..76605bd9602 100644 --- a/source/blender/blenlib/CMakeLists.txt +++ b/source/blender/blenlib/CMakeLists.txt @@ -40,6 +40,7 @@ set(SRC intern/BLI_memiter.c intern/BLI_mempool.c intern/BLI_mmap.c + intern/BLI_subprocess.cc intern/BLI_timer.c intern/DLRB_tree.c intern/array_store.cc @@ -365,6 +366,7 @@ set(SRC BLI_string_utils.hh BLI_struct_equality_utils.hh BLI_sub_frame.hh + BLI_subprocess.hh BLI_sys_types.h BLI_system.h BLI_task.h diff --git a/source/blender/blenlib/intern/BLI_subprocess.cc b/source/blender/blenlib/intern/BLI_subprocess.cc new file mode 100644 index 00000000000..bfb0dabbc5a --- /dev/null +++ b/source/blender/blenlib/intern/BLI_subprocess.cc @@ -0,0 +1,420 @@ +/* SPDX-FileCopyrightText: 2024 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_subprocess.hh" + +#if BLI_SUBPROCESS_SUPPORT + +/* Based on https://github.com/jarikomppa/ipc (Unlicense) */ + +# include "BLI_assert.h" +# include "BLI_path_util.h" +# include "BLI_string_utf8.h" +# include + +namespace blender { + +static bool check_arguments_are_valid(Span args) +{ + for (StringRefNull arg : args) { + for (const char c : arg) { + if (!std::isalnum(c) && !ELEM(c, '_', '-')) { + return false; + } + } + } + + return true; +} + +} // namespace blender + +# ifdef _WIN32 + +# define WIN32_LEAN_AND_MEAN +# include +# include + +namespace blender { + +static void print_last_error(const char *function, const char *msg) +{ + DWORD error_code = GetLastError(); + std::cerr << "ERROR (" << error_code << "): " << function << " : " << msg << std::endl; +} + +static void check(bool result, const char *function, const char *msg) +{ + if (!result) { + print_last_error(function, msg); + BLI_assert(false); + } +} + +# define CHECK(result) check((result), __func__, #result) +# undef ERROR /* Defined in wingdi.h */ +# define ERROR(msg) check(false, __func__, msg) + +bool BlenderSubprocess::create(Span args) +{ + BLI_assert(handle_ == nullptr); + + if (!check_arguments_are_valid(args)) { + BLI_assert(false); + return false; + } + + wchar_t path[FILE_MAX]; + if (!GetModuleFileNameW(nullptr, path, FILE_MAX)) { + ERROR("GetModuleFileNameW"); + return false; + } + + std::string args_str; + for (StringRefNull arg : args) { + args_str += arg + " "; + } + + const int length_wc = MultiByteToWideChar( + CP_UTF8, 0, args_str.c_str(), args_str.length(), nullptr, 0); + std::wstring w_args(length_wc, 0); + CHECK(MultiByteToWideChar( + CP_UTF8, 0, args_str.c_str(), args_str.length(), w_args.data(), length_wc)); + + STARTUPINFOW startup_info = {0}; + startup_info.cb = sizeof(startup_info); + PROCESS_INFORMATION process_info = {0}; + if (!CreateProcessW(path, + /** Use data() since lpCommandLine must be mutable. */ + w_args.data(), + nullptr, + nullptr, + false, + 0, + nullptr, + nullptr, + &startup_info, + &process_info)) + { + ERROR("CreateProcessW"); + return false; + } + + handle_ = process_info.hProcess; + CHECK(CloseHandle(process_info.hThread)); + + return true; +} + +BlenderSubprocess::~BlenderSubprocess() +{ + if (handle_) { + CHECK(CloseHandle(handle_)); + } +} + +bool BlenderSubprocess::is_running() +{ + if (!handle_) { + return false; + } + + DWORD exit_code = 0; + if (GetExitCodeProcess(handle_, &exit_code)) { + return exit_code == STILL_ACTIVE; + } + + ERROR("GetExitCodeProcess"); + /* Assume the process is still running. */ + return true; +} + +SharedMemory::SharedMemory(std::string name, size_t size, bool is_owner) + : name_(name), is_owner_(is_owner) +{ + if (is_owner) { + handle_ = CreateFileMappingA( + INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0, size, name.c_str()); + CHECK(handle_ /*Create*/); + } + else { + handle_ = OpenFileMappingA(FILE_MAP_ALL_ACCESS, FALSE, name.c_str()); + CHECK(handle_ /*Open*/); + } + + if (handle_) { + data_ = MapViewOfFile(handle_, FILE_MAP_ALL_ACCESS, 0, 0, size); + CHECK(data_); + } + else { + data_ = nullptr; + } + + data_size_ = data_ ? size : 0; +} + +SharedMemory::~SharedMemory() +{ + if (data_) { + CHECK(UnmapViewOfFile(data_)); + } + if (handle_) { + CHECK(CloseHandle(handle_)); + } +} + +SharedSemaphore::SharedSemaphore(std::string name, bool is_owner) + : name_(name), is_owner_(is_owner) +{ + handle_ = CreateSemaphoreA(nullptr, 0, 1, name.c_str()); + CHECK(handle_); +} + +SharedSemaphore::~SharedSemaphore() +{ + if (handle_) { + CHECK(CloseHandle(handle_)); + } +} + +void SharedSemaphore::increment() +{ + CHECK(ReleaseSemaphore(handle_, 1, nullptr)); +} + +void SharedSemaphore::decrement() +{ + CHECK(WaitForSingleObject(handle_, INFINITE) != WAIT_FAILED); +} + +bool SharedSemaphore::try_decrement(int wait_ms) +{ + DWORD result = WaitForSingleObject(handle_, wait_ms); + CHECK(result != WAIT_FAILED); + return result == WAIT_OBJECT_0; +} + +} // namespace blender + +# elif defined(__linux__) + +# include "BLI_time.h" +# include "BLI_vector.hh" +# include +# include +# include +# include +# include +# include +# include + +namespace blender { + +static void print_last_error(const char *function, const char *msg) +{ + int error_code = errno; + std::string error_msg = "ERROR (" + std::to_string(error_code) + "): " + function + " : " + msg; + perror(error_msg.c_str()); +} + +static void check(int result, const char *function, const char *msg) +{ + if (result == -1) { + print_last_error(function, msg); + BLI_assert(false); + } +} + +# define CHECK(result) check((result), __func__, #result) +# define ERROR(msg) check(-1, __func__, msg) + +bool BlenderSubprocess::create(Span args) +{ + if (!check_arguments_are_valid(args)) { + BLI_assert(false); + return false; + } + + char path[PATH_MAX + 1]; + size_t len = readlink("/proc/self/exe", path, PATH_MAX); + if (len == -1) { + ERROR("readlink"); + return false; + } + /* readlink doesn't append a null terminator. */ + path[len] = '\0'; + + Vector char_args; + for (StringRefNull arg : args) { + char_args.append((char *)arg.data()); + } + char_args.append(nullptr); + + pid_ = fork(); + + if (pid_ == -1) { + ERROR("fork"); + return false; + } + else if (pid_ > 0) { + return true; + } + + /* Child process initialization. */ + execv(path, char_args.data()); + + ERROR("execv"); + exit(errno); + + return false; +} + +BlenderSubprocess::~BlenderSubprocess() {} + +bool BlenderSubprocess::is_running() +{ + if (pid_ == -1) { + return false; + } + + pid_t result = waitpid(pid_, nullptr, WNOHANG); + CHECK(result); + + if (result == pid_) { + pid_ = -1; + return false; + } + + return true; +} + +SharedMemory::SharedMemory(std::string name, size_t size, bool is_owner) + : name_(name), is_owner_(is_owner) +{ + constexpr mode_t user_mode = S_IRUSR | S_IWUSR; + if (is_owner) { + handle_ = shm_open(name.c_str(), O_CREAT | O_EXCL | O_RDWR, user_mode); + CHECK(handle_); + if (handle_ != -1) { + if (ftruncate(handle_, size) == -1) { + ERROR("ftruncate"); + CHECK(close(handle_)); + handle_ = -1; + } + } + } + else { + handle_ = shm_open(name.c_str(), O_RDWR, user_mode); + CHECK(handle_); + } + + if (handle_ != -1) { + data_ = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, handle_, 0); + if (data_ == MAP_FAILED) { + ERROR("mmap"); + data_ = nullptr; + } + /* File descriptor can close after mmap. */ + CHECK(close(handle_)); + } + else { + data_ = nullptr; + } + + data_size_ = data_ ? size : 0; +} + +SharedMemory::~SharedMemory() +{ + if (data_) { + CHECK(munmap(data_, data_size_)); + if (is_owner_) { + CHECK(shm_unlink(name_.c_str())); + } + } +} + +SharedSemaphore::SharedSemaphore(std::string name, bool is_owner) + : name_(name), is_owner_(is_owner) +{ + constexpr mode_t user_mode = S_IRUSR | S_IWUSR; + handle_ = sem_open(name.c_str(), O_CREAT, user_mode, 0); + if (!handle_) { + ERROR("sem_open"); + } +} + +SharedSemaphore::~SharedSemaphore() +{ + if (handle_) { + CHECK(sem_close(handle_)); + if (is_owner_) { + CHECK(sem_unlink(name_.c_str())); + } + } +} + +void SharedSemaphore::increment() +{ + CHECK(sem_post(handle_)); +} + +void SharedSemaphore::decrement() +{ + while (true) { + int result = sem_wait(handle_); + if (result == 0) { + return; + } + else if (errno != EINTR) { + ERROR("sem_wait"); + return; + } + /* Try again if interrupted by handler. */ + } +} + +bool SharedSemaphore::try_decrement(int wait_ms) +{ + if (wait_ms == 0) { + int result = sem_trywait(handle_); + if (result == 0) { + return true; + } + else if (errno == EINVAL) { + ERROR("sem_trywait"); + } + return false; + } + + timespec time; + if (clock_gettime(CLOCK_REALTIME, &time) == -1) { + ERROR("clock_gettime"); + BLI_time_sleep_ms(wait_ms); + return try_decrement(0); + } + + time.tv_sec += wait_ms / 1000; + time.tv_nsec += (wait_ms % 1000) * 10e6; + + while (true) { + int result = sem_timedwait(handle_, &time); + if (result == 0) { + return true; + } + else if (errno != EINTR) { + if (errno != ETIMEDOUT) { + ERROR("sem_timedwait"); + } + return false; + } + /* Try again if interrupted by handler. */ + } +} + +} // namespace blender + +# endif + +#endif diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 199beddb9f5..7d47d898ff7 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -88,6 +88,7 @@ set(SRC GPU_capabilities.hh GPU_common.hh GPU_common_types.hh + GPU_compilation_subprocess.hh GPU_compute.hh GPU_context.hh GPU_debug.hh @@ -150,6 +151,7 @@ set(OPENGL_SRC opengl/gl_backend.cc opengl/gl_batch.cc + opengl/gl_compilation_subprocess.cc opengl/gl_compute.cc opengl/gl_context.cc opengl/gl_debug.cc @@ -171,6 +173,7 @@ set(OPENGL_SRC opengl/gl_backend.hh opengl/gl_batch.hh + opengl/gl_compilation_subprocess.hh opengl/gl_compute.hh opengl/gl_context.hh opengl/gl_debug.hh @@ -841,6 +844,10 @@ target_link_libraries(bf_gpu PUBLIC bf_gpu_shaders ) +if(WITH_OPENGL_BACKEND AND UNIX) +target_link_libraries(bf_gpu PUBLIC rt) +endif() + if(WITH_OPENCOLORIO) target_link_libraries(bf_gpu PUBLIC bf_ocio_shaders) endif() diff --git a/source/blender/gpu/GPU_capabilities.hh b/source/blender/gpu/GPU_capabilities.hh index d8df48b896c..844f1b196af 100644 --- a/source/blender/gpu/GPU_capabilities.hh +++ b/source/blender/gpu/GPU_capabilities.hh @@ -40,6 +40,8 @@ const char *GPU_extension_get(int i); int GPU_texture_size_with_limit(int res); +bool GPU_use_parallel_compilation(); + bool GPU_mip_render_workaround(); bool GPU_depth_blitting_workaround(); bool GPU_use_main_context_workaround(); diff --git a/source/blender/gpu/GPU_compilation_subprocess.hh b/source/blender/gpu/GPU_compilation_subprocess.hh new file mode 100644 index 00000000000..aa8040ec39a --- /dev/null +++ b/source/blender/gpu/GPU_compilation_subprocess.hh @@ -0,0 +1,13 @@ +/* SPDX-FileCopyrightText: 2024 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#include "BLI_subprocess.hh" + +#if defined(WITH_OPENGL_BACKEND) && defined(BLI_SUBPROCESS_SUPPORT) + +void GPU_compilation_subprocess_run(const char *subprocess_name); + +#endif diff --git a/source/blender/gpu/GPU_shader.hh b/source/blender/gpu/GPU_shader.hh index 557134ed0de..f11594665b3 100644 --- a/source/blender/gpu/GPU_shader.hh +++ b/source/blender/gpu/GPU_shader.hh @@ -10,6 +10,8 @@ #pragma once +#include "BLI_span.hh" +#include "BLI_vector.hh" #include "GPU_shader_builtin.hh" namespace blender::gpu { @@ -59,6 +61,29 @@ const GPUShaderCreateInfo *GPU_shader_create_info_get(const char *info_name); */ bool GPU_shader_create_info_check_error(const GPUShaderCreateInfo *_info, char r_error[128]); +using BatchHandle = int64_t; +/** + * Request the creation of multiple shaders at once, allowing the backend to use multithreaded + * compilation. Returns a handle that can be used to poll if all shaders have been compiled, and to + * retrieve the compiled shaders. + * NOTE: This function is asynchronous on OpenGL, but it's blocking on Vulkan and Metal. + * WARNING: The GPUShaderCreateInfo pointers should be valid until `GPU_shader_batch_finalize` has + * returned. + */ +BatchHandle GPU_shader_batch_create_from_infos(blender::Span infos); +/** + * Returns true if all the shaders from the batch have finished their compilation. + */ +bool GPU_shader_batch_is_ready(BatchHandle handle); +/** + * Retrieve the compiled shaders, in the same order as the `GPUShaderCreateInfo`s. + * If the compilation has not finished yet, this call will block the thread until all the shaders + * are ready. + * Shaders with compilation errors are returned as null pointers. + * WARNING: The handle will be invalidated by this call, you can't request the same batch twice. + */ +blender::Vector GPU_shader_batch_finalize(BatchHandle &handle); + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc index ea416ca44f5..a4d743c1453 100644 --- a/source/blender/gpu/intern/gpu_capabilities.cc +++ b/source/blender/gpu/intern/gpu_capabilities.cc @@ -131,6 +131,11 @@ int GPU_max_samplers() return GCaps.max_samplers; } +bool GPU_use_parallel_compilation() +{ + return GCaps.max_parallel_compilations > 0; +} + bool GPU_mip_render_workaround() { return GCaps.mip_render_workaround; diff --git a/source/blender/gpu/intern/gpu_capabilities_private.hh b/source/blender/gpu/intern/gpu_capabilities_private.hh index e373bcd3039..293dc7f3131 100644 --- a/source/blender/gpu/intern/gpu_capabilities_private.hh +++ b/source/blender/gpu/intern/gpu_capabilities_private.hh @@ -51,6 +51,8 @@ struct GPUCapabilities { bool texture_view_support = true; bool stencil_export_support = false; + int max_parallel_compilations = 0; + /* OpenGL related workarounds. */ bool mip_render_workaround = false; bool depth_blitting_workaround = false; diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh index 1ba2e47b295..208d672c877 100644 --- a/source/blender/gpu/intern/gpu_context_private.hh +++ b/source/blender/gpu/intern/gpu_context_private.hh @@ -35,6 +35,8 @@ class Context { StateManager *state_manager = nullptr; Immediate *imm = nullptr; + ShaderCompiler *compiler = nullptr; + /** * All 4 window frame-buffers. * None of them are valid in an off-screen context. diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index de58d473ed0..574a7bbefe0 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -290,130 +290,7 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info) { using namespace blender::gpu::shader; const ShaderCreateInfo &info = *reinterpret_cast(_info); - - const_cast(info).finalize(); - - GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP); - - const std::string error = info.check_error(); - if (!error.empty()) { - std::cerr << error.c_str() << "\n"; - BLI_assert(false); - } - - Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str()); - shader->init(info); - shader->specialization_constants_init(info); - - std::string defines = shader->defines_declare(info); - std::string resources = shader->resources_declare(info); - - if (info.legacy_resource_location_ == false) { - defines += "#define USE_GPU_SHADER_CREATE_INFO\n"; - } - - Vector typedefs; - if (!info.typedef_sources_.is_empty() || !info.typedef_source_generated.empty()) { - typedefs.append(gpu_shader_dependency_get_source("GPU_shader_shared_utils.hh").c_str()); - } - if (!info.typedef_source_generated.empty()) { - typedefs.append(info.typedef_source_generated.c_str()); - } - for (auto filename : info.typedef_sources_) { - typedefs.append(gpu_shader_dependency_get_source(filename).c_str()); - } - - if (!info.vertex_source_.is_empty()) { - auto code = gpu_shader_dependency_get_resolved_source(info.vertex_source_); - std::string interface = shader->vertex_interface_declare(info); - - Vector sources; - standard_defines(sources); - sources.append("#define GPU_VERTEX_SHADER\n"); - if (!info.geometry_source_.is_empty()) { - sources.append("#define USE_GEOMETRY_SHADER\n"); - } - sources.append(defines.c_str()); - sources.extend(typedefs); - sources.append(resources.c_str()); - sources.append(interface.c_str()); - sources.extend(code); - sources.extend(info.dependencies_generated); - sources.append(info.vertex_source_generated.c_str()); - - shader->vertex_shader_from_glsl(sources); - } - - if (!info.fragment_source_.is_empty()) { - auto code = gpu_shader_dependency_get_resolved_source(info.fragment_source_); - std::string interface = shader->fragment_interface_declare(info); - - Vector sources; - standard_defines(sources); - sources.append("#define GPU_FRAGMENT_SHADER\n"); - if (!info.geometry_source_.is_empty()) { - sources.append("#define USE_GEOMETRY_SHADER\n"); - } - sources.append(defines.c_str()); - sources.extend(typedefs); - sources.append(resources.c_str()); - sources.append(interface.c_str()); - sources.extend(code); - sources.extend(info.dependencies_generated); - sources.append(info.fragment_source_generated.c_str()); - - shader->fragment_shader_from_glsl(sources); - } - - if (!info.geometry_source_.is_empty()) { - auto code = gpu_shader_dependency_get_resolved_source(info.geometry_source_); - std::string layout = shader->geometry_layout_declare(info); - std::string interface = shader->geometry_interface_declare(info); - - Vector sources; - standard_defines(sources); - sources.append("#define GPU_GEOMETRY_SHADER\n"); - sources.append(defines.c_str()); - sources.extend(typedefs); - sources.append(resources.c_str()); - sources.append(layout.c_str()); - sources.append(interface.c_str()); - sources.append(info.geometry_source_generated.c_str()); - sources.extend(code); - - shader->geometry_shader_from_glsl(sources); - } - - if (!info.compute_source_.is_empty()) { - auto code = gpu_shader_dependency_get_resolved_source(info.compute_source_); - std::string layout = shader->compute_layout_declare(info); - - Vector sources; - standard_defines(sources); - sources.append("#define GPU_COMPUTE_SHADER\n"); - sources.append(defines.c_str()); - sources.extend(typedefs); - sources.append(resources.c_str()); - sources.append(layout.c_str()); - sources.extend(code); - sources.extend(info.dependencies_generated); - sources.append(info.compute_source_generated.c_str()); - - shader->compute_shader_from_glsl(sources); - } - - if (info.tf_type_ != GPU_SHADER_TFB_NONE && info.tf_names_.size() > 0) { - shader->transform_feedback_names_set(info.tf_names_.as_span(), info.tf_type_); - } - - if (!shader->finalize(&info)) { - delete shader; - GPU_debug_group_end(); - return nullptr; - } - - GPU_debug_group_end(); - return wrap(shader); + return wrap(Context::get()->compiler->compile(info, false)); } GPUShader *GPU_shader_create_from_python(const char *vertcode, @@ -450,6 +327,25 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode, return sh; } +BatchHandle GPU_shader_batch_create_from_infos(Span infos) +{ + using namespace blender::gpu::shader; + Span &infos_ = reinterpret_cast &>( + infos); + return Context::get()->compiler->batch_compile(infos_); +} + +bool GPU_shader_batch_is_ready(BatchHandle handle) +{ + return Context::get()->compiler->batch_is_ready(handle); +} + +Vector GPU_shader_batch_finalize(BatchHandle &handle) +{ + Vector result = Context::get()->compiler->batch_finalize(handle); + return reinterpret_cast &>(result); +} + void GPU_shader_compile_static() { printf("Compiling all static GPU shaders. This process takes a while.\n"); @@ -880,4 +776,175 @@ void Shader::set_framebuffer_srgb_target(int use_srgb_to_linear) /** \} */ +/* -------------------------------------------------------------------- */ +/** \name ShaderCompiler + * \{ */ + +Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation) +{ + using namespace blender::gpu::shader; + const_cast(info).finalize(); + + GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP); + + const std::string error = info.check_error(); + if (!error.empty()) { + std::cerr << error.c_str() << "\n"; + BLI_assert(false); + } + + Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str()); + shader->init(info, is_batch_compilation); + shader->specialization_constants_init(info); + + std::string defines = shader->defines_declare(info); + std::string resources = shader->resources_declare(info); + + if (info.legacy_resource_location_ == false) { + defines += "#define USE_GPU_SHADER_CREATE_INFO\n"; + } + + Vector typedefs; + if (!info.typedef_sources_.is_empty() || !info.typedef_source_generated.empty()) { + typedefs.append(gpu_shader_dependency_get_source("GPU_shader_shared_utils.hh").c_str()); + } + if (!info.typedef_source_generated.empty()) { + typedefs.append(info.typedef_source_generated.c_str()); + } + for (auto filename : info.typedef_sources_) { + typedefs.append(gpu_shader_dependency_get_source(filename).c_str()); + } + + if (!info.vertex_source_.is_empty()) { + auto code = gpu_shader_dependency_get_resolved_source(info.vertex_source_); + std::string interface = shader->vertex_interface_declare(info); + + Vector sources; + standard_defines(sources); + sources.append("#define GPU_VERTEX_SHADER\n"); + if (!info.geometry_source_.is_empty()) { + sources.append("#define USE_GEOMETRY_SHADER\n"); + } + sources.append(defines.c_str()); + sources.extend(typedefs); + sources.append(resources.c_str()); + sources.append(interface.c_str()); + sources.extend(code); + sources.extend(info.dependencies_generated); + sources.append(info.vertex_source_generated.c_str()); + + shader->vertex_shader_from_glsl(sources); + } + + if (!info.fragment_source_.is_empty()) { + auto code = gpu_shader_dependency_get_resolved_source(info.fragment_source_); + std::string interface = shader->fragment_interface_declare(info); + + Vector sources; + standard_defines(sources); + sources.append("#define GPU_FRAGMENT_SHADER\n"); + if (!info.geometry_source_.is_empty()) { + sources.append("#define USE_GEOMETRY_SHADER\n"); + } + sources.append(defines.c_str()); + sources.extend(typedefs); + sources.append(resources.c_str()); + sources.append(interface.c_str()); + sources.extend(code); + sources.extend(info.dependencies_generated); + sources.append(info.fragment_source_generated.c_str()); + + shader->fragment_shader_from_glsl(sources); + } + + if (!info.geometry_source_.is_empty()) { + auto code = gpu_shader_dependency_get_resolved_source(info.geometry_source_); + std::string layout = shader->geometry_layout_declare(info); + std::string interface = shader->geometry_interface_declare(info); + + Vector sources; + standard_defines(sources); + sources.append("#define GPU_GEOMETRY_SHADER\n"); + sources.append(defines.c_str()); + sources.extend(typedefs); + sources.append(resources.c_str()); + sources.append(layout.c_str()); + sources.append(interface.c_str()); + sources.append(info.geometry_source_generated.c_str()); + sources.extend(code); + + shader->geometry_shader_from_glsl(sources); + } + + if (!info.compute_source_.is_empty()) { + auto code = gpu_shader_dependency_get_resolved_source(info.compute_source_); + std::string layout = shader->compute_layout_declare(info); + + Vector sources; + standard_defines(sources); + sources.append("#define GPU_COMPUTE_SHADER\n"); + sources.append(defines.c_str()); + sources.extend(typedefs); + sources.append(resources.c_str()); + sources.append(layout.c_str()); + sources.extend(code); + sources.extend(info.dependencies_generated); + sources.append(info.compute_source_generated.c_str()); + + shader->compute_shader_from_glsl(sources); + } + + if (info.tf_type_ != GPU_SHADER_TFB_NONE && info.tf_names_.size() > 0) { + shader->transform_feedback_names_set(info.tf_names_.as_span(), info.tf_type_); + } + + if (!shader->finalize(&info)) { + delete shader; + GPU_debug_group_end(); + return nullptr; + } + + GPU_debug_group_end(); + return shader; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name ShaderCompilerGeneric + * \{ */ + +ShaderCompilerGeneric::~ShaderCompilerGeneric() +{ + /* Ensure all the requested batches have been retrieved. */ + BLI_assert(batches.is_empty()); +} + +BatchHandle ShaderCompilerGeneric::batch_compile(Span &infos) +{ + BatchHandle handle = next_batch_handle++; + batches.add(handle, {{}, infos, true}); + Batch &batch = batches.lookup(handle); + batch.shaders.reserve(infos.size()); + for (const shader::ShaderCreateInfo *info : infos) { + batch.shaders.append(compile(*info, true)); + } + return handle; +} + +bool ShaderCompilerGeneric::batch_is_ready(BatchHandle handle) +{ + bool is_ready = batches.lookup(handle).is_ready; + return is_ready; +} + +Vector ShaderCompilerGeneric::batch_finalize(BatchHandle &handle) +{ + Vector shaders = batches.pop(handle).shaders; + handle = 0; + return shaders; +} + +/** \} */ + } // namespace blender::gpu diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh index ac99f8551d3..23d7cad9ba7 100644 --- a/source/blender/gpu/intern/gpu_shader_private.hh +++ b/source/blender/gpu/intern/gpu_shader_private.hh @@ -17,6 +17,7 @@ #include "BLI_map.hh" +#include #include namespace blender { @@ -77,7 +78,9 @@ class Shader { Shader(const char *name); virtual ~Shader(); - virtual void init(const shader::ShaderCreateInfo &info) = 0; + /* `is_batch_compilation` is true when the shader is being compiled as part of a + * `GPU_shader_batch`. Backends that use the `ShaderCompilerGeneric` can ignore it. */ + virtual void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) = 0; virtual void vertex_shader_from_glsl(MutableSpan sources) = 0; virtual void geometry_shader_from_glsl(MutableSpan sources) = 0; @@ -160,6 +163,43 @@ static inline const Shader *unwrap(const GPUShader *vert) return reinterpret_cast(vert); } +class ShaderCompiler { + protected: + struct Sources { + std::string vert; + std::string geom; + std::string frag; + std::string comp; + }; + + public: + Shader *compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation); + + virtual BatchHandle batch_compile(Span &infos) = 0; + virtual bool batch_is_ready(BatchHandle handle) = 0; + virtual Vector batch_finalize(BatchHandle &handle) = 0; +}; + +/* Generic (fully synchronous) implementation for backends that don't implement their own + * ShaderCompiler. Used by Vulkan and Metal. */ +class ShaderCompilerGeneric : public ShaderCompiler { + private: + struct Batch { + Vector shaders; + Vector infos; + bool is_ready = false; + }; + BatchHandle next_batch_handle = 1; + Map batches; + + public: + ~ShaderCompilerGeneric(); + + virtual BatchHandle batch_compile(Span &infos) override; + virtual bool batch_is_ready(BatchHandle handle) override; + virtual Vector batch_finalize(BatchHandle &handle) override; +}; + enum class Severity { Unknown, Warning, diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 0bb7dbf6806..b4d4362ae3e 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -267,6 +267,8 @@ MTLContext::MTLContext(void *ghost_window, void *ghost_context) /* Initialize samplers. */ this->sampler_state_cache_init(); + + compiler = new ShaderCompilerGeneric(); } MTLContext::~MTLContext() @@ -369,6 +371,8 @@ MTLContext::~MTLContext() if (this->device) { [this->device release]; } + + delete compiler; } void MTLContext::begin_frame() diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh index 3a9cf2089bc..8d815724989 100644 --- a/source/blender/gpu/metal/mtl_shader.hh +++ b/source/blender/gpu/metal/mtl_shader.hh @@ -277,7 +277,7 @@ class MTLShader : public Shader { NSString *fragment_function_name_); ~MTLShader(); - void init(const shader::ShaderCreateInfo & /*info*/) override {} + void init(const shader::ShaderCreateInfo & /*info*/, bool /*is_batch_compilation*/) override {} /* Assign GLSL source. */ void vertex_shader_from_glsl(MutableSpan sources) override; diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index f5b2f1e0f5a..7f1efc3ec29 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -10,6 +10,9 @@ #if defined(WIN32) # include "BLI_winstuff.h" #endif +#include "BLI_subprocess.hh" +#include "BLI_threads.h" +#include "DNA_userdef_types.h" #include "gpu_capabilities_private.hh" #include "gpu_platform_private.hh" @@ -594,6 +597,13 @@ void GLBackend::capabilities_init() detect_workarounds(); +#if BLI_SUBPROCESS_SUPPORT + GCaps.max_parallel_compilations = std::min(int(U.max_shader_compilation_subprocesses), + BLI_system_thread_count()); +#else + GCaps.max_parallel_compilations = 0; +#endif + /* Disable this feature entirely when not debugging. */ if ((G.debug & G_DEBUG_GPU) == 0) { GLContext::debug_layer_support = false; diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh index 5d7f1189b44..4635de1d402 100644 --- a/source/blender/gpu/opengl/gl_backend.hh +++ b/source/blender/gpu/opengl/gl_backend.hh @@ -39,6 +39,8 @@ class GLBackend : public GPUBackend { renderdoc::api::Renderdoc renderdoc_; #endif + GLShaderCompiler compiler_; + public: GLBackend() { @@ -64,6 +66,11 @@ class GLBackend : public GPUBackend { return static_cast(GPUBackend::get()); } + GLShaderCompiler *get_compiler() + { + return &compiler_; + } + void samplers_update() override { GLTexture::samplers_update(); diff --git a/source/blender/gpu/opengl/gl_compilation_subprocess.cc b/source/blender/gpu/opengl/gl_compilation_subprocess.cc new file mode 100644 index 00000000000..5cdf88f804d --- /dev/null +++ b/source/blender/gpu/opengl/gl_compilation_subprocess.cc @@ -0,0 +1,222 @@ +/* SPDX-FileCopyrightText: 2024 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gl_compilation_subprocess.hh" + +#if BLI_SUBPROCESS_SUPPORT + +# include "BKE_appdir.hh" +# include "BLI_fileops.hh" +# include "BLI_hash.hh" +# include "BLI_path_util.h" +# include "CLG_log.h" +# include "GHOST_C-api.h" +# include "GPU_context.hh" +# include "GPU_init_exit.hh" +# include +# include +# include + +# ifndef _WIN32 +# include +# endif + +namespace blender::gpu { + +class SubprocessShader { + GLuint vert_ = 0; + GLuint frag_ = 0; + GLuint program_ = 0; + bool success_ = false; + + public: + SubprocessShader(const char *vert_src, const char *frag_src) + { + GLint status; + + vert_ = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(vert_, 1, &vert_src, nullptr); + glCompileShader(vert_); + glGetShaderiv(vert_, GL_COMPILE_STATUS, &status); + if (!status) { + return; + } + + frag_ = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(frag_, 1, &frag_src, nullptr); + glCompileShader(frag_); + glGetShaderiv(frag_, GL_COMPILE_STATUS, &status); + if (!status) { + return; + } + + program_ = glCreateProgram(); + glAttachShader(program_, vert_); + glAttachShader(program_, frag_); + glLinkProgram(program_); + glGetProgramiv(program_, GL_LINK_STATUS, &status); + if (!status) { + return; + } + + success_ = true; + } + + ~SubprocessShader() + { + glDeleteShader(vert_); + glDeleteShader(frag_); + glDeleteProgram(program_); + } + + ShaderBinaryHeader *get_binary(void *memory) + { + ShaderBinaryHeader *bin = reinterpret_cast(memory); + bin->format = 0; + bin->size = 0; + + if (success_) { + glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &bin->size); + if (bin->size + sizeof(ShaderBinaryHeader) < compilation_subprocess_shared_memory_size) { + glGetProgramBinary(program_, bin->size, nullptr, &bin->format, &bin->data_start); + } + } + + return bin; + } +}; + +/* Check if the binary is valid and can be loaded by the driver. */ +static bool validate_binary(void *binary) +{ + ShaderBinaryHeader *bin = reinterpret_cast(binary); + GLuint program = glCreateProgram(); + glProgramBinary(program, bin->format, &bin->data_start, bin->size); + GLint status; + glGetProgramiv(program, GL_LINK_STATUS, &status); + glDeleteProgram(program); + return status; +} + +} // namespace blender::gpu + +void GPU_compilation_subprocess_run(const char *subprocess_name) +{ + using namespace blender; + using namespace blender::gpu; + +# ifndef _WIN32 + /** NOTE: Technically, the parent process could have crashed before this. */ + pid_t ppid = getppid(); +# endif + + CLG_init(); + + std::string name = subprocess_name; + SharedMemory shared_mem(name, compilation_subprocess_shared_memory_size, false); + if (!shared_mem.get_data()) { + std::cerr << "Compilation Subprocess: Failed to open shared memory " << subprocess_name + << "\n"; + return; + } + SharedSemaphore start_semaphore(name + "_START", true); + SharedSemaphore end_semaphore(name + "_END", true); + SharedSemaphore close_semaphore(name + "_CLOSE", true); + + GHOST_SystemHandle ghost_system = GHOST_CreateSystemBackground(); + BLI_assert(ghost_system); + GHOST_GPUSettings gpu_settings = {0}; + gpu_settings.context_type = GHOST_kDrawingContextTypeOpenGL; + GHOST_ContextHandle ghost_context = GHOST_CreateGPUContext(ghost_system, gpu_settings); + if (ghost_context == nullptr) { + std::cerr << "Compilation Subprocess: Failed to initialize GHOST context for " + << subprocess_name << "\n"; + GHOST_DisposeSystem(ghost_system); + return; + } + GHOST_ActivateGPUContext(ghost_context); + GPUContext *gpu_context = GPU_context_create(nullptr, ghost_context); + GPU_init(); + + BKE_tempdir_init(nullptr); + std::string cache_dir = std::string(BKE_tempdir_base()) + "BLENDER_SHADER_CACHE" + SEP_STR; + BLI_dir_create_recursive(cache_dir.c_str()); + + while (true) { + /* Process events to avoid crashes on Wayland. + * See https://bugreports.qt.io/browse/QTBUG-81504 */ + GHOST_ProcessEvents(ghost_system, false); + +# ifdef _WIN32 + start_semaphore.decrement(); +# else + bool lost_parent = false; + while (!lost_parent && !start_semaphore.try_decrement(1000)) { + lost_parent = getppid() != ppid; + } + if (lost_parent) { + std::cerr << "Compilation Subprocess: Lost parent process\n"; + break; + } +# endif + + if (close_semaphore.try_decrement()) { + break; + } + + const char *shaders = reinterpret_cast(shared_mem.get_data()); + + const char *vert_src = shaders; + const char *frag_src = shaders + strlen(shaders) + 1; + + DefaultHash hasher; + uint64_t vert_hash = hasher(vert_src); + uint64_t frag_hash = hasher(frag_src); + std::string hash_str = std::to_string(vert_hash) + "_" + std::to_string(frag_hash); + std::string cache_path = cache_dir + SEP_STR + hash_str; + + /* TODO: This should lock the files? */ + if (BLI_exists(cache_path.c_str())) { + /* Read cached binary. */ + fstream file(cache_path, std::ios::binary | std::ios::in | std::ios::ate); + std::streamsize size = file.tellg(); + if (size <= compilation_subprocess_shared_memory_size) { + file.seekg(0, std::ios::beg); + file.read(reinterpret_cast(shared_mem.get_data()), size); + /* Ensure it's valid. */ + if (validate_binary(shared_mem.get_data())) { + end_semaphore.increment(); + continue; + } + else { + std::cout << "Compilation Subprocess: Failed to load cached shader binary " << hash_str + << "\n"; + } + } + else { + /* This should never happen, since shaders larger than the pool size should be discarded + * and compiled in the main Blender process. */ + std::cerr << "Compilation Subprocess: Wrong size for cached shader binary " << hash_str + << "\n"; + BLI_assert_unreachable(); + } + } + + SubprocessShader shader(vert_src, frag_src); + ShaderBinaryHeader *binary = shader.get_binary(shared_mem.get_data()); + + end_semaphore.increment(); + + fstream file(cache_path, std::ios::binary | std::ios::out); + file.write(reinterpret_cast(shared_mem.get_data()), + binary->size + offsetof(ShaderBinaryHeader, data_start)); + } + + GPU_exit(); + GPU_context_discard(gpu_context); + GHOST_DisposeGPUContext(ghost_system, ghost_context); + GHOST_DisposeSystem(ghost_system); +} + +#endif diff --git a/source/blender/gpu/opengl/gl_compilation_subprocess.hh b/source/blender/gpu/opengl/gl_compilation_subprocess.hh new file mode 100644 index 00000000000..022e731f51c --- /dev/null +++ b/source/blender/gpu/opengl/gl_compilation_subprocess.hh @@ -0,0 +1,31 @@ +/* SPDX-FileCopyrightText: 2024 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#include "GPU_compilation_subprocess.hh" + +#if BLI_SUBPROCESS_SUPPORT + +# include "BLI_sys_types.h" + +namespace blender::gpu { + +/* The size of the memory pools shared by Blender and the compilation subprocesses. */ +constexpr size_t compilation_subprocess_shared_memory_size = 1024 * 1024 * 5; /* 5mB */ + +struct ShaderBinaryHeader { + /* Size of the shader binary data. */ + int32_t size; + /* Magic number that identifies the format of this shader binary (Driver-defined). + * This (and size) is set to 0 when the shader has failed to compile. */ + uint32_t format; + /* When casting a shared memory pool into a ShaderBinaryHeader*, this is the first byte of the + * shader binary data. */ + uint8_t data_start; +}; + +} // namespace blender::gpu + +#endif diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc index de95e643e80..cbf3ce9d885 100644 --- a/source/blender/gpu/opengl/gl_context.cc +++ b/source/blender/gpu/opengl/gl_context.cc @@ -84,6 +84,8 @@ GLContext::GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list active_fb = back_left; static_cast(state_manager)->active_fb = static_cast( active_fb); + + compiler = GLBackend::get()->get_compiler(); } GLContext::~GLContext() diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 4e3a7cd426c..15d1718d200 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -8,22 +8,34 @@ #include +#include "BKE_appdir.hh" #include "BKE_global.hh" #include "BLI_string.h" +#include "BLI_time.h" #include "BLI_vector.hh" +#include "BLI_system.h" +#include BLI_SYSTEM_PID_H + #include "GPU_capabilities.hh" #include "GPU_platform.hh" +#include "gpu_capabilities_private.hh" #include "gpu_shader_dependency_private.hh" #include "gl_debug.hh" #include "gl_vertex_buffer.hh" +#include "gl_compilation_subprocess.hh" #include "gl_shader.hh" #include "gl_shader_interface.hh" #include +#include +#ifdef WIN32 +# define popen _popen +# define pclose _pclose +#endif using namespace blender; using namespace blender::gpu; @@ -51,8 +63,10 @@ GLShader::~GLShader() #endif } -void GLShader::init(const shader::ShaderCreateInfo &info) +void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) { + async_compilation_ = is_batch_compilation; + /* Extract the constants names from info and store them locally. */ for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) { specialization_constant_names_.append(constant.name.c_str()); @@ -1093,14 +1107,8 @@ const char *GLShader::glsl_patch_get(GLenum gl_stage) GLuint GLShader::create_shader_stage(GLenum gl_stage, MutableSpan sources, - const GLSources &gl_sources) + GLSources &gl_sources) { - GLuint shader = glCreateShader(gl_stage); - if (shader == 0) { - fprintf(stderr, "GLShader: Error: Could not create shader object.\n"); - return 0; - } - /* Patch the shader sources to include specialization constants. */ std::string constants_source; Vector recreated_sources; @@ -1117,6 +1125,12 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage, sources[SOURCES_INDEX_VERSION] = glsl_patch_get(gl_stage); sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS] = constants_source.c_str(); + if (async_compilation_) { + gl_sources[SOURCES_INDEX_VERSION].source = std::string(sources[SOURCES_INDEX_VERSION]); + gl_sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS].source = std::string( + sources[SOURCES_INDEX_SPECIALIZATION_CONSTANTS]); + } + if (DEBUG_LOG_SHADER_SRC_ON_ERROR) { /* Store the generated source for printing in case the link fails. */ StringRefNull source_type; @@ -1141,6 +1155,17 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage, } } + if (async_compilation_) { + /* Only build the sources. */ + return 0; + } + + GLuint shader = glCreateShader(gl_stage); + if (shader == 0) { + fprintf(stderr, "GLShader: Error: Could not create shader object.\n"); + return 0; + } + glShaderSource(shader, sources.size(), sources.data(), nullptr); glCompileShader(shader); @@ -1180,8 +1205,8 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage, void GLShader::update_program_and_sources(GLSources &stage_sources, MutableSpan sources) { - const bool has_specialization_constants = !constants.types.is_empty(); - if (has_specialization_constants && stage_sources.is_empty()) { + const bool store_sources = !constants.types.is_empty() || async_compilation_; + if (store_sources && stage_sources.is_empty()) { stage_sources = sources; } @@ -1231,10 +1256,23 @@ bool GLShader::finalize(const shader::ShaderCreateInfo *info) geometry_shader_from_glsl(sources); } - if (!program_link()) { + if (async_compilation_) { + return true; + } + + program_link(); + return post_finalize(info); +} + +bool GLShader::post_finalize(const shader::ShaderCreateInfo *info) +{ + if (!check_link_status()) { return false; } + /* Reset for specialization constants variations. */ + async_compilation_ = false; + GLuint program_id = program_get(); if (info != nullptr && info->legacy_resource_location_ == false) { interface = new GLShaderInterface(program_id, *info); @@ -1450,13 +1488,18 @@ GLShader::GLProgram::~GLProgram() glDeleteProgram(program_id); } -bool GLShader::program_link() +void GLShader::program_link() { BLI_assert(program_active_ != nullptr); if (program_active_->program_id == 0) { program_active_->program_id = glCreateProgram(); debug::object_label(GL_PROGRAM, program_active_->program_id, name); } + + if (async_compilation_) { + return; + } + GLuint program_id = program_active_->program_id; if (program_active_->vert_shader) { @@ -1472,7 +1515,11 @@ bool GLShader::program_link() glAttachShader(program_id, program_active_->compute_shader); } glLinkProgram(program_id); +} +bool GLShader::check_link_status() +{ + GLuint program_id = program_active_->program_id; GLint status; glGetProgramiv(program_id, GL_LINK_STATUS, &status); if (!status) { @@ -1542,3 +1589,256 @@ GLuint GLShader::program_get() } /** \} */ + +#if BLI_SUBPROCESS_SUPPORT + +/* -------------------------------------------------------------------- */ +/** \name Compiler workers + * \{ */ + +GLCompilerWorker::GLCompilerWorker() +{ + static size_t pipe_id = 0; + pipe_id++; + + std::string name = "BLENDER_SHADER_COMPILER_" + std::to_string(getpid()) + "_" + + std::to_string(pipe_id); + + shared_mem_ = std::make_unique( + name, compilation_subprocess_shared_memory_size, true); + start_semaphore_ = std::make_unique(name + "_START", false); + end_semaphore_ = std::make_unique(name + "_END", false); + close_semaphore_ = std::make_unique(name + "_CLOSE", false); + + subprocess_.create({"--compilation-subprocess", name.c_str()}); +} + +GLCompilerWorker::~GLCompilerWorker() +{ + close_semaphore_->increment(); + /* Flag start so the subprocess can reach the close semaphore. */ + start_semaphore_->increment(); +} + +void GLCompilerWorker::compile(StringRefNull vert, StringRefNull frag) +{ + BLI_assert(state_ == AVAILABLE); + + strcpy((char *)shared_mem_->get_data(), vert.c_str()); + strcpy((char *)shared_mem_->get_data() + vert.size() + sizeof('\0'), frag.c_str()); + + start_semaphore_->increment(); + + state_ = COMPILATION_REQUESTED; + compilation_start = BLI_time_now_seconds(); +} + +bool GLCompilerWorker::is_ready() +{ + BLI_assert(ELEM(state_, COMPILATION_REQUESTED, COMPILATION_READY)); + if (state_ == COMPILATION_READY) { + return true; + } + + if (end_semaphore_->try_decrement()) { + state_ = COMPILATION_READY; + } + + return state_ == COMPILATION_READY; +} + +bool GLCompilerWorker::is_lost() +{ + /* Use a timeout for hanged processes. */ + float max_timeout_seconds = 30.0f; + return !subprocess_.is_running() || + (BLI_time_now_seconds() - compilation_start) > max_timeout_seconds; +} + +bool GLCompilerWorker::load_program_binary(GLint program) +{ + BLI_assert(ELEM(state_, COMPILATION_REQUESTED, COMPILATION_READY)); + if (state_ == COMPILATION_REQUESTED) { + end_semaphore_->decrement(); + state_ = COMPILATION_READY; + } + + ShaderBinaryHeader *binary = (ShaderBinaryHeader *)shared_mem_->get_data(); + + state_ = COMPILATION_FINISHED; + + if (binary->size > 0) { + glProgramBinary(program, binary->format, &binary->data_start, binary->size); + return true; + } + + return false; +} + +void GLCompilerWorker::release() +{ + state_ = AVAILABLE; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name GLShaderCompiler + * \{ */ + +GLShaderCompiler::~GLShaderCompiler() +{ + BLI_assert(batches.is_empty()); + + for (GLCompilerWorker *worker : workers_) { + delete worker; + } +} + +GLCompilerWorker *GLShaderCompiler::get_compiler_worker(const char *vert, const char *frag) +{ + GLCompilerWorker *result = nullptr; + for (GLCompilerWorker *compiler : workers_) { + if (compiler->state_ == GLCompilerWorker::AVAILABLE) { + result = compiler; + break; + } + } + if (!result && workers_.size() < GCaps.max_parallel_compilations) { + result = new GLCompilerWorker(); + workers_.append(result); + } + if (result) { + result->compile(vert, frag); + } + return result; +} + +bool GLShaderCompiler::worker_is_lost(GLCompilerWorker *&worker) +{ + if (worker->is_lost()) { + std::cerr << "ERROR: Compilation subprocess lost\n"; + workers_.remove_first_occurrence_and_reorder(worker); + delete worker; + worker = nullptr; + } + + return worker == nullptr; +} + +BatchHandle GLShaderCompiler::batch_compile(Span &infos) +{ + BLI_assert(GPU_use_parallel_compilation()); + + std::scoped_lock lock(mutex_); + BatchHandle handle = next_batch_handle++; + batches.add(handle, {}); + Batch &batch = batches.lookup(handle); + batch.items.reserve(infos.size()); + batch.is_ready = false; + + for (const shader::ShaderCreateInfo *info : infos) { + const_cast(info)->finalize(); + CompilationWork item = {}; + item.info = info; + item.do_async_compilation = !info->vertex_source_.is_empty() && + !info->fragment_source_.is_empty() && + info->compute_source_.is_empty() && + info->geometry_source_.is_empty(); + if (item.do_async_compilation) { + item.shader = static_cast(compile(*info, true)); + for (const char *src : item.shader->vertex_sources_.sources_get()) { + item.vertex_src.append(src); + } + for (const char *src : item.shader->fragment_sources_.sources_get()) { + item.fragment_src.append(src); + } + + size_t required_size = item.vertex_src.size() + item.fragment_src.size(); + if (required_size < compilation_subprocess_shared_memory_size) { + item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str()); + } + else { + delete item.shader; + item.do_async_compilation = false; + } + } + batch.items.append(item); + } + return handle; +} + +bool GLShaderCompiler::batch_is_ready(BatchHandle handle) +{ + std::scoped_lock lock(mutex_); + Batch &batch = batches.lookup(handle); + if (batch.is_ready) { + return true; + } + + batch.is_ready = true; + for (CompilationWork &item : batch.items) { + if (item.is_ready) { + continue; + } + + if (!item.do_async_compilation) { + /* Compile it locally. */ + item.shader = static_cast(compile(*item.info, false)); + item.is_ready = true; + continue; + } + + if (!item.worker) { + /* Try to acquire an available worker. */ + item.worker = get_compiler_worker(item.vertex_src.c_str(), item.fragment_src.c_str()); + } + else if (item.worker->is_ready()) { + /* Retrieve the binary compiled by the worker. */ + if (!item.worker->load_program_binary(item.shader->program_active_->program_id) || + !item.shader->post_finalize(item.info)) + { + /* Compilation failed, try to compile it locally. */ + delete item.shader; + item.shader = nullptr; + item.do_async_compilation = false; + } + else { + item.is_ready = true; + } + item.worker->release(); + item.worker = nullptr; + } + else if (worker_is_lost(item.worker)) { + /* We lost the worker, try to compile it locally. */ + delete item.shader; + item.shader = nullptr; + item.do_async_compilation = false; + } + + if (!item.is_ready) { + batch.is_ready = false; + } + } + + return batch.is_ready; +} + +Vector GLShaderCompiler::batch_finalize(BatchHandle &handle) +{ + while (!batch_is_ready(handle)) { + BLI_time_sleep_ms(1); + } + std::scoped_lock lock(mutex_); + Batch batch = batches.pop(handle); + Vector result; + for (CompilationWork &item : batch.items) { + result.append(item.shader); + } + handle = 0; + return result; +} + +/** \} */ + +#endif diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index d4b7c618bb3..6240896d7d8 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -13,10 +13,14 @@ #include #include "BLI_map.hh" +#include "BLI_subprocess.hh" +#include "BLI_utility_mixins.hh" #include "gpu_shader_create_info.hh" #include "gpu_shader_private.hh" +#include + namespace blender::gpu { /** @@ -48,6 +52,7 @@ class GLSources : public Vector { class GLShader : public Shader { friend shader::ShaderCreateInfo; friend shader::StageInterfaceInfo; + friend class GLShaderCompiler; private: struct GLProgram { @@ -85,6 +90,8 @@ class GLShader : public Shader { */ GLProgram *program_active_ = nullptr; + bool async_compilation_ = false; + /** * When the shader uses Specialization Constants these attribute contains the sources to * rebuild shader stages. When Specialization Constants aren't used they are empty to @@ -112,7 +119,8 @@ class GLShader : public Shader { /** * Link the active program. */ - bool program_link(); + void program_link(); + bool check_link_status(); /** * Return a GLProgram program id that reflects the current state of shader.constants.values. @@ -131,7 +139,7 @@ class GLShader : public Shader { GLShader(const char *name); ~GLShader(); - void init(const shader::ShaderCreateInfo &info) override; + void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) override; /** Return true on success. */ void vertex_shader_from_glsl(MutableSpan sources) override; @@ -139,6 +147,7 @@ class GLShader : public Shader { void fragment_shader_from_glsl(MutableSpan sources) override; void compute_shader_from_glsl(MutableSpan sources) override; bool finalize(const shader::ShaderCreateInfo *info = nullptr) override; + bool post_finalize(const shader::ShaderCreateInfo *info = nullptr); void warm_cache(int /*limit*/) override{}; std::string resources_declare(const shader::ShaderCreateInfo &info) const override; @@ -191,7 +200,7 @@ class GLShader : public Shader { /** Create, compile and attach the shader stage to the shader program. */ GLuint create_shader_stage(GLenum gl_stage, MutableSpan sources, - const GLSources &gl_sources); + GLSources &gl_sources); /** * \brief features available on newer implementation such as native barycentric coordinates @@ -204,6 +213,84 @@ class GLShader : public Shader { MEM_CXX_CLASS_ALLOC_FUNCS("GLShader"); }; +#if BLI_SUBPROCESS_SUPPORT + +class GLCompilerWorker { + friend class GLShaderCompiler; + + private: + BlenderSubprocess subprocess_; + std::unique_ptr shared_mem_; + std::unique_ptr start_semaphore_; + std::unique_ptr end_semaphore_; + std::unique_ptr close_semaphore_; + enum eState { + /* The worker has been acquired and the compilation has been requested. */ + COMPILATION_REQUESTED, + /* The shader binary result is ready to be read. */ + COMPILATION_READY, + /* The binary result has been loaded into a program and the worker can be released. */ + COMPILATION_FINISHED, + /* The worker is not currently in use and can be acquired. */ + AVAILABLE + }; + eState state_ = AVAILABLE; + double compilation_start = 0; + + GLCompilerWorker(); + ~GLCompilerWorker(); + + void compile(StringRefNull vert, StringRefNull frag); + bool is_ready(); + bool load_program_binary(GLint program); + void release(); + + /* Check if the process may have closed/crashed/hanged. */ + bool is_lost(); +}; + +class GLShaderCompiler : public ShaderCompiler { + private: + std::mutex mutex_; + Vector workers_; + + struct CompilationWork { + GLCompilerWorker *worker = nullptr; + GLShader *shader = nullptr; + const shader::ShaderCreateInfo *info = nullptr; + bool do_async_compilation = false; + + std::string vertex_src; + std::string fragment_src; + + bool is_ready = false; + }; + + struct Batch { + Vector items; + bool is_ready = false; + }; + + BatchHandle next_batch_handle = 1; + Map batches; + + GLCompilerWorker *get_compiler_worker(const char *vert, const char *frag); + bool worker_is_lost(GLCompilerWorker *&worker); + + public: + ~GLShaderCompiler(); + + virtual BatchHandle batch_compile(Span &infos) override; + virtual bool batch_is_ready(BatchHandle handle) override; + virtual Vector batch_finalize(BatchHandle &handle) override; +}; + +#else + +class GLShaderCompiler : public ShaderCompilerGeneric {}; + +#endif + class GLLogParser : public GPULogParser { public: const char *parse_line(const char *source_combined, diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc index 5ebf7ac404e..76ca1fccfc0 100644 --- a/source/blender/gpu/vulkan/vk_context.cc +++ b/source/blender/gpu/vulkan/vk_context.cc @@ -36,6 +36,8 @@ VKContext::VKContext(void *ghost_window, VKFrameBuffer *framebuffer = new VKFrameBuffer("back_left"); back_left = framebuffer; active_fb = framebuffer; + + compiler = new ShaderCompilerGeneric(); } VKContext::~VKContext() @@ -51,6 +53,8 @@ VKContext::~VKContext() delete imm; imm = nullptr; + + delete compiler; } void VKContext::sync_backbuffer() diff --git a/source/blender/gpu/vulkan/vk_shader.cc b/source/blender/gpu/vulkan/vk_shader.cc index 71123336fdc..9c5f2737844 100644 --- a/source/blender/gpu/vulkan/vk_shader.cc +++ b/source/blender/gpu/vulkan/vk_shader.cc @@ -568,7 +568,7 @@ VKShader::VKShader(const char *name) : Shader(name) context_ = VKContext::get(); } -void VKShader::init(const shader::ShaderCreateInfo &info) +void VKShader::init(const shader::ShaderCreateInfo &info, bool /*is_batch_compilation*/) { VKShaderInterface *vk_interface = new VKShaderInterface(); vk_interface->init(info); diff --git a/source/blender/gpu/vulkan/vk_shader.hh b/source/blender/gpu/vulkan/vk_shader.hh index 286941cc993..f096f83e751 100644 --- a/source/blender/gpu/vulkan/vk_shader.hh +++ b/source/blender/gpu/vulkan/vk_shader.hh @@ -49,7 +49,7 @@ class VKShader : public Shader { VKShader(const char *name); virtual ~VKShader(); - void init(const shader::ShaderCreateInfo &info) override; + void init(const shader::ShaderCreateInfo &info, bool is_batch_compilation) override; void vertex_shader_from_glsl(MutableSpan sources) override; void geometry_shader_from_glsl(MutableSpan sources) override; diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h index 8b670da63d3..e1ecea0a938 100644 --- a/source/blender/makesdna/DNA_userdef_types.h +++ b/source/blender/makesdna/DNA_userdef_types.h @@ -979,11 +979,12 @@ typedef struct UserDef { /** #eGPUBackendType */ short gpu_backend; + /** Max number of parallel shader compilation subprocesses. */ + short max_shader_compilation_subprocesses; + /** Number of samples for FPS display calculations. */ short playback_fps_samples; - char _pad7[2]; - /** Private, defaults to 20 for 72 DPI setting. */ short widget_unit; short anisotropic_filter; diff --git a/source/blender/makesrna/intern/rna_userdef.cc b/source/blender/makesrna/intern/rna_userdef.cc index 7288f5ee8a6..8168ea5b47d 100644 --- a/source/blender/makesrna/intern/rna_userdef.cc +++ b/source/blender/makesrna/intern/rna_userdef.cc @@ -6196,6 +6196,15 @@ static void rna_def_userdef_system(BlenderRNA *brna) "GPU Backend", "GPU backend to use (requires restarting Blender for changes to take effect)"); + prop = RNA_def_property(srna, "max_shader_compilation_subprocesses", PROP_INT, PROP_NONE); + RNA_def_property_range(prop, 0, INT16_MAX); + RNA_def_property_ui_text(prop, + "Max Shader Compilation Subprocesses", + "Max number of parallel shader compilation subprocesses, " + "clamped at the max threads supported by the CPU " + "(requires restarting Blender for changes to take effect). " + "Setting it to 0 disables subprocess shader compilation "); + /* Network. */ prop = RNA_def_property(srna, "use_online_access", PROP_BOOLEAN, PROP_NONE); diff --git a/source/creator/creator.cc b/source/creator/creator.cc index 6dcdf98ca40..ff2a9c5031e 100644 --- a/source/creator/creator.cc +++ b/source/creator/creator.cc @@ -71,6 +71,8 @@ #include "RNA_define.hh" +#include "GPU_compilation_subprocess.hh" + #ifdef WITH_FREESTYLE # include "FRS_freestyle.h" #endif @@ -328,6 +330,14 @@ int main(int argc, # endif /* USE_WIN32_UNICODE_ARGS */ #endif /* WIN32 */ +#if defined(WITH_OPENGL_BACKEND) && defined(BLI_SUBPROCESS_SUPPORT) + if (strcmp(argv[0], "--compilation-subprocess") == 0) { + BLI_assert(argc == 2); + GPU_compilation_subprocess_run(argv[1]); + return 0; + } +#endif + /* NOTE: Special exception for guarded allocator type switch: * we need to perform switch from lock-free to fully * guarded allocator before any allocation happened.