This is the first commit of the several required to support subprocess-based parallel compilation on OpenGL. This provides the base API and implementation, and exposes the max subprocesses setting on the UI, but it's not used by any code yet. More information and the rest of the code can be found in #121925. This one includes: - A new `GPU_shader_batch` API that allows requesting the compilation of multiple shaders at once, allowing GPU backed to compile them in parallel and asynchronously without blocking the Blender UI. - A virtual `ShaderCompiler` class that backends can use to add their own implementation. - A `ShaderCompilerGeneric` class that implements synchronous/blocking compilation of batches for backends that don't have their own implementation yet. - A `GLShaderCompiler` that supports parallel compilation using subprocesses. - A new `BLI_subprocess` API, including IPC (required for the `GLShaderCompiler` implementation). - The implementation of the subprocess program in `GPU_compilation_subprocess`. - A new `Max Shader Compilation Subprocesses` option in `Preferences > System > Memory & Limits` to enable parallel shader compilation and the max number of subprocesses to allocate (each subprocess has a relatively high memory footprint). Implementation Overview: There's a single `GLShaderCompiler` shared by all OpenGL contexts. This class stores a pool of up to `GCaps.max_parallel_compilations` subprocesses that can be used for compilation. Each subprocess has a shared memory pool used for sending the shader source code from the main Blender process and for receiving the already compiled shader binary from the subprocess. This is synchronized using a series of shared semaphores. The subprocesses maintain a shader cache on disk inside a `BLENDER_SHADER_CACHE` folder at the OS temporary folder. Shaders that fail to compile are tried to be compiled again locally for proper error reports. Hanged subprocesses are currently detected using a timeout of 30s. Pull Request: https://projects.blender.org/blender/blender/pulls/122232
121 lines
3.4 KiB
C++
121 lines
3.4 KiB
C++
/* SPDX-FileCopyrightText: 2016 by Mike Erwin. All rights reserved.
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup gpu
|
|
*
|
|
* This interface allow GPU to manage GL objects for multiple context and threads.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "MEM_guardedalloc.h"
|
|
|
|
#include "GPU_context.hh"
|
|
|
|
#include "gpu_debug_private.hh"
|
|
#include "gpu_framebuffer_private.hh"
|
|
#include "gpu_immediate_private.hh"
|
|
#include "gpu_shader_private.hh"
|
|
#include "gpu_state_private.hh"
|
|
|
|
#include <pthread.h>
|
|
|
|
struct GPUMatrixState;
|
|
|
|
namespace blender::gpu {
|
|
|
|
class Context {
|
|
public:
|
|
/** State management */
|
|
Shader *shader = nullptr;
|
|
FrameBuffer *active_fb = nullptr;
|
|
GPUMatrixState *matrix_state = nullptr;
|
|
StateManager *state_manager = nullptr;
|
|
Immediate *imm = nullptr;
|
|
|
|
ShaderCompiler *compiler = nullptr;
|
|
|
|
/**
|
|
* All 4 window frame-buffers.
|
|
* None of them are valid in an off-screen context.
|
|
* Right frame-buffers are only available if using stereo rendering.
|
|
* Front frame-buffers contains (in principle, but not always) the last frame color.
|
|
* Default frame-buffer is back_left.
|
|
*/
|
|
FrameBuffer *back_left = nullptr;
|
|
FrameBuffer *front_left = nullptr;
|
|
FrameBuffer *back_right = nullptr;
|
|
FrameBuffer *front_right = nullptr;
|
|
|
|
DebugStack debug_stack;
|
|
bool debug_is_capturing = false;
|
|
|
|
/* GPUContext counter used to assign a unique ID to each GPUContext.
|
|
* NOTE(Metal): This is required by the Metal Backend, as a bug exists in the global OS shader
|
|
* cache wherein compilation of identical source from two distinct threads can result in an
|
|
* invalid cache collision, result in a broken shader object. Appending the unique context ID
|
|
* onto compiled sources ensures the source hashes are different. */
|
|
static int context_counter;
|
|
int context_id = 0;
|
|
|
|
protected:
|
|
/** Thread on which this context is active. */
|
|
pthread_t thread_;
|
|
bool is_active_;
|
|
/** Avoid including GHOST headers. Can be nullptr for off-screen contexts. */
|
|
void *ghost_window_;
|
|
|
|
public:
|
|
Context();
|
|
virtual ~Context();
|
|
|
|
static Context *get();
|
|
|
|
virtual void activate() = 0;
|
|
virtual void deactivate() = 0;
|
|
virtual void begin_frame() = 0;
|
|
virtual void end_frame() = 0;
|
|
|
|
/* Will push all pending commands to the GPU. */
|
|
virtual void flush() = 0;
|
|
/* Will wait until the GPU has finished executing all command. */
|
|
virtual void finish() = 0;
|
|
|
|
virtual void memory_statistics_get(int *r_total_mem, int *r_free_mem) = 0;
|
|
|
|
virtual void debug_group_begin(const char *, int){};
|
|
virtual void debug_group_end(){};
|
|
|
|
/* Returns true if capture successfully started. */
|
|
virtual bool debug_capture_begin(const char *title) = 0;
|
|
virtual void debug_capture_end() = 0;
|
|
virtual void *debug_capture_scope_create(const char *name) = 0;
|
|
virtual bool debug_capture_scope_begin(void *scope) = 0;
|
|
virtual void debug_capture_scope_end(void *scope) = 0;
|
|
|
|
/* Consider all buffers slot empty after these call for error checking.
|
|
* But doesn't really free them. */
|
|
virtual void debug_unbind_all_ubo() = 0;
|
|
virtual void debug_unbind_all_ssbo() = 0;
|
|
|
|
bool is_active_on_thread();
|
|
};
|
|
|
|
/* Syntactic sugar. */
|
|
static inline GPUContext *wrap(Context *ctx)
|
|
{
|
|
return reinterpret_cast<GPUContext *>(ctx);
|
|
}
|
|
static inline Context *unwrap(GPUContext *ctx)
|
|
{
|
|
return reinterpret_cast<Context *>(ctx);
|
|
}
|
|
static inline const Context *unwrap(const GPUContext *ctx)
|
|
{
|
|
return reinterpret_cast<const Context *>(ctx);
|
|
}
|
|
|
|
} // namespace blender::gpu
|