Metal: Add support for parallel compilation and precompilation specialisation
This speeds up EEVEE startup and material compilation time. Authored by Apple: James McCarthy Pull Request: https://projects.blender.org/blender/blender/pulls/125657
This commit is contained in:
committed by
Clément Foucault
parent
13391c14d8
commit
eb3fe75392
@@ -874,6 +874,7 @@ MultiTestApp *multitestapp_new(void)
|
||||
if (!app->sys) {
|
||||
fatal("Unable to create ghost system");
|
||||
}
|
||||
GPU_backend_ghost_system_set(app->sys);
|
||||
|
||||
if (!GHOST_AddEventConsumer(app->sys, consumer)) {
|
||||
fatal("Unable to add multitest event consumer ");
|
||||
|
||||
@@ -83,3 +83,7 @@ void GPU_render_end();
|
||||
/* For operations which need to run exactly once per frame -- even if there are no render updates.
|
||||
*/
|
||||
void GPU_render_step();
|
||||
|
||||
/* For when we need access to a system context in order to create a GPU context. */
|
||||
void GPU_backend_ghost_system_set(void *ghost_system_handle);
|
||||
void *GPU_backend_ghost_system_get();
|
||||
|
||||
@@ -231,7 +231,10 @@ struct ShaderSpecialization {
|
||||
* Request the compilation of multiple specialization constant variations at once,
|
||||
* allowing the backend to use multithreaded compilation.
|
||||
* Returns a handle that can be used to poll if all variations have been compiled.
|
||||
* NOTE: This function is asynchronous on OpenGL, and a no-op on Vulkan and Metal.
|
||||
* A NULL handle indicates no compilation of any variant was possible (likely due to
|
||||
* some state being currently available) and so no batch was created. Compilation
|
||||
* of the specialiized variant will instead occur at draw/dispatch time.
|
||||
* NOTE: This function is asynchronous on OpenGL and Metal and a no-op on Vulkan.
|
||||
* Batches are processed one by one in FIFO order.
|
||||
* WARNING: Binding a specialization before the batch finishes will fail.
|
||||
*/
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
* - free can be called from any thread
|
||||
*/
|
||||
|
||||
#include "GHOST_C-api.h"
|
||||
|
||||
#include "BKE_global.hh"
|
||||
|
||||
#include "BLI_assert.h"
|
||||
@@ -241,6 +243,17 @@ static eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL;
|
||||
static std::optional<eGPUBackendType> g_backend_type_override = std::nullopt;
|
||||
static std::optional<bool> g_backend_type_supported = std::nullopt;
|
||||
static GPUBackend *g_backend = nullptr;
|
||||
static GHOST_SystemHandle g_ghost_system = nullptr;
|
||||
|
||||
void GPU_backend_ghost_system_set(void *ghost_system_handle)
|
||||
{
|
||||
g_ghost_system = reinterpret_cast<GHOST_SystemHandle>(ghost_system_handle);
|
||||
}
|
||||
|
||||
void *GPU_backend_ghost_system_get()
|
||||
{
|
||||
return g_ghost_system;
|
||||
}
|
||||
|
||||
void GPU_backend_type_selection_set(const eGPUBackendType backend)
|
||||
{
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <Cocoa/Cocoa.h>
|
||||
#include <Metal/Metal.h>
|
||||
#include <QuartzCore/QuartzCore.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
namespace blender::gpu {
|
||||
|
||||
@@ -285,6 +286,64 @@ bool supports_barycentric_whitelist(id<MTLDevice> device)
|
||||
return supported_gpu && should_support_barycentrics;
|
||||
}
|
||||
|
||||
bool is_apple_sillicon(id<MTLDevice> device)
|
||||
{
|
||||
NSString *gpu_name = [device name];
|
||||
BLI_assert([gpu_name length]);
|
||||
|
||||
const char *vendor = [gpu_name UTF8String];
|
||||
|
||||
/* Known good configs. */
|
||||
return (strstr(vendor, "Apple") || strstr(vendor, "APPLE"));
|
||||
}
|
||||
|
||||
static int get_num_performance_cpu_cores(id<MTLDevice> device)
|
||||
{
|
||||
const int SYSCTL_BUF_LENGTH = 16;
|
||||
int num_performance_cores = -1;
|
||||
unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH];
|
||||
size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH;
|
||||
|
||||
if (is_apple_sillicon(device)) {
|
||||
/* On Apple Silicon query the number of performance cores */
|
||||
if (sysctlbyname("hw.perflevel0.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) ==
|
||||
0)
|
||||
{
|
||||
num_performance_cores = sysctl_buffer[0];
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* On Intel just return the logical core count */
|
||||
if (sysctlbyname("hw.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) == 0) {
|
||||
num_performance_cores = sysctl_buffer[0];
|
||||
}
|
||||
}
|
||||
BLI_assert(num_performance_cores != -1);
|
||||
return num_performance_cores;
|
||||
}
|
||||
|
||||
static int get_num_efficiency_cpu_cores(id<MTLDevice> device)
|
||||
{
|
||||
if (is_apple_sillicon(device)) {
|
||||
/* On Apple Silicon query the number of efficiency cores */
|
||||
const int SYSCTL_BUF_LENGTH = 16;
|
||||
int num_efficiency_cores = -1;
|
||||
unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH];
|
||||
size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH;
|
||||
if (sysctlbyname("hw.perflevel1.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) ==
|
||||
0)
|
||||
{
|
||||
num_efficiency_cores = sysctl_buffer[0];
|
||||
}
|
||||
|
||||
BLI_assert(num_efficiency_cores != -1);
|
||||
return num_efficiency_cores;
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool MTLBackend::metal_is_supported()
|
||||
{
|
||||
/* Device compatibility information using Metal Feature-set tables.
|
||||
@@ -392,6 +451,10 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* CPU Info */
|
||||
MTLBackend::capabilities.num_performance_cores = get_num_performance_cpu_cores(ctx->device);
|
||||
MTLBackend::capabilities.num_efficiency_cores = get_num_efficiency_cpu_cores(ctx->device);
|
||||
|
||||
/* Common Global Capabilities. */
|
||||
GCaps.max_texture_size = ([device supportsFamily:MTLGPUFamilyApple3] ||
|
||||
MTLBackend::capabilities.supports_family_mac1) ?
|
||||
@@ -430,6 +493,9 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
|
||||
|
||||
GCaps.geometry_shader_support = false;
|
||||
|
||||
/* Compile shaders on performance cores but leave one free so UI is still responsive */
|
||||
GCaps.max_parallel_compilations = MTLBackend::capabilities.num_performance_cores - 1;
|
||||
|
||||
/* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes.
|
||||
* Can use argument buffers if a higher limit is required. */
|
||||
GCaps.max_shader_storage_buffer_bindings = 14;
|
||||
|
||||
@@ -57,6 +57,10 @@ struct MTLCapabilities {
|
||||
bool supports_family_mac_catalyst1 = false;
|
||||
bool supports_family_mac_catalyst2 = false;
|
||||
AppleGPUType gpu = APPLE_GPU_UNKNOWN;
|
||||
|
||||
/* CPU Info */
|
||||
int num_performance_cores = -1;
|
||||
int num_efficiency_cores = -1;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
||||
@@ -268,7 +268,12 @@ MTLContext::MTLContext(void *ghost_window, void *ghost_context)
|
||||
/* Initialize samplers. */
|
||||
this->sampler_state_cache_init();
|
||||
|
||||
compiler = new ShaderCompilerGeneric();
|
||||
if (GPU_use_parallel_compilation()) {
|
||||
compiler = new MTLShaderCompiler();
|
||||
}
|
||||
else {
|
||||
compiler = new ShaderCompilerGeneric();
|
||||
}
|
||||
}
|
||||
|
||||
MTLContext::~MTLContext()
|
||||
@@ -2217,8 +2222,15 @@ const MTLComputePipelineStateInstance *MTLContext::ensure_compute_pipeline_state
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MTLShader *active_shader = this->pipeline_state.active_shader;
|
||||
|
||||
/* Set descriptor to default shader constants . */
|
||||
MTLComputePipelineStateDescriptor compute_pipeline_descriptor(active_shader->constants.values);
|
||||
|
||||
const MTLComputePipelineStateInstance *compute_pso_inst =
|
||||
this->pipeline_state.active_shader->bake_compute_pipeline_state(this);
|
||||
this->pipeline_state.active_shader->bake_compute_pipeline_state(this,
|
||||
compute_pipeline_descriptor);
|
||||
|
||||
if (compute_pso_inst == nullptr || compute_pso_inst->pso == nil) {
|
||||
MTL_LOG_WARNING("No valid compute PSO for compute dispatch!", );
|
||||
return nullptr;
|
||||
|
||||
@@ -347,6 +347,12 @@ struct MTLComputePipelineStateDescriptor {
|
||||
/* Specialization constants map. */
|
||||
SpecializationStateDescriptor specialization_state;
|
||||
|
||||
MTLComputePipelineStateDescriptor() {}
|
||||
MTLComputePipelineStateDescriptor(Vector<Shader::Constants::Value> values)
|
||||
{
|
||||
specialization_state.values = values;
|
||||
}
|
||||
|
||||
/* Comparison Operator for caching. */
|
||||
bool operator==(const MTLComputePipelineStateDescriptor &other) const
|
||||
{
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <deque>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
@@ -264,9 +265,14 @@ class MTLShader : public Shader {
|
||||
void *push_constant_data_ = nullptr;
|
||||
bool push_constant_modified_ = false;
|
||||
|
||||
/** Special definition for Max TotalThreadsPerThreadgroup tuning. */
|
||||
/* Special definition for Max TotalThreadsPerThreadgroup tuning. */
|
||||
uint maxTotalThreadsPerThreadgroup_Tuning_ = 0;
|
||||
|
||||
/* Set to true when batch compiling */
|
||||
bool async_compilation_ = false;
|
||||
|
||||
bool finalize_shader(const shader::ShaderCreateInfo *info = nullptr);
|
||||
|
||||
public:
|
||||
MTLShader(MTLContext *ctx, const char *name);
|
||||
MTLShader(MTLContext *ctx,
|
||||
@@ -278,7 +284,7 @@ class MTLShader : public Shader {
|
||||
NSString *fragment_function_name_);
|
||||
~MTLShader();
|
||||
|
||||
void init(const shader::ShaderCreateInfo & /*info*/, bool /*is_batch_compilation*/) override {}
|
||||
void init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation) override;
|
||||
|
||||
/* Assign GLSL source. */
|
||||
void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
|
||||
@@ -296,6 +302,14 @@ class MTLShader : public Shader {
|
||||
{
|
||||
return valid_;
|
||||
}
|
||||
bool has_compute_shader_lib()
|
||||
{
|
||||
return (shader_library_compute_ != nil);
|
||||
}
|
||||
bool has_parent_shader()
|
||||
{
|
||||
return (parent_shader_ != nil);
|
||||
}
|
||||
MTLRenderPipelineStateDescriptor &get_current_pipeline_state()
|
||||
{
|
||||
return current_pipeline_state_;
|
||||
@@ -375,7 +389,9 @@ class MTLShader : public Shader {
|
||||
MTLPrimitiveTopologyClass prim_type,
|
||||
const MTLRenderPipelineStateDescriptor &pipeline_descriptor);
|
||||
|
||||
MTLComputePipelineStateInstance *bake_compute_pipeline_state(MTLContext *ctx);
|
||||
MTLComputePipelineStateInstance *bake_compute_pipeline_state(
|
||||
MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor);
|
||||
|
||||
const MTLComputePipelineStateCommon &get_compute_common_state()
|
||||
{
|
||||
return compute_pso_common_state_;
|
||||
@@ -392,6 +408,94 @@ class MTLShader : public Shader {
|
||||
MEM_CXX_CLASS_ALLOC_FUNCS("MTLShader");
|
||||
};
|
||||
|
||||
class MTLParallelShaderCompiler {
|
||||
private:
|
||||
enum ParallelWorkType {
|
||||
PARALLELWORKTYPE_UNSPECIFIED,
|
||||
PARALLELWORKTYPE_COMPILE_SHADER,
|
||||
PARALLELWORKTYPE_BAKE_PSO,
|
||||
};
|
||||
|
||||
struct ParallelWork {
|
||||
const shader::ShaderCreateInfo *info = nullptr;
|
||||
class MTLShaderCompiler *shader_compiler = nullptr;
|
||||
MTLShader *shader = nullptr;
|
||||
Vector<Shader::Constants::Value> specialization_values;
|
||||
|
||||
ParallelWorkType work_type = PARALLELWORKTYPE_UNSPECIFIED;
|
||||
bool is_ready = false;
|
||||
};
|
||||
|
||||
struct Batch {
|
||||
Vector<ParallelWork *> items;
|
||||
bool is_ready = false;
|
||||
};
|
||||
|
||||
std::mutex batch_mutex;
|
||||
BatchHandle next_batch_handle = 1;
|
||||
Map<BatchHandle, Batch> batches;
|
||||
|
||||
std::vector<std::thread> compile_threads;
|
||||
|
||||
volatile bool terminate_compile_threads;
|
||||
std::condition_variable cond_var;
|
||||
std::mutex queue_mutex;
|
||||
std::deque<ParallelWork *> parallel_work_queue;
|
||||
|
||||
void parallel_compilation_thread_func(GPUContext *blender_gpu_context);
|
||||
BatchHandle create_batch(size_t batch_size);
|
||||
void add_item_to_batch(ParallelWork *work_item, BatchHandle batch_handle);
|
||||
void add_parallel_item_to_queue(ParallelWork *add_parallel_item_to_queuework_item,
|
||||
BatchHandle batch_handle);
|
||||
|
||||
std::atomic<int> ref_count;
|
||||
|
||||
public:
|
||||
MTLParallelShaderCompiler();
|
||||
~MTLParallelShaderCompiler();
|
||||
|
||||
void create_compile_threads();
|
||||
BatchHandle batch_compile(MTLShaderCompiler *shade_compiler,
|
||||
Span<const shader::ShaderCreateInfo *> &infos);
|
||||
bool batch_is_ready(BatchHandle handle);
|
||||
Vector<Shader *> batch_finalize(BatchHandle &handle);
|
||||
|
||||
SpecializationBatchHandle precompile_specializations(Span<ShaderSpecialization> specializations);
|
||||
bool specialization_batch_is_ready(SpecializationBatchHandle &handle);
|
||||
|
||||
void increment_ref_count()
|
||||
{
|
||||
ref_count++;
|
||||
}
|
||||
void decrement_ref_count()
|
||||
{
|
||||
ref_count--;
|
||||
}
|
||||
int get_ref_count()
|
||||
{
|
||||
return ref_count;
|
||||
}
|
||||
};
|
||||
|
||||
class MTLShaderCompiler : public ShaderCompiler {
|
||||
private:
|
||||
MTLParallelShaderCompiler *parallel_shader_compiler;
|
||||
|
||||
public:
|
||||
MTLShaderCompiler();
|
||||
virtual ~MTLShaderCompiler() override;
|
||||
|
||||
virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) override;
|
||||
virtual bool batch_is_ready(BatchHandle handle) override;
|
||||
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
|
||||
|
||||
virtual SpecializationBatchHandle precompile_specializations(
|
||||
Span<ShaderSpecialization> specializations) override;
|
||||
virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) override;
|
||||
|
||||
void release_parallel_shader_compiler();
|
||||
};
|
||||
|
||||
/* Vertex format conversion.
|
||||
* Determines whether it is possible to resize a vertex attribute type
|
||||
* during input assembly. A conversion is implied by the difference
|
||||
|
||||
@@ -8,9 +8,11 @@
|
||||
|
||||
#include "BKE_global.hh"
|
||||
|
||||
#include "BLI_time.h"
|
||||
#include "DNA_userdef_types.h"
|
||||
|
||||
#include "BLI_string.h"
|
||||
#include "BLI_time.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
@@ -37,7 +39,9 @@
|
||||
#include "mtl_texture.hh"
|
||||
#include "mtl_vertex_buffer.hh"
|
||||
|
||||
extern char datatoc_mtl_shader_common_msl[];
|
||||
#include "GHOST_C-api.h"
|
||||
|
||||
extern const char datatoc_mtl_shader_common_msl[];
|
||||
|
||||
using namespace blender;
|
||||
using namespace blender::gpu;
|
||||
@@ -168,6 +172,11 @@ MTLShader::~MTLShader()
|
||||
}
|
||||
}
|
||||
|
||||
void MTLShader::init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation)
|
||||
{
|
||||
async_compilation_ = is_batch_compilation;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -462,7 +471,10 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
|
||||
/* If this is a compute shader, bake base PSO for compute straight-away.
|
||||
* NOTE: This will compile the base unspecialized variant. */
|
||||
if (is_compute) {
|
||||
this->bake_compute_pipeline_state(context_);
|
||||
/* Set descriptor to default shader constants */
|
||||
MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants.values);
|
||||
|
||||
this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -708,6 +720,8 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
|
||||
push_constant_modified_ = is_dirty;
|
||||
}
|
||||
|
||||
/* Attempts to pre-generate a PSO based on the parent shaders PSO
|
||||
* (Render shaders only) */
|
||||
void MTLShader::warm_cache(int limit)
|
||||
{
|
||||
if (parent_shader_ != nullptr) {
|
||||
@@ -1450,7 +1464,8 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
|
||||
}
|
||||
}
|
||||
|
||||
MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
|
||||
MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(
|
||||
MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
|
||||
{
|
||||
/* NOTE(Metal): Bakes and caches a PSO for compute. */
|
||||
BLI_assert(this);
|
||||
@@ -1459,13 +1474,6 @@ MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(MTLConte
|
||||
BLI_assert(this->is_valid());
|
||||
BLI_assert(shader_library_compute_ != nil);
|
||||
|
||||
/* Evaluate descriptor for specialization constants. */
|
||||
MTLComputePipelineStateDescriptor compute_pipeline_descriptor;
|
||||
|
||||
/* Specialization configuration.
|
||||
* NOTE: If allow_specialized is disabled, we will build the base un-specialized variant. */
|
||||
compute_pipeline_descriptor.specialization_state = {this->constants.values};
|
||||
|
||||
/* Check if current PSO exists in the cache. */
|
||||
pso_cache_lock_.lock();
|
||||
MTLComputePipelineStateInstance **pso_lookup = compute_pso_cache_.lookup_ptr(
|
||||
@@ -1806,4 +1814,421 @@ bool MTLShader::has_transform_feedback_varying(std::string str)
|
||||
tf_output_name_list_.end());
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* Since this is going to be compiling shaders in a multi-threaded fashion we
|
||||
* don't want to create an instance per context as we want to restrict the
|
||||
* number of simultanenous compliation threads to ensure system respsonsiveness.
|
||||
* Hence the global shared instance. */
|
||||
MTLParallelShaderCompiler *g_shared_parallel_shader_compiler = nullptr;
|
||||
std::mutex g_shared_parallel_shader_compiler_mutex;
|
||||
|
||||
MTLParallelShaderCompiler *get_shared_parallel_shader_compiler()
|
||||
{
|
||||
std::scoped_lock lock(g_shared_parallel_shader_compiler_mutex);
|
||||
|
||||
if (!g_shared_parallel_shader_compiler) {
|
||||
g_shared_parallel_shader_compiler = new MTLParallelShaderCompiler();
|
||||
}
|
||||
else {
|
||||
g_shared_parallel_shader_compiler->increment_ref_count();
|
||||
}
|
||||
return g_shared_parallel_shader_compiler;
|
||||
}
|
||||
|
||||
void release_shared_parallel_shader_compiler()
|
||||
{
|
||||
std::scoped_lock lock(g_shared_parallel_shader_compiler_mutex);
|
||||
|
||||
if (!g_shared_parallel_shader_compiler) {
|
||||
return;
|
||||
}
|
||||
|
||||
g_shared_parallel_shader_compiler->decrement_ref_count();
|
||||
if (g_shared_parallel_shader_compiler->get_ref_count() == 0) {
|
||||
delete g_shared_parallel_shader_compiler;
|
||||
g_shared_parallel_shader_compiler = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name MTLParallelShaderCompiler
|
||||
* \{ */
|
||||
|
||||
MTLParallelShaderCompiler::MTLParallelShaderCompiler()
|
||||
{
|
||||
BLI_assert(GPU_use_parallel_compilation());
|
||||
|
||||
terminate_compile_threads = false;
|
||||
}
|
||||
|
||||
MTLParallelShaderCompiler::~MTLParallelShaderCompiler()
|
||||
{
|
||||
BLI_assert(batches.is_empty());
|
||||
terminate_compile_threads = true;
|
||||
cond_var.notify_all();
|
||||
|
||||
for (auto &thread : compile_threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void MTLParallelShaderCompiler::create_compile_threads()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
|
||||
/* Return if the compilation threads already exist */
|
||||
if (!compile_threads.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Limit to the number of compiler threads to (performance cores - 1) to
|
||||
* leave one thread free for main thread/UI responsiveness */
|
||||
const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
|
||||
int max_mtlcompiler_threads = capabilities.num_performance_cores - 1;
|
||||
|
||||
/* Save the main thread context */
|
||||
GPUContext *main_thread_context = GPU_context_active_get();
|
||||
MTLContext *metal_context = static_cast<MTLContext *>(unwrap(main_thread_context));
|
||||
id<MTLDevice> metal_device = metal_context->device;
|
||||
|
||||
#if defined(MAC_OS_VERSION_13_3)
|
||||
/* Clamp the number of threads if neccessary. */
|
||||
if (@available(macOS 13.3, *)) {
|
||||
/* Check we've set the flag to allow more than 2 compile threads. */
|
||||
BLI_assert(metal_device.shouldMaximizeConcurrentCompilation);
|
||||
max_mtlcompiler_threads = MIN(int([metal_device maximumConcurrentCompilationTaskCount]),
|
||||
max_mtlcompiler_threads);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* GPU settings for context creation. */
|
||||
GHOST_GPUSettings gpuSettings = {0};
|
||||
gpuSettings.context_type = GHOST_kDrawingContextTypeMetal;
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
gpuSettings.flags |= GHOST_gpuDebugContext;
|
||||
}
|
||||
gpuSettings.preferred_device.index = U.gpu_preferred_index;
|
||||
gpuSettings.preferred_device.vendor_id = U.gpu_preferred_vendor_id;
|
||||
gpuSettings.preferred_device.device_id = U.gpu_preferred_device_id;
|
||||
|
||||
/* Spawn the compiler threads. */
|
||||
for (int i = 0; i < max_mtlcompiler_threads; i++) {
|
||||
|
||||
/* Grab the system handle. */
|
||||
GHOST_SystemHandle ghost_system = reinterpret_cast<GHOST_SystemHandle>(
|
||||
GPU_backend_ghost_system_get());
|
||||
BLI_assert(ghost_system);
|
||||
|
||||
/* Create a Ghost GPU Context using the system handle. */
|
||||
GHOST_ContextHandle ghost_gpu_context = GHOST_CreateGPUContext(ghost_system, gpuSettings);
|
||||
|
||||
/* Create a GPU context for the compile thread to use. */
|
||||
GPUContext *per_thread_context = GPU_context_create(nullptr, ghost_gpu_context);
|
||||
|
||||
/* Restore the main thread context.
|
||||
* (required as the above context creation also makes it active). */
|
||||
GPU_context_active_set(main_thread_context);
|
||||
|
||||
/* Create a new thread */
|
||||
compile_threads.push_back(std::thread([this, per_thread_context] {
|
||||
this->parallel_compilation_thread_func(per_thread_context);
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
void MTLParallelShaderCompiler::parallel_compilation_thread_func(GPUContext *blender_gpu_context)
|
||||
{
|
||||
/* Contexts can only be created on the main thread so we have to
|
||||
* pass one in and make it active here */
|
||||
GPU_context_active_set(blender_gpu_context);
|
||||
|
||||
MTLContext *metal_context = static_cast<MTLContext *>(unwrap(blender_gpu_context));
|
||||
MTLShaderCompiler *shader_compiler = static_cast<MTLShaderCompiler *>(metal_context->compiler);
|
||||
|
||||
/* This context is only for compilation, it does not need it's own instance of the compiler */
|
||||
shader_compiler->release_parallel_shader_compiler();
|
||||
|
||||
/* Loop until we get the terminate signal */
|
||||
while (!terminate_compile_threads) {
|
||||
/* Grab the next shader off of the queue or wait... */
|
||||
ParallelWork *work_item = nullptr;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||
cond_var.wait(lock,
|
||||
[&] { return terminate_compile_threads || !parallel_work_queue.empty(); });
|
||||
if (terminate_compile_threads || parallel_work_queue.empty()) {
|
||||
continue;
|
||||
}
|
||||
work_item = parallel_work_queue.front();
|
||||
parallel_work_queue.pop_front();
|
||||
}
|
||||
|
||||
/* Compile a shader */
|
||||
if (work_item->work_type == PARALLELWORKTYPE_COMPILE_SHADER) {
|
||||
BLI_assert(work_item->info);
|
||||
|
||||
const shader::ShaderCreateInfo *shader_info = work_item->info;
|
||||
work_item->shader = static_cast<MTLShader *>(
|
||||
work_item->shader_compiler->compile(*shader_info, true));
|
||||
|
||||
if (work_item->shader) {
|
||||
/* Generate and cache any render PSOs if possible (typically materials only)
|
||||
* (Finalize() will already bake a Compute PSO if possible) */
|
||||
work_item->shader->warm_cache(-1);
|
||||
}
|
||||
}
|
||||
/* Bake PSO */
|
||||
else if (work_item->work_type == PARALLELWORKTYPE_BAKE_PSO) {
|
||||
MTLShader *shader = work_item->shader;
|
||||
/* Currently only support Compute */
|
||||
BLI_assert(shader && shader->has_compute_shader_lib());
|
||||
|
||||
/* Create descriptor using these specialization constants. */
|
||||
MTLComputePipelineStateDescriptor compute_pipeline_descriptor(
|
||||
work_item->specialization_values);
|
||||
|
||||
shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor);
|
||||
}
|
||||
else {
|
||||
BLI_assert(false);
|
||||
}
|
||||
work_item->is_ready = true;
|
||||
}
|
||||
|
||||
GPU_context_discard(blender_gpu_context);
|
||||
}
|
||||
|
||||
BatchHandle MTLParallelShaderCompiler::create_batch(size_t batch_size)
|
||||
{
|
||||
std::scoped_lock lock(batch_mutex);
|
||||
BatchHandle batch_handle = next_batch_handle++;
|
||||
batches.add(batch_handle, {});
|
||||
Batch &batch = batches.lookup(batch_handle);
|
||||
if (batch_size) {
|
||||
batch.items.reserve(batch_size);
|
||||
}
|
||||
batch.is_ready = false;
|
||||
shader_debug_printf("Created batch %llu\n", batch_handle);
|
||||
return batch_handle;
|
||||
}
|
||||
|
||||
void MTLParallelShaderCompiler::add_item_to_batch(ParallelWork *work_item,
|
||||
BatchHandle batch_handle)
|
||||
{
|
||||
std::scoped_lock lock(batch_mutex);
|
||||
Batch &batch = batches.lookup(batch_handle);
|
||||
batch.items.append(work_item);
|
||||
}
|
||||
|
||||
void MTLParallelShaderCompiler::add_parallel_item_to_queue(ParallelWork *work_item,
|
||||
BatchHandle batch_handle)
|
||||
{
|
||||
shader_debug_printf("Request add shader work\n");
|
||||
if (!terminate_compile_threads) {
|
||||
|
||||
/* Defer creation of compilation threads until required */
|
||||
if (compile_threads.empty()) {
|
||||
create_compile_threads();
|
||||
}
|
||||
|
||||
add_item_to_batch(work_item, batch_handle);
|
||||
std::lock_guard<std::mutex> lock(queue_mutex);
|
||||
parallel_work_queue.push_back(work_item);
|
||||
cond_var.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
BatchHandle MTLParallelShaderCompiler::batch_compile(MTLShaderCompiler *shader_compiler,
|
||||
Span<const shader::ShaderCreateInfo *> &infos)
|
||||
{
|
||||
BLI_assert(GPU_use_parallel_compilation());
|
||||
|
||||
BatchHandle batch_handle = create_batch(infos.size());
|
||||
|
||||
shader_debug_printf("Batch compile %llu shaders (Batch = %llu)\n", infos.size(), batch_handle);
|
||||
|
||||
/* Have to finalize all shaderInfos *before* any parallel compilation as
|
||||
* ShaderCreateInfo::finalize() is not thread safe */
|
||||
for (const shader::ShaderCreateInfo *info : infos) {
|
||||
const_cast<ShaderCreateInfo *>(info)->finalize();
|
||||
}
|
||||
|
||||
for (const shader::ShaderCreateInfo *info : infos) {
|
||||
ParallelWork *work_item = new ParallelWork;
|
||||
work_item->info = info;
|
||||
work_item->shader_compiler = shader_compiler;
|
||||
work_item->is_ready = false;
|
||||
work_item->shader = nullptr;
|
||||
work_item->work_type = PARALLELWORKTYPE_COMPILE_SHADER;
|
||||
add_parallel_item_to_queue(work_item, batch_handle);
|
||||
}
|
||||
|
||||
return batch_handle;
|
||||
}
|
||||
|
||||
bool MTLParallelShaderCompiler::batch_is_ready(BatchHandle handle)
|
||||
{
|
||||
std::scoped_lock lock(batch_mutex);
|
||||
Batch &batch = batches.lookup(handle);
|
||||
if (batch.is_ready) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (ParallelWork *item : batch.items) {
|
||||
if (item->is_ready) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
batch.is_ready = true;
|
||||
shader_debug_printf("Batch %llu is now ready\n", handle);
|
||||
return batch.is_ready;
|
||||
}
|
||||
|
||||
Vector<Shader *> MTLParallelShaderCompiler::batch_finalize(BatchHandle &handle)
|
||||
{
|
||||
while (!batch_is_ready(handle)) {
|
||||
BLI_time_sleep_ms(1);
|
||||
}
|
||||
std::scoped_lock lock(batch_mutex);
|
||||
|
||||
Batch batch = batches.pop(handle);
|
||||
Vector<Shader *> result;
|
||||
for (ParallelWork *item : batch.items) {
|
||||
result.append(item->shader);
|
||||
delete item;
|
||||
}
|
||||
handle = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
SpecializationBatchHandle MTLParallelShaderCompiler::precompile_specializations(
|
||||
Span<ShaderSpecialization> specializations)
|
||||
{
|
||||
BLI_assert(GPU_use_parallel_compilation());
|
||||
/* Zero indicates no batch was created */
|
||||
SpecializationBatchHandle batch_handle = 0;
|
||||
|
||||
for (auto &specialization : specializations) {
|
||||
MTLShader *sh = static_cast<MTLShader *>(unwrap(specialization.shader));
|
||||
|
||||
/* Specialization constants only take effect when we create the PSO.
|
||||
* We don't have the relevant info to create a Render PSO Descriptor unless
|
||||
* the shader has a has_parent_shader() but in that case it would (currently) be
|
||||
* invalid to apply specialization constants. For those reasons we currently only
|
||||
* support precompilation of Compute shaders.
|
||||
* (technically we could call makeFunction but the benefit would likely be minimal) */
|
||||
if (!sh->has_compute_shader_lib()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
BLI_assert_msg(sh->is_valid(), "Shader must be finalized before precompiling specializations");
|
||||
|
||||
/* Defer batch creation until we have some work to do */
|
||||
if (!batch_handle) {
|
||||
batch_handle = create_batch(1);
|
||||
}
|
||||
|
||||
ParallelWork *work_item = new ParallelWork;
|
||||
work_item->info = nullptr;
|
||||
work_item->is_ready = false;
|
||||
work_item->shader = sh;
|
||||
work_item->work_type = PARALLELWORKTYPE_BAKE_PSO;
|
||||
|
||||
/* Add the specialization constants to the work-item */
|
||||
for (const SpecializationConstant &constant : specialization.constants) {
|
||||
const ShaderInput *input = sh->interface->constant_get(constant.name.c_str());
|
||||
BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists");
|
||||
work_item->specialization_values[input->location].u = constant.value.u;
|
||||
}
|
||||
sh->constants.is_dirty = true;
|
||||
|
||||
add_parallel_item_to_queue(work_item, batch_handle);
|
||||
}
|
||||
return batch_handle;
|
||||
}
|
||||
|
||||
bool MTLParallelShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle)
|
||||
{
|
||||
/* Check empty batch case where we have no handle */
|
||||
if (!handle) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::scoped_lock lock(batch_mutex);
|
||||
Batch &batch = batches.lookup(handle);
|
||||
if (batch.is_ready) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (ParallelWork *item : batch.items) {
|
||||
if (item->is_ready) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle is zeroed once the batch is ready */
|
||||
handle = 0;
|
||||
batch.is_ready = true;
|
||||
shader_debug_printf("Specialization Batch %llu is now ready\n", handle);
|
||||
return batch.is_ready;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name MTLShaderCompiler
|
||||
* \{ */
|
||||
|
||||
MTLShaderCompiler::MTLShaderCompiler()
|
||||
{
|
||||
parallel_shader_compiler = get_shared_parallel_shader_compiler();
|
||||
}
|
||||
|
||||
MTLShaderCompiler::~MTLShaderCompiler()
|
||||
{
|
||||
release_parallel_shader_compiler();
|
||||
}
|
||||
|
||||
void MTLShaderCompiler::release_parallel_shader_compiler()
|
||||
{
|
||||
if (parallel_shader_compiler) {
|
||||
release_shared_parallel_shader_compiler();
|
||||
parallel_shader_compiler = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
BatchHandle MTLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo *> &infos)
|
||||
{
|
||||
BLI_assert(parallel_shader_compiler);
|
||||
return parallel_shader_compiler->batch_compile(this, infos);
|
||||
}
|
||||
bool MTLShaderCompiler::batch_is_ready(BatchHandle handle)
|
||||
{
|
||||
return parallel_shader_compiler->batch_is_ready(handle);
|
||||
}
|
||||
Vector<Shader *> MTLShaderCompiler::batch_finalize(BatchHandle &handle)
|
||||
{
|
||||
return parallel_shader_compiler->batch_finalize(handle);
|
||||
}
|
||||
SpecializationBatchHandle MTLShaderCompiler::precompile_specializations(
|
||||
Span<ShaderSpecialization> specializations)
|
||||
{
|
||||
return parallel_shader_compiler->precompile_specializations(specializations);
|
||||
}
|
||||
|
||||
bool MTLShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle)
|
||||
{
|
||||
return parallel_shader_compiler->specialization_batch_is_ready(handle);
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
} // namespace blender::gpu
|
||||
|
||||
@@ -28,6 +28,7 @@ void GPUTest::SetUp()
|
||||
gpuSettings.context_type = draw_context_type;
|
||||
gpuSettings.flags = GHOST_gpuDebugContext;
|
||||
ghost_system = GHOST_CreateSystem();
|
||||
GPU_backend_ghost_system_set(ghost_system);
|
||||
ghost_context = GHOST_CreateGPUContext(ghost_system, gpuSettings);
|
||||
GHOST_ActivateGPUContext(ghost_context);
|
||||
context = GPU_context_create(nullptr, ghost_context);
|
||||
|
||||
@@ -1844,6 +1844,7 @@ static bool wm_main_playanim_intern(int argc, const char **argv, PlayArgs *args_
|
||||
GHOST_SetBacktraceHandler((GHOST_TBacktraceFn)BLI_system_backtrace);
|
||||
|
||||
ps.ghost_data.system = GHOST_CreateSystem();
|
||||
GPU_backend_ghost_system_set(ps.ghost_data.system);
|
||||
|
||||
if (UNLIKELY(ps.ghost_data.system == nullptr)) {
|
||||
/* GHOST will have reported the back-ends that failed to load. */
|
||||
|
||||
@@ -1889,6 +1889,7 @@ void wm_ghost_init(bContext *C)
|
||||
GHOST_SetBacktraceHandler((GHOST_TBacktraceFn)BLI_system_backtrace);
|
||||
|
||||
g_system = GHOST_CreateSystem();
|
||||
GPU_backend_ghost_system_set(g_system);
|
||||
|
||||
if (UNLIKELY(g_system == nullptr)) {
|
||||
/* GHOST will have reported the back-ends that failed to load. */
|
||||
|
||||
Reference in New Issue
Block a user