Metal: Add support for parallel compilation and precompilation specialisation

This speeds up EEVEE startup and material compilation time.

Authored by Apple: James McCarthy
Pull Request: https://projects.blender.org/blender/blender/pulls/125657
This commit is contained in:
Jason Fielder
2024-09-30 11:21:28 +02:00
committed by Clément Foucault
parent 13391c14d8
commit eb3fe75392
13 changed files with 658 additions and 17 deletions

View File

@@ -874,6 +874,7 @@ MultiTestApp *multitestapp_new(void)
if (!app->sys) {
fatal("Unable to create ghost system");
}
GPU_backend_ghost_system_set(app->sys);
if (!GHOST_AddEventConsumer(app->sys, consumer)) {
fatal("Unable to add multitest event consumer ");

View File

@@ -83,3 +83,7 @@ void GPU_render_end();
/* For operations which need to run exactly once per frame -- even if there are no render updates.
*/
void GPU_render_step();
/* For when we need access to a system context in order to create a GPU context. */
void GPU_backend_ghost_system_set(void *ghost_system_handle);
void *GPU_backend_ghost_system_get();

View File

@@ -231,7 +231,10 @@ struct ShaderSpecialization {
* Request the compilation of multiple specialization constant variations at once,
* allowing the backend to use multithreaded compilation.
* Returns a handle that can be used to poll if all variations have been compiled.
* NOTE: This function is asynchronous on OpenGL, and a no-op on Vulkan and Metal.
* A NULL handle indicates no compilation of any variant was possible (likely due to
* some state being currently available) and so no batch was created. Compilation
* of the specialiized variant will instead occur at draw/dispatch time.
* NOTE: This function is asynchronous on OpenGL and Metal and a no-op on Vulkan.
* Batches are processed one by one in FIFO order.
* WARNING: Binding a specialization before the batch finishes will fail.
*/

View File

@@ -13,6 +13,8 @@
* - free can be called from any thread
*/
#include "GHOST_C-api.h"
#include "BKE_global.hh"
#include "BLI_assert.h"
@@ -241,6 +243,17 @@ static eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL;
static std::optional<eGPUBackendType> g_backend_type_override = std::nullopt;
static std::optional<bool> g_backend_type_supported = std::nullopt;
static GPUBackend *g_backend = nullptr;
static GHOST_SystemHandle g_ghost_system = nullptr;
void GPU_backend_ghost_system_set(void *ghost_system_handle)
{
g_ghost_system = reinterpret_cast<GHOST_SystemHandle>(ghost_system_handle);
}
void *GPU_backend_ghost_system_get()
{
return g_ghost_system;
}
void GPU_backend_type_selection_set(const eGPUBackendType backend)
{

View File

@@ -28,6 +28,7 @@
#include <Cocoa/Cocoa.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
#include <sys/sysctl.h>
namespace blender::gpu {
@@ -285,6 +286,64 @@ bool supports_barycentric_whitelist(id<MTLDevice> device)
return supported_gpu && should_support_barycentrics;
}
bool is_apple_sillicon(id<MTLDevice> device)
{
NSString *gpu_name = [device name];
BLI_assert([gpu_name length]);
const char *vendor = [gpu_name UTF8String];
/* Known good configs. */
return (strstr(vendor, "Apple") || strstr(vendor, "APPLE"));
}
static int get_num_performance_cpu_cores(id<MTLDevice> device)
{
const int SYSCTL_BUF_LENGTH = 16;
int num_performance_cores = -1;
unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH];
size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH;
if (is_apple_sillicon(device)) {
/* On Apple Silicon query the number of performance cores */
if (sysctlbyname("hw.perflevel0.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) ==
0)
{
num_performance_cores = sysctl_buffer[0];
}
}
else {
/* On Intel just return the logical core count */
if (sysctlbyname("hw.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) == 0) {
num_performance_cores = sysctl_buffer[0];
}
}
BLI_assert(num_performance_cores != -1);
return num_performance_cores;
}
static int get_num_efficiency_cpu_cores(id<MTLDevice> device)
{
if (is_apple_sillicon(device)) {
/* On Apple Silicon query the number of efficiency cores */
const int SYSCTL_BUF_LENGTH = 16;
int num_efficiency_cores = -1;
unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH];
size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH;
if (sysctlbyname("hw.perflevel1.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) ==
0)
{
num_efficiency_cores = sysctl_buffer[0];
}
BLI_assert(num_efficiency_cores != -1);
return num_efficiency_cores;
}
else {
return 0;
}
}
bool MTLBackend::metal_is_supported()
{
/* Device compatibility information using Metal Feature-set tables.
@@ -392,6 +451,10 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
}
#endif
/* CPU Info */
MTLBackend::capabilities.num_performance_cores = get_num_performance_cpu_cores(ctx->device);
MTLBackend::capabilities.num_efficiency_cores = get_num_efficiency_cpu_cores(ctx->device);
/* Common Global Capabilities. */
GCaps.max_texture_size = ([device supportsFamily:MTLGPUFamilyApple3] ||
MTLBackend::capabilities.supports_family_mac1) ?
@@ -430,6 +493,9 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
GCaps.geometry_shader_support = false;
/* Compile shaders on performance cores but leave one free so UI is still responsive */
GCaps.max_parallel_compilations = MTLBackend::capabilities.num_performance_cores - 1;
/* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes.
* Can use argument buffers if a higher limit is required. */
GCaps.max_shader_storage_buffer_bindings = 14;

View File

@@ -57,6 +57,10 @@ struct MTLCapabilities {
bool supports_family_mac_catalyst1 = false;
bool supports_family_mac_catalyst2 = false;
AppleGPUType gpu = APPLE_GPU_UNKNOWN;
/* CPU Info */
int num_performance_cores = -1;
int num_efficiency_cores = -1;
};
} // namespace gpu

View File

@@ -268,7 +268,12 @@ MTLContext::MTLContext(void *ghost_window, void *ghost_context)
/* Initialize samplers. */
this->sampler_state_cache_init();
compiler = new ShaderCompilerGeneric();
if (GPU_use_parallel_compilation()) {
compiler = new MTLShaderCompiler();
}
else {
compiler = new ShaderCompilerGeneric();
}
}
MTLContext::~MTLContext()
@@ -2217,8 +2222,15 @@ const MTLComputePipelineStateInstance *MTLContext::ensure_compute_pipeline_state
return nullptr;
}
MTLShader *active_shader = this->pipeline_state.active_shader;
/* Set descriptor to default shader constants . */
MTLComputePipelineStateDescriptor compute_pipeline_descriptor(active_shader->constants.values);
const MTLComputePipelineStateInstance *compute_pso_inst =
this->pipeline_state.active_shader->bake_compute_pipeline_state(this);
this->pipeline_state.active_shader->bake_compute_pipeline_state(this,
compute_pipeline_descriptor);
if (compute_pso_inst == nullptr || compute_pso_inst->pso == nil) {
MTL_LOG_WARNING("No valid compute PSO for compute dispatch!", );
return nullptr;

View File

@@ -347,6 +347,12 @@ struct MTLComputePipelineStateDescriptor {
/* Specialization constants map. */
SpecializationStateDescriptor specialization_state;
MTLComputePipelineStateDescriptor() {}
MTLComputePipelineStateDescriptor(Vector<Shader::Constants::Value> values)
{
specialization_state.values = values;
}
/* Comparison Operator for caching. */
bool operator==(const MTLComputePipelineStateDescriptor &other) const
{

View File

@@ -20,6 +20,7 @@
#include <functional>
#include <unordered_map>
#include <deque>
#include <mutex>
#include <thread>
@@ -264,9 +265,14 @@ class MTLShader : public Shader {
void *push_constant_data_ = nullptr;
bool push_constant_modified_ = false;
/** Special definition for Max TotalThreadsPerThreadgroup tuning. */
/* Special definition for Max TotalThreadsPerThreadgroup tuning. */
uint maxTotalThreadsPerThreadgroup_Tuning_ = 0;
/* Set to true when batch compiling */
bool async_compilation_ = false;
bool finalize_shader(const shader::ShaderCreateInfo *info = nullptr);
public:
MTLShader(MTLContext *ctx, const char *name);
MTLShader(MTLContext *ctx,
@@ -278,7 +284,7 @@ class MTLShader : public Shader {
NSString *fragment_function_name_);
~MTLShader();
void init(const shader::ShaderCreateInfo & /*info*/, bool /*is_batch_compilation*/) override {}
void init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation) override;
/* Assign GLSL source. */
void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
@@ -296,6 +302,14 @@ class MTLShader : public Shader {
{
return valid_;
}
bool has_compute_shader_lib()
{
return (shader_library_compute_ != nil);
}
bool has_parent_shader()
{
return (parent_shader_ != nil);
}
MTLRenderPipelineStateDescriptor &get_current_pipeline_state()
{
return current_pipeline_state_;
@@ -375,7 +389,9 @@ class MTLShader : public Shader {
MTLPrimitiveTopologyClass prim_type,
const MTLRenderPipelineStateDescriptor &pipeline_descriptor);
MTLComputePipelineStateInstance *bake_compute_pipeline_state(MTLContext *ctx);
MTLComputePipelineStateInstance *bake_compute_pipeline_state(
MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor);
const MTLComputePipelineStateCommon &get_compute_common_state()
{
return compute_pso_common_state_;
@@ -392,6 +408,94 @@ class MTLShader : public Shader {
MEM_CXX_CLASS_ALLOC_FUNCS("MTLShader");
};
class MTLParallelShaderCompiler {
private:
enum ParallelWorkType {
PARALLELWORKTYPE_UNSPECIFIED,
PARALLELWORKTYPE_COMPILE_SHADER,
PARALLELWORKTYPE_BAKE_PSO,
};
struct ParallelWork {
const shader::ShaderCreateInfo *info = nullptr;
class MTLShaderCompiler *shader_compiler = nullptr;
MTLShader *shader = nullptr;
Vector<Shader::Constants::Value> specialization_values;
ParallelWorkType work_type = PARALLELWORKTYPE_UNSPECIFIED;
bool is_ready = false;
};
struct Batch {
Vector<ParallelWork *> items;
bool is_ready = false;
};
std::mutex batch_mutex;
BatchHandle next_batch_handle = 1;
Map<BatchHandle, Batch> batches;
std::vector<std::thread> compile_threads;
volatile bool terminate_compile_threads;
std::condition_variable cond_var;
std::mutex queue_mutex;
std::deque<ParallelWork *> parallel_work_queue;
void parallel_compilation_thread_func(GPUContext *blender_gpu_context);
BatchHandle create_batch(size_t batch_size);
void add_item_to_batch(ParallelWork *work_item, BatchHandle batch_handle);
void add_parallel_item_to_queue(ParallelWork *add_parallel_item_to_queuework_item,
BatchHandle batch_handle);
std::atomic<int> ref_count;
public:
MTLParallelShaderCompiler();
~MTLParallelShaderCompiler();
void create_compile_threads();
BatchHandle batch_compile(MTLShaderCompiler *shade_compiler,
Span<const shader::ShaderCreateInfo *> &infos);
bool batch_is_ready(BatchHandle handle);
Vector<Shader *> batch_finalize(BatchHandle &handle);
SpecializationBatchHandle precompile_specializations(Span<ShaderSpecialization> specializations);
bool specialization_batch_is_ready(SpecializationBatchHandle &handle);
void increment_ref_count()
{
ref_count++;
}
void decrement_ref_count()
{
ref_count--;
}
int get_ref_count()
{
return ref_count;
}
};
class MTLShaderCompiler : public ShaderCompiler {
private:
MTLParallelShaderCompiler *parallel_shader_compiler;
public:
MTLShaderCompiler();
virtual ~MTLShaderCompiler() override;
virtual BatchHandle batch_compile(Span<const shader::ShaderCreateInfo *> &infos) override;
virtual bool batch_is_ready(BatchHandle handle) override;
virtual Vector<Shader *> batch_finalize(BatchHandle &handle) override;
virtual SpecializationBatchHandle precompile_specializations(
Span<ShaderSpecialization> specializations) override;
virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) override;
void release_parallel_shader_compiler();
};
/* Vertex format conversion.
* Determines whether it is possible to resize a vertex attribute type
* during input assembly. A conversion is implied by the difference

View File

@@ -8,9 +8,11 @@
#include "BKE_global.hh"
#include "BLI_time.h"
#include "DNA_userdef_types.h"
#include "BLI_string.h"
#include "BLI_time.h"
#include <algorithm>
#include <fstream>
#include <iostream>
@@ -37,7 +39,9 @@
#include "mtl_texture.hh"
#include "mtl_vertex_buffer.hh"
extern char datatoc_mtl_shader_common_msl[];
#include "GHOST_C-api.h"
extern const char datatoc_mtl_shader_common_msl[];
using namespace blender;
using namespace blender::gpu;
@@ -168,6 +172,11 @@ MTLShader::~MTLShader()
}
}
void MTLShader::init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation)
{
async_compilation_ = is_batch_compilation;
}
/** \} */
/* -------------------------------------------------------------------- */
@@ -462,7 +471,10 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* If this is a compute shader, bake base PSO for compute straight-away.
* NOTE: This will compile the base unspecialized variant. */
if (is_compute) {
this->bake_compute_pipeline_state(context_);
/* Set descriptor to default shader constants */
MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants.values);
this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor);
}
}
@@ -708,6 +720,8 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
push_constant_modified_ = is_dirty;
}
/* Attempts to pre-generate a PSO based on the parent shaders PSO
* (Render shaders only) */
void MTLShader::warm_cache(int limit)
{
if (parent_shader_ != nullptr) {
@@ -1450,7 +1464,8 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
}
}
MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(
MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor)
{
/* NOTE(Metal): Bakes and caches a PSO for compute. */
BLI_assert(this);
@@ -1459,13 +1474,6 @@ MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(MTLConte
BLI_assert(this->is_valid());
BLI_assert(shader_library_compute_ != nil);
/* Evaluate descriptor for specialization constants. */
MTLComputePipelineStateDescriptor compute_pipeline_descriptor;
/* Specialization configuration.
* NOTE: If allow_specialized is disabled, we will build the base un-specialized variant. */
compute_pipeline_descriptor.specialization_state = {this->constants.values};
/* Check if current PSO exists in the cache. */
pso_cache_lock_.lock();
MTLComputePipelineStateInstance **pso_lookup = compute_pso_cache_.lookup_ptr(
@@ -1806,4 +1814,421 @@ bool MTLShader::has_transform_feedback_varying(std::string str)
tf_output_name_list_.end());
}
/** \} */
/* Since this is going to be compiling shaders in a multi-threaded fashion we
* don't want to create an instance per context as we want to restrict the
* number of simultanenous compliation threads to ensure system respsonsiveness.
* Hence the global shared instance. */
MTLParallelShaderCompiler *g_shared_parallel_shader_compiler = nullptr;
std::mutex g_shared_parallel_shader_compiler_mutex;
MTLParallelShaderCompiler *get_shared_parallel_shader_compiler()
{
std::scoped_lock lock(g_shared_parallel_shader_compiler_mutex);
if (!g_shared_parallel_shader_compiler) {
g_shared_parallel_shader_compiler = new MTLParallelShaderCompiler();
}
else {
g_shared_parallel_shader_compiler->increment_ref_count();
}
return g_shared_parallel_shader_compiler;
}
void release_shared_parallel_shader_compiler()
{
std::scoped_lock lock(g_shared_parallel_shader_compiler_mutex);
if (!g_shared_parallel_shader_compiler) {
return;
}
g_shared_parallel_shader_compiler->decrement_ref_count();
if (g_shared_parallel_shader_compiler->get_ref_count() == 0) {
delete g_shared_parallel_shader_compiler;
g_shared_parallel_shader_compiler = nullptr;
}
}
/* -------------------------------------------------------------------- */
/** \name MTLParallelShaderCompiler
* \{ */
MTLParallelShaderCompiler::MTLParallelShaderCompiler()
{
BLI_assert(GPU_use_parallel_compilation());
terminate_compile_threads = false;
}
MTLParallelShaderCompiler::~MTLParallelShaderCompiler()
{
BLI_assert(batches.is_empty());
terminate_compile_threads = true;
cond_var.notify_all();
for (auto &thread : compile_threads) {
thread.join();
}
}
void MTLParallelShaderCompiler::create_compile_threads()
{
std::unique_lock<std::mutex> lock(queue_mutex);
/* Return if the compilation threads already exist */
if (!compile_threads.empty()) {
return;
}
/* Limit to the number of compiler threads to (performance cores - 1) to
* leave one thread free for main thread/UI responsiveness */
const MTLCapabilities &capabilities = MTLBackend::get_capabilities();
int max_mtlcompiler_threads = capabilities.num_performance_cores - 1;
/* Save the main thread context */
GPUContext *main_thread_context = GPU_context_active_get();
MTLContext *metal_context = static_cast<MTLContext *>(unwrap(main_thread_context));
id<MTLDevice> metal_device = metal_context->device;
#if defined(MAC_OS_VERSION_13_3)
/* Clamp the number of threads if neccessary. */
if (@available(macOS 13.3, *)) {
/* Check we've set the flag to allow more than 2 compile threads. */
BLI_assert(metal_device.shouldMaximizeConcurrentCompilation);
max_mtlcompiler_threads = MIN(int([metal_device maximumConcurrentCompilationTaskCount]),
max_mtlcompiler_threads);
}
#endif
/* GPU settings for context creation. */
GHOST_GPUSettings gpuSettings = {0};
gpuSettings.context_type = GHOST_kDrawingContextTypeMetal;
if (G.debug & G_DEBUG_GPU) {
gpuSettings.flags |= GHOST_gpuDebugContext;
}
gpuSettings.preferred_device.index = U.gpu_preferred_index;
gpuSettings.preferred_device.vendor_id = U.gpu_preferred_vendor_id;
gpuSettings.preferred_device.device_id = U.gpu_preferred_device_id;
/* Spawn the compiler threads. */
for (int i = 0; i < max_mtlcompiler_threads; i++) {
/* Grab the system handle. */
GHOST_SystemHandle ghost_system = reinterpret_cast<GHOST_SystemHandle>(
GPU_backend_ghost_system_get());
BLI_assert(ghost_system);
/* Create a Ghost GPU Context using the system handle. */
GHOST_ContextHandle ghost_gpu_context = GHOST_CreateGPUContext(ghost_system, gpuSettings);
/* Create a GPU context for the compile thread to use. */
GPUContext *per_thread_context = GPU_context_create(nullptr, ghost_gpu_context);
/* Restore the main thread context.
* (required as the above context creation also makes it active). */
GPU_context_active_set(main_thread_context);
/* Create a new thread */
compile_threads.push_back(std::thread([this, per_thread_context] {
this->parallel_compilation_thread_func(per_thread_context);
}));
}
}
void MTLParallelShaderCompiler::parallel_compilation_thread_func(GPUContext *blender_gpu_context)
{
/* Contexts can only be created on the main thread so we have to
* pass one in and make it active here */
GPU_context_active_set(blender_gpu_context);
MTLContext *metal_context = static_cast<MTLContext *>(unwrap(blender_gpu_context));
MTLShaderCompiler *shader_compiler = static_cast<MTLShaderCompiler *>(metal_context->compiler);
/* This context is only for compilation, it does not need it's own instance of the compiler */
shader_compiler->release_parallel_shader_compiler();
/* Loop until we get the terminate signal */
while (!terminate_compile_threads) {
/* Grab the next shader off of the queue or wait... */
ParallelWork *work_item = nullptr;
{
std::unique_lock<std::mutex> lock(queue_mutex);
cond_var.wait(lock,
[&] { return terminate_compile_threads || !parallel_work_queue.empty(); });
if (terminate_compile_threads || parallel_work_queue.empty()) {
continue;
}
work_item = parallel_work_queue.front();
parallel_work_queue.pop_front();
}
/* Compile a shader */
if (work_item->work_type == PARALLELWORKTYPE_COMPILE_SHADER) {
BLI_assert(work_item->info);
const shader::ShaderCreateInfo *shader_info = work_item->info;
work_item->shader = static_cast<MTLShader *>(
work_item->shader_compiler->compile(*shader_info, true));
if (work_item->shader) {
/* Generate and cache any render PSOs if possible (typically materials only)
* (Finalize() will already bake a Compute PSO if possible) */
work_item->shader->warm_cache(-1);
}
}
/* Bake PSO */
else if (work_item->work_type == PARALLELWORKTYPE_BAKE_PSO) {
MTLShader *shader = work_item->shader;
/* Currently only support Compute */
BLI_assert(shader && shader->has_compute_shader_lib());
/* Create descriptor using these specialization constants. */
MTLComputePipelineStateDescriptor compute_pipeline_descriptor(
work_item->specialization_values);
shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor);
}
else {
BLI_assert(false);
}
work_item->is_ready = true;
}
GPU_context_discard(blender_gpu_context);
}
BatchHandle MTLParallelShaderCompiler::create_batch(size_t batch_size)
{
std::scoped_lock lock(batch_mutex);
BatchHandle batch_handle = next_batch_handle++;
batches.add(batch_handle, {});
Batch &batch = batches.lookup(batch_handle);
if (batch_size) {
batch.items.reserve(batch_size);
}
batch.is_ready = false;
shader_debug_printf("Created batch %llu\n", batch_handle);
return batch_handle;
}
void MTLParallelShaderCompiler::add_item_to_batch(ParallelWork *work_item,
BatchHandle batch_handle)
{
std::scoped_lock lock(batch_mutex);
Batch &batch = batches.lookup(batch_handle);
batch.items.append(work_item);
}
void MTLParallelShaderCompiler::add_parallel_item_to_queue(ParallelWork *work_item,
BatchHandle batch_handle)
{
shader_debug_printf("Request add shader work\n");
if (!terminate_compile_threads) {
/* Defer creation of compilation threads until required */
if (compile_threads.empty()) {
create_compile_threads();
}
add_item_to_batch(work_item, batch_handle);
std::lock_guard<std::mutex> lock(queue_mutex);
parallel_work_queue.push_back(work_item);
cond_var.notify_one();
}
}
BatchHandle MTLParallelShaderCompiler::batch_compile(MTLShaderCompiler *shader_compiler,
Span<const shader::ShaderCreateInfo *> &infos)
{
BLI_assert(GPU_use_parallel_compilation());
BatchHandle batch_handle = create_batch(infos.size());
shader_debug_printf("Batch compile %llu shaders (Batch = %llu)\n", infos.size(), batch_handle);
/* Have to finalize all shaderInfos *before* any parallel compilation as
* ShaderCreateInfo::finalize() is not thread safe */
for (const shader::ShaderCreateInfo *info : infos) {
const_cast<ShaderCreateInfo *>(info)->finalize();
}
for (const shader::ShaderCreateInfo *info : infos) {
ParallelWork *work_item = new ParallelWork;
work_item->info = info;
work_item->shader_compiler = shader_compiler;
work_item->is_ready = false;
work_item->shader = nullptr;
work_item->work_type = PARALLELWORKTYPE_COMPILE_SHADER;
add_parallel_item_to_queue(work_item, batch_handle);
}
return batch_handle;
}
bool MTLParallelShaderCompiler::batch_is_ready(BatchHandle handle)
{
std::scoped_lock lock(batch_mutex);
Batch &batch = batches.lookup(handle);
if (batch.is_ready) {
return true;
}
for (ParallelWork *item : batch.items) {
if (item->is_ready) {
continue;
}
else {
return false;
}
}
batch.is_ready = true;
shader_debug_printf("Batch %llu is now ready\n", handle);
return batch.is_ready;
}
Vector<Shader *> MTLParallelShaderCompiler::batch_finalize(BatchHandle &handle)
{
while (!batch_is_ready(handle)) {
BLI_time_sleep_ms(1);
}
std::scoped_lock lock(batch_mutex);
Batch batch = batches.pop(handle);
Vector<Shader *> result;
for (ParallelWork *item : batch.items) {
result.append(item->shader);
delete item;
}
handle = 0;
return result;
}
SpecializationBatchHandle MTLParallelShaderCompiler::precompile_specializations(
Span<ShaderSpecialization> specializations)
{
BLI_assert(GPU_use_parallel_compilation());
/* Zero indicates no batch was created */
SpecializationBatchHandle batch_handle = 0;
for (auto &specialization : specializations) {
MTLShader *sh = static_cast<MTLShader *>(unwrap(specialization.shader));
/* Specialization constants only take effect when we create the PSO.
* We don't have the relevant info to create a Render PSO Descriptor unless
* the shader has a has_parent_shader() but in that case it would (currently) be
* invalid to apply specialization constants. For those reasons we currently only
* support precompilation of Compute shaders.
* (technically we could call makeFunction but the benefit would likely be minimal) */
if (!sh->has_compute_shader_lib()) {
continue;
}
BLI_assert_msg(sh->is_valid(), "Shader must be finalized before precompiling specializations");
/* Defer batch creation until we have some work to do */
if (!batch_handle) {
batch_handle = create_batch(1);
}
ParallelWork *work_item = new ParallelWork;
work_item->info = nullptr;
work_item->is_ready = false;
work_item->shader = sh;
work_item->work_type = PARALLELWORKTYPE_BAKE_PSO;
/* Add the specialization constants to the work-item */
for (const SpecializationConstant &constant : specialization.constants) {
const ShaderInput *input = sh->interface->constant_get(constant.name.c_str());
BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists");
work_item->specialization_values[input->location].u = constant.value.u;
}
sh->constants.is_dirty = true;
add_parallel_item_to_queue(work_item, batch_handle);
}
return batch_handle;
}
bool MTLParallelShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle)
{
/* Check empty batch case where we have no handle */
if (!handle) {
return true;
}
std::scoped_lock lock(batch_mutex);
Batch &batch = batches.lookup(handle);
if (batch.is_ready) {
return true;
}
for (ParallelWork *item : batch.items) {
if (item->is_ready) {
continue;
}
else {
return false;
}
}
/* Handle is zeroed once the batch is ready */
handle = 0;
batch.is_ready = true;
shader_debug_printf("Specialization Batch %llu is now ready\n", handle);
return batch.is_ready;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name MTLShaderCompiler
* \{ */
MTLShaderCompiler::MTLShaderCompiler()
{
parallel_shader_compiler = get_shared_parallel_shader_compiler();
}
MTLShaderCompiler::~MTLShaderCompiler()
{
release_parallel_shader_compiler();
}
void MTLShaderCompiler::release_parallel_shader_compiler()
{
if (parallel_shader_compiler) {
release_shared_parallel_shader_compiler();
parallel_shader_compiler = nullptr;
}
}
BatchHandle MTLShaderCompiler::batch_compile(Span<const shader::ShaderCreateInfo *> &infos)
{
BLI_assert(parallel_shader_compiler);
return parallel_shader_compiler->batch_compile(this, infos);
}
bool MTLShaderCompiler::batch_is_ready(BatchHandle handle)
{
return parallel_shader_compiler->batch_is_ready(handle);
}
Vector<Shader *> MTLShaderCompiler::batch_finalize(BatchHandle &handle)
{
return parallel_shader_compiler->batch_finalize(handle);
}
SpecializationBatchHandle MTLShaderCompiler::precompile_specializations(
Span<ShaderSpecialization> specializations)
{
return parallel_shader_compiler->precompile_specializations(specializations);
}
bool MTLShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle)
{
return parallel_shader_compiler->specialization_batch_is_ready(handle);
}
/** \} */
} // namespace blender::gpu

View File

@@ -28,6 +28,7 @@ void GPUTest::SetUp()
gpuSettings.context_type = draw_context_type;
gpuSettings.flags = GHOST_gpuDebugContext;
ghost_system = GHOST_CreateSystem();
GPU_backend_ghost_system_set(ghost_system);
ghost_context = GHOST_CreateGPUContext(ghost_system, gpuSettings);
GHOST_ActivateGPUContext(ghost_context);
context = GPU_context_create(nullptr, ghost_context);

View File

@@ -1844,6 +1844,7 @@ static bool wm_main_playanim_intern(int argc, const char **argv, PlayArgs *args_
GHOST_SetBacktraceHandler((GHOST_TBacktraceFn)BLI_system_backtrace);
ps.ghost_data.system = GHOST_CreateSystem();
GPU_backend_ghost_system_set(ps.ghost_data.system);
if (UNLIKELY(ps.ghost_data.system == nullptr)) {
/* GHOST will have reported the back-ends that failed to load. */

View File

@@ -1889,6 +1889,7 @@ void wm_ghost_init(bContext *C)
GHOST_SetBacktraceHandler((GHOST_TBacktraceFn)BLI_system_backtrace);
g_system = GHOST_CreateSystem();
GPU_backend_ghost_system_set(g_system);
if (UNLIKELY(g_system == nullptr)) {
/* GHOST will have reported the back-ends that failed to load. */