From eb3fe753925bcc888f352e135b4251b3e657ea34 Mon Sep 17 00:00:00 2001 From: Jason Fielder Date: Mon, 30 Sep 2024 11:21:28 +0200 Subject: [PATCH] Metal: Add support for parallel compilation and precompilation specialisation This speeds up EEVEE startup and material compilation time. Authored by Apple: James McCarthy Pull Request: https://projects.blender.org/blender/blender/pulls/125657 --- intern/ghost/test/multitest/MultiTest.c | 1 + source/blender/gpu/GPU_context.hh | 4 + source/blender/gpu/GPU_shader.hh | 5 +- source/blender/gpu/intern/gpu_context.cc | 13 + source/blender/gpu/metal/mtl_backend.mm | 66 +++ source/blender/gpu/metal/mtl_capabilities.hh | 4 + source/blender/gpu/metal/mtl_context.mm | 16 +- .../gpu/metal/mtl_pso_descriptor_state.hh | 6 + source/blender/gpu/metal/mtl_shader.hh | 110 ++++- source/blender/gpu/metal/mtl_shader.mm | 447 +++++++++++++++++- source/blender/gpu/tests/gpu_testing.cc | 1 + .../windowmanager/intern/wm_playanim.cc | 1 + .../blender/windowmanager/intern/wm_window.cc | 1 + 13 files changed, 658 insertions(+), 17 deletions(-) diff --git a/intern/ghost/test/multitest/MultiTest.c b/intern/ghost/test/multitest/MultiTest.c index 9dd89fc4255..af47501bfb7 100644 --- a/intern/ghost/test/multitest/MultiTest.c +++ b/intern/ghost/test/multitest/MultiTest.c @@ -874,6 +874,7 @@ MultiTestApp *multitestapp_new(void) if (!app->sys) { fatal("Unable to create ghost system"); } + GPU_backend_ghost_system_set(app->sys); if (!GHOST_AddEventConsumer(app->sys, consumer)) { fatal("Unable to add multitest event consumer "); diff --git a/source/blender/gpu/GPU_context.hh b/source/blender/gpu/GPU_context.hh index ab8040d7406..7c0e69c2e84 100644 --- a/source/blender/gpu/GPU_context.hh +++ b/source/blender/gpu/GPU_context.hh @@ -83,3 +83,7 @@ void GPU_render_end(); /* For operations which need to run exactly once per frame -- even if there are no render updates. */ void GPU_render_step(); + +/* For when we need access to a system context in order to create a GPU context. */ +void GPU_backend_ghost_system_set(void *ghost_system_handle); +void *GPU_backend_ghost_system_get(); diff --git a/source/blender/gpu/GPU_shader.hh b/source/blender/gpu/GPU_shader.hh index 7f6dc0eb8d2..990cc41df16 100644 --- a/source/blender/gpu/GPU_shader.hh +++ b/source/blender/gpu/GPU_shader.hh @@ -231,7 +231,10 @@ struct ShaderSpecialization { * Request the compilation of multiple specialization constant variations at once, * allowing the backend to use multithreaded compilation. * Returns a handle that can be used to poll if all variations have been compiled. - * NOTE: This function is asynchronous on OpenGL, and a no-op on Vulkan and Metal. + * A NULL handle indicates no compilation of any variant was possible (likely due to + * some state being currently available) and so no batch was created. Compilation + * of the specialiized variant will instead occur at draw/dispatch time. + * NOTE: This function is asynchronous on OpenGL and Metal and a no-op on Vulkan. * Batches are processed one by one in FIFO order. * WARNING: Binding a specialization before the batch finishes will fail. */ diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc index 4c544276a4d..4bfd8e43ea1 100644 --- a/source/blender/gpu/intern/gpu_context.cc +++ b/source/blender/gpu/intern/gpu_context.cc @@ -13,6 +13,8 @@ * - free can be called from any thread */ +#include "GHOST_C-api.h" + #include "BKE_global.hh" #include "BLI_assert.h" @@ -241,6 +243,17 @@ static eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL; static std::optional g_backend_type_override = std::nullopt; static std::optional g_backend_type_supported = std::nullopt; static GPUBackend *g_backend = nullptr; +static GHOST_SystemHandle g_ghost_system = nullptr; + +void GPU_backend_ghost_system_set(void *ghost_system_handle) +{ + g_ghost_system = reinterpret_cast(ghost_system_handle); +} + +void *GPU_backend_ghost_system_get() +{ + return g_ghost_system; +} void GPU_backend_type_selection_set(const eGPUBackendType backend) { diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index d856e9aefca..925678d92e8 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -28,6 +28,7 @@ #include #include #include +#include namespace blender::gpu { @@ -285,6 +286,64 @@ bool supports_barycentric_whitelist(id device) return supported_gpu && should_support_barycentrics; } +bool is_apple_sillicon(id device) +{ + NSString *gpu_name = [device name]; + BLI_assert([gpu_name length]); + + const char *vendor = [gpu_name UTF8String]; + + /* Known good configs. */ + return (strstr(vendor, "Apple") || strstr(vendor, "APPLE")); +} + +static int get_num_performance_cpu_cores(id device) +{ + const int SYSCTL_BUF_LENGTH = 16; + int num_performance_cores = -1; + unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH]; + size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH; + + if (is_apple_sillicon(device)) { + /* On Apple Silicon query the number of performance cores */ + if (sysctlbyname("hw.perflevel0.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) == + 0) + { + num_performance_cores = sysctl_buffer[0]; + } + } + else { + /* On Intel just return the logical core count */ + if (sysctlbyname("hw.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) == 0) { + num_performance_cores = sysctl_buffer[0]; + } + } + BLI_assert(num_performance_cores != -1); + return num_performance_cores; +} + +static int get_num_efficiency_cpu_cores(id device) +{ + if (is_apple_sillicon(device)) { + /* On Apple Silicon query the number of efficiency cores */ + const int SYSCTL_BUF_LENGTH = 16; + int num_efficiency_cores = -1; + unsigned char sysctl_buffer[SYSCTL_BUF_LENGTH]; + size_t sysctl_buffer_length = SYSCTL_BUF_LENGTH; + if (sysctlbyname("hw.perflevel1.logicalcpu", &sysctl_buffer, &sysctl_buffer_length, NULL, 0) == + 0) + { + num_efficiency_cores = sysctl_buffer[0]; + } + + BLI_assert(num_efficiency_cores != -1); + return num_efficiency_cores; + } + else { + return 0; + } +} + bool MTLBackend::metal_is_supported() { /* Device compatibility information using Metal Feature-set tables. @@ -392,6 +451,10 @@ void MTLBackend::capabilities_init(MTLContext *ctx) } #endif + /* CPU Info */ + MTLBackend::capabilities.num_performance_cores = get_num_performance_cpu_cores(ctx->device); + MTLBackend::capabilities.num_efficiency_cores = get_num_efficiency_cpu_cores(ctx->device); + /* Common Global Capabilities. */ GCaps.max_texture_size = ([device supportsFamily:MTLGPUFamilyApple3] || MTLBackend::capabilities.supports_family_mac1) ? @@ -430,6 +493,9 @@ void MTLBackend::capabilities_init(MTLContext *ctx) GCaps.geometry_shader_support = false; + /* Compile shaders on performance cores but leave one free so UI is still responsive */ + GCaps.max_parallel_compilations = MTLBackend::capabilities.num_performance_cores - 1; + /* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes. * Can use argument buffers if a higher limit is required. */ GCaps.max_shader_storage_buffer_bindings = 14; diff --git a/source/blender/gpu/metal/mtl_capabilities.hh b/source/blender/gpu/metal/mtl_capabilities.hh index bc523423bbb..63393ebb255 100644 --- a/source/blender/gpu/metal/mtl_capabilities.hh +++ b/source/blender/gpu/metal/mtl_capabilities.hh @@ -57,6 +57,10 @@ struct MTLCapabilities { bool supports_family_mac_catalyst1 = false; bool supports_family_mac_catalyst2 = false; AppleGPUType gpu = APPLE_GPU_UNKNOWN; + + /* CPU Info */ + int num_performance_cores = -1; + int num_efficiency_cores = -1; }; } // namespace gpu diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 46fa98fd221..325ee7a252c 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -268,7 +268,12 @@ MTLContext::MTLContext(void *ghost_window, void *ghost_context) /* Initialize samplers. */ this->sampler_state_cache_init(); - compiler = new ShaderCompilerGeneric(); + if (GPU_use_parallel_compilation()) { + compiler = new MTLShaderCompiler(); + } + else { + compiler = new ShaderCompilerGeneric(); + } } MTLContext::~MTLContext() @@ -2217,8 +2222,15 @@ const MTLComputePipelineStateInstance *MTLContext::ensure_compute_pipeline_state return nullptr; } + MTLShader *active_shader = this->pipeline_state.active_shader; + + /* Set descriptor to default shader constants . */ + MTLComputePipelineStateDescriptor compute_pipeline_descriptor(active_shader->constants.values); + const MTLComputePipelineStateInstance *compute_pso_inst = - this->pipeline_state.active_shader->bake_compute_pipeline_state(this); + this->pipeline_state.active_shader->bake_compute_pipeline_state(this, + compute_pipeline_descriptor); + if (compute_pso_inst == nullptr || compute_pso_inst->pso == nil) { MTL_LOG_WARNING("No valid compute PSO for compute dispatch!", ); return nullptr; diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh index 066e6252f0e..40828498490 100644 --- a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh +++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh @@ -347,6 +347,12 @@ struct MTLComputePipelineStateDescriptor { /* Specialization constants map. */ SpecializationStateDescriptor specialization_state; + MTLComputePipelineStateDescriptor() {} + MTLComputePipelineStateDescriptor(Vector values) + { + specialization_state.values = values; + } + /* Comparison Operator for caching. */ bool operator==(const MTLComputePipelineStateDescriptor &other) const { diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh index a23226a55a2..5abdb095f9f 100644 --- a/source/blender/gpu/metal/mtl_shader.hh +++ b/source/blender/gpu/metal/mtl_shader.hh @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -264,9 +265,14 @@ class MTLShader : public Shader { void *push_constant_data_ = nullptr; bool push_constant_modified_ = false; - /** Special definition for Max TotalThreadsPerThreadgroup tuning. */ + /* Special definition for Max TotalThreadsPerThreadgroup tuning. */ uint maxTotalThreadsPerThreadgroup_Tuning_ = 0; + /* Set to true when batch compiling */ + bool async_compilation_ = false; + + bool finalize_shader(const shader::ShaderCreateInfo *info = nullptr); + public: MTLShader(MTLContext *ctx, const char *name); MTLShader(MTLContext *ctx, @@ -278,7 +284,7 @@ class MTLShader : public Shader { NSString *fragment_function_name_); ~MTLShader(); - void init(const shader::ShaderCreateInfo & /*info*/, bool /*is_batch_compilation*/) override {} + void init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation) override; /* Assign GLSL source. */ void vertex_shader_from_glsl(MutableSpan sources) override; @@ -296,6 +302,14 @@ class MTLShader : public Shader { { return valid_; } + bool has_compute_shader_lib() + { + return (shader_library_compute_ != nil); + } + bool has_parent_shader() + { + return (parent_shader_ != nil); + } MTLRenderPipelineStateDescriptor &get_current_pipeline_state() { return current_pipeline_state_; @@ -375,7 +389,9 @@ class MTLShader : public Shader { MTLPrimitiveTopologyClass prim_type, const MTLRenderPipelineStateDescriptor &pipeline_descriptor); - MTLComputePipelineStateInstance *bake_compute_pipeline_state(MTLContext *ctx); + MTLComputePipelineStateInstance *bake_compute_pipeline_state( + MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor); + const MTLComputePipelineStateCommon &get_compute_common_state() { return compute_pso_common_state_; @@ -392,6 +408,94 @@ class MTLShader : public Shader { MEM_CXX_CLASS_ALLOC_FUNCS("MTLShader"); }; +class MTLParallelShaderCompiler { + private: + enum ParallelWorkType { + PARALLELWORKTYPE_UNSPECIFIED, + PARALLELWORKTYPE_COMPILE_SHADER, + PARALLELWORKTYPE_BAKE_PSO, + }; + + struct ParallelWork { + const shader::ShaderCreateInfo *info = nullptr; + class MTLShaderCompiler *shader_compiler = nullptr; + MTLShader *shader = nullptr; + Vector specialization_values; + + ParallelWorkType work_type = PARALLELWORKTYPE_UNSPECIFIED; + bool is_ready = false; + }; + + struct Batch { + Vector items; + bool is_ready = false; + }; + + std::mutex batch_mutex; + BatchHandle next_batch_handle = 1; + Map batches; + + std::vector compile_threads; + + volatile bool terminate_compile_threads; + std::condition_variable cond_var; + std::mutex queue_mutex; + std::deque parallel_work_queue; + + void parallel_compilation_thread_func(GPUContext *blender_gpu_context); + BatchHandle create_batch(size_t batch_size); + void add_item_to_batch(ParallelWork *work_item, BatchHandle batch_handle); + void add_parallel_item_to_queue(ParallelWork *add_parallel_item_to_queuework_item, + BatchHandle batch_handle); + + std::atomic ref_count; + + public: + MTLParallelShaderCompiler(); + ~MTLParallelShaderCompiler(); + + void create_compile_threads(); + BatchHandle batch_compile(MTLShaderCompiler *shade_compiler, + Span &infos); + bool batch_is_ready(BatchHandle handle); + Vector batch_finalize(BatchHandle &handle); + + SpecializationBatchHandle precompile_specializations(Span specializations); + bool specialization_batch_is_ready(SpecializationBatchHandle &handle); + + void increment_ref_count() + { + ref_count++; + } + void decrement_ref_count() + { + ref_count--; + } + int get_ref_count() + { + return ref_count; + } +}; + +class MTLShaderCompiler : public ShaderCompiler { + private: + MTLParallelShaderCompiler *parallel_shader_compiler; + + public: + MTLShaderCompiler(); + virtual ~MTLShaderCompiler() override; + + virtual BatchHandle batch_compile(Span &infos) override; + virtual bool batch_is_ready(BatchHandle handle) override; + virtual Vector batch_finalize(BatchHandle &handle) override; + + virtual SpecializationBatchHandle precompile_specializations( + Span specializations) override; + virtual bool specialization_batch_is_ready(SpecializationBatchHandle &handle) override; + + void release_parallel_shader_compiler(); +}; + /* Vertex format conversion. * Determines whether it is possible to resize a vertex attribute type * during input assembly. A conversion is implied by the difference diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm index fb97e137bb2..376a85ecf11 100644 --- a/source/blender/gpu/metal/mtl_shader.mm +++ b/source/blender/gpu/metal/mtl_shader.mm @@ -8,9 +8,11 @@ #include "BKE_global.hh" -#include "BLI_time.h" +#include "DNA_userdef_types.h" #include "BLI_string.h" +#include "BLI_time.h" + #include #include #include @@ -37,7 +39,9 @@ #include "mtl_texture.hh" #include "mtl_vertex_buffer.hh" -extern char datatoc_mtl_shader_common_msl[]; +#include "GHOST_C-api.h" + +extern const char datatoc_mtl_shader_common_msl[]; using namespace blender; using namespace blender::gpu; @@ -168,6 +172,11 @@ MTLShader::~MTLShader() } } +void MTLShader::init(const shader::ShaderCreateInfo & /*info*/, bool is_batch_compilation) +{ + async_compilation_ = is_batch_compilation; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -462,7 +471,10 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) /* If this is a compute shader, bake base PSO for compute straight-away. * NOTE: This will compile the base unspecialized variant. */ if (is_compute) { - this->bake_compute_pipeline_state(context_); + /* Set descriptor to default shader constants */ + MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants.values); + + this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor); } } @@ -708,6 +720,8 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty) push_constant_modified_ = is_dirty; } +/* Attempts to pre-generate a PSO based on the parent shaders PSO + * (Render shaders only) */ void MTLShader::warm_cache(int limit) { if (parent_shader_ != nullptr) { @@ -1450,7 +1464,8 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state( } } -MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(MTLContext *ctx) +MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state( + MTLContext *ctx, MTLComputePipelineStateDescriptor &compute_pipeline_descriptor) { /* NOTE(Metal): Bakes and caches a PSO for compute. */ BLI_assert(this); @@ -1459,13 +1474,6 @@ MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(MTLConte BLI_assert(this->is_valid()); BLI_assert(shader_library_compute_ != nil); - /* Evaluate descriptor for specialization constants. */ - MTLComputePipelineStateDescriptor compute_pipeline_descriptor; - - /* Specialization configuration. - * NOTE: If allow_specialized is disabled, we will build the base un-specialized variant. */ - compute_pipeline_descriptor.specialization_state = {this->constants.values}; - /* Check if current PSO exists in the cache. */ pso_cache_lock_.lock(); MTLComputePipelineStateInstance **pso_lookup = compute_pso_cache_.lookup_ptr( @@ -1806,4 +1814,421 @@ bool MTLShader::has_transform_feedback_varying(std::string str) tf_output_name_list_.end()); } +/** \} */ + +/* Since this is going to be compiling shaders in a multi-threaded fashion we + * don't want to create an instance per context as we want to restrict the + * number of simultanenous compliation threads to ensure system respsonsiveness. + * Hence the global shared instance. */ +MTLParallelShaderCompiler *g_shared_parallel_shader_compiler = nullptr; +std::mutex g_shared_parallel_shader_compiler_mutex; + +MTLParallelShaderCompiler *get_shared_parallel_shader_compiler() +{ + std::scoped_lock lock(g_shared_parallel_shader_compiler_mutex); + + if (!g_shared_parallel_shader_compiler) { + g_shared_parallel_shader_compiler = new MTLParallelShaderCompiler(); + } + else { + g_shared_parallel_shader_compiler->increment_ref_count(); + } + return g_shared_parallel_shader_compiler; +} + +void release_shared_parallel_shader_compiler() +{ + std::scoped_lock lock(g_shared_parallel_shader_compiler_mutex); + + if (!g_shared_parallel_shader_compiler) { + return; + } + + g_shared_parallel_shader_compiler->decrement_ref_count(); + if (g_shared_parallel_shader_compiler->get_ref_count() == 0) { + delete g_shared_parallel_shader_compiler; + g_shared_parallel_shader_compiler = nullptr; + } +} + +/* -------------------------------------------------------------------- */ +/** \name MTLParallelShaderCompiler + * \{ */ + +MTLParallelShaderCompiler::MTLParallelShaderCompiler() +{ + BLI_assert(GPU_use_parallel_compilation()); + + terminate_compile_threads = false; +} + +MTLParallelShaderCompiler::~MTLParallelShaderCompiler() +{ + BLI_assert(batches.is_empty()); + terminate_compile_threads = true; + cond_var.notify_all(); + + for (auto &thread : compile_threads) { + thread.join(); + } +} + +void MTLParallelShaderCompiler::create_compile_threads() +{ + std::unique_lock lock(queue_mutex); + + /* Return if the compilation threads already exist */ + if (!compile_threads.empty()) { + return; + } + + /* Limit to the number of compiler threads to (performance cores - 1) to + * leave one thread free for main thread/UI responsiveness */ + const MTLCapabilities &capabilities = MTLBackend::get_capabilities(); + int max_mtlcompiler_threads = capabilities.num_performance_cores - 1; + + /* Save the main thread context */ + GPUContext *main_thread_context = GPU_context_active_get(); + MTLContext *metal_context = static_cast(unwrap(main_thread_context)); + id metal_device = metal_context->device; + +#if defined(MAC_OS_VERSION_13_3) + /* Clamp the number of threads if neccessary. */ + if (@available(macOS 13.3, *)) { + /* Check we've set the flag to allow more than 2 compile threads. */ + BLI_assert(metal_device.shouldMaximizeConcurrentCompilation); + max_mtlcompiler_threads = MIN(int([metal_device maximumConcurrentCompilationTaskCount]), + max_mtlcompiler_threads); + } +#endif + + /* GPU settings for context creation. */ + GHOST_GPUSettings gpuSettings = {0}; + gpuSettings.context_type = GHOST_kDrawingContextTypeMetal; + if (G.debug & G_DEBUG_GPU) { + gpuSettings.flags |= GHOST_gpuDebugContext; + } + gpuSettings.preferred_device.index = U.gpu_preferred_index; + gpuSettings.preferred_device.vendor_id = U.gpu_preferred_vendor_id; + gpuSettings.preferred_device.device_id = U.gpu_preferred_device_id; + + /* Spawn the compiler threads. */ + for (int i = 0; i < max_mtlcompiler_threads; i++) { + + /* Grab the system handle. */ + GHOST_SystemHandle ghost_system = reinterpret_cast( + GPU_backend_ghost_system_get()); + BLI_assert(ghost_system); + + /* Create a Ghost GPU Context using the system handle. */ + GHOST_ContextHandle ghost_gpu_context = GHOST_CreateGPUContext(ghost_system, gpuSettings); + + /* Create a GPU context for the compile thread to use. */ + GPUContext *per_thread_context = GPU_context_create(nullptr, ghost_gpu_context); + + /* Restore the main thread context. + * (required as the above context creation also makes it active). */ + GPU_context_active_set(main_thread_context); + + /* Create a new thread */ + compile_threads.push_back(std::thread([this, per_thread_context] { + this->parallel_compilation_thread_func(per_thread_context); + })); + } +} + +void MTLParallelShaderCompiler::parallel_compilation_thread_func(GPUContext *blender_gpu_context) +{ + /* Contexts can only be created on the main thread so we have to + * pass one in and make it active here */ + GPU_context_active_set(blender_gpu_context); + + MTLContext *metal_context = static_cast(unwrap(blender_gpu_context)); + MTLShaderCompiler *shader_compiler = static_cast(metal_context->compiler); + + /* This context is only for compilation, it does not need it's own instance of the compiler */ + shader_compiler->release_parallel_shader_compiler(); + + /* Loop until we get the terminate signal */ + while (!terminate_compile_threads) { + /* Grab the next shader off of the queue or wait... */ + ParallelWork *work_item = nullptr; + { + std::unique_lock lock(queue_mutex); + cond_var.wait(lock, + [&] { return terminate_compile_threads || !parallel_work_queue.empty(); }); + if (terminate_compile_threads || parallel_work_queue.empty()) { + continue; + } + work_item = parallel_work_queue.front(); + parallel_work_queue.pop_front(); + } + + /* Compile a shader */ + if (work_item->work_type == PARALLELWORKTYPE_COMPILE_SHADER) { + BLI_assert(work_item->info); + + const shader::ShaderCreateInfo *shader_info = work_item->info; + work_item->shader = static_cast( + work_item->shader_compiler->compile(*shader_info, true)); + + if (work_item->shader) { + /* Generate and cache any render PSOs if possible (typically materials only) + * (Finalize() will already bake a Compute PSO if possible) */ + work_item->shader->warm_cache(-1); + } + } + /* Bake PSO */ + else if (work_item->work_type == PARALLELWORKTYPE_BAKE_PSO) { + MTLShader *shader = work_item->shader; + /* Currently only support Compute */ + BLI_assert(shader && shader->has_compute_shader_lib()); + + /* Create descriptor using these specialization constants. */ + MTLComputePipelineStateDescriptor compute_pipeline_descriptor( + work_item->specialization_values); + + shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor); + } + else { + BLI_assert(false); + } + work_item->is_ready = true; + } + + GPU_context_discard(blender_gpu_context); +} + +BatchHandle MTLParallelShaderCompiler::create_batch(size_t batch_size) +{ + std::scoped_lock lock(batch_mutex); + BatchHandle batch_handle = next_batch_handle++; + batches.add(batch_handle, {}); + Batch &batch = batches.lookup(batch_handle); + if (batch_size) { + batch.items.reserve(batch_size); + } + batch.is_ready = false; + shader_debug_printf("Created batch %llu\n", batch_handle); + return batch_handle; +} + +void MTLParallelShaderCompiler::add_item_to_batch(ParallelWork *work_item, + BatchHandle batch_handle) +{ + std::scoped_lock lock(batch_mutex); + Batch &batch = batches.lookup(batch_handle); + batch.items.append(work_item); +} + +void MTLParallelShaderCompiler::add_parallel_item_to_queue(ParallelWork *work_item, + BatchHandle batch_handle) +{ + shader_debug_printf("Request add shader work\n"); + if (!terminate_compile_threads) { + + /* Defer creation of compilation threads until required */ + if (compile_threads.empty()) { + create_compile_threads(); + } + + add_item_to_batch(work_item, batch_handle); + std::lock_guard lock(queue_mutex); + parallel_work_queue.push_back(work_item); + cond_var.notify_one(); + } +} + +BatchHandle MTLParallelShaderCompiler::batch_compile(MTLShaderCompiler *shader_compiler, + Span &infos) +{ + BLI_assert(GPU_use_parallel_compilation()); + + BatchHandle batch_handle = create_batch(infos.size()); + + shader_debug_printf("Batch compile %llu shaders (Batch = %llu)\n", infos.size(), batch_handle); + + /* Have to finalize all shaderInfos *before* any parallel compilation as + * ShaderCreateInfo::finalize() is not thread safe */ + for (const shader::ShaderCreateInfo *info : infos) { + const_cast(info)->finalize(); + } + + for (const shader::ShaderCreateInfo *info : infos) { + ParallelWork *work_item = new ParallelWork; + work_item->info = info; + work_item->shader_compiler = shader_compiler; + work_item->is_ready = false; + work_item->shader = nullptr; + work_item->work_type = PARALLELWORKTYPE_COMPILE_SHADER; + add_parallel_item_to_queue(work_item, batch_handle); + } + + return batch_handle; +} + +bool MTLParallelShaderCompiler::batch_is_ready(BatchHandle handle) +{ + std::scoped_lock lock(batch_mutex); + Batch &batch = batches.lookup(handle); + if (batch.is_ready) { + return true; + } + + for (ParallelWork *item : batch.items) { + if (item->is_ready) { + continue; + } + else { + return false; + } + } + + batch.is_ready = true; + shader_debug_printf("Batch %llu is now ready\n", handle); + return batch.is_ready; +} + +Vector MTLParallelShaderCompiler::batch_finalize(BatchHandle &handle) +{ + while (!batch_is_ready(handle)) { + BLI_time_sleep_ms(1); + } + std::scoped_lock lock(batch_mutex); + + Batch batch = batches.pop(handle); + Vector result; + for (ParallelWork *item : batch.items) { + result.append(item->shader); + delete item; + } + handle = 0; + return result; +} + +SpecializationBatchHandle MTLParallelShaderCompiler::precompile_specializations( + Span specializations) +{ + BLI_assert(GPU_use_parallel_compilation()); + /* Zero indicates no batch was created */ + SpecializationBatchHandle batch_handle = 0; + + for (auto &specialization : specializations) { + MTLShader *sh = static_cast(unwrap(specialization.shader)); + + /* Specialization constants only take effect when we create the PSO. + * We don't have the relevant info to create a Render PSO Descriptor unless + * the shader has a has_parent_shader() but in that case it would (currently) be + * invalid to apply specialization constants. For those reasons we currently only + * support precompilation of Compute shaders. + * (technically we could call makeFunction but the benefit would likely be minimal) */ + if (!sh->has_compute_shader_lib()) { + continue; + } + + BLI_assert_msg(sh->is_valid(), "Shader must be finalized before precompiling specializations"); + + /* Defer batch creation until we have some work to do */ + if (!batch_handle) { + batch_handle = create_batch(1); + } + + ParallelWork *work_item = new ParallelWork; + work_item->info = nullptr; + work_item->is_ready = false; + work_item->shader = sh; + work_item->work_type = PARALLELWORKTYPE_BAKE_PSO; + + /* Add the specialization constants to the work-item */ + for (const SpecializationConstant &constant : specialization.constants) { + const ShaderInput *input = sh->interface->constant_get(constant.name.c_str()); + BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists"); + work_item->specialization_values[input->location].u = constant.value.u; + } + sh->constants.is_dirty = true; + + add_parallel_item_to_queue(work_item, batch_handle); + } + return batch_handle; +} + +bool MTLParallelShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle) +{ + /* Check empty batch case where we have no handle */ + if (!handle) { + return true; + } + + std::scoped_lock lock(batch_mutex); + Batch &batch = batches.lookup(handle); + if (batch.is_ready) { + return true; + } + + for (ParallelWork *item : batch.items) { + if (item->is_ready) { + continue; + } + else { + return false; + } + } + + /* Handle is zeroed once the batch is ready */ + handle = 0; + batch.is_ready = true; + shader_debug_printf("Specialization Batch %llu is now ready\n", handle); + return batch.is_ready; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name MTLShaderCompiler + * \{ */ + +MTLShaderCompiler::MTLShaderCompiler() +{ + parallel_shader_compiler = get_shared_parallel_shader_compiler(); +} + +MTLShaderCompiler::~MTLShaderCompiler() +{ + release_parallel_shader_compiler(); +} + +void MTLShaderCompiler::release_parallel_shader_compiler() +{ + if (parallel_shader_compiler) { + release_shared_parallel_shader_compiler(); + parallel_shader_compiler = nullptr; + } +} + +BatchHandle MTLShaderCompiler::batch_compile(Span &infos) +{ + BLI_assert(parallel_shader_compiler); + return parallel_shader_compiler->batch_compile(this, infos); +} +bool MTLShaderCompiler::batch_is_ready(BatchHandle handle) +{ + return parallel_shader_compiler->batch_is_ready(handle); +} +Vector MTLShaderCompiler::batch_finalize(BatchHandle &handle) +{ + return parallel_shader_compiler->batch_finalize(handle); +} +SpecializationBatchHandle MTLShaderCompiler::precompile_specializations( + Span specializations) +{ + return parallel_shader_compiler->precompile_specializations(specializations); +} + +bool MTLShaderCompiler::specialization_batch_is_ready(SpecializationBatchHandle &handle) +{ + return parallel_shader_compiler->specialization_batch_is_ready(handle); +} + +/** \} */ + } // namespace blender::gpu diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc index fbe2e74c86d..47a86b50b82 100644 --- a/source/blender/gpu/tests/gpu_testing.cc +++ b/source/blender/gpu/tests/gpu_testing.cc @@ -28,6 +28,7 @@ void GPUTest::SetUp() gpuSettings.context_type = draw_context_type; gpuSettings.flags = GHOST_gpuDebugContext; ghost_system = GHOST_CreateSystem(); + GPU_backend_ghost_system_set(ghost_system); ghost_context = GHOST_CreateGPUContext(ghost_system, gpuSettings); GHOST_ActivateGPUContext(ghost_context); context = GPU_context_create(nullptr, ghost_context); diff --git a/source/blender/windowmanager/intern/wm_playanim.cc b/source/blender/windowmanager/intern/wm_playanim.cc index 302f9667328..c49456b20cd 100644 --- a/source/blender/windowmanager/intern/wm_playanim.cc +++ b/source/blender/windowmanager/intern/wm_playanim.cc @@ -1844,6 +1844,7 @@ static bool wm_main_playanim_intern(int argc, const char **argv, PlayArgs *args_ GHOST_SetBacktraceHandler((GHOST_TBacktraceFn)BLI_system_backtrace); ps.ghost_data.system = GHOST_CreateSystem(); + GPU_backend_ghost_system_set(ps.ghost_data.system); if (UNLIKELY(ps.ghost_data.system == nullptr)) { /* GHOST will have reported the back-ends that failed to load. */ diff --git a/source/blender/windowmanager/intern/wm_window.cc b/source/blender/windowmanager/intern/wm_window.cc index 7bd6525ba91..6aacb7f7072 100644 --- a/source/blender/windowmanager/intern/wm_window.cc +++ b/source/blender/windowmanager/intern/wm_window.cc @@ -1889,6 +1889,7 @@ void wm_ghost_init(bContext *C) GHOST_SetBacktraceHandler((GHOST_TBacktraceFn)BLI_system_backtrace); g_system = GHOST_CreateSystem(); + GPU_backend_ghost_system_set(g_system); if (UNLIKELY(g_system == nullptr)) { /* GHOST will have reported the back-ends that failed to load. */