BLI code for enums that are meant to be used as "bit flags" defined
an ENUM_OPERATORS macro in BLI_utildefines.h. This cleans up things
related to said macro:
- Move it out into a separate BLI_enum_flags.hh header, instead of
"random bag of things" that is the current place,
- Update it to no longer need manual indication of highest individual
bit value. This originally was added in a31a87f89 (2020 Oct), in
order to silence some UBSan warnings that were coming
from GPU related structures (looking at current GPU code, I don't
think this is happening anymore). However, that caused actual
user-visible bugs due to incorrectly specified max. enum bit value,
and today 14% of all usages have incorrect highest individual
bit value spelled out.
- I have reviewed all usages of operator ~ and none of them are
used for directly producing a DNA-serialized value; all the
usages are for masking out other bits for which the new ~
behavior that just flips all bits is fine.
- Make the macro define flag_is_set() function to ease check of bits
that are set in C++ enum class cases; update existing cases to use
that instead of three other ways that were used.
Pull Request: https://projects.blender.org/blender/blender/pulls/148230
1021 lines
37 KiB
C++
1021 lines
37 KiB
C++
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup gpu
|
|
*/
|
|
#pragma once
|
|
|
|
#include "gpu_context_private.hh"
|
|
|
|
#include "GPU_common_types.hh"
|
|
|
|
/* Don't generate OpenGL deprecation warning. This is a known thing, and is not something easily
|
|
* solvable in a short term. */
|
|
#ifdef __clang__
|
|
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
#endif
|
|
|
|
#include "intern/GHOST_Context.hh"
|
|
#include "intern/GHOST_ContextMTL.hh"
|
|
#include "intern/GHOST_Window.hh"
|
|
|
|
#include "mtl_backend.hh"
|
|
#include "mtl_capabilities.hh"
|
|
#include "mtl_common.hh"
|
|
#include "mtl_framebuffer.hh"
|
|
#include "mtl_memory.hh"
|
|
#include "mtl_shader.hh"
|
|
#include "mtl_shader_interface.hh"
|
|
#include "mtl_texture.hh"
|
|
|
|
#include <Cocoa/Cocoa.h>
|
|
#include <Metal/Metal.h>
|
|
#include <QuartzCore/QuartzCore.h>
|
|
#include <chrono>
|
|
#include <mutex>
|
|
|
|
@class CAMetalLayer;
|
|
@class MTLCommandQueue;
|
|
@class MTLRenderPipelineState;
|
|
|
|
namespace blender::gpu {
|
|
|
|
/* Forward Declarations */
|
|
class MTLContext;
|
|
class MTLCommandBufferManager;
|
|
class MTLUniformBuf;
|
|
class MTLStorageBuf;
|
|
|
|
/* Structs containing information on current binding state for textures and samplers. */
|
|
struct MTLTextureBinding {
|
|
bool used;
|
|
gpu::MTLTexture *texture_resource;
|
|
};
|
|
|
|
struct MTLSamplerBinding {
|
|
bool used;
|
|
MTLSamplerState state;
|
|
|
|
bool operator==(MTLSamplerBinding const &other) const
|
|
{
|
|
return (used == other.used && state == other.state);
|
|
}
|
|
};
|
|
|
|
/* Caching of resource bindings for active MTLRenderCommandEncoder.
|
|
* In Metal, resource bindings are local to the MTLCommandEncoder,
|
|
* not globally to the whole pipeline/cmd buffer. */
|
|
struct MTLBoundShaderState {
|
|
MTLShader *shader_ = nullptr;
|
|
uint pso_index_;
|
|
void set(MTLShader *shader, uint pso_index)
|
|
{
|
|
shader_ = shader;
|
|
pso_index_ = pso_index;
|
|
}
|
|
};
|
|
|
|
/* Caching of CommandEncoder Vertex/Fragment buffer bindings. */
|
|
struct BufferBindingCached {
|
|
/* Whether the given binding slot uses byte data (Push Constant equivalent)
|
|
* or an MTLBuffer. */
|
|
bool is_bytes;
|
|
id<MTLBuffer> metal_buffer;
|
|
uint64_t offset;
|
|
};
|
|
|
|
/* Caching of CommandEncoder textures bindings. */
|
|
struct TextureBindingCached {
|
|
id<MTLTexture> metal_texture;
|
|
};
|
|
|
|
/* Cached of CommandEncoder sampler states. */
|
|
struct SamplerStateBindingCached {
|
|
MTLSamplerState binding_state;
|
|
id<MTLSamplerState> sampler_state;
|
|
bool is_arg_buffer_binding;
|
|
};
|
|
|
|
/* Metal Context Render Pass State -- Used to track active RenderCommandEncoder state based on
|
|
* bound MTLFrameBuffer's.Owned by MTLContext. */
|
|
class MTLRenderPassState {
|
|
friend class MTLContext;
|
|
|
|
public:
|
|
MTLRenderPassState(MTLContext &context, MTLCommandBufferManager &command_buffer_manager)
|
|
: ctx(context), cmd(command_buffer_manager) {};
|
|
|
|
/* Given a RenderPassState is associated with a live RenderCommandEncoder,
|
|
* this state sits within the MTLCommandBufferManager. */
|
|
MTLContext &ctx;
|
|
MTLCommandBufferManager &cmd;
|
|
|
|
MTLBoundShaderState last_bound_shader_state;
|
|
id<MTLRenderPipelineState> bound_pso = nil;
|
|
id<MTLDepthStencilState> bound_ds_state = nil;
|
|
uint last_used_stencil_ref_value = 0;
|
|
MTLScissorRect last_scissor_rect;
|
|
|
|
BufferBindingCached cached_vertex_buffer_bindings[MTL_MAX_BUFFER_BINDINGS];
|
|
BufferBindingCached cached_fragment_buffer_bindings[MTL_MAX_BUFFER_BINDINGS];
|
|
TextureBindingCached cached_vertex_texture_bindings[MTL_MAX_TEXTURE_SLOTS];
|
|
TextureBindingCached cached_fragment_texture_bindings[MTL_MAX_TEXTURE_SLOTS];
|
|
SamplerStateBindingCached cached_vertex_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS];
|
|
SamplerStateBindingCached cached_fragment_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS];
|
|
|
|
/* Reset RenderCommandEncoder binding state. */
|
|
void reset_state();
|
|
|
|
/* Texture Binding (RenderCommandEncoder). */
|
|
void bind_vertex_texture(id<MTLTexture> tex, uint slot);
|
|
void bind_fragment_texture(id<MTLTexture> tex, uint slot);
|
|
|
|
/* Sampler Binding (RenderCommandEncoder). */
|
|
void bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
|
|
bool use_argument_buffer_for_samplers,
|
|
uint slot);
|
|
void bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
|
|
bool use_argument_buffer_for_samplers,
|
|
uint slot);
|
|
|
|
/* Buffer binding (RenderCommandEncoder). */
|
|
void bind_vertex_buffer(id<MTLBuffer> buffer, uint64_t buffer_offset, uint index);
|
|
void bind_fragment_buffer(id<MTLBuffer> buffer, uint64_t buffer_offset, uint index);
|
|
void bind_vertex_bytes(const void *bytes, uint64_t length, uint index);
|
|
void bind_fragment_bytes(const void *bytes, uint64_t length, uint index);
|
|
};
|
|
|
|
/* Metal Context Compute Pass State -- Used to track active ComputeCommandEncoder state. */
|
|
class MTLComputeState {
|
|
friend class MTLContext;
|
|
|
|
public:
|
|
MTLComputeState(MTLContext &context, MTLCommandBufferManager &command_buffer_manager)
|
|
: ctx(context), cmd(command_buffer_manager) {};
|
|
|
|
/* Given a ComputePassState is associated with a live ComputeCommandEncoder,
|
|
* this state sits within the MTLCommandBufferManager. */
|
|
MTLContext &ctx;
|
|
MTLCommandBufferManager &cmd;
|
|
|
|
id<MTLComputePipelineState> bound_pso = nil;
|
|
BufferBindingCached cached_compute_buffer_bindings[MTL_MAX_BUFFER_BINDINGS];
|
|
TextureBindingCached cached_compute_texture_bindings[MTL_MAX_TEXTURE_SLOTS];
|
|
SamplerStateBindingCached cached_compute_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS];
|
|
|
|
/* Reset ComputeCommandEncoder binding state. */
|
|
void reset_state();
|
|
|
|
/* PSO Binding. */
|
|
void bind_pso(id<MTLComputePipelineState> pso);
|
|
|
|
/* Texture Binding (ComputeCommandEncoder). */
|
|
void bind_compute_texture(id<MTLTexture> tex, uint slot);
|
|
/* Sampler Binding (ComputeCommandEncoder). */
|
|
void bind_compute_sampler(MTLSamplerBinding &sampler_binding,
|
|
bool use_argument_buffer_for_samplers,
|
|
uint slot);
|
|
/* Buffer binding (ComputeCommandEncoder). */
|
|
void bind_compute_buffer(id<MTLBuffer> buffer, uint64_t buffer_offset, uint index);
|
|
void bind_compute_bytes(const void *bytes, uint64_t length, uint index);
|
|
};
|
|
|
|
/* Depth Stencil State */
|
|
struct MTLContextDepthStencilState {
|
|
|
|
/* Depth State. */
|
|
bool depth_write_enable;
|
|
bool depth_test_enabled;
|
|
float depth_range_near;
|
|
float depth_range_far;
|
|
MTLCompareFunction depth_function;
|
|
float depth_bias;
|
|
float depth_slope_scale;
|
|
bool depth_bias_enabled_for_points;
|
|
bool depth_bias_enabled_for_lines;
|
|
bool depth_bias_enabled_for_tris;
|
|
|
|
/* Stencil State. */
|
|
bool stencil_test_enabled;
|
|
uint stencil_read_mask;
|
|
uint stencil_write_mask;
|
|
uint stencil_ref;
|
|
MTLCompareFunction stencil_func;
|
|
|
|
MTLStencilOperation stencil_op_front_stencil_fail;
|
|
MTLStencilOperation stencil_op_front_depth_fail;
|
|
MTLStencilOperation stencil_op_front_depthstencil_pass;
|
|
|
|
MTLStencilOperation stencil_op_back_stencil_fail;
|
|
MTLStencilOperation stencil_op_back_depth_fail;
|
|
MTLStencilOperation stencil_op_back_depthstencil_pass;
|
|
|
|
/* Frame-buffer State -- We need to mark this, in case stencil state remains unchanged,
|
|
* but attachment state has changed. */
|
|
bool has_depth_target;
|
|
bool has_stencil_target;
|
|
|
|
/* TODO(Metal): Consider optimizing this function using `memcmp`.
|
|
* Un-used, but differing, stencil state leads to over-generation
|
|
* of state objects when doing trivial compare. */
|
|
bool operator==(const MTLContextDepthStencilState &other) const
|
|
{
|
|
bool depth_state_equality = (has_depth_target == other.has_depth_target &&
|
|
depth_write_enable == other.depth_write_enable &&
|
|
depth_test_enabled == other.depth_test_enabled &&
|
|
depth_function == other.depth_function);
|
|
|
|
bool stencil_state_equality = true;
|
|
if (has_stencil_target) {
|
|
stencil_state_equality =
|
|
(has_stencil_target == other.has_stencil_target &&
|
|
stencil_test_enabled == other.stencil_test_enabled &&
|
|
stencil_op_front_stencil_fail == other.stencil_op_front_stencil_fail &&
|
|
stencil_op_front_depth_fail == other.stencil_op_front_depth_fail &&
|
|
stencil_op_front_depthstencil_pass == other.stencil_op_front_depthstencil_pass &&
|
|
stencil_op_back_stencil_fail == other.stencil_op_back_stencil_fail &&
|
|
stencil_op_back_depth_fail == other.stencil_op_back_depth_fail &&
|
|
stencil_op_back_depthstencil_pass == other.stencil_op_back_depthstencil_pass &&
|
|
stencil_func == other.stencil_func && stencil_read_mask == other.stencil_read_mask &&
|
|
stencil_write_mask == other.stencil_write_mask);
|
|
}
|
|
|
|
return depth_state_equality && stencil_state_equality;
|
|
}
|
|
|
|
/* Depth stencil state will get hashed in order to prepare
|
|
* MTLDepthStencilState objects. The hash should comprise of
|
|
* all elements which fill the MTLDepthStencilDescriptor.
|
|
* These are bound when [rec setDepthStencilState:...] is called.
|
|
* Depth bias and stencil reference value are set dynamically on the RenderCommandEncoder:
|
|
* - setStencilReferenceValue:
|
|
* - setDepthBias:slopeScale:clamp:
|
|
*/
|
|
std::size_t hash() const
|
|
{
|
|
std::size_t boolean_bitmask = (this->depth_write_enable ? 1 : 0) |
|
|
((this->depth_test_enabled ? 1 : 0) << 1) |
|
|
((this->depth_bias_enabled_for_points ? 1 : 0) << 2) |
|
|
((this->depth_bias_enabled_for_lines ? 1 : 0) << 3) |
|
|
((this->depth_bias_enabled_for_tris ? 1 : 0) << 4) |
|
|
((this->stencil_test_enabled ? 1 : 0) << 5) |
|
|
((this->has_depth_target ? 1 : 0) << 6) |
|
|
((this->has_stencil_target ? 1 : 0) << 7);
|
|
|
|
std::size_t stencilop_bitmask = ((std::size_t)this->stencil_op_front_stencil_fail) |
|
|
((std::size_t)this->stencil_op_front_depth_fail << 3) |
|
|
((std::size_t)this->stencil_op_front_depthstencil_pass << 6) |
|
|
((std::size_t)this->stencil_op_back_stencil_fail << 9) |
|
|
((std::size_t)this->stencil_op_back_depth_fail << 12) |
|
|
((std::size_t)this->stencil_op_back_depthstencil_pass << 15);
|
|
|
|
std::size_t main_hash = (std::size_t)this->depth_function;
|
|
if (this->has_stencil_target) {
|
|
main_hash += (std::size_t)(this->stencil_read_mask & 0xFF) << 8;
|
|
main_hash += (std::size_t)(this->stencil_write_mask & 0xFF) << 16;
|
|
}
|
|
main_hash ^= (std::size_t)this->stencil_func << 16;
|
|
main_hash ^= stencilop_bitmask;
|
|
|
|
std::size_t final_hash = (main_hash << 8) | boolean_bitmask;
|
|
return final_hash;
|
|
}
|
|
};
|
|
|
|
struct MTLContextTextureUtils {
|
|
|
|
/* Depth Update Utilities */
|
|
/* Depth texture updates are not directly supported with Blit operations, similarly, we cannot
|
|
* use a compute shader to write to depth, so we must instead render to a depth target.
|
|
* These processes use vertex/fragment shaders to render texture data from an intermediate
|
|
* source, in order to prime the depth buffer. */
|
|
blender::Map<DepthTextureUpdateRoutineSpecialisation, gpu::Shader *> depth_2d_update_shaders;
|
|
gpu::Shader *fullscreen_blit_shader = nullptr;
|
|
|
|
/* Texture Read/Update routines */
|
|
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_1d_read_compute_psos;
|
|
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_1d_array_read_compute_psos;
|
|
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_2d_read_compute_psos;
|
|
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_2d_array_read_compute_psos;
|
|
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_3d_read_compute_psos;
|
|
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_cube_read_compute_psos;
|
|
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_cube_array_read_compute_psos;
|
|
blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_buffer_read_compute_psos;
|
|
|
|
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_1d_update_compute_psos;
|
|
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_1d_array_update_compute_psos;
|
|
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_2d_update_compute_psos;
|
|
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_2d_array_update_compute_psos;
|
|
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_3d_update_compute_psos;
|
|
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_cube_update_compute_psos;
|
|
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_cube_array_update_compute_psos;
|
|
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
|
|
texture_buffer_update_compute_psos;
|
|
|
|
template<typename T> void free_cached_pso_map(blender::Map<T, id<MTLComputePipelineState>> &map)
|
|
{
|
|
for (typename blender::MutableMapItem<T, id<MTLComputePipelineState>> item : map.items()) {
|
|
[item.value release];
|
|
}
|
|
map.clear();
|
|
}
|
|
|
|
void init()
|
|
{
|
|
fullscreen_blit_shader = nullptr;
|
|
}
|
|
|
|
void cleanup()
|
|
{
|
|
if (fullscreen_blit_shader) {
|
|
GPU_shader_free(fullscreen_blit_shader);
|
|
}
|
|
|
|
/* Free depth 2D Update shaders */
|
|
for (auto item : depth_2d_update_shaders.items()) {
|
|
GPU_shader_free(item.value);
|
|
}
|
|
depth_2d_update_shaders.clear();
|
|
|
|
/* Free Read shader maps */
|
|
free_cached_pso_map(texture_1d_read_compute_psos);
|
|
free_cached_pso_map(texture_1d_read_compute_psos);
|
|
free_cached_pso_map(texture_1d_array_read_compute_psos);
|
|
free_cached_pso_map(texture_2d_read_compute_psos);
|
|
free_cached_pso_map(texture_2d_array_read_compute_psos);
|
|
free_cached_pso_map(texture_3d_read_compute_psos);
|
|
free_cached_pso_map(texture_cube_read_compute_psos);
|
|
free_cached_pso_map(texture_cube_array_read_compute_psos);
|
|
free_cached_pso_map(texture_buffer_read_compute_psos);
|
|
free_cached_pso_map(texture_1d_update_compute_psos);
|
|
free_cached_pso_map(texture_1d_array_update_compute_psos);
|
|
free_cached_pso_map(texture_2d_update_compute_psos);
|
|
free_cached_pso_map(texture_2d_array_update_compute_psos);
|
|
free_cached_pso_map(texture_3d_update_compute_psos);
|
|
free_cached_pso_map(texture_cube_update_compute_psos);
|
|
free_cached_pso_map(texture_cube_array_update_compute_psos);
|
|
free_cached_pso_map(texture_buffer_update_compute_psos);
|
|
}
|
|
};
|
|
|
|
class MTLContextComputeUtils {
|
|
private:
|
|
id<MTLComputePipelineState> buffer_clear_pso_ = nil;
|
|
|
|
public:
|
|
id<MTLComputePipelineState> get_buffer_clear_pso();
|
|
void cleanup()
|
|
{
|
|
if (buffer_clear_pso_) {
|
|
[buffer_clear_pso_ release];
|
|
buffer_clear_pso_ = nil;
|
|
}
|
|
}
|
|
};
|
|
|
|
/* Combined sampler state configuration for Argument Buffer caching. */
|
|
struct MTLSamplerArray {
|
|
uint num_samplers;
|
|
/* MTLSamplerState permutations between 0..256 - slightly more than a byte. */
|
|
MTLSamplerState mtl_sampler_flags[MTL_MAX_TEXTURE_SLOTS];
|
|
id<MTLSamplerState> mtl_sampler[MTL_MAX_TEXTURE_SLOTS];
|
|
|
|
bool operator==(const MTLSamplerArray &other) const
|
|
{
|
|
if (this->num_samplers != other.num_samplers) {
|
|
return false;
|
|
}
|
|
return (memcmp(this->mtl_sampler_flags,
|
|
other.mtl_sampler_flags,
|
|
sizeof(MTLSamplerState) * this->num_samplers) == 0);
|
|
}
|
|
|
|
uint32_t hash() const
|
|
{
|
|
uint32_t hash = this->num_samplers;
|
|
for (int i = 0; i < this->num_samplers; i++) {
|
|
hash ^= uint32_t(this->mtl_sampler_flags[i]) << (i % 3);
|
|
}
|
|
return hash;
|
|
}
|
|
};
|
|
|
|
enum MTLPipelineStateDirtyFlag {
|
|
MTL_PIPELINE_STATE_NULL_FLAG = 0,
|
|
/* Whether we need to call setViewport. */
|
|
MTL_PIPELINE_STATE_VIEWPORT_FLAG = (1 << 0),
|
|
/* Whether we need to call setScissor. */
|
|
MTL_PIPELINE_STATE_SCISSOR_FLAG = (1 << 1),
|
|
/* Whether we need to update/rebind active depth stencil state. */
|
|
MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG = (1 << 2),
|
|
/* Whether we need to update/rebind active PSO. */
|
|
MTL_PIPELINE_STATE_PSO_FLAG = (1 << 3),
|
|
/* Whether we need to update the frontFacingWinding state. */
|
|
MTL_PIPELINE_STATE_FRONT_FACING_FLAG = (1 << 4),
|
|
/* Whether we need to update the culling state. */
|
|
MTL_PIPELINE_STATE_CULLMODE_FLAG = (1 << 5),
|
|
/* Full pipeline state needs applying. Occurs when beginning a new render pass. */
|
|
MTL_PIPELINE_STATE_ALL_FLAG =
|
|
(MTL_PIPELINE_STATE_VIEWPORT_FLAG | MTL_PIPELINE_STATE_SCISSOR_FLAG |
|
|
MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG | MTL_PIPELINE_STATE_PSO_FLAG |
|
|
MTL_PIPELINE_STATE_FRONT_FACING_FLAG | MTL_PIPELINE_STATE_CULLMODE_FLAG)
|
|
};
|
|
|
|
ENUM_OPERATORS(MTLPipelineStateDirtyFlag);
|
|
|
|
struct MTLUniformBufferBinding {
|
|
bool bound;
|
|
MTLUniformBuf *ubo;
|
|
};
|
|
|
|
struct MTLStorageBufferBinding {
|
|
bool bound;
|
|
MTLStorageBuf *ssbo;
|
|
};
|
|
|
|
struct MTLContextGlobalShaderPipelineState {
|
|
bool initialised = false;
|
|
|
|
/* Whether the pipeline state has been modified since application.
|
|
* `dirty_flags` is a bitmask of the types of state which have been updated.
|
|
* This is in order to optimize calls and only re-apply state as needed.
|
|
* Some state parameters are dynamically applied on the RenderCommandEncoder,
|
|
* others may be encapsulated in GPU-resident state objects such as
|
|
* MTLDepthStencilState or MTLRenderPipelineState. */
|
|
bool dirty = true;
|
|
MTLPipelineStateDirtyFlag dirty_flags = MTL_PIPELINE_STATE_NULL_FLAG;
|
|
|
|
/* Shader resources. */
|
|
MTLShader *null_shader = nullptr;
|
|
|
|
/* Active Shader State. */
|
|
MTLShader *active_shader = nullptr;
|
|
|
|
/* Global Uniform Buffers. */
|
|
MTLUniformBufferBinding ubo_bindings[MTL_MAX_BUFFER_BINDINGS];
|
|
|
|
/* Storage buffer. */
|
|
MTLStorageBufferBinding ssbo_bindings[MTL_MAX_BUFFER_BINDINGS];
|
|
|
|
/* Context Texture bindings. */
|
|
MTLTextureBinding texture_bindings[MTL_MAX_TEXTURE_SLOTS];
|
|
MTLSamplerBinding sampler_bindings[MTL_MAX_SAMPLER_SLOTS];
|
|
|
|
/* Image bindings. */
|
|
MTLTextureBinding image_bindings[MTL_MAX_TEXTURE_SLOTS];
|
|
|
|
/*** --- Render Pipeline State --- ***/
|
|
/* Track global render pipeline state for the current context. The functions in GPU_state.hh
|
|
* modify these parameters. Certain values, tagged [PSO], are parameters which are required to be
|
|
* passed into PSO creation, rather than dynamic state functions on the RenderCommandEncoder.
|
|
*/
|
|
|
|
/* Blending State. */
|
|
MTLColorWriteMask color_write_mask; /* [PSO] */
|
|
bool blending_enabled; /* [PSO] */
|
|
MTLBlendOperation alpha_blend_op; /* [PSO] */
|
|
MTLBlendOperation rgb_blend_op; /* [PSO] */
|
|
MTLBlendFactor dest_alpha_blend_factor; /* [PSO] */
|
|
MTLBlendFactor dest_rgb_blend_factor; /* [PSO] */
|
|
MTLBlendFactor src_alpha_blend_factor; /* [PSO] */
|
|
MTLBlendFactor src_rgb_blend_factor; /* [PSO] */
|
|
|
|
/* Culling State. */
|
|
bool culling_enabled;
|
|
GPUFaceCullTest cull_mode;
|
|
GPUFrontFace front_face;
|
|
|
|
/* Depth State. */
|
|
MTLContextDepthStencilState depth_stencil_state;
|
|
|
|
/* Viewport/Scissor Region. */
|
|
int num_active_viewports = 1;
|
|
int viewport_offset_x[GPU_MAX_VIEWPORTS];
|
|
int viewport_offset_y[GPU_MAX_VIEWPORTS];
|
|
int viewport_width[GPU_MAX_VIEWPORTS];
|
|
int viewport_height[GPU_MAX_VIEWPORTS];
|
|
bool scissor_enabled;
|
|
int scissor_x;
|
|
int scissor_y;
|
|
int scissor_width;
|
|
int scissor_height;
|
|
|
|
/* Image data access state. */
|
|
uint unpack_row_length;
|
|
|
|
/* Render parameters. */
|
|
float point_size = 1.0f;
|
|
float line_width = 1.0f;
|
|
|
|
/* Clipping plane enablement. */
|
|
bool clip_distance_enabled[6] = {false};
|
|
};
|
|
|
|
/* Command Buffer Manager - Owned by MTLContext.
|
|
* The MTLCommandBufferManager represents all work associated with
|
|
* a command buffer of a given identity. This manager is a fixed-state
|
|
* on the context, which coordinates the lifetime of command buffers
|
|
* for particular categories of work.
|
|
*
|
|
* This ensures operations on command buffers, and the state associated,
|
|
* is correctly tracked and managed. Workload submission and MTLCommandEncoder
|
|
* coordination is managed from here.
|
|
*
|
|
* There is currently only one MTLCommandBufferManager for managing submission
|
|
* of the "main" rendering commands. A secondary upload command buffer track,
|
|
* or asynchronous compute command buffer track may be added in the future. */
|
|
class MTLCommandBufferManager {
|
|
friend class MTLContext;
|
|
|
|
public:
|
|
/* Counter for all active command buffers. */
|
|
static volatile std::atomic<int> num_active_cmd_bufs_in_system;
|
|
|
|
private:
|
|
/* Associated Context and properties. */
|
|
MTLContext &context_;
|
|
bool supports_render_ = false;
|
|
|
|
/* CommandBuffer tracking. */
|
|
id<MTLCommandBuffer> active_command_buffer_ = nil;
|
|
id<MTLCommandBuffer> last_submitted_command_buffer_ = nil;
|
|
volatile std::atomic<int> num_active_cmd_bufs = 0;
|
|
|
|
/* Active MTLCommandEncoders. */
|
|
enum {
|
|
MTL_NO_COMMAND_ENCODER = 0,
|
|
MTL_RENDER_COMMAND_ENCODER = 1,
|
|
MTL_BLIT_COMMAND_ENCODER = 2,
|
|
MTL_COMPUTE_COMMAND_ENCODER = 3
|
|
} active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER;
|
|
|
|
id<MTLRenderCommandEncoder> active_render_command_encoder_ = nil;
|
|
id<MTLBlitCommandEncoder> active_blit_command_encoder_ = nil;
|
|
id<MTLComputeCommandEncoder> active_compute_command_encoder_ = nil;
|
|
|
|
/* State associated with active RenderCommandEncoder. */
|
|
MTLRenderPassState render_pass_state_;
|
|
MTLFrameBuffer *active_frame_buffer_ = nullptr;
|
|
MTLRenderPassDescriptor *active_pass_descriptor_ = nullptr;
|
|
|
|
/* State associated with active ComputeCommandEncoder. */
|
|
MTLComputeState compute_state_;
|
|
|
|
/* Workload heuristics - We may need to split command buffers to optimize workload and balancing.
|
|
*/
|
|
int current_draw_call_count_ = 0;
|
|
int encoder_count_ = 0;
|
|
int vertex_submitted_count_ = 0;
|
|
bool empty_ = true;
|
|
|
|
/** Debug groups. */
|
|
/* Stack tracking all calls to push_debug_group. */
|
|
std::vector<std::string> debug_group_stack;
|
|
/* Stack tracking calls resulting in active API calls to pushDebugGroup on the current command
|
|
* buffer. */
|
|
std::vector<std::string> debug_group_pushed_stack;
|
|
|
|
public:
|
|
MTLCommandBufferManager(MTLContext &context)
|
|
: context_(context), render_pass_state_(context, *this), compute_state_(context, *this) {};
|
|
void prepare(bool supports_render = true);
|
|
|
|
/* If wait is true, CPU will stall until GPU work has completed. */
|
|
bool submit(bool wait);
|
|
|
|
/* Fetch/query current encoder. */
|
|
bool is_inside_render_pass();
|
|
bool is_inside_blit();
|
|
bool is_inside_compute();
|
|
id<MTLRenderCommandEncoder> get_active_render_command_encoder();
|
|
id<MTLBlitCommandEncoder> get_active_blit_command_encoder();
|
|
id<MTLComputeCommandEncoder> get_active_compute_command_encoder();
|
|
MTLFrameBuffer *get_active_framebuffer();
|
|
|
|
/* RenderPassState for RenderCommandEncoder. */
|
|
MTLRenderPassState &get_render_pass_state()
|
|
{
|
|
/* Render pass state should only be valid if we are inside a render pass. */
|
|
BLI_assert(this->is_inside_render_pass());
|
|
return render_pass_state_;
|
|
}
|
|
|
|
/* RenderPassState for RenderCommandEncoder. */
|
|
MTLComputeState &get_compute_state()
|
|
{
|
|
/* Render pass state should only be valid if we are inside a compute encoder. */
|
|
BLI_assert(this->is_inside_compute());
|
|
return compute_state_;
|
|
}
|
|
|
|
/* Rendering Heuristics. */
|
|
void register_draw_counters(int vertex_submission);
|
|
void reset_counters();
|
|
bool do_break_submission();
|
|
|
|
/* Encoder and Pass management. */
|
|
/* End currently active MTLCommandEncoder. */
|
|
bool end_active_command_encoder(bool retain_framebuffers = false);
|
|
id<MTLRenderCommandEncoder> ensure_begin_render_command_encoder(MTLFrameBuffer *ctx_framebuffer,
|
|
bool force_begin,
|
|
bool *r_new_pass);
|
|
id<MTLBlitCommandEncoder> ensure_begin_blit_encoder();
|
|
id<MTLComputeCommandEncoder> ensure_begin_compute_encoder();
|
|
|
|
/* Workload Synchronization. */
|
|
bool insert_memory_barrier(GPUBarrier barrier_bits,
|
|
GPUStageBarrierBits before_stages,
|
|
GPUStageBarrierBits after_stages);
|
|
void encode_signal_event(id<MTLEvent> event, uint64_t value);
|
|
void encode_wait_for_event(id<MTLEvent> event, uint64_t value);
|
|
/* TODO(Metal): Support fences in command buffer class. */
|
|
|
|
/* Debug. */
|
|
void push_debug_group(const char *name, int index);
|
|
void pop_debug_group();
|
|
|
|
void inc_active_command_buffer_count()
|
|
{
|
|
num_active_cmd_bufs_in_system++;
|
|
num_active_cmd_bufs++;
|
|
}
|
|
|
|
void dec_active_command_buffer_count()
|
|
{
|
|
BLI_assert(num_active_cmd_bufs_in_system > 0 && num_active_cmd_bufs > 0);
|
|
num_active_cmd_bufs_in_system--;
|
|
num_active_cmd_bufs--;
|
|
}
|
|
|
|
int get_active_command_buffer_count()
|
|
{
|
|
return num_active_cmd_bufs;
|
|
}
|
|
|
|
void wait_until_active_command_buffers_complete()
|
|
{
|
|
while (get_active_command_buffer_count()) {
|
|
std::this_thread::yield();
|
|
}
|
|
}
|
|
|
|
private:
|
|
/* Begin new command buffer. */
|
|
id<MTLCommandBuffer> ensure_begin();
|
|
|
|
void register_encoder_counters();
|
|
|
|
/* Debug group management. */
|
|
void unfold_pending_debug_groups();
|
|
};
|
|
|
|
/**
|
|
* MTLContext -- Core render loop and state management.
|
|
*
|
|
* NOTE(Metal): Partial #MTLContext stub to provide wrapper functionality
|
|
* for work-in-progress `MTL*` classes.
|
|
*/
|
|
class MTLContext : public Context {
|
|
friend class MTLBackend;
|
|
friend class MTLRenderPassState;
|
|
friend class MTLComputeState;
|
|
|
|
public:
|
|
/* Swap-chain and latency management. */
|
|
static std::atomic<int> max_drawables_in_flight;
|
|
static std::atomic<int64_t> avg_drawable_latency_us;
|
|
static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT];
|
|
|
|
public:
|
|
/* Shaders and Pipeline state. */
|
|
MTLContextGlobalShaderPipelineState pipeline_state;
|
|
|
|
/* Metal API Resource Handles. */
|
|
id<MTLCommandQueue> queue = nil;
|
|
id<MTLDevice> device = nil;
|
|
|
|
#ifndef NDEBUG
|
|
/* Label for Context debug name assignment. */
|
|
NSString *label = nil;
|
|
#endif
|
|
|
|
/* Memory Management. */
|
|
MTLScratchBufferManager memory_manager;
|
|
static std::mutex global_memory_manager_reflock;
|
|
static int global_memory_manager_refcount;
|
|
static MTLBufferPool *global_memory_manager;
|
|
|
|
/* CommandBuffer managers. */
|
|
MTLCommandBufferManager main_command_buffer;
|
|
|
|
private:
|
|
/* Parent Context. */
|
|
GHOST_ContextMTL *ghost_context_;
|
|
|
|
/* Render Passes and Frame-buffers. */
|
|
id<MTLTexture> default_fbo_mtltexture_ = nil;
|
|
gpu::MTLTexture *default_fbo_gputexture_ = nullptr;
|
|
|
|
/* Depth-stencil state cache. */
|
|
blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache;
|
|
|
|
/* Compute and specialization caches. */
|
|
MTLContextTextureUtils texture_utils_;
|
|
MTLContextComputeUtils compute_utils_;
|
|
|
|
/* Texture Samplers. */
|
|
/* Cache of generated #MTLSamplerState objects based on permutations of the members of
|
|
* `GPUSamplerState`. */
|
|
id<MTLSamplerState> sampler_state_cache_[GPU_SAMPLER_EXTEND_MODES_COUNT]
|
|
[GPU_SAMPLER_EXTEND_MODES_COUNT]
|
|
[GPU_SAMPLER_FILTERING_TYPES_COUNT];
|
|
id<MTLSamplerState> custom_sampler_state_cache_[GPU_SAMPLER_CUSTOM_TYPES_COUNT];
|
|
id<MTLSamplerState> default_sampler_state_ = nil;
|
|
|
|
/* When texture sampler count exceeds the resource bind limit, an
|
|
* argument buffer is used to pass samplers to the shader.
|
|
* Each unique configurations of multiple samplers can be cached, so as to not require
|
|
* re-generation. `samplers_` stores the current list of bound sampler objects.
|
|
* `cached_sampler_buffers_` is a cache of encoded argument buffers which can be re-used. */
|
|
MTLSamplerArray samplers_;
|
|
blender::Map<MTLSamplerArray, gpu::MTLBuffer *> cached_sampler_buffers_;
|
|
|
|
/* Frame. */
|
|
bool is_inside_frame_ = false;
|
|
uint current_frame_index_;
|
|
|
|
/* Visibility buffer for MTLQuery results. */
|
|
gpu::MTLBuffer *visibility_buffer_ = nullptr;
|
|
bool visibility_is_dirty_ = false;
|
|
|
|
/* Null buffers for empty/uninitialized bindings.
|
|
* Null attribute buffer follows default attribute format of OpenGL Backend. */
|
|
id<MTLBuffer> null_buffer_; /* All zero's. */
|
|
id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
|
|
|
|
/** Dummy Resources */
|
|
/* Maximum of 32 texture types. Though most combinations invalid. */
|
|
gpu::MTLTexture *dummy_textures_[GPU_SAMPLER_TYPE_MAX][GPU_TEXTURE_BUFFER] = {{nullptr}};
|
|
GPUVertFormat dummy_vertformat_[GPU_SAMPLER_TYPE_MAX];
|
|
VertBuf *dummy_verts_[GPU_SAMPLER_TYPE_MAX] = {nullptr};
|
|
|
|
/* Debug scope timings. Adapted form GLContext::TimeQuery.
|
|
* Only supports CPU timings for now. */
|
|
struct ScopeTimings {
|
|
using Clock = std::chrono::steady_clock;
|
|
using TimePoint = Clock::time_point;
|
|
using Nanoseconds = std::chrono::nanoseconds;
|
|
|
|
static TimePoint epoch;
|
|
|
|
std::string name;
|
|
bool finished;
|
|
TimePoint cpu_start, cpu_end;
|
|
};
|
|
Vector<ScopeTimings> scope_timings;
|
|
|
|
void process_frame_timings();
|
|
|
|
public:
|
|
/* GPUContext interface. */
|
|
MTLContext(void *ghost_window, void *ghost_context);
|
|
~MTLContext();
|
|
|
|
static void check_error(const char *info);
|
|
|
|
void activate() override;
|
|
void deactivate() override;
|
|
void begin_frame() override;
|
|
void end_frame() override;
|
|
|
|
void flush() override;
|
|
void finish() override;
|
|
|
|
void memory_statistics_get(int *r_total_mem, int *r_free_mem) override;
|
|
|
|
static MTLContext *get()
|
|
{
|
|
return static_cast<MTLContext *>(Context::get());
|
|
}
|
|
|
|
void debug_group_begin(const char *name, int index) override;
|
|
void debug_group_end() override;
|
|
bool debug_capture_begin(const char *title) override;
|
|
void debug_capture_end() override;
|
|
void *debug_capture_scope_create(const char *name) override;
|
|
bool debug_capture_scope_begin(void *scope) override;
|
|
void debug_capture_scope_end(void *scope) override;
|
|
|
|
void debug_unbind_all_ubo() override {};
|
|
void debug_unbind_all_ssbo() override {};
|
|
|
|
/*** MTLContext Utility functions. */
|
|
/*
|
|
* All below functions modify the global state for the context, controlling the flow of
|
|
* rendering, binding resources, setting global state, resource management etc;
|
|
*/
|
|
|
|
/** Metal Context Core functions. **/
|
|
|
|
/* Bind frame-buffer to context. */
|
|
void framebuffer_bind(MTLFrameBuffer *framebuffer);
|
|
|
|
/* Restore frame-buffer used by active context to default back-buffer. */
|
|
void framebuffer_restore();
|
|
|
|
/* Ensure a render-pass using the Context frame-buffer (active_fb_) is in progress. */
|
|
id<MTLRenderCommandEncoder> ensure_begin_render_pass();
|
|
|
|
MTLFrameBuffer *get_current_framebuffer();
|
|
MTLFrameBuffer *get_default_framebuffer();
|
|
|
|
/* Context Global-State Texture Binding. */
|
|
void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit, bool is_image);
|
|
void sampler_bind(MTLSamplerState, uint sampler_unit);
|
|
void texture_unbind(gpu::MTLTexture *mtl_texture, bool is_image, StateManager *state_manager);
|
|
void texture_unbind_all(bool is_image);
|
|
void sampler_state_cache_init();
|
|
id<MTLSamplerState> get_sampler_from_state(MTLSamplerState state);
|
|
id<MTLSamplerState> get_default_sampler_state();
|
|
|
|
/* Active shader specialization constants state. */
|
|
shader::SpecializationConstants constants_state;
|
|
|
|
void specialization_constants_set(const shader::SpecializationConstants *constants_state);
|
|
|
|
/* Metal Context pipeline state. */
|
|
void pipeline_state_init();
|
|
MTLShader *get_active_shader();
|
|
|
|
/* These functions ensure that the current RenderCommandEncoder has
|
|
* the correct global state assigned. This should be called prior
|
|
* to every draw call, to ensure that all state is applied and up
|
|
* to date. We handle:
|
|
*
|
|
* - Buffer bindings (Vertex buffers, Uniforms, UBOs)
|
|
* - Texture bindings
|
|
* - Sampler bindings (+ argument buffer bindings)
|
|
* - Dynamic Render pipeline state (on encoder)
|
|
* - Baking Pipeline State Objects (PSOs) for current shader, based
|
|
* on final pipeline state.
|
|
*
|
|
* `ensure_render_pipeline_state` will return false if the state is
|
|
* invalid and cannot be applied. This should cancel a draw call. */
|
|
bool ensure_render_pipeline_state(MTLPrimitiveType prim_type);
|
|
bool ensure_buffer_bindings(id<MTLRenderCommandEncoder> rec,
|
|
const MTLShaderInterface *shader_interface,
|
|
const MTLRenderPipelineStateInstance *pipeline_state_instance);
|
|
bool ensure_buffer_bindings(id<MTLComputeCommandEncoder> rec,
|
|
const MTLShaderInterface *shader_interface,
|
|
const MTLComputePipelineStateInstance *pipeline_state_instance);
|
|
void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
|
|
MTLShaderInterface *shader_interface,
|
|
const MTLRenderPipelineStateInstance *pipeline_state_instance);
|
|
void ensure_texture_bindings(id<MTLComputeCommandEncoder> rec,
|
|
MTLShaderInterface *shader_interface,
|
|
const MTLComputePipelineStateInstance *pipeline_state_instance);
|
|
void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
|
|
|
|
id<MTLBuffer> get_null_buffer();
|
|
id<MTLBuffer> get_null_attribute_buffer();
|
|
gpu::MTLTexture *get_dummy_texture(GPUTextureType type, GPUSamplerFormat sampler_format);
|
|
void free_dummy_resources();
|
|
|
|
/* Compute. */
|
|
/* Ensure compute pipeline state for current config is compiled and return PSO instance. */
|
|
const MTLComputePipelineStateInstance *ensure_compute_pipeline_state();
|
|
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len);
|
|
void compute_dispatch_indirect(StorageBuf *indirect_buf);
|
|
|
|
/* State assignment. */
|
|
void set_viewport(int origin_x, int origin_y, int width, int height);
|
|
void set_viewports(int count, const int (&viewports)[GPU_MAX_VIEWPORTS][4]);
|
|
void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
|
|
void set_scissor_enabled(bool scissor_enabled);
|
|
|
|
/* Visibility buffer control. */
|
|
void set_visibility_buffer(gpu::MTLBuffer *buffer);
|
|
gpu::MTLBuffer *get_visibility_buffer() const;
|
|
|
|
/* Flag whether the visibility buffer for query results
|
|
* has changed. This requires a new RenderPass in order
|
|
* to update. */
|
|
bool is_visibility_dirty() const;
|
|
|
|
/* Reset dirty flag state for visibility buffer. */
|
|
void clear_visibility_dirty();
|
|
|
|
/* Texture utilities. */
|
|
MTLContextTextureUtils &get_texture_utils()
|
|
{
|
|
return texture_utils_;
|
|
}
|
|
|
|
/* Compute utilities. */
|
|
MTLContextComputeUtils &get_compute_utils()
|
|
{
|
|
return compute_utils_;
|
|
}
|
|
|
|
bool get_active()
|
|
{
|
|
return is_active_;
|
|
}
|
|
|
|
bool get_inside_frame()
|
|
{
|
|
return is_inside_frame_;
|
|
}
|
|
|
|
uint get_current_frame_index()
|
|
{
|
|
return current_frame_index_;
|
|
}
|
|
|
|
MTLScratchBufferManager &get_scratchbuffer_manager()
|
|
{
|
|
return this->memory_manager;
|
|
}
|
|
|
|
static void global_memory_manager_acquire_ref()
|
|
{
|
|
MTLContext::global_memory_manager_reflock.lock();
|
|
if (MTLContext::global_memory_manager == nullptr) {
|
|
BLI_assert(MTLContext::global_memory_manager_refcount == 0);
|
|
MTLContext::global_memory_manager = new MTLBufferPool();
|
|
}
|
|
MTLContext::global_memory_manager_refcount++;
|
|
MTLContext::global_memory_manager_reflock.unlock();
|
|
}
|
|
|
|
static void global_memory_manager_release_ref()
|
|
{
|
|
MTLContext::global_memory_manager_reflock.lock();
|
|
MTLContext::global_memory_manager_refcount--;
|
|
BLI_assert(MTLContext::global_memory_manager_refcount >= 0);
|
|
BLI_assert(MTLContext::global_memory_manager != nullptr);
|
|
|
|
if (MTLContext::global_memory_manager_refcount <= 0) {
|
|
delete MTLContext::global_memory_manager;
|
|
MTLContext::global_memory_manager = nullptr;
|
|
}
|
|
MTLContext::global_memory_manager_reflock.unlock();
|
|
}
|
|
|
|
static MTLBufferPool *get_global_memory_manager()
|
|
{
|
|
BLI_assert(MTLContext::global_memory_manager != nullptr);
|
|
return MTLContext::global_memory_manager;
|
|
}
|
|
|
|
/* Swap-chain and latency management. */
|
|
static void latency_resolve_average(int64_t frame_latency_us)
|
|
{
|
|
int64_t avg = 0;
|
|
int64_t frame_c = 0;
|
|
for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) {
|
|
MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1];
|
|
avg += MTLContext::frame_latency[i];
|
|
frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0;
|
|
}
|
|
MTLContext::frame_latency[0] = frame_latency_us;
|
|
avg += MTLContext::frame_latency[0];
|
|
if (frame_c > 0) {
|
|
avg /= frame_c;
|
|
}
|
|
else {
|
|
avg = 0;
|
|
}
|
|
MTLContext::avg_drawable_latency_us = avg;
|
|
}
|
|
|
|
private:
|
|
void set_ghost_context(GHOST_ContextHandle ghostCtxHandle);
|
|
void set_ghost_window(GHOST_WindowHandle ghostWinHandle);
|
|
};
|
|
|
|
/* GHOST Context callback and present. */
|
|
void present(MTLRenderPassDescriptor *blit_descriptor,
|
|
id<MTLRenderPipelineState> blit_pso,
|
|
id<MTLTexture> swapchain_texture,
|
|
id<CAMetalDrawable> drawable);
|
|
|
|
} // namespace blender::gpu
|