Files
test2/source/blender/gpu/metal/mtl_context.mm
Jason Fielder c18351f670 Metal: Increase concurrent shader compilation threads
Leverage new API call in Metal to increase the number of threads
dedicated to concurrent shader compilation. First step to improve
parallel compilation times when multiple engines are active.

Would also enable an increase in worker threads for shader
compilation jobs within the DRWManager.

Note that this is only available in the latest
version of macOS Ventura (13.3).

Authored by Apple: Michael Parkin-White

Pull Request: https://projects.blender.org/blender/blender/pulls/106625
2023-04-21 07:52:17 +02:00

2289 lines
91 KiB
Plaintext

/* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_framebuffer.hh"
#include "mtl_immediate.hh"
#include "mtl_memory.hh"
#include "mtl_primitive.hh"
#include "mtl_shader.hh"
#include "mtl_shader_interface.hh"
#include "mtl_state.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
#include "DNA_userdef_types.h"
#include "GPU_capabilities.h"
#include "GPU_matrix.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "GPU_uniform_buffer.h"
#include "GPU_vertex_buffer.h"
#include "intern/gpu_matrix_private.h"
#include "PIL_time.h"
#include <fstream>
#include <string>
using namespace blender;
using namespace blender::gpu;
namespace blender::gpu {
/* Global memory manager. */
std::mutex MTLContext::global_memory_manager_reflock;
int MTLContext::global_memory_manager_refcount = 0;
MTLBufferPool *MTLContext::global_memory_manager = nullptr;
/* Swap-chain and latency management. */
std::atomic<int> MTLContext::max_drawables_in_flight = 0;
std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0;
int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0};
/* -------------------------------------------------------------------- */
/** \name GHOST Context interaction.
* \{ */
void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle)
{
GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle);
BLI_assert(ghost_ctx != nullptr);
/* Release old MTLTexture handle */
if (default_fbo_mtltexture_) {
[default_fbo_mtltexture_ release];
default_fbo_mtltexture_ = nil;
}
/* Release Framebuffer attachments */
MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left);
MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left);
mtl_front_left->remove_all_attachments();
mtl_back_left->remove_all_attachments();
GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx);
if (ghost_cgl_ctx != NULL) {
default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture();
MTL_LOG_INFO(
"Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n",
ghost_cgl_ctx,
this,
this->device,
this->queue,
default_fbo_gputexture_);
/* Check if the GHOST Context provides a default framebuffer: */
if (default_fbo_mtltexture_) {
/* Release old GPUTexture handle */
if (default_fbo_gputexture_) {
GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
default_fbo_gputexture_ = nullptr;
}
/* Retain handle */
[default_fbo_mtltexture_ retain];
/*** Create front and back-buffers ***/
/* Create gpu::MTLTexture objects */
default_fbo_gputexture_ = new gpu::MTLTexture(
"MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_);
/* Update frame-buffers with new texture attachments. */
mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
#ifndef NDEBUG
this->label = default_fbo_mtltexture_.label;
#endif
}
else {
/* Add default texture for cases where no other framebuffer is bound */
if (!default_fbo_gputexture_) {
default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>(unwrap(GPU_texture_create_2d(
__func__, 16, 16, 1, GPU_RGBA16F, GPU_TEXTURE_USAGE_GENERAL, nullptr)));
}
mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
MTL_LOG_INFO(
"-- Bound context %p for GPU context: %p is offscreen and does not have a default "
"framebuffer\n",
ghost_cgl_ctx,
this);
#ifndef NDEBUG
this->label = @"Offscreen Metal Context";
#endif
}
}
else {
MTL_LOG_INFO(
"[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null "
"(GhostContext: %p, GhostContext_CGL: %p)\n",
ghost_ctx,
ghost_cgl_ctx);
BLI_assert(false);
}
}
void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle)
{
GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle);
this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL));
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name MTLContext
* \{ */
/* Placeholder functions */
MTLContext::MTLContext(void *ghost_window, void *ghost_context)
: memory_manager(*this), main_command_buffer(*this)
{
/* Init debug. */
debug::mtl_debug_init();
/* Initialize Render-pass and Frame-buffer State. */
this->back_left = nullptr;
/* Initialize command buffer state. */
this->main_command_buffer.prepare();
/* Initialize IMM and pipeline state */
this->pipeline_state.initialised = false;
/* Frame management. */
is_inside_frame_ = false;
current_frame_index_ = 0;
/* Prepare null data buffer. */
null_buffer_ = nil;
null_attribute_buffer_ = nil;
/* Zero-initialize MTL textures. */
default_fbo_mtltexture_ = nil;
default_fbo_gputexture_ = nullptr;
/** Fetch GHOSTContext and fetch Metal device/queue. */
ghost_window_ = ghost_window;
if (ghost_window_ && ghost_context == NULL) {
/* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided.
* Regardless of whether windowed or not, we need access to the GhostContext
* for presentation, and device/queue access. */
GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_);
ghost_context = (ghostWin ? ghostWin->getContext() : NULL);
}
BLI_assert(ghost_context);
this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context);
this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue();
this->device = (id<MTLDevice>)this->ghost_context_->metalDevice();
BLI_assert(this->queue);
BLI_assert(this->device);
[this->queue retain];
[this->device retain];
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wobjc-method-access"
/* Enable increased concurrent shader compiler limit.
* Note: Disable warning for missing method when building on older OS's, as compiled code will
* still work correctly when run on a system with the API available. */
if (@available(macOS 13.3, *)) {
[this->device setShouldMaximizeConcurrentCompilation:YES];
}
#pragma clang diagnostic pop
/* Register present callback. */
this->ghost_context_->metalRegisterPresentCallback(&present);
/* Create FrameBuffer handles. */
MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
this->front_left = mtl_front_left;
this->back_left = mtl_back_left;
this->active_fb = this->back_left;
/* Prepare platform and capabilities. (NOTE: With METAL, this needs to be done after CTX
* initialization). */
MTLBackend::platform_init(this);
MTLBackend::capabilities_init(this);
/* Initialize Metal modules. */
this->memory_manager.init();
this->state_manager = new MTLStateManager(this);
this->imm = new MTLImmediate(this);
/* Ensure global memory manager is initialized. */
MTLContext::global_memory_manager_acquire_ref();
MTLContext::get_global_memory_manager()->init(this->device);
/* Initialize texture read/update structures. */
this->get_texture_utils().init();
/* Bound Samplers struct. */
for (int i = 0; i < MTL_MAX_TEXTURE_SLOTS; i++) {
samplers_.mtl_sampler[i] = nil;
samplers_.mtl_sampler_flags[i] = DEFAULT_SAMPLER_STATE;
}
/* Initialize samplers. */
this->sampler_state_cache_init();
}
MTLContext::~MTLContext()
{
BLI_assert(this == reinterpret_cast<MTLContext *>(GPU_context_active_get()));
/* Ensure rendering is complete command encoders/command buffers are freed. */
if (MTLBackend::get()->is_inside_render_boundary()) {
this->finish();
/* End frame. */
if (this->get_inside_frame()) {
this->end_frame();
}
}
/* Release context textures. */
if (default_fbo_gputexture_) {
GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
default_fbo_gputexture_ = nullptr;
}
if (default_fbo_mtltexture_) {
[default_fbo_mtltexture_ release];
default_fbo_mtltexture_ = nil;
}
/* Release Memory Manager */
this->get_scratchbuffer_manager().free();
/* Release update/blit shaders. */
this->get_texture_utils().cleanup();
/* Detach resource references */
GPU_texture_unbind_all();
/* Unbind UBOs */
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ubo_bindings[i].bound &&
this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
GPUUniformBuf *ubo = wrap(
static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo));
GPU_uniformbuf_unbind(ubo);
}
}
/* Release Dummy resources */
this->free_dummy_resources();
/* Release Sampler States. */
for (int extend_yz_i = 0; extend_yz_i < GPU_SAMPLER_EXTEND_MODES_COUNT; extend_yz_i++) {
for (int extend_x_i = 0; extend_x_i < GPU_SAMPLER_EXTEND_MODES_COUNT; extend_x_i++) {
for (int filtering_i = 0; filtering_i < GPU_SAMPLER_FILTERING_TYPES_COUNT; filtering_i++) {
if (sampler_state_cache_[extend_yz_i][extend_x_i][filtering_i] != nil) {
[sampler_state_cache_[extend_yz_i][extend_x_i][filtering_i] release];
sampler_state_cache_[extend_yz_i][extend_x_i][filtering_i] = nil;
}
}
}
}
/* Release Custom Sampler States. */
for (int i = 0; i < GPU_SAMPLER_CUSTOM_TYPES_COUNT; i++) {
if (custom_sampler_state_cache_[i] != nil) {
[custom_sampler_state_cache_[i] release];
custom_sampler_state_cache_[i] = nil;
}
}
/* Empty cached sampler argument buffers. */
for (auto entry : cached_sampler_buffers_.values()) {
entry->free();
}
cached_sampler_buffers_.clear();
/* Free null buffers. */
if (null_buffer_) {
[null_buffer_ release];
}
if (null_attribute_buffer_) {
[null_attribute_buffer_ release];
}
/* Release memory manager reference. */
MTLContext::global_memory_manager_release_ref();
/* Free Metal objects. */
if (this->queue) {
[this->queue release];
}
if (this->device) {
[this->device release];
}
}
void MTLContext::begin_frame()
{
BLI_assert(MTLBackend::get()->is_inside_render_boundary());
if (this->get_inside_frame()) {
return;
}
/* Begin Command buffer for next frame. */
is_inside_frame_ = true;
}
void MTLContext::end_frame()
{
BLI_assert(this->get_inside_frame());
/* Ensure pre-present work is committed. */
this->flush();
/* Increment frame counter. */
is_inside_frame_ = false;
}
void MTLContext::check_error(const char *info)
{
/* TODO(Metal): Implement. */
}
void MTLContext::activate()
{
/* Make sure no other context is already bound to this thread. */
BLI_assert(is_active_ == false);
is_active_ = true;
thread_ = pthread_self();
/* Re-apply ghost window/context for resizing */
if (ghost_window_) {
this->set_ghost_window((GHOST_WindowHandle)ghost_window_);
}
else if (ghost_context_) {
this->set_ghost_context((GHOST_ContextHandle)ghost_context_);
}
/* Reset UBO bind state. */
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ubo_bindings[i].bound &&
this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
this->pipeline_state.ubo_bindings[i].bound = false;
this->pipeline_state.ubo_bindings[i].ubo = nullptr;
}
}
/* Ensure imm active. */
immActivate();
}
void MTLContext::deactivate()
{
BLI_assert(this->is_active_on_thread());
/* Flush context on deactivate. */
this->flush();
is_active_ = false;
immDeactivate();
}
void MTLContext::flush()
{
this->main_command_buffer.submit(false);
}
void MTLContext::finish()
{
this->main_command_buffer.submit(true);
}
void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
{
/* TODO(Metal): Implement. */
*total_mem = 0;
*free_mem = 0;
}
void MTLContext::framebuffer_bind(MTLFrameBuffer *framebuffer)
{
/* We do not yet begin the pass -- We defer beginning the pass until a draw is requested. */
BLI_assert(framebuffer);
this->active_fb = framebuffer;
}
void MTLContext::framebuffer_restore()
{
/* Bind default framebuffer from context --
* We defer beginning the pass until a draw is requested. */
this->active_fb = this->back_left;
}
id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
{
BLI_assert(this);
/* Ensure the rendering frame has started. */
if (!this->get_inside_frame()) {
this->begin_frame();
}
/* Check whether a framebuffer is bound. */
if (!this->active_fb) {
BLI_assert(false && "No framebuffer is bound!");
return this->main_command_buffer.get_active_render_command_encoder();
}
/* Ensure command buffer workload submissions are optimal --
* Though do not split a batch mid-IMM recording. */
if (this->main_command_buffer.do_break_submission() &&
!((MTLImmediate *)(this->imm))->imm_is_recording()) {
this->flush();
}
/* Begin pass or perform a pass switch if the active framebuffer has been changed, or if the
* framebuffer state has been modified (is_dirty). */
if (!this->main_command_buffer.is_inside_render_pass() ||
this->active_fb != this->main_command_buffer.get_active_framebuffer() ||
this->main_command_buffer.get_active_framebuffer()->get_dirty() ||
this->is_visibility_dirty()) {
/* Validate bound framebuffer before beginning render pass. */
if (!static_cast<MTLFrameBuffer *>(this->active_fb)->validate_render_pass()) {
MTL_LOG_WARNING("Framebuffer validation failed, falling back to default framebuffer\n");
this->framebuffer_restore();
if (!static_cast<MTLFrameBuffer *>(this->active_fb)->validate_render_pass()) {
MTL_LOG_ERROR("CRITICAL: DEFAULT FRAMEBUFFER FAIL VALIDATION!!\n");
}
}
/* Begin RenderCommandEncoder on main CommandBuffer. */
bool new_render_pass = false;
id<MTLRenderCommandEncoder> new_enc =
this->main_command_buffer.ensure_begin_render_command_encoder(
static_cast<MTLFrameBuffer *>(this->active_fb), true, &new_render_pass);
if (new_render_pass) {
/* Flag context pipeline state as dirty - dynamic pipeline state need re-applying. */
this->pipeline_state.dirty_flags = MTL_PIPELINE_STATE_ALL_FLAG;
}
return new_enc;
}
BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
return this->main_command_buffer.get_active_render_command_encoder();
}
MTLFrameBuffer *MTLContext::get_current_framebuffer()
{
MTLFrameBuffer *last_bound = static_cast<MTLFrameBuffer *>(this->active_fb);
return last_bound ? last_bound : this->get_default_framebuffer();
}
MTLFrameBuffer *MTLContext::get_default_framebuffer()
{
return static_cast<MTLFrameBuffer *>(this->back_left);
}
MTLShader *MTLContext::get_active_shader()
{
return this->pipeline_state.active_shader;
}
id<MTLBuffer> MTLContext::get_null_buffer()
{
if (null_buffer_ != nil) {
return null_buffer_;
}
/* TODO(mpw_apple_gpusw): Null buffer size temporarily increased to cover
* maximum possible UBO size. There are a number of cases which need to be
* resolved in the high level where an expected UBO does not have a bound
* buffer. The null buffer needs to at least cover the size of these
* UBOs to avoid any GPU memory issues. */
static const int null_buffer_size = 20480;
null_buffer_ = [this->device newBufferWithLength:null_buffer_size
options:MTLResourceStorageModeManaged];
[null_buffer_ retain];
uint32_t *null_data = (uint32_t *)calloc(1, null_buffer_size);
memcpy([null_buffer_ contents], null_data, null_buffer_size);
[null_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)];
free(null_data);
BLI_assert(null_buffer_ != nil);
return null_buffer_;
}
id<MTLBuffer> MTLContext::get_null_attribute_buffer()
{
if (null_attribute_buffer_ != nil) {
return null_attribute_buffer_;
}
/* Allocate Null buffer if it has not yet been created.
* Min buffer size is 256 bytes -- though we only need 64 bytes of data. */
static const int null_buffer_size = 256;
null_attribute_buffer_ = [this->device newBufferWithLength:null_buffer_size
options:MTLResourceStorageModeManaged];
BLI_assert(null_attribute_buffer_ != nil);
[null_attribute_buffer_ retain];
float data[4] = {0.0f, 0.0f, 0.0f, 1.0f};
memcpy([null_attribute_buffer_ contents], data, sizeof(float) * 4);
[null_attribute_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)];
return null_attribute_buffer_;
}
gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type,
eGPUSamplerFormat sampler_format)
{
/* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */
gpu::MTLTexture *dummy_tex = dummy_textures_[sampler_format][type - 1];
if (dummy_tex != nullptr) {
return dummy_tex;
}
else {
/* Determine format for dummy texture. */
eGPUTextureFormat format = GPU_RGBA8;
switch (sampler_format) {
case GPU_SAMPLER_TYPE_FLOAT:
format = GPU_RGBA8;
break;
case GPU_SAMPLER_TYPE_INT:
format = GPU_RGBA8I;
break;
case GPU_SAMPLER_TYPE_UINT:
format = GPU_RGBA8UI;
break;
case GPU_SAMPLER_TYPE_DEPTH:
format = GPU_DEPTH32F_STENCIL8;
break;
default:
BLI_assert_unreachable();
}
/* Create dummy texture based on desired type. */
GPUTexture *tex = nullptr;
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_GENERAL;
switch (type) {
case GPU_TEXTURE_1D:
tex = GPU_texture_create_1d("Dummy 1D", 128, 1, format, usage, nullptr);
break;
case GPU_TEXTURE_1D_ARRAY:
tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, format, usage, nullptr);
break;
case GPU_TEXTURE_2D:
tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, format, usage, nullptr);
break;
case GPU_TEXTURE_2D_ARRAY:
tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, format, usage, nullptr);
break;
case GPU_TEXTURE_3D:
tex = GPU_texture_create_3d("Dummy 3D", 128, 128, 1, 1, format, usage, nullptr);
break;
case GPU_TEXTURE_CUBE:
tex = GPU_texture_create_cube("Dummy Cube", 128, 1, format, usage, nullptr);
break;
case GPU_TEXTURE_CUBE_ARRAY:
tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, format, usage, nullptr);
break;
case GPU_TEXTURE_BUFFER:
if (!dummy_verts_[sampler_format]) {
GPU_vertformat_clear(&dummy_vertformat_[sampler_format]);
GPUVertCompType comp_type = GPU_COMP_F32;
GPUVertFetchMode fetch_mode = GPU_FETCH_FLOAT;
switch (sampler_format) {
case GPU_SAMPLER_TYPE_FLOAT:
case GPU_SAMPLER_TYPE_DEPTH:
comp_type = GPU_COMP_F32;
fetch_mode = GPU_FETCH_FLOAT;
break;
case GPU_SAMPLER_TYPE_INT:
comp_type = GPU_COMP_I32;
fetch_mode = GPU_FETCH_INT;
break;
case GPU_SAMPLER_TYPE_UINT:
comp_type = GPU_COMP_U32;
fetch_mode = GPU_FETCH_INT;
break;
default:
BLI_assert_unreachable();
}
GPU_vertformat_attr_add(
&dummy_vertformat_[sampler_format], "dummy", comp_type, 4, fetch_mode);
dummy_verts_[sampler_format] = GPU_vertbuf_create_with_format_ex(
&dummy_vertformat_[sampler_format],
GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(dummy_verts_[sampler_format], 64);
}
tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_[sampler_format]);
break;
default:
BLI_assert_msg(false, "Unrecognised texture type");
return nullptr;
}
gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex));
dummy_textures_[sampler_format][type - 1] = metal_tex;
return metal_tex;
}
return nullptr;
}
void MTLContext::free_dummy_resources()
{
for (int format = 0; format < GPU_SAMPLER_TYPE_MAX; format++) {
for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) {
if (dummy_textures_[format][tex]) {
GPU_texture_free(
reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[format][tex])));
dummy_textures_[format][tex] = nullptr;
}
}
if (dummy_verts_[format]) {
GPU_vertbuf_discard(dummy_verts_[format]);
}
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Global Context State
* \{ */
/* Metal Context Pipeline State. */
void MTLContext::pipeline_state_init()
{
/*** Initialize state only once. ***/
if (!this->pipeline_state.initialised) {
this->pipeline_state.initialised = true;
this->pipeline_state.active_shader = nullptr;
/* Clear bindings state. */
for (int t = 0; t < GPU_max_textures(); t++) {
this->pipeline_state.texture_bindings[t].used = false;
this->pipeline_state.texture_bindings[t].slot_index = -1;
this->pipeline_state.texture_bindings[t].texture_resource = nullptr;
}
for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) {
this->pipeline_state.sampler_bindings[s].used = false;
}
for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) {
this->pipeline_state.ubo_bindings[u].bound = false;
this->pipeline_state.ubo_bindings[u].ubo = nullptr;
}
}
/*** State defaults -- restored by GPU_state_init. ***/
/* Clear blending State. */
this->pipeline_state.color_write_mask = MTLColorWriteMaskRed | MTLColorWriteMaskGreen |
MTLColorWriteMaskBlue | MTLColorWriteMaskAlpha;
this->pipeline_state.blending_enabled = false;
this->pipeline_state.alpha_blend_op = MTLBlendOperationAdd;
this->pipeline_state.rgb_blend_op = MTLBlendOperationAdd;
this->pipeline_state.dest_alpha_blend_factor = MTLBlendFactorZero;
this->pipeline_state.dest_rgb_blend_factor = MTLBlendFactorZero;
this->pipeline_state.src_alpha_blend_factor = MTLBlendFactorOne;
this->pipeline_state.src_rgb_blend_factor = MTLBlendFactorOne;
/* Viewport and scissor. */
this->pipeline_state.viewport_offset_x = 0;
this->pipeline_state.viewport_offset_y = 0;
this->pipeline_state.viewport_width = 0;
this->pipeline_state.viewport_height = 0;
this->pipeline_state.scissor_x = 0;
this->pipeline_state.scissor_y = 0;
this->pipeline_state.scissor_width = 0;
this->pipeline_state.scissor_height = 0;
this->pipeline_state.scissor_enabled = false;
/* Culling State. */
this->pipeline_state.culling_enabled = false;
this->pipeline_state.cull_mode = GPU_CULL_NONE;
this->pipeline_state.front_face = GPU_COUNTERCLOCKWISE;
/* DATA and IMAGE access state. */
this->pipeline_state.unpack_row_length = 0;
/* Depth State. */
this->pipeline_state.depth_stencil_state.depth_write_enable = false;
this->pipeline_state.depth_stencil_state.depth_test_enabled = false;
this->pipeline_state.depth_stencil_state.depth_range_near = 0.0;
this->pipeline_state.depth_stencil_state.depth_range_far = 1.0;
this->pipeline_state.depth_stencil_state.depth_function = MTLCompareFunctionAlways;
this->pipeline_state.depth_stencil_state.depth_bias = 0.0;
this->pipeline_state.depth_stencil_state.depth_slope_scale = 0.0;
this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points = false;
this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines = false;
this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris = false;
/* Stencil State. */
this->pipeline_state.depth_stencil_state.stencil_test_enabled = false;
this->pipeline_state.depth_stencil_state.stencil_read_mask = 0xFF;
this->pipeline_state.depth_stencil_state.stencil_write_mask = 0xFF;
this->pipeline_state.depth_stencil_state.stencil_ref = 0;
this->pipeline_state.depth_stencil_state.stencil_func = MTLCompareFunctionAlways;
this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail = MTLStencilOperationKeep;
this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail = MTLStencilOperationKeep;
this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass =
MTLStencilOperationKeep;
this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail = MTLStencilOperationKeep;
this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail = MTLStencilOperationKeep;
this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass =
MTLStencilOperationKeep;
}
void MTLContext::set_viewport(int origin_x, int origin_y, int width, int height)
{
BLI_assert(this);
BLI_assert(width > 0);
BLI_assert(height > 0);
BLI_assert(origin_x >= 0);
BLI_assert(origin_y >= 0);
bool changed = (this->pipeline_state.viewport_offset_x != origin_x) ||
(this->pipeline_state.viewport_offset_y != origin_y) ||
(this->pipeline_state.viewport_width != width) ||
(this->pipeline_state.viewport_height != height);
this->pipeline_state.viewport_offset_x = origin_x;
this->pipeline_state.viewport_offset_y = origin_y;
this->pipeline_state.viewport_width = width;
this->pipeline_state.viewport_height = height;
if (changed) {
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags |
MTL_PIPELINE_STATE_VIEWPORT_FLAG);
}
}
void MTLContext::set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height)
{
BLI_assert(this);
bool changed = (this->pipeline_state.scissor_x != scissor_x) ||
(this->pipeline_state.scissor_y != scissor_y) ||
(this->pipeline_state.scissor_width != scissor_width) ||
(this->pipeline_state.scissor_height != scissor_height) ||
(this->pipeline_state.scissor_enabled != true);
this->pipeline_state.scissor_x = scissor_x;
this->pipeline_state.scissor_y = scissor_y;
this->pipeline_state.scissor_width = scissor_width;
this->pipeline_state.scissor_height = scissor_height;
this->pipeline_state.scissor_enabled = (scissor_width > 0 && scissor_height > 0);
if (changed) {
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags |
MTL_PIPELINE_STATE_SCISSOR_FLAG);
}
}
void MTLContext::set_scissor_enabled(bool scissor_enabled)
{
/* Only turn on Scissor if requested scissor region is valid */
scissor_enabled = scissor_enabled && (this->pipeline_state.scissor_width > 0 &&
this->pipeline_state.scissor_height > 0);
bool changed = (this->pipeline_state.scissor_enabled != scissor_enabled);
this->pipeline_state.scissor_enabled = scissor_enabled;
if (changed) {
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags |
MTL_PIPELINE_STATE_SCISSOR_FLAG);
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Command Encoder and pipeline state
* These utilities ensure that all of the globally bound resources and state have been
* correctly encoded within the current RenderCommandEncoder. This involves managing
* buffer bindings, texture bindings, depth stencil state and dynamic pipeline state.
*
* We will also trigger compilation of new PSOs where the input state has changed
* and is required.
* All of this setup is required in order to perform a valid draw call.
* \{ */
bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type)
{
BLI_assert(this->pipeline_state.initialised);
/* Check if an active shader is bound. */
if (!this->pipeline_state.active_shader) {
MTL_LOG_WARNING("No Metal shader for bound GL shader\n");
return false;
}
/* Also ensure active shader is valid. */
if (!this->pipeline_state.active_shader->is_valid()) {
MTL_LOG_WARNING(
"Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
return false;
}
/* Apply global state. */
this->state_manager->apply_state();
/* Main command buffer tracks the current state of the render pass, based on bound
* MTLFrameBuffer. */
MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
/* Debug Check: Ensure Framebuffer instance is not dirty. */
BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
/* Fetch shader interface. */
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
if (shader_interface == nullptr) {
MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
return false;
}
/* Fetch shader and bake valid PipelineStateObject (PSO) based on current
* shader and state combination. This PSO represents the final GPU-executable
* permutation of the shader. */
MTLRenderPipelineStateInstance *pipeline_state_instance =
this->pipeline_state.active_shader->bake_current_pipeline_state(
this, mtl_prim_type_to_topology_class(mtl_prim_type));
if (!pipeline_state_instance) {
MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n",
shader_interface->get_name());
return false;
}
bool result = false;
if (pipeline_state_instance->pso) {
/* Fetch render command encoder. A render pass should already be active.
* This will be NULL if invalid. */
id<MTLRenderCommandEncoder> rec =
this->main_command_buffer.get_active_render_command_encoder();
BLI_assert(rec);
if (rec == nil) {
MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n");
return false;
}
/* Bind Render Pipeline State. */
BLI_assert(pipeline_state_instance->pso);
if (rps.bound_pso != pipeline_state_instance->pso) {
[rec setRenderPipelineState:pipeline_state_instance->pso];
rps.bound_pso = pipeline_state_instance->pso;
}
/** Ensure resource bindings. */
/* Texture Bindings. */
/* We will iterate through all texture bindings on the context and determine if any of the
* active slots match those in our shader interface. If so, textures will be bound. */
if (shader_interface->get_total_textures() > 0) {
this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance);
}
/* Transform feedback buffer binding. */
GPUVertBuf *tf_vbo =
this->pipeline_state.active_shader->get_transform_feedback_active_buffer();
if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) {
/* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */
BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine ||
mtl_prim_type == MTLPrimitiveTypeTriangle ||
mtl_prim_type == MTLPrimitiveTypePoint);
/* Fetch active transform feedback buffer from vertbuf */
MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo));
/* Ensure TF buffer is ready. */
tf_vbo_mtl->bind();
id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer();
BLI_assert(tf_buffer_mtl != nil);
if (tf_buffer_mtl != nil) {
[rec setVertexBuffer:tf_buffer_mtl
offset:0
atIndex:pipeline_state_instance->transform_feedback_buffer_index];
MTL_LOG_INFO("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n",
tf_vbo_mtl,
tf_buffer_mtl);
}
}
/* Matrix Bindings. */
/* This is now called upon shader bind. We may need to re-evaluate this though,
* as was done here to ensure uniform changes between draws were tracked.
* NOTE(Metal): We may be able to remove this. */
GPU_matrix_bind(reinterpret_cast<struct GPUShader *>(
static_cast<Shader *>(this->pipeline_state.active_shader)));
/* Bind Uniforms */
this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance);
/* Bind Null attribute buffer, if needed. */
if (pipeline_state_instance->null_attribute_buffer_index >= 0) {
if (G.debug & G_DEBUG_GPU) {
MTL_LOG_INFO("Binding null attribute buffer at index: %d\n",
pipeline_state_instance->null_attribute_buffer_index);
}
rps.bind_vertex_buffer(this->get_null_attribute_buffer(),
0,
pipeline_state_instance->null_attribute_buffer_index);
}
/** Dynamic Per-draw Render State on RenderCommandEncoder. */
/* State: Viewport. */
if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) {
MTLViewport viewport;
viewport.originX = (double)this->pipeline_state.viewport_offset_x;
viewport.originY = (double)this->pipeline_state.viewport_offset_y;
viewport.width = (double)this->pipeline_state.viewport_width;
viewport.height = (double)this->pipeline_state.viewport_height;
viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near;
viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far;
[rec setViewport:viewport];
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
~MTL_PIPELINE_STATE_VIEWPORT_FLAG);
}
/* State: Scissor. */
if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) {
/* Get FrameBuffer associated with active RenderCommandEncoder. */
MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer();
MTLScissorRect scissor;
if (this->pipeline_state.scissor_enabled) {
scissor.x = this->pipeline_state.scissor_x;
scissor.y = this->pipeline_state.scissor_y;
scissor.width = this->pipeline_state.scissor_width;
scissor.height = this->pipeline_state.scissor_height;
/* Some scissor assignments exceed the bounds of the viewport due to implicitly added
* padding to the width/height - Clamp width/height. */
BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_default_width());
BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_default_height());
scissor.width = (uint)min_ii(scissor.width,
max_ii(render_fb->get_default_width() - (int)(scissor.x), 0));
scissor.height = (uint)min_ii(
scissor.height, max_ii(render_fb->get_default_height() - (int)(scissor.y), 0));
BLI_assert(scissor.width > 0 &&
(scissor.x + scissor.width <= render_fb->get_default_width()));
BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_default_height()));
}
else {
/* Scissor is disabled, reset to default size as scissor state may have been previously
* assigned on this encoder. */
scissor.x = 0;
scissor.y = 0;
scissor.width = render_fb->get_default_width();
scissor.height = render_fb->get_default_height();
}
/* Scissor state can still be flagged as changed if it is toggled on and off, without
* parameters changing between draws. */
if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) {
[rec setScissorRect:scissor];
rps.last_scissor_rect = scissor;
}
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
~MTL_PIPELINE_STATE_SCISSOR_FLAG);
}
/* State: Face winding. */
if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) {
/* We need to invert the face winding in Metal, to account for the inverted-Y coordinate
* system. */
MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ?
MTLWindingClockwise :
MTLWindingCounterClockwise;
[rec setFrontFacingWinding:winding];
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
~MTL_PIPELINE_STATE_FRONT_FACING_FLAG);
}
/* State: cull-mode. */
if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) {
MTLCullMode mode = MTLCullModeNone;
if (this->pipeline_state.culling_enabled) {
switch (this->pipeline_state.cull_mode) {
case GPU_CULL_NONE:
mode = MTLCullModeNone;
break;
case GPU_CULL_FRONT:
mode = MTLCullModeFront;
break;
case GPU_CULL_BACK:
mode = MTLCullModeBack;
break;
default:
BLI_assert_unreachable();
break;
}
}
[rec setCullMode:mode];
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
~MTL_PIPELINE_STATE_CULLMODE_FLAG);
}
/* Pipeline state is now good. */
result = true;
}
return result;
}
/* Bind uniform buffers to an active render command encoder using the rendering state of the
* current context -> Active shader, Bound UBOs). */
bool MTLContext::ensure_uniform_buffer_bindings(
id<MTLRenderCommandEncoder> rec,
const MTLShaderInterface *shader_interface,
const MTLRenderPipelineStateInstance *pipeline_state_instance)
{
/* Fetch Render Pass state. */
MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
/* Shader owned push constant block for uniforms.. */
bool active_shader_changed = (rps.last_bound_shader_state.shader_ !=
this->pipeline_state.active_shader ||
rps.last_bound_shader_state.shader_ == nullptr ||
rps.last_bound_shader_state.pso_index_ !=
pipeline_state_instance->shader_pso_index);
const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
* will be offset by the number of bound VBOs. */
uint32_t block_size = push_constant_block.size;
uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
push_constant_block.buffer_index;
BLI_assert(buffer_index >= 0 && buffer_index < MTL_MAX_BUFFER_BINDINGS);
/* Only need to rebind block if push constants have been modified -- or if no data is bound for
* the current RenderCommandEncoder. */
if (this->pipeline_state.active_shader->get_push_constant_is_dirty() ||
active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes ||
!rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) {
/* Bind push constant data. */
BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
rps.bind_vertex_bytes(
this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
rps.bind_fragment_bytes(
this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
/* Only need to rebind block if it has been modified. */
this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
}
}
rps.last_bound_shader_state.set(this->pipeline_state.active_shader,
pipeline_state_instance->shader_pso_index);
/* Bind Global GPUUniformBuffers */
/* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
if (ubo.buffer_index >= 0) {
/* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
* the uniform PushConstantBlock. */
const uint32_t buffer_index = ubo.buffer_index + 1;
int ubo_offset = 0;
id<MTLBuffer> ubo_buffer = nil;
int ubo_size = 0;
bool bind_dummy_buffer = false;
if (this->pipeline_state.ubo_bindings[ubo.buffer_index].bound) {
/* Fetch UBO global-binding properties from slot. */
ubo_offset = 0;
ubo_buffer = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_metal_buffer(
&ubo_offset);
ubo_size = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_size();
/* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
* allocating zero buffers. */
if (ubo_buffer == nil) {
bind_dummy_buffer = true;
}
else {
BLI_assert(ubo_buffer != nil);
BLI_assert(ubo_size > 0);
if (pipeline_state_instance->reflection_data_available) {
/* NOTE: While the vertex and fragment stages have different UBOs, the indices in each
* case will be the same for the same UBO.
* We also determine expected size and then ensure buffer of the correct size
* exists in one of the vertex/fragment shader binding tables. This path is used
* to verify that the size of the bound UBO matches what is expected in the shader. */
uint32_t expected_size =
(buffer_index <
pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ?
pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index]
.size :
0;
if (expected_size == 0) {
expected_size =
(buffer_index <
pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ?
pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index]
.size :
0;
}
BLI_assert_msg(
expected_size > 0,
"Shader interface expects UBO, but shader reflection data reports that it "
"is not present");
/* If ubo size is smaller than the size expected by the shader, we need to bind the
* dummy buffer, which will be big enough, to avoid an OOB error. */
if (ubo_size < expected_size) {
MTL_LOG_INFO(
"[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
"%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
"over-binding, which is not used, but we need this to avoid validation "
"issues\n",
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index,
ubo_size,
expected_size,
shader_interface->get_name());
bind_dummy_buffer = true;
}
}
}
}
else {
MTL_LOG_INFO(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
"nothing was bound -- binding dummy buffer\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index);
bind_dummy_buffer = true;
}
if (bind_dummy_buffer) {
/* Perform Dummy binding. */
ubo_offset = 0;
ubo_buffer = this->get_null_buffer();
ubo_size = [ubo_buffer length];
}
if (ubo_buffer != nil) {
uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
buffer_index;
/* Bind Vertex UBO. */
if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
}
/* Bind Fragment UBOs. */
if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
}
}
else {
MTL_LOG_WARNING(
"[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
"is NULL!\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index);
}
}
}
return true;
}
/* Variant for compute. Bind uniform buffers to an active compute command encoder using the
* rendering state of the current context -> Active shader, Bound UBOs). */
bool MTLContext::ensure_uniform_buffer_bindings(
id<MTLComputeCommandEncoder> rec,
const MTLShaderInterface *shader_interface,
const MTLComputePipelineStateInstance &pipeline_state_instance)
{
/* Fetch Compute Pass state. */
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
/* Fetch push constant block and bind. */
const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
* will be offset by the number of bound VBOs. */
uint32_t block_size = push_constant_block.size;
uint32_t buffer_index = pipeline_state_instance.base_uniform_buffer_index +
push_constant_block.buffer_index;
BLI_assert(buffer_index >= 0 && buffer_index < MTL_MAX_BUFFER_BINDINGS);
/* For compute, we must always re-bind the push constant block as other compute
* operations may have assigned resources over the top, outside of the compiled
* compute shader path. */
/* Bind push constant data. */
BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
cs.bind_compute_bytes(
this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
/* Only need to rebind block if it has been modified. */
this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
}
/* Bind Global GPUUniformBuffers */
/* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
if (ubo.buffer_index >= 0) {
/* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
* the uniform PushConstantBlock. */
const uint32_t buffer_index = ubo.buffer_index + 1;
int ubo_offset = 0;
id<MTLBuffer> ubo_buffer = nil;
int ubo_size = 0;
bool bind_dummy_buffer = false;
if (this->pipeline_state.ubo_bindings[ubo.buffer_index].bound) {
/* Fetch UBO global-binding properties from slot. */
ubo_offset = 0;
ubo_buffer = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_metal_buffer(
&ubo_offset);
ubo_size = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_size();
UNUSED_VARS_NDEBUG(ubo_size);
/* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
* allocating zero buffers. */
if (ubo_buffer == nil) {
bind_dummy_buffer = true;
}
else {
BLI_assert(ubo_buffer != nil);
BLI_assert(ubo_size > 0);
}
}
else {
MTL_LOG_INFO(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
"nothing was bound -- binding dummy buffer\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index);
bind_dummy_buffer = true;
}
if (bind_dummy_buffer) {
/* Perform Dummy binding. */
ubo_offset = 0;
ubo_buffer = this->get_null_buffer();
ubo_size = [ubo_buffer length];
}
if (ubo_buffer != nil) {
uint32_t buffer_bind_index = pipeline_state_instance.base_uniform_buffer_index +
buffer_index;
/* Bind Vertex UBO. */
if (bool(ubo.stage_mask & ShaderStage::COMPUTE)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
cs.bind_compute_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
}
}
else {
MTL_LOG_WARNING(
"[UBO] Compute Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
"is NULL!\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index);
}
}
}
return true;
}
/* Ensure texture bindings are correct and up to date for current draw call. */
void MTLContext::ensure_texture_bindings(
id<MTLRenderCommandEncoder> rec,
MTLShaderInterface *shader_interface,
const MTLRenderPipelineStateInstance *pipeline_state_instance)
{
BLI_assert(shader_interface != nil);
BLI_assert(rec != nil);
/* Fetch Render Pass state. */
MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
@autoreleasepool {
int vertex_arg_buffer_bind_index = -1;
int fragment_arg_buffer_bind_index = -1;
/* Argument buffers are used for samplers, when the limit of 16 is exceeded. */
bool use_argument_buffer_for_samplers = shader_interface->uses_argument_buffer_for_samplers();
vertex_arg_buffer_bind_index = shader_interface->get_argument_buffer_bind_index(
ShaderStage::VERTEX);
fragment_arg_buffer_bind_index = shader_interface->get_argument_buffer_bind_index(
ShaderStage::FRAGMENT);
/* Loop through expected textures in shader interface and resolve bindings with currently
* bound textures.. */
for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
/* Ensure the bound texture is compatible with the shader interface. If the
* shader does not expect a texture to be bound for the current slot, we skip
* binding.
* NOTE: Global texture bindings may be left over from prior draw calls. */
const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
if (!shader_texture_info.used) {
/* Skip unused binding points if explicit indices are specified. */
continue;
}
int slot = shader_texture_info.slot_index;
if (slot >= 0 && slot < GPU_max_textures()) {
bool bind_dummy_texture = true;
if (this->pipeline_state.texture_bindings[slot].used) {
gpu::MTLTexture *bound_texture =
this->pipeline_state.texture_bindings[slot].texture_resource;
MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
BLI_assert(bound_texture);
BLI_assert(bound_sampler.used);
if (shader_texture_info.type == bound_texture->type_) {
/* Bind texture and sampler if the bound texture matches the type expected by the
* shader. */
id<MTLTexture> tex = bound_texture->get_metal_handle();
if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
rps.bind_vertex_texture(tex, slot);
rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
}
if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
rps.bind_fragment_texture(tex, slot);
rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
}
/* Texture state resolved, no need to bind dummy texture */
bind_dummy_texture = false;
}
else {
/* Texture type for bound texture (e.g. Texture2DArray) does not match what was
* expected in the shader interface. This is a problem and we will need to bind
* a dummy texture to ensure correct API usage. */
MTL_LOG_WARNING(
"(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
"texture target type. (Expecting type %d, actual type %d) (binding "
"name:'%s')(texture name:'%s')\n",
shader_interface->get_name(),
bound_texture,
slot,
shader_texture_info.type,
bound_texture->type_,
shader_interface->get_name_at_offset(shader_texture_info.name_offset),
bound_texture->get_name());
}
}
else {
MTL_LOG_WARNING(
"Shader '%s' expected texture to be bound to slot %d -- No texture was "
"bound. (name:'%s')\n",
shader_interface->get_name(),
slot,
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
}
/* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
* are provided -- as certain configurations may not need any binding. These issues should
* be fixed in the high-level, if problems crop up. */
if (bind_dummy_texture) {
if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
rps.bind_vertex_texture(
get_dummy_texture(shader_texture_info.type, shader_texture_info.sampler_format)
->get_metal_handle(),
slot);
/* Bind default sampler state. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
}
if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
rps.bind_fragment_texture(
get_dummy_texture(shader_texture_info.type, shader_texture_info.sampler_format)
->get_metal_handle(),
slot);
/* Bind default sampler state. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
}
}
}
else {
MTL_LOG_WARNING(
"Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
"hardware/API limit of '%d'. (name:'%s')\n",
this->pipeline_state.active_shader,
slot,
GPU_max_textures(),
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
}
}
/* Construct and Bind argument buffer.
* NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
if (use_argument_buffer_for_samplers) {
#ifndef NDEBUG
/* Debug check to validate each expected texture in the shader interface has a valid
* sampler object bound to the context. We will need all of these to be valid
* when constructing the sampler argument buffer. */
for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
const MTLShaderTexture &texture = shader_interface->get_texture(i);
if (texture.used) {
BLI_assert(this->samplers_.mtl_sampler[i] != nil);
}
}
#endif
/* Check to ensure the buffer binding index for the argument buffer has been assigned.
* This PSO property will be set if we expect to use argument buffers, and the shader
* uses any amount of textures. */
BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0);
if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) {
/* Offset binding index to be relative to the start of static uniform buffer binding slots.
* The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
* used by vertex and index buffer bindings, and the number of buffers present will vary
* between PSOs. */
int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
vertex_arg_buffer_bind_index);
assert(arg_buffer_idx < 32);
id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
arg_buffer_idx);
if (argument_encoder == nil) {
argument_encoder = [pipeline_state_instance->vert
newArgumentEncoderWithBufferIndex:arg_buffer_idx];
shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
}
/* Generate or Fetch argument buffer sampler configuration.
* NOTE(Metal): we need to base sampler counts off of the maximal texture
* index. This is not the most optimal, but in practice, not a use-case
* when argument buffers are required.
* This is because with explicit texture indices, the binding indices
* should match across draws, to allow the high-level to optimize bind-points. */
gpu::MTLBuffer *encoder_buffer = nullptr;
this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
this->samplers_);
if (cached_smp_buffer_search != nullptr) {
encoder_buffer = *cached_smp_buffer_search;
}
else {
/* Populate argument buffer with current global sampler bindings. */
int size = [argument_encoder encodedLength];
int alignment = max_uu([argument_encoder alignment], 256);
int size_align_delta = (size % alignment);
int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
size + (alignment - (size % alignment)) :
size;
/* Allocate buffer to store encoded sampler arguments. */
encoder_buffer = MTLContext::get_global_memory_manager()->allocate(aligned_alloc_size,
true);
BLI_assert(encoder_buffer);
BLI_assert(encoder_buffer->get_metal_buffer());
[argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
[argument_encoder
setSamplerStates:this->samplers_.mtl_sampler
withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
encoder_buffer->flush();
/* Insert into cache. */
this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
}
BLI_assert(encoder_buffer != nullptr);
int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
vertex_arg_buffer_bind_index);
rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index);
/* Fragment shader shares its argument buffer binding with the vertex shader, So no need to
* re-encode. We can use the same argument buffer. */
if (fragment_arg_buffer_bind_index >= 0) {
BLI_assert(fragment_arg_buffer_bind_index);
int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
fragment_arg_buffer_bind_index);
rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index);
}
}
}
}
}
/* Texture binding variant for compute command encoder.
* Ensure bound texture resources are bound to the active MTLComputeCommandEncoder. */
void MTLContext::ensure_texture_bindings(
id<MTLComputeCommandEncoder> rec,
MTLShaderInterface *shader_interface,
const MTLComputePipelineStateInstance &pipeline_state_instance)
{
BLI_assert(shader_interface != nil);
BLI_assert(rec != nil);
/* Fetch Render Pass state. */
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
@autoreleasepool {
int compute_arg_buffer_bind_index = -1;
/* Argument buffers are used for samplers, when the limit of 16 is exceeded.
* NOTE: Compute uses vertex argument for arg buffer bind index. */
bool use_argument_buffer_for_samplers = shader_interface->uses_argument_buffer_for_samplers();
compute_arg_buffer_bind_index = shader_interface->get_argument_buffer_bind_index(
ShaderStage::COMPUTE);
/* Loop through expected textures in shader interface and resolve bindings with currently
* bound textures.. */
for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
/* Ensure the bound texture is compatible with the shader interface. If the
* shader does not expect a texture to be bound for the current slot, we skip
* binding.
* NOTE: Global texture bindings may be left over from prior draw calls. */
const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
if (!shader_texture_info.used) {
/* Skip unused binding points if explicit indices are specified. */
continue;
}
int slot = shader_texture_info.slot_index;
if (slot >= 0 && slot < GPU_max_textures()) {
bool bind_dummy_texture = true;
if (this->pipeline_state.texture_bindings[slot].used) {
gpu::MTLTexture *bound_texture =
this->pipeline_state.texture_bindings[slot].texture_resource;
MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
BLI_assert(bound_texture);
BLI_assert(bound_sampler.used);
if (shader_texture_info.type == bound_texture->type_) {
/* Bind texture and sampler if the bound texture matches the type expected by the
* shader. */
id<MTLTexture> tex = bound_texture->get_metal_handle();
if (bool(shader_texture_info.stage_mask & ShaderStage::COMPUTE)) {
cs.bind_compute_texture(tex, slot);
cs.bind_compute_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
}
/* Texture state resolved, no need to bind dummy texture */
bind_dummy_texture = false;
}
else {
/* Texture type for bound texture (e.g. Texture2DArray) does not match what was
* expected in the shader interface. This is a problem and we will need to bind
* a dummy texture to ensure correct API usage. */
MTL_LOG_WARNING(
"(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
"texture target type. (Expecting type %d, actual type %d) (binding "
"name:'%s')(texture name:'%s')\n",
shader_interface->get_name(),
bound_texture,
slot,
shader_texture_info.type,
bound_texture->type_,
shader_interface->get_name_at_offset(shader_texture_info.name_offset),
bound_texture->get_name());
}
}
else {
MTL_LOG_WARNING(
"Shader '%s' expected texture to be bound to slot %d -- No texture was "
"bound. (name:'%s')\n",
shader_interface->get_name(),
slot,
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
}
/* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
* are provided -- as certain configurations may not need any binding. These issues should
* be fixed in the high-level, if problems crop up. */
if (bind_dummy_texture) {
if (bool(shader_texture_info.stage_mask & ShaderStage::COMPUTE)) {
cs.bind_compute_texture(
get_dummy_texture(shader_texture_info.type, shader_texture_info.sampler_format)
->get_metal_handle(),
slot);
/* Bind default sampler state. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
cs.bind_compute_sampler(default_binding, use_argument_buffer_for_samplers, slot);
}
}
}
else {
MTL_LOG_WARNING(
"Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
"hardware/API limit of '%d'. (name:'%s')\n",
this->pipeline_state.active_shader,
slot,
GPU_max_textures(),
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
}
}
/* Construct and Bind argument buffer.
* NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
if (use_argument_buffer_for_samplers) {
#ifndef NDEBUG
/* Debug check to validate each expected texture in the shader interface has a valid
* sampler object bound to the context. We will need all of these to be valid
* when constructing the sampler argument buffer. */
for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
const MTLShaderTexture &texture = shader_interface->get_texture(i);
if (texture.used) {
BLI_assert(this->samplers_.mtl_sampler[i] != nil);
}
}
#endif
/* Check to ensure the buffer binding index for the argument buffer has been assigned.
* This PSO property will be set if we expect to use argument buffers, and the shader
* uses any amount of textures. */
BLI_assert(compute_arg_buffer_bind_index >= 0);
if (compute_arg_buffer_bind_index >= 0) {
/* Offset binding index to be relative to the start of static uniform buffer binding slots.
* The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
* used by vertex and index buffer bindings, and the number of buffers present will vary
* between PSOs. */
int arg_buffer_idx = (pipeline_state_instance.base_uniform_buffer_index +
compute_arg_buffer_bind_index);
assert(arg_buffer_idx < 32);
id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
arg_buffer_idx);
if (argument_encoder == nil) {
argument_encoder = [pipeline_state_instance.compute
newArgumentEncoderWithBufferIndex:arg_buffer_idx];
shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
}
/* Generate or Fetch argument buffer sampler configuration.
* NOTE(Metal): we need to base sampler counts off of the maximal texture
* index. This is not the most optimal, but in practice, not a use-case
* when argument buffers are required.
* This is because with explicit texture indices, the binding indices
* should match across draws, to allow the high-level to optimize bind-points. */
gpu::MTLBuffer *encoder_buffer = nullptr;
this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
this->samplers_);
if (cached_smp_buffer_search != nullptr) {
encoder_buffer = *cached_smp_buffer_search;
}
else {
/* Populate argument buffer with current global sampler bindings. */
int size = [argument_encoder encodedLength];
int alignment = max_uu([argument_encoder alignment], 256);
int size_align_delta = (size % alignment);
int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
size + (alignment - (size % alignment)) :
size;
/* Allocate buffer to store encoded sampler arguments. */
encoder_buffer = MTLContext::get_global_memory_manager()->allocate(aligned_alloc_size,
true);
BLI_assert(encoder_buffer);
BLI_assert(encoder_buffer->get_metal_buffer());
[argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
[argument_encoder
setSamplerStates:this->samplers_.mtl_sampler
withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
encoder_buffer->flush();
/* Insert into cache. */
this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
}
BLI_assert(encoder_buffer != nullptr);
int compute_buffer_index = (pipeline_state_instance.base_uniform_buffer_index +
compute_arg_buffer_bind_index);
cs.bind_compute_buffer(encoder_buffer->get_metal_buffer(), 0, compute_buffer_index);
}
}
}
}
/* Encode latest depth-stencil state. */
void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
{
/* Check if we need to update state. */
if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) {
return;
}
/* Fetch render command encoder. */
id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder();
BLI_assert(rec);
/* Fetch Render Pass state. */
MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
/** Prepare Depth-stencil state based on current global pipeline state. */
MTLFrameBuffer *fb = this->get_current_framebuffer();
bool hasDepthTarget = fb->has_depth_attachment();
bool hasStencilTarget = fb->has_stencil_attachment();
if (hasDepthTarget || hasStencilTarget) {
/* Update FrameBuffer State. */
this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget;
this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget;
/* Check if current MTLContextDepthStencilState maps to an existing state object in
* the Depth-stencil state cache. */
id<MTLDepthStencilState> ds_state = nil;
id<MTLDepthStencilState> *depth_stencil_state_lookup =
this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state);
/* If not, populate DepthStencil state descriptor. */
if (depth_stencil_state_lookup == nullptr) {
MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init]
autorelease];
if (hasDepthTarget) {
ds_state_desc.depthWriteEnabled =
this->pipeline_state.depth_stencil_state.depth_write_enable;
ds_state_desc.depthCompareFunction =
this->pipeline_state.depth_stencil_state.depth_test_enabled ?
this->pipeline_state.depth_stencil_state.depth_function :
MTLCompareFunctionAlways;
}
if (hasStencilTarget) {
ds_state_desc.backFaceStencil.readMask =
this->pipeline_state.depth_stencil_state.stencil_read_mask;
ds_state_desc.backFaceStencil.writeMask =
this->pipeline_state.depth_stencil_state.stencil_write_mask;
ds_state_desc.backFaceStencil.stencilFailureOperation =
this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail;
ds_state_desc.backFaceStencil.depthFailureOperation =
this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail;
ds_state_desc.backFaceStencil.depthStencilPassOperation =
this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass;
ds_state_desc.backFaceStencil.stencilCompareFunction =
(this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
this->pipeline_state.depth_stencil_state.stencil_func :
MTLCompareFunctionAlways;
ds_state_desc.frontFaceStencil.readMask =
this->pipeline_state.depth_stencil_state.stencil_read_mask;
ds_state_desc.frontFaceStencil.writeMask =
this->pipeline_state.depth_stencil_state.stencil_write_mask;
ds_state_desc.frontFaceStencil.stencilFailureOperation =
this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail;
ds_state_desc.frontFaceStencil.depthFailureOperation =
this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail;
ds_state_desc.frontFaceStencil.depthStencilPassOperation =
this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass;
ds_state_desc.frontFaceStencil.stencilCompareFunction =
(this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
this->pipeline_state.depth_stencil_state.stencil_func :
MTLCompareFunctionAlways;
}
/* Bake new DS state. */
ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc];
/* Store state in cache. */
BLI_assert(ds_state != nil);
this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state);
}
else {
ds_state = *depth_stencil_state_lookup;
BLI_assert(ds_state != nil);
}
/* Bind Depth Stencil State to render command encoder. */
BLI_assert(ds_state != nil);
if (ds_state != nil) {
if (rps.bound_ds_state != ds_state) {
[rec setDepthStencilState:ds_state];
rps.bound_ds_state = ds_state;
}
}
/* Apply dynamic depth-stencil state on encoder. */
if (hasStencilTarget) {
uint32_t stencil_ref_value =
(this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
this->pipeline_state.depth_stencil_state.stencil_ref :
0;
if (stencil_ref_value != rps.last_used_stencil_ref_value) {
[rec setStencilReferenceValue:stencil_ref_value];
rps.last_used_stencil_ref_value = stencil_ref_value;
}
}
if (hasDepthTarget) {
bool doBias = false;
switch (prim_type) {
case MTLPrimitiveTypeTriangle:
case MTLPrimitiveTypeTriangleStrip:
doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris;
break;
case MTLPrimitiveTypeLine:
case MTLPrimitiveTypeLineStrip:
doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines;
break;
case MTLPrimitiveTypePoint:
doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points;
break;
}
[rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0
slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0
clamp:0];
}
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Compute dispatch.
* \{ */
bool MTLContext::ensure_compute_pipeline_state()
{
/* Verify if bound shader is valid and fetch MTLComputePipelineStateInstance. */
/* Check if an active shader is bound. */
if (!this->pipeline_state.active_shader) {
MTL_LOG_WARNING("No Metal shader bound!\n");
return false;
}
/* Also ensure active shader is valid. */
if (!this->pipeline_state.active_shader->is_valid()) {
MTL_LOG_WARNING(
"Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
return false;
}
/* Verify this is a compute shader. */
/* Fetch shader interface. */
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
if (shader_interface == nullptr) {
MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
return false;
}
bool success = this->pipeline_state.active_shader->bake_compute_pipeline_state(this);
const MTLComputePipelineStateInstance &compute_pso_inst =
this->pipeline_state.active_shader->get_compute_pipeline_state();
if (!success || compute_pso_inst.pso == nil) {
MTL_LOG_WARNING("No valid compute PSO for compute dispatch!\n", );
return false;
}
return true;
}
void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
{
/* Ensure all resources required by upcoming compute submission are correctly bound. */
if (this->ensure_compute_pipeline_state()) {
/* Shader instance. */
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
const MTLComputePipelineStateInstance &compute_pso_inst =
this->pipeline_state.active_shader->get_compute_pipeline_state();
/* Begin compute encoder. */
id<MTLComputeCommandEncoder> compute_encoder =
this->main_command_buffer.ensure_begin_compute_encoder();
BLI_assert(compute_encoder != nil);
/* Bind PSO. */
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
cs.bind_pso(compute_pso_inst.pso);
/* Bind buffers. */
this->ensure_uniform_buffer_bindings(compute_encoder, shader_interface, compute_pso_inst);
/** Ensure resource bindings. */
/* Texture Bindings. */
/* We will iterate through all texture bindings on the context and determine if any of the
* active slots match those in our shader interface. If so, textures will be bound. */
if (shader_interface->get_total_textures() > 0) {
this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst);
}
/* Dispatch compute. */
[compute_encoder dispatchThreadgroups:MTLSizeMake(groups_x_len, groups_y_len, groups_z_len)
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
compute_pso_inst.threadgroup_y_len,
compute_pso_inst.threadgroup_z_len)];
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Visibility buffer control for MTLQueryPool.
* \{ */
void MTLContext::set_visibility_buffer(gpu::MTLBuffer *buffer)
{
/* Flag visibility buffer as dirty if the buffer being used for visibility has changed --
* This is required by the render pass, and we will break the pass if the results destination
* buffer is modified. */
if (buffer) {
visibility_is_dirty_ = (buffer != visibility_buffer_) || visibility_is_dirty_;
visibility_buffer_ = buffer;
visibility_buffer_->debug_ensure_used();
}
else {
/* If buffer is null, reset visibility state, mark dirty to break render pass if results are no
* longer needed. */
visibility_is_dirty_ = (visibility_buffer_ != nullptr) || visibility_is_dirty_;
visibility_buffer_ = nullptr;
}
}
gpu::MTLBuffer *MTLContext::get_visibility_buffer() const
{
return visibility_buffer_;
}
void MTLContext::clear_visibility_dirty()
{
visibility_is_dirty_ = false;
}
bool MTLContext::is_visibility_dirty() const
{
return visibility_is_dirty_;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Texture State Management
* \{ */
void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit)
{
BLI_assert(this);
BLI_assert(mtl_texture);
if (texture_unit < 0 || texture_unit >= GPU_max_textures() ||
texture_unit >= MTL_MAX_TEXTURE_SLOTS) {
MTL_LOG_WARNING("Attempting to bind texture '%s' to invalid texture unit %d\n",
mtl_texture->get_name(),
texture_unit);
BLI_assert(false);
return;
}
/* Bind new texture. */
this->pipeline_state.texture_bindings[texture_unit].texture_resource = mtl_texture;
this->pipeline_state.texture_bindings[texture_unit].used = true;
mtl_texture->is_bound_ = true;
}
void MTLContext::sampler_bind(MTLSamplerState sampler_state, uint sampler_unit)
{
BLI_assert(this);
if (sampler_unit < 0 || sampler_unit >= GPU_max_textures() ||
sampler_unit >= MTL_MAX_SAMPLER_SLOTS) {
MTL_LOG_WARNING("Attempting to bind sampler to invalid sampler unit %d\n", sampler_unit);
BLI_assert(false);
return;
}
/* Apply binding. */
this->pipeline_state.sampler_bindings[sampler_unit] = {true, sampler_state};
}
void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture)
{
BLI_assert(mtl_texture);
/* Iterate through textures in state and unbind. */
for (int i = 0; i < min_uu(GPU_max_textures(), MTL_MAX_TEXTURE_SLOTS); i++) {
if (this->pipeline_state.texture_bindings[i].texture_resource == mtl_texture) {
this->pipeline_state.texture_bindings[i].texture_resource = nullptr;
this->pipeline_state.texture_bindings[i].used = false;
}
}
/* Locally unbind texture. */
mtl_texture->is_bound_ = false;
}
void MTLContext::texture_unbind_all()
{
/* Iterate through context's bound textures. */
for (int t = 0; t < min_uu(GPU_max_textures(), MTL_MAX_TEXTURE_SLOTS); t++) {
if (this->pipeline_state.texture_bindings[t].used &&
this->pipeline_state.texture_bindings[t].texture_resource) {
this->pipeline_state.texture_bindings[t].used = false;
this->pipeline_state.texture_bindings[t].texture_resource = nullptr;
}
}
}
id<MTLSamplerState> MTLContext::get_sampler_from_state(MTLSamplerState sampler_state)
{
/* Internal sampler states are signal values and do not correspond to actual samplers. */
BLI_assert(sampler_state.state.type != GPU_SAMPLER_STATE_TYPE_INTERNAL);
if (sampler_state.state.type == GPU_SAMPLER_STATE_TYPE_CUSTOM) {
return custom_sampler_state_cache_[sampler_state.state.custom_type];
}
return sampler_state_cache_[sampler_state.state.extend_yz][sampler_state.state.extend_x]
[sampler_state.state.filtering];
}
/** A function that maps GPUSamplerExtendMode values to their Metal enum counterparts. */
static inline MTLSamplerAddressMode to_mtl_type(GPUSamplerExtendMode wrap_mode)
{
switch (wrap_mode) {
case GPU_SAMPLER_EXTEND_MODE_EXTEND:
return MTLSamplerAddressModeClampToEdge;
case GPU_SAMPLER_EXTEND_MODE_REPEAT:
return MTLSamplerAddressModeRepeat;
case GPU_SAMPLER_EXTEND_MODE_MIRRORED_REPEAT:
return MTLSamplerAddressModeMirrorRepeat;
case GPU_SAMPLER_EXTEND_MODE_CLAMP_TO_BORDER:
return MTLSamplerAddressModeClampToBorderColor;
default:
BLI_assert_unreachable();
return MTLSamplerAddressModeClampToEdge;
}
}
void MTLContext::sampler_state_cache_init()
{
for (int extend_yz_i = 0; extend_yz_i < GPU_SAMPLER_EXTEND_MODES_COUNT; extend_yz_i++) {
const GPUSamplerExtendMode extend_yz = static_cast<GPUSamplerExtendMode>(extend_yz_i);
const MTLSamplerAddressMode extend_t = to_mtl_type(extend_yz);
for (int extend_x_i = 0; extend_x_i < GPU_SAMPLER_EXTEND_MODES_COUNT; extend_x_i++) {
const GPUSamplerExtendMode extend_x = static_cast<GPUSamplerExtendMode>(extend_x_i);
const MTLSamplerAddressMode extend_s = to_mtl_type(extend_x);
for (int filtering_i = 0; filtering_i < GPU_SAMPLER_FILTERING_TYPES_COUNT; filtering_i++) {
const GPUSamplerFiltering filtering = GPUSamplerFiltering(filtering_i);
MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init];
descriptor.normalizedCoordinates = true;
descriptor.sAddressMode = extend_s;
descriptor.tAddressMode = extend_t;
descriptor.rAddressMode = extend_t;
descriptor.borderColor = MTLSamplerBorderColorTransparentBlack;
descriptor.minFilter = (filtering & GPU_SAMPLER_FILTERING_LINEAR) ?
MTLSamplerMinMagFilterLinear :
MTLSamplerMinMagFilterNearest;
descriptor.magFilter = (filtering & GPU_SAMPLER_FILTERING_LINEAR) ?
MTLSamplerMinMagFilterLinear :
MTLSamplerMinMagFilterNearest;
descriptor.mipFilter = (filtering & GPU_SAMPLER_FILTERING_MIPMAP) ?
MTLSamplerMipFilterLinear :
MTLSamplerMipFilterNotMipmapped;
descriptor.lodMinClamp = -1000;
descriptor.lodMaxClamp = 1000;
float aniso_filter = max_ff(16, U.anisotropic_filter);
descriptor.maxAnisotropy = (filtering & GPU_SAMPLER_FILTERING_MIPMAP) ? aniso_filter : 1;
descriptor.compareFunction = MTLCompareFunctionAlways;
descriptor.supportArgumentBuffers = true;
id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor];
sampler_state_cache_[extend_yz_i][extend_x_i][filtering_i] = state;
BLI_assert(state != nil);
[descriptor autorelease];
}
}
}
/* Compare sampler for depth textures. */
{
MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init];
descriptor.minFilter = MTLSamplerMinMagFilterLinear;
descriptor.magFilter = MTLSamplerMinMagFilterLinear;
descriptor.compareFunction = MTLCompareFunctionLessEqual;
descriptor.lodMinClamp = -1000;
descriptor.lodMaxClamp = 1000;
descriptor.supportArgumentBuffers = true;
id<MTLSamplerState> compare_state = [this->device newSamplerStateWithDescriptor:descriptor];
custom_sampler_state_cache_[GPU_SAMPLER_CUSTOM_COMPARE] = compare_state;
BLI_assert(compare_state != nil);
[descriptor autorelease];
}
/* Custom sampler for icons. The icon texture is sampled within the shader using a -0.5f LOD
* bias. */
{
MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init];
descriptor.minFilter = MTLSamplerMinMagFilterLinear;
descriptor.magFilter = MTLSamplerMinMagFilterLinear;
descriptor.mipFilter = MTLSamplerMipFilterNearest;
descriptor.lodMinClamp = 0;
descriptor.lodMaxClamp = 1;
id<MTLSamplerState> icon_state = [this->device newSamplerStateWithDescriptor:descriptor];
custom_sampler_state_cache_[GPU_SAMPLER_CUSTOM_ICON] = icon_state;
BLI_assert(icon_state != nil);
[descriptor autorelease];
}
}
id<MTLSamplerState> MTLContext::get_default_sampler_state()
{
if (default_sampler_state_ == nil) {
default_sampler_state_ = this->get_sampler_from_state({GPUSamplerState::default_sampler()});
}
return default_sampler_state_;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Swap-chain management and Metal presentation.
* \{ */
void present(MTLRenderPassDescriptor *blit_descriptor,
id<MTLRenderPipelineState> blit_pso,
id<MTLTexture> swapchain_texture,
id<CAMetalDrawable> drawable)
{
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
/* Flush any outstanding work. */
ctx->flush();
/* Always pace CPU to maximum of 3 drawables in flight.
* nextDrawable may have more in flight if backing swapchain
* textures are re-allocate, such as during resize events.
*
* Determine frames in flight based on current latency. If
* we are in a high-latency situation, limit frames in flight
* to increase app responsiveness and keep GPU execution under control.
* If latency improves, increase frames in flight to improve overall
* performance. */
int perf_max_drawables = MTL_MAX_DRAWABLES;
if (MTLContext::avg_drawable_latency_us > 185000) {
perf_max_drawables = 1;
}
else if (MTLContext::avg_drawable_latency_us > 85000) {
perf_max_drawables = 2;
}
while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) {
PIL_sleep_ms(2);
}
/* Present is submitted in its own CMD Buffer to ensure drawable reference released as early as
* possible. This command buffer is separate as it does not utilize the global state
* for rendering as the main context does. */
id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer];
MTLCommandBufferManager::num_active_cmd_bufs++;
/* Do Present Call and final Blit to MTLDrawable. */
id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor];
[enc setRenderPipelineState:blit_pso];
[enc setFragmentTexture:swapchain_texture atIndex:0];
[enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
[enc endEncoding];
/* Present drawable. */
BLI_assert(drawable);
[cmdbuf presentDrawable:drawable];
/* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */
MTLSafeFreeList *cmd_free_buffer_list =
MTLContext::get_global_memory_manager()->get_current_safe_list();
BLI_assert(cmd_free_buffer_list);
id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf;
[cmd_buffer_ref retain];
/* Increment drawables in flight limiter. */
MTLContext::max_drawables_in_flight++;
std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now();
/* Increment free pool reference and decrement upon command buffer completion. */
cmd_free_buffer_list->increment_reference();
[cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) {
/* Flag freed buffers associated with this CMD buffer as ready to be freed. */
cmd_free_buffer_list->decrement_reference();
[cmd_buffer_ref release];
/* Decrement count */
MTLCommandBufferManager::num_active_cmd_bufs--;
MTL_LOG_INFO("[Metal] Active command buffers: %d\n",
MTLCommandBufferManager::num_active_cmd_bufs);
/* Drawable count and latency management. */
MTLContext::max_drawables_in_flight--;
std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now();
int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>(
completion_time - submission_time)
.count();
MTLContext::latency_resolve_average(microseconds_per_frame);
MTL_LOG_INFO("Frame Latency: %f ms (Rolling avg: %f ms Drawables: %d)\n",
((float)microseconds_per_frame) / 1000.0f,
((float)MTLContext::avg_drawable_latency_us) / 1000.0f,
perf_max_drawables);
}];
[cmdbuf commit];
/* When debugging, fetch advanced command buffer errors. */
if (G.debug & G_DEBUG_GPU) {
[cmdbuf waitUntilCompleted];
NSError *error = [cmdbuf error];
if (error != nil) {
NSLog(@"%@", error);
BLI_assert(false);
@autoreleasepool {
const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
std::ofstream outfile;
outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
outfile << stringAsChar;
outfile.close();
}
}
else {
@autoreleasepool {
NSString *str = @"Command buffer completed successfully!\n";
const char *stringAsChar = [str UTF8String];
std::ofstream outfile;
outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
outfile << stringAsChar;
outfile.close();
}
}
}
}
/** \} */
} // namespace blender::gpu