Files
test2/source/blender/gpu/metal/mtl_batch.mm
Jason Fielder 62219f8da9 Metal: Re-enable workbench NEXT shadows
With the shift to GPU-driven rendering pipeline,
the SSBO vertex fetch paradigm used to
implement workbench shadows on Metal
instead of utilising the geometry shader
path no longer worked correctly.

This is because the draw submission
required vertex amplification up-front,
based on the expected output geometry
amount for a given input geometry.

This patch aims to resolve this
issue through addition of API to
enable the features within the
GPU driven pipeline.

Co-authored-by: Michael Parkin-White <mparkinwhite@apple.com>
Pull Request: https://projects.blender.org/blender/blender/pulls/113498
2023-10-19 08:01:17 +02:00

1129 lines
44 KiB
Plaintext

/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*
* Metal implementation of GPUBatch.
*/
#include "BLI_assert.h"
#include "BLI_span.hh"
#include "BKE_global.h"
#include "GPU_common.h"
#include "gpu_batch_private.hh"
#include "gpu_shader_private.hh"
#include "mtl_batch.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_index_buffer.hh"
#include "mtl_shader.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_vertex_buffer.hh"
#include <string>
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Creation & Deletion
* \{ */
void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count)
{
if (this->flag & GPU_BATCH_INVALID) {
this->shader_in_use_ = false;
}
this->draw_advanced(v_first, v_count, i_first, i_count);
}
void MTLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
{
if (this->flag & GPU_BATCH_INVALID) {
this->shader_in_use_ = false;
}
this->draw_advanced_indirect(indirect_buf, offset);
}
void MTLBatch::shader_bind()
{
if (active_shader_ && active_shader_->is_valid()) {
active_shader_->bind();
shader_in_use_ = true;
}
}
void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx)
{
BLI_assert(ctx != nullptr);
this->vertex_descriptor_cache_clear();
cache_context_ = ctx;
}
void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_clear()
{
cache_life_index_++;
cache_context_ = nullptr;
}
void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_ensure()
{
if (this->cache_context_ != nullptr) {
/* Invalidate vertex descriptor bindings cache if batch has changed. */
if (batch_->flag & GPU_BATCH_DIRTY) {
batch_->flag &= ~GPU_BATCH_DIRTY;
this->vertex_descriptor_cache_clear();
}
}
/* Initialize cache if not ready. */
if (cache_context_ == nullptr) {
this->vertex_descriptor_cache_init(MTLContext::get());
}
}
MTLBatch::VertexDescriptorShaderInterfacePair *MTLBatch::MTLVertexDescriptorCache::find(
const ShaderInterface *interface)
{
this->vertex_descriptor_cache_ensure();
for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
if (cache_[i].interface == interface && cache_[i].cache_life_index == cache_life_index_) {
return &cache_[i];
}
}
return nullptr;
}
bool MTLBatch::MTLVertexDescriptorCache::insert(
MTLBatch::VertexDescriptorShaderInterfacePair &data)
{
vertex_descriptor_cache_ensure();
for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
if (cache_[i].interface == nullptr || cache_[i].cache_life_index != cache_life_index_) {
cache_[i] = data;
cache_[i].cache_life_index = cache_life_index_;
return true;
}
}
return false;
}
int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts,
MTLRenderPipelineStateDescriptor &desc,
const MTLShaderInterface *interface,
uint16_t &attr_mask,
bool instanced)
{
const GPUVertFormat *format = &verts->format;
/* Whether the current vertex buffer has been added to the buffer layout descriptor. */
bool buffer_added = false;
/* Per-vertex stride of current vertex buffer. */
int buffer_stride = format->stride;
/* Buffer binding index of the vertex buffer once added to the buffer layout descriptor. */
int buffer_index = -1;
int attribute_offset = 0;
if (!active_shader_->get_uses_ssbo_vertex_fetch()) {
BLI_assert(
buffer_stride >= 4 &&
"In Metal, Vertex buffer stride should be 4. SSBO Vertex fetch is not affected by this");
}
/* Iterate over GPUVertBuf vertex format and find attributes matching those in the active
* shader's interface. */
for (uint32_t a_idx = 0; a_idx < format->attr_len; a_idx++) {
const GPUVertAttr *a = &format->attrs[a_idx];
if (format->deinterleaved) {
attribute_offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].size) * verts->vertex_len;
buffer_stride = a->size;
}
else {
attribute_offset = a->offset;
}
/* Find attribute with the matching name. Attributes may have multiple compatible
* name aliases. */
for (uint32_t n_idx = 0; n_idx < a->name_len; n_idx++) {
const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
const ShaderInput *input = interface->attr_get(name);
if (input == nullptr || input->location == -1) {
/* Vertex/instance buffers provided have attribute data for attributes which are not needed
* by this particular shader. This shader only needs binding information for the attributes
* has in the shader interface. */
MTL_LOG_WARNING(
"MTLBatch: Could not find attribute with name '%s' (defined in active vertex format) "
"in the shader interface for shader '%s'",
name,
interface->get_name());
continue;
}
/* Fetch metal attribute information (ShaderInput->binding is used to fetch the corresponding
* slot. */
const MTLShaderInputAttribute &mtl_attr = interface->get_attribute(input->binding);
BLI_assert(mtl_attr.location >= 0);
/* Verify that the attribute location from the shader interface
* matches the attribute location returned in the input table. These should always be the
* same. */
BLI_assert(mtl_attr.location == input->location);
/* Check if attribute is already present in the given slot. */
if ((~attr_mask) & (1 << mtl_attr.location)) {
MTL_LOG_INFO(
" -- [Batch] Skipping attribute with input location %d (As one is already bound)",
mtl_attr.location);
}
else {
/* Update attribute used-slot mask. */
attr_mask &= ~(1 << mtl_attr.location);
/* Add buffer layout entry in descriptor if it has not yet been added
* for current vertex buffer. */
if (!buffer_added) {
buffer_index = desc.vertex_descriptor.num_vert_buffers;
desc.vertex_descriptor.buffer_layouts[buffer_index].step_function =
(instanced) ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
desc.vertex_descriptor.buffer_layouts[buffer_index].step_rate = 1;
desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
desc.vertex_descriptor.num_vert_buffers++;
buffer_added = true;
MTL_LOG_INFO(" -- [Batch] Adding source %s buffer (Index: %d, Stride: %d)",
(instanced) ? "instance" : "vertex",
buffer_index,
buffer_stride);
}
else {
/* Ensure stride is correct for de-interleaved attributes. */
desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
}
/* Handle Matrix/Array vertex attribute types.
* Metal does not natively support these as attribute types, so we handle these cases
* by stacking together compatible types (e.g. 4xVec4 for Mat4) and combining
* the data in the shader.
* The generated Metal shader will contain a generated input binding, which reads
* in individual attributes and merges them into the desired type after vertex
* assembly. e.g. a Mat4 (Float4x4) will generate 4 Float4 attributes. */
if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
BLI_assert_msg(
a->comp_len == 16,
"only mat4 attributes currently supported -- Not ready to handle other long "
"component length attributes yet");
/* SSBO Vertex Fetch Attribute safety checks. */
if (active_shader_->get_uses_ssbo_vertex_fetch()) {
/* When using SSBO vertex fetch, we do not need to expose split attributes,
* A matrix can be read directly as a whole block of contiguous data. */
MTLSSBOAttribute ssbo_attr(mtl_attr.index,
buffer_index,
attribute_offset,
buffer_stride,
GPU_SHADER_ATTR_TYPE_MAT4,
instanced);
active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
ssbo_attr;
desc.vertex_descriptor.num_ssbo_attributes++;
}
else {
/* Handle Mat4 attributes. */
if (a->comp_len == 16) {
/* Debug safety checks. */
BLI_assert_msg(mtl_attr.matrix_element_count == 4,
"mat4 type expected but there are fewer components");
BLI_assert_msg(mtl_attr.size == 16, "Expecting subtype 'vec4' with 16 bytes");
BLI_assert_msg(
mtl_attr.format == MTLVertexFormatFloat4,
"Per-attribute vertex format MUST be float4 for an input type of 'mat4'");
/* We have found the 'ROOT' attribute. A mat4 contains 4 consecutive float4 attribute
* locations we must map to. */
for (int i = 0; i < a->comp_len / 4; i++) {
desc.vertex_descriptor.attributes[mtl_attr.location + i].format =
MTLVertexFormatFloat4;
/* Data is consecutive in the buffer for the whole matrix, each float4 will shift
* the offset by 16 bytes. */
desc.vertex_descriptor.attributes[mtl_attr.location + i].offset =
attribute_offset + i * 16;
/* All source data for a matrix is in the same singular buffer. */
desc.vertex_descriptor.attributes[mtl_attr.location + i].buffer_index =
buffer_index;
/* Update total attribute account. */
desc.vertex_descriptor.total_attributes++;
desc.vertex_descriptor.max_attribute_value = max_ii(
mtl_attr.location + i, desc.vertex_descriptor.max_attribute_value);
MTL_LOG_INFO("-- Sub-Attrib Location: %d, offset: %d, buffer index: %d",
mtl_attr.location + i,
attribute_offset + i * 16,
buffer_index);
/* Update attribute used-slot mask for array elements. */
attr_mask &= ~(1 << (mtl_attr.location + i));
}
MTL_LOG_INFO(
"Float4x4 attribute type added for '%s' at attribute locations: %d to %d",
name,
mtl_attr.location,
mtl_attr.location + 3);
}
/* Ensure we are not exceeding the attribute limit. */
BLI_assert(desc.vertex_descriptor.max_attribute_value <
MTL_MAX_VERTEX_INPUT_ATTRIBUTES);
}
}
else {
/* Handle Any required format conversions.
* NOTE(Metal): If there is a mis-match between the format of an attribute
* in the shader interface, and the specified format in the VertexBuffer VertexFormat,
* we need to perform a format conversion.
*
* The Metal API can perform certain conversions internally during vertex assembly:
* - Type Normalization e.g short2 to float2 between 0.0 to 1.0.
* - Type Truncation e.g. Float4 to Float2.
* - Type expansion e,g, Float3 to Float4 (Following 0,0,0,1 for assignment to empty
* elements).
*
* Certain conversion cannot be performed however, and in these cases, we need to
* instruct the shader to generate a specialized version with a conversion routine upon
* attribute read.
* - This handles cases such as conversion between types e.g. Integer to float without
* normalization.
*
* For more information on the supported and unsupported conversions, see:
* https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
*/
MTLVertexFormat converted_format;
bool can_use_internal_conversion = mtl_convert_vertex_format(
mtl_attr.format,
(GPUVertCompType)a->comp_type,
a->comp_len,
(GPUVertFetchMode)a->fetch_mode,
&converted_format);
bool is_floating_point_format = (a->comp_type == GPU_COMP_F32);
if (can_use_internal_conversion) {
desc.vertex_descriptor.attributes[mtl_attr.location].format = converted_format;
desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
is_floating_point_format ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
(GPUVertFetchMode)GPU_FETCH_INT;
BLI_assert(converted_format != MTLVertexFormatInvalid);
}
else {
/* The internal implicit conversion is not supported.
* In this case, we need to handle conversion inside the shader.
* This is handled using `format_conversion_mode`.
* `format_conversion_mode` is assigned the blender-specified fetch mode (GPU_FETCH_*).
* This then controls how a given attribute is interpreted. The data will be read
* as specified and then converted appropriately to the correct form.
*
* e.g. if `GPU_FETCH_INT_TO_FLOAT` is specified, the specialized read-routine
* in the shader will read the data as an int, and cast this to floating point
* representation. (Rather than reading the source data as float).
*
* NOTE: Even if full conversion is not supported, we may still partially perform an
* implicit conversion where possible, such as vector truncation or expansion. */
MTLVertexFormat converted_format;
bool can_convert = mtl_vertex_format_resize(
mtl_attr.format, a->comp_len, &converted_format);
desc.vertex_descriptor.attributes[mtl_attr.location].format = can_convert ?
converted_format :
mtl_attr.format;
desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
(GPUVertFetchMode)a->fetch_mode;
BLI_assert(desc.vertex_descriptor.attributes[mtl_attr.location].format !=
MTLVertexFormatInvalid);
}
desc.vertex_descriptor.attributes[mtl_attr.location].offset = attribute_offset;
desc.vertex_descriptor.attributes[mtl_attr.location].buffer_index = buffer_index;
desc.vertex_descriptor.max_attribute_value =
((mtl_attr.location) > desc.vertex_descriptor.max_attribute_value) ?
(mtl_attr.location) :
desc.vertex_descriptor.max_attribute_value;
desc.vertex_descriptor.total_attributes++;
/* SSBO Vertex Fetch attribute bind. */
if (active_shader_->get_uses_ssbo_vertex_fetch()) {
BLI_assert_msg(desc.vertex_descriptor.attributes[mtl_attr.location].format ==
mtl_attr.format,
"SSBO Vertex Fetch does not support attribute conversion.");
MTLSSBOAttribute ssbo_attr(
mtl_attr.index,
buffer_index,
attribute_offset,
buffer_stride,
MTLShader::ssbo_vertex_type_to_attr_type(
desc.vertex_descriptor.attributes[mtl_attr.location].format),
instanced);
active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
ssbo_attr;
desc.vertex_descriptor.num_ssbo_attributes++;
}
/* NOTE: We are setting max_attribute_value to be up to the maximum found index, because
* of this, it is possible that we may skip over certain attributes if they were not in
* the source GPUVertFormat. */
MTL_LOG_INFO(
" -- Batch Attribute(%d): ORIG Shader Format: %d, ORIG Vert format: %d, Vert "
"components: %d, Fetch Mode %d --> FINAL FORMAT: %d",
mtl_attr.location,
(int)mtl_attr.format,
(int)a->comp_type,
(int)a->comp_len,
(int)a->fetch_mode,
(int)desc.vertex_descriptor.attributes[mtl_attr.location].format);
MTL_LOG_INFO(
" -- [Batch] matching %s attribute '%s' (Attribute Index: %d, Buffer index: %d, "
"offset: %d)",
(instanced) ? "instance" : "vertex",
name,
mtl_attr.location,
buffer_index,
attribute_offset);
}
}
}
}
if (buffer_added) {
return buffer_index;
}
return -1;
}
id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_count)
{
/* Setup draw call and render pipeline state here. Called by every draw, but setup here so that
* MTLDrawList only needs to perform setup a single time. */
BLI_assert(this);
/* Fetch Metal device. */
MTLContext *ctx = MTLContext::get();
if (!ctx) {
BLI_assert_msg(false, "No context available for rendering.");
return nil;
}
/* Fetch bound shader from context. */
active_shader_ = static_cast<MTLShader *>(ctx->shader);
if (active_shader_ == nullptr || !active_shader_->is_valid()) {
/* Skip drawing if there is no valid Metal shader.
* This will occur if the path through which the shader is prepared
* is invalid (e.g. Python without create-info), or, the source shader uses a geometry pass. */
BLI_assert_msg(false, "No valid Metal shader!");
return nil;
}
/* Check if using SSBO Fetch Mode.
* This is an alternative drawing mode to geometry shaders, wherein vertex buffers
* are bound as readable (random-access) GPU buffers and certain descriptor properties
* are passed using Shader uniforms. */
bool uses_ssbo_fetch = active_shader_->get_uses_ssbo_vertex_fetch();
/* Prepare Vertex Descriptor and extract VertexBuffers to bind. */
MTLVertBuf *buffers[GPU_BATCH_VBO_MAX_LEN] = {nullptr};
int num_buffers = 0;
/* Ensure Index Buffer is ready. */
MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
if (mtl_elem != NULL) {
mtl_elem->upload_data();
}
/* Populate vertex descriptor with attribute binding information.
* The vertex descriptor and buffer layout descriptors describe
* how vertex data from bound vertex buffers maps to the
* shader's input.
* A unique vertex descriptor will result in a new PipelineStateObject
* being generated for the currently bound shader. */
prepare_vertex_descriptor_and_bindings(buffers, num_buffers);
/* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer
* data operations are required. */
for (int i = 0; i < num_buffers; i++) {
MTLVertBuf *buf_at_index = buffers[i];
if (buf_at_index == NULL) {
BLI_assert_msg(
false,
"Total buffer count does not match highest buffer index, could be gaps in bindings");
continue;
}
MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
mtlvbo->bind();
}
/* Ensure render pass is active and fetch active RenderCommandEncoder. */
id<MTLRenderCommandEncoder> rec = ctx->ensure_begin_render_pass();
/* Fetch RenderPassState to enable resource binding for active pass. */
MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state();
/* Debug Check: Ensure Frame-buffer instance is not dirty. */
BLI_assert(!ctx->main_command_buffer.get_active_framebuffer()->get_dirty());
/* Bind Shader. */
this->shader_bind();
/* GPU debug markers. */
if (G.debug & G_DEBUG_GPU) {
[rec pushDebugGroup:[NSString stringWithFormat:@"Draw Commands%@ (GPUShader: %s)",
this->elem ? @"(indexed)" : @"",
active_shader_->get_interface()->get_name()]];
[rec insertDebugSignpost:[NSString
stringWithFormat:@"Draw Commands %@ (GPUShader: %s)",
this->elem ? @"(indexed)" : @"",
active_shader_->get_interface()->get_name()]];
}
/*** Bind Vertex Buffers and Index Buffers **/
/* SSBO Vertex Fetch Buffer bindings. */
if (uses_ssbo_fetch) {
/* SSBO Vertex Fetch - Bind Index Buffer to appropriate slot -- if used. */
id<MTLBuffer> idx_buffer = nil;
GPUPrimType final_prim_type = this->prim_type;
if (mtl_elem != nullptr) {
/* Fetch index buffer. This function can situationally return an optimized
* index buffer of a different primitive type. If this is the case, `final_prim_type`
* and `v_count` will be updated with the new format.
* NOTE: For indexed rendering, v_count represents the number of indices. */
idx_buffer = mtl_elem->get_index_buffer(final_prim_type, v_count);
BLI_assert(idx_buffer != nil);
/* Update uniforms for SSBO-vertex-fetch-mode indexed rendering to flag usage. */
int &uniform_ssbo_index_mode_u16 = active_shader_->uni_ssbo_uses_index_mode_u16;
BLI_assert(uniform_ssbo_index_mode_u16 != -1);
int uses_index_mode_u16 = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 1 : 0;
active_shader_->uniform_int(uniform_ssbo_index_mode_u16, 1, 1, &uses_index_mode_u16);
}
else {
idx_buffer = ctx->get_null_buffer();
}
rps.bind_vertex_buffer(idx_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
/* Ensure all attributes are set. */
active_shader_->ssbo_vertex_fetch_bind_attributes_end(rec);
/* Bind NULL Buffers for unused vertex data slots. */
id<MTLBuffer> null_buffer = ctx->get_null_buffer();
BLI_assert(null_buffer != nil);
for (int i = num_buffers; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
rps.bind_vertex_buffer(null_buffer, 0, i);
}
}
/* Flag whether Indexed rendering is used or not. */
int &uniform_ssbo_use_indexed = active_shader_->uni_ssbo_uses_indexed_rendering;
BLI_assert(uniform_ssbo_use_indexed != -1);
int uses_indexed_rendering = (mtl_elem != nullptr) ? 1 : 0;
active_shader_->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
/* Set SSBO-fetch-mode status uniforms. */
BLI_assert(active_shader_->uni_ssbo_input_prim_type_loc != -1);
BLI_assert(active_shader_->uni_ssbo_input_vert_count_loc != -1);
GPU_shader_uniform_int_ex(reinterpret_cast<GPUShader *>(wrap(active_shader_)),
active_shader_->uni_ssbo_input_prim_type_loc,
1,
1,
(const int *)(&final_prim_type));
GPU_shader_uniform_int_ex(reinterpret_cast<GPUShader *>(wrap(active_shader_)),
active_shader_->uni_ssbo_input_vert_count_loc,
1,
1,
(const int *)(&v_count));
}
/* Ensure Context Render Pipeline State is fully setup and ready to execute the draw.
* This should happen after all other final rendering setup is complete. */
MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
if (!ctx->ensure_render_pipeline_state(mtl_prim_type)) {
MTL_LOG_ERROR("Failed to prepare and apply render pipeline state.");
BLI_assert(false);
return nil;
}
/* Bind Vertex Buffers. */
for (int i = 0; i < num_buffers; i++) {
MTLVertBuf *buf_at_index = buffers[i];
if (buf_at_index == NULL) {
BLI_assert_msg(
false,
"Total buffer count does not match highest buffer index, could be gaps in bindings");
continue;
}
/* Buffer handle. */
MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
mtlvbo->flag_used();
/* Fetch buffer from MTLVertexBuffer and bind. */
id<MTLBuffer> mtl_buffer = mtlvbo->get_metal_buffer();
BLI_assert(mtl_buffer != nil);
rps.bind_vertex_buffer(mtl_buffer, 0, i);
}
/* Return Render Command Encoder used with setup. */
return rec;
}
void MTLBatch::unbind(id<MTLRenderCommandEncoder> rec)
{
/* Pop bind debug group. */
if (G.debug & G_DEBUG_GPU) {
[rec popDebugGroup];
}
}
void MTLBatch::prepare_vertex_descriptor_and_bindings(MTLVertBuf **buffers, int &num_buffers)
{
/* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound.
*/
MTLStateManager *state_manager = static_cast<MTLStateManager *>(
MTLContext::get()->state_manager);
MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
const MTLShaderInterface *interface = active_shader_->get_interface();
uint16_t attr_mask = interface->get_enabled_attribute_mask();
/* Reset vertex descriptor to default state. */
desc.reset_vertex_descriptor();
/* Fetch Vertex and Instance Buffers. */
Span<MTLVertBuf *> mtl_verts(reinterpret_cast<MTLVertBuf **>(this->verts),
GPU_BATCH_VBO_MAX_LEN);
Span<MTLVertBuf *> mtl_inst(reinterpret_cast<MTLVertBuf **>(this->inst),
GPU_BATCH_INST_VBO_MAX_LEN);
/* SSBO Vertex fetch also passes vertex descriptor information into the shader. */
if (active_shader_->get_uses_ssbo_vertex_fetch()) {
active_shader_->ssbo_vertex_fetch_bind_attributes_begin();
}
/* Resolve Metal vertex buffer bindings. */
/* Vertex Descriptors
* ------------------
* Vertex Descriptors are required to generate a pipeline state, based on the current Batch's
* buffer bindings. These bindings are a unique matching, depending on what input attributes a
* batch has in its buffers, and those which are supported by the shader interface.
*
* We iterate through the buffers and resolve which attributes satisfy the requirements of the
* currently bound shader. We cache this data, for a given Batch<->ShderInterface pairing in a
* VAO cache to avoid the need to recalculate this data. */
bool buffer_is_instanced[GPU_BATCH_VBO_MAX_LEN] = {false};
VertexDescriptorShaderInterfacePair *descriptor = this->vao_cache.find(interface);
if (descriptor) {
desc.vertex_descriptor = descriptor->vertex_descriptor;
attr_mask = descriptor->attr_mask;
num_buffers = descriptor->num_buffers;
for (int bid = 0; bid < GPU_BATCH_VBO_MAX_LEN; ++bid) {
if (descriptor->bufferIds[bid].used) {
if (descriptor->bufferIds[bid].is_instance) {
buffers[bid] = mtl_inst[descriptor->bufferIds[bid].id];
buffer_is_instanced[bid] = true;
}
else {
buffers[bid] = mtl_verts[descriptor->bufferIds[bid].id];
buffer_is_instanced[bid] = false;
}
}
}
/* Use cached ssbo attribute binding data. */
if (active_shader_->get_uses_ssbo_vertex_fetch()) {
BLI_assert(desc.vertex_descriptor.uses_ssbo_vertex_fetch);
for (int attr_id = 0; attr_id < desc.vertex_descriptor.num_ssbo_attributes; attr_id++) {
active_shader_->ssbo_vertex_fetch_bind_attribute(
desc.vertex_descriptor.ssbo_attributes[attr_id]);
}
}
}
else {
VertexDescriptorShaderInterfacePair pair{};
pair.interface = interface;
for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN; ++i) {
pair.bufferIds[i].id = -1;
pair.bufferIds[i].is_instance = 0;
pair.bufferIds[i].used = 0;
}
/* NOTE: Attribute extraction order from buffer is the reverse of the OpenGL as we flag once an
* attribute is found, rather than pre-setting the mask. */
/* Extract Instance attributes (These take highest priority). */
for (int v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
if (mtl_inst[v]) {
MTL_LOG_INFO(" -- [Batch] Checking bindings for bound instance buffer %p", mtl_inst[v]);
int buffer_ind = this->prepare_vertex_binding(
mtl_inst[v], desc, interface, attr_mask, true);
if (buffer_ind >= 0) {
buffers[buffer_ind] = mtl_inst[v];
buffer_is_instanced[buffer_ind] = true;
pair.bufferIds[buffer_ind].id = v;
pair.bufferIds[buffer_ind].used = 1;
pair.bufferIds[buffer_ind].is_instance = 1;
num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
}
}
}
/* Extract Vertex attributes (First-bound vertex buffer takes priority). */
for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
if (mtl_verts[v] != NULL) {
MTL_LOG_INFO(" -- [Batch] Checking bindings for bound vertex buffer %p", mtl_verts[v]);
int buffer_ind = this->prepare_vertex_binding(
mtl_verts[v], desc, interface, attr_mask, false);
if (buffer_ind >= 0) {
buffers[buffer_ind] = mtl_verts[v];
buffer_is_instanced[buffer_ind] = false;
pair.bufferIds[buffer_ind].id = v;
pair.bufferIds[buffer_ind].used = 1;
pair.bufferIds[buffer_ind].is_instance = 0;
num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
}
}
}
/* Add to VertexDescriptor cache */
desc.vertex_descriptor.uses_ssbo_vertex_fetch = active_shader_->get_uses_ssbo_vertex_fetch();
pair.attr_mask = attr_mask;
pair.vertex_descriptor = desc.vertex_descriptor;
pair.num_buffers = num_buffers;
if (!this->vao_cache.insert(pair)) {
printf(
"[Performance Warning] cache is full (Size: %d), vertex descriptor will not be cached\n",
GPU_VAO_STATIC_LEN);
}
}
/* DEBUG: verify if our attribute bindings have been fully provided as expected. */
#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
if (attr_mask != 0) {
/* Attributes are not necessarily contiguous. */
for (int i = 0; i < active_shader_->get_interface()->get_total_attributes(); i++) {
const MTLShaderInputAttribute &attr = active_shader_->get_interface()->get_attribute(i);
if (attr_mask & (1 << attr.location)) {
MTL_LOG_WARNING(
"Warning: Missing expected attribute '%s' with location: %u in shader %s (attr "
"number: %u)",
active_shader_->get_interface()->get_name_at_offset(attr.name_offset),
attr.location,
active_shader_->name_get(),
i);
/* If an attribute is not included, then format in vertex descriptor should be invalid due
* to nil assignment. */
BLI_assert(desc.vertex_descriptor.attributes[attr.location].format ==
MTLVertexFormatInvalid);
}
}
}
#endif
}
void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
{
#if TRUST_NO_ONE
BLI_assert(v_count > 0 && i_count > 0);
#endif
/* Setup RenderPipelineState for batch. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
id<MTLRenderCommandEncoder> rec = this->bind(v_count);
if (rec == nil) {
/* End of draw. */
this->unbind(rec);
return;
}
/* Fetch IndexBuffer and resolve primitive type. */
MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
/* Render using SSBO Vertex Fetch. */
if (active_shader_->get_uses_ssbo_vertex_fetch()) {
/* Submit draw call with modified vertex count, which reflects vertices per primitive defined
* in the USE_SSBO_VERTEX_FETCH pragma. */
int num_input_primitives = gpu_get_prim_count_from_type(v_count, this->prim_type);
int output_num_verts = num_input_primitives *
active_shader_->get_ssbo_vertex_fetch_output_num_verts();
BLI_assert_msg(
mtl_vertex_count_fits_primitive_type(
output_num_verts, active_shader_->get_ssbo_vertex_fetch_output_prim_type()),
"Output Vertex count is not compatible with the requested output vertex primitive type");
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(active_shader_->get_ssbo_vertex_fetch_output_prim_type());
[rec drawPrimitives:active_shader_->get_ssbo_vertex_fetch_output_prim_type()
vertexStart:0
vertexCount:output_num_verts
instanceCount:i_count
baseInstance:i_first];
ctx->main_command_buffer.register_draw_counters(output_num_verts * i_count);
}
/* Perform regular draw. */
else if (mtl_elem == NULL) {
/* Primitive Type toplogy emulation. */
if (mtl_needs_topology_emulation(this->prim_type)) {
/* Generate index buffer for primitive types requiring emulation. */
GPUPrimType emulated_prim_type = this->prim_type;
uint32_t emulated_v_count = v_count;
id<MTLBuffer> generated_index_buffer = this->get_emulated_toplogy_buffer(emulated_prim_type,
emulated_v_count);
BLI_assert(generated_index_buffer != nil);
MTLPrimitiveType emulated_mtl_prim_type = gpu_prim_type_to_metal(emulated_prim_type);
/* Temp: Disable culling for emulated primitive types.
* TODO(Metal): Support face winding in topology buffer. */
[rec setCullMode:MTLCullModeNone];
if (generated_index_buffer != nil) {
BLI_assert(emulated_mtl_prim_type == MTLPrimitiveTypeTriangle ||
emulated_mtl_prim_type == MTLPrimitiveTypeLine);
if (emulated_mtl_prim_type == MTLPrimitiveTypeTriangle) {
BLI_assert(emulated_v_count % 3 == 0);
}
if (emulated_mtl_prim_type == MTLPrimitiveTypeLine) {
BLI_assert(emulated_v_count % 2 == 0);
}
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(emulated_mtl_prim_type);
[rec drawIndexedPrimitives:emulated_mtl_prim_type
indexCount:emulated_v_count
indexType:MTLIndexTypeUInt32
indexBuffer:generated_index_buffer
indexBufferOffset:0
instanceCount:i_count
baseVertex:v_first
baseInstance:i_first];
}
else {
printf("[Note] Cannot draw batch -- Emulated Topology mode: %u not yet supported\n",
this->prim_type);
}
}
else {
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(mtl_prim_type);
/* Issue draw call. */
[rec drawPrimitives:mtl_prim_type
vertexStart:v_first
vertexCount:v_count
instanceCount:i_count
baseInstance:i_first];
}
ctx->main_command_buffer.register_draw_counters(v_count * i_count);
}
/* Perform indexed draw. */
else {
MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
uint32_t base_index = mtl_elem->index_base_;
uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
uint32_t v_first_ofs = ((v_first + mtl_elem->index_start_) * index_size);
BLI_assert_msg((v_first_ofs % index_size) == 0,
"Index offset is not 2/4-byte aligned as per METAL spec");
/* Fetch index buffer. May return an index buffer of a differing format,
* if index buffer optimization is used. In these cases, final_prim_type and
* index_count get updated with the new properties. */
GPUPrimType final_prim_type = this->prim_type;
uint index_count = v_count;
id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
BLI_assert(index_buffer != nil);
if (index_buffer != nil) {
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(mtl_prim_type);
/* Issue draw call. */
[rec drawIndexedPrimitives:mtl_prim_type
indexCount:index_count
indexType:index_type
indexBuffer:index_buffer
indexBufferOffset:v_first_ofs
instanceCount:i_count
baseVertex:base_index
baseInstance:i_first];
ctx->main_command_buffer.register_draw_counters(index_count * i_count);
}
else {
BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
}
}
/* End of draw. */
this->unbind(rec);
}
void MTLBatch::draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
{
/* Setup RenderPipelineState for batch. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
id<MTLRenderCommandEncoder> rec = this->bind(0);
if (rec == nil) {
printf("Failed to open Render Command encoder for DRAW INDIRECT\n");
/* End of draw. */
this->unbind(rec);
return;
}
/* Fetch indirect buffer Metal handle. */
MTLStorageBuf *mtlssbo = static_cast<MTLStorageBuf *>(unwrap(indirect_buf));
id<MTLBuffer> mtl_indirect_buf = mtlssbo->get_metal_buffer();
BLI_assert(mtl_indirect_buf != nil);
if (mtl_indirect_buf == nil) {
MTL_LOG_WARNING("Metal Indirect Draw Storage Buffer is nil.");
/* End of draw. */
this->unbind(rec);
return;
}
/* Indirect SSBO vertex fetch calls require the draw command in the buffer to be mutated at
* command encoding time. This takes place within the draw manager when a shader supporting
* SSBO Vertex-Fetch is used. */
if (active_shader_->get_uses_ssbo_vertex_fetch())
{ /* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(active_shader_->get_ssbo_vertex_fetch_output_prim_type());
/* Issue draw call. */
[rec drawPrimitives:active_shader_->get_ssbo_vertex_fetch_output_prim_type()
indirectBuffer:mtl_indirect_buf
indirectBufferOffset:offset];
ctx->main_command_buffer.register_draw_counters(1);
/* End of draw. */
this->unbind(rec);
return;
}
/* Unsupported primitive type check. */
BLI_assert_msg(this->prim_type != GPU_PRIM_TRI_FAN,
"TriangleFan is not supported in Metal for Indirect draws.");
/* Fetch IndexBuffer and resolve primitive type. */
MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
if (mtl_needs_topology_emulation(this->prim_type)) {
BLI_assert_msg(false, "Metal Topology emulation unsupported for draw indirect.\n");
/* End of draw. */
this->unbind(rec);
return;
}
if (mtl_elem == NULL) {
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(mtl_prim_type);
/* Issue draw call. */
[rec drawPrimitives:mtl_prim_type indirectBuffer:mtl_indirect_buf indirectBufferOffset:offset];
ctx->main_command_buffer.register_draw_counters(1);
}
else {
/* Fetch index buffer. May return an index buffer of a differing format,
* if index buffer optimization is used. In these cases, final_prim_type and
* index_count get updated with the new properties. */
MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
GPUPrimType final_prim_type = this->prim_type;
uint index_count = 0;
/* Disable index optimization for indirect draws. */
mtl_elem->flag_can_optimize(false);
id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
BLI_assert(index_buffer != nil);
if (index_buffer != nil) {
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(mtl_prim_type);
/* Issue draw call. */
[rec drawIndexedPrimitives:mtl_prim_type
indexType:index_type
indexBuffer:index_buffer
indexBufferOffset:0
indirectBuffer:mtl_indirect_buf
indirectBufferOffset:offset];
ctx->main_command_buffer.register_draw_counters(1);
}
else {
BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
}
}
/* End of draw. */
this->unbind(rec);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Topology emulation and optimization
* \{ */
id<MTLBuffer> MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type,
uint32_t &in_out_v_count)
{
BLI_assert(in_out_v_count > 0);
/* Determine emulated primitive types. */
GPUPrimType input_prim_type = in_out_prim_type;
uint32_t v_count = in_out_v_count;
GPUPrimType output_prim_type;
switch (input_prim_type) {
case GPU_PRIM_POINTS:
case GPU_PRIM_LINES:
case GPU_PRIM_TRIS:
BLI_assert_msg(false, "Optimal primitive types should not reach here.");
return nil;
break;
case GPU_PRIM_LINES_ADJ:
case GPU_PRIM_TRIS_ADJ:
BLI_assert_msg(false, "Adjacency primitive types should not reach here.");
return nil;
break;
case GPU_PRIM_LINE_STRIP:
case GPU_PRIM_LINE_LOOP:
case GPU_PRIM_LINE_STRIP_ADJ:
output_prim_type = GPU_PRIM_LINES;
break;
case GPU_PRIM_TRI_STRIP:
case GPU_PRIM_TRI_FAN:
output_prim_type = GPU_PRIM_TRIS;
break;
default:
BLI_assert_msg(false, "Invalid primitive type.");
return nil;
}
/* Check if topology buffer exists and is valid. */
if (this->emulated_topology_buffer_ != nullptr &&
(emulated_topology_type_ != input_prim_type || topology_buffer_input_v_count_ != v_count))
{
/* Release existing topology buffer. */
emulated_topology_buffer_->free();
emulated_topology_buffer_ = nullptr;
}
/* Generate new topology index buffer. */
if (this->emulated_topology_buffer_ == nullptr) {
/* Calculate IB len. */
uint32_t output_prim_count = 0;
switch (input_prim_type) {
case GPU_PRIM_LINE_STRIP:
case GPU_PRIM_LINE_STRIP_ADJ:
output_prim_count = v_count - 1;
break;
case GPU_PRIM_LINE_LOOP:
output_prim_count = v_count;
break;
case GPU_PRIM_TRI_STRIP:
case GPU_PRIM_TRI_FAN:
output_prim_count = v_count - 2;
break;
default:
BLI_assert_msg(false, "Cannot generate optimized topology buffer for other types.");
break;
}
uint32_t output_IB_elems = output_prim_count * ((output_prim_type == GPU_PRIM_TRIS) ? 3 : 2);
/* Allocate buffer. */
uint32_t buffer_bytes = output_IB_elems * 4;
BLI_assert(buffer_bytes > 0);
this->emulated_topology_buffer_ = MTLContext::get_global_memory_manager()->allocate(
buffer_bytes, true);
/* Populate. */
uint32_t *data = (uint32_t *)this->emulated_topology_buffer_->get_host_ptr();
BLI_assert(data != nullptr);
/* TODO(Metal): Support inverse winding modes. */
bool winding_clockwise = false;
UNUSED_VARS(winding_clockwise);
switch (input_prim_type) {
/* Line Loop. */
case GPU_PRIM_LINE_LOOP: {
int line = 0;
for (line = 0; line < output_prim_count - 1; line++) {
data[line * 2 + 0] = line + 0;
data[line * 2 + 1] = line + 1;
}
/* Closing line. */
data[line * 2 + 0] = line + 0;
data[line * 2 + 1] = 0;
} break;
/* Triangle Fan. */
case GPU_PRIM_TRI_FAN: {
for (int triangle = 0; triangle < output_prim_count; triangle++) {
data[triangle * 3 + 0] = 0; /* Always 0 */
data[triangle * 3 + 1] = triangle + 1;
data[triangle * 3 + 2] = triangle + 2;
}
} break;
default:
BLI_assert_msg(false, "Other primitive types do not require emulation.");
return nil;
}
/* Flush. */
this->emulated_topology_buffer_->flush();
/* Assign members relating to current cached IB. */
topology_buffer_input_v_count_ = v_count;
topology_buffer_output_v_count_ = output_IB_elems;
emulated_topology_type_ = input_prim_type;
}
/* Return. */
in_out_v_count = topology_buffer_output_v_count_;
in_out_prim_type = output_prim_type;
return (emulated_topology_buffer_) ? emulated_topology_buffer_->get_metal_buffer() : nil;
}
/** \} */
} // namespace blender::gpu