Files
test/source/blender/draw/intern/draw_command.cc
Clément Foucault cae6c6d81a GPU: Remove Shader Draw Parameter workaround
With 5.0 we start requiring this extension for GL and VK.
All of our target hardware supports it with up to date
drivers.

Some old drivers were disabling this extension because of
buggy behavior. We simply drop support for them in 5.0.

This allows us to remove a lot of code and the last
shader create info override done at startup. This will
unlock more refactoring of the shader create info into
static classes to reduce binary size and other benefits.

## TODO:
- [x] Remove checks for ARB_shader_draw_parameters
- [x] Remove checks for ARB_clip_control
- [x] Check for the extension on startup for OpenGL
- [x] Check for the extension on startup for Vulkan
- [x] ~~Add user facing popup message about minimum
      requirements not being met.~~ Done using the same
      popup as old hardware.

Pull Request: https://projects.blender.org/blender/blender/pulls/142334
2025-08-12 14:04:41 +02:00

878 lines
29 KiB
C++

/* SPDX-FileCopyrightText: 2022 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup draw
*/
#include "GPU_batch.hh"
#include "GPU_capabilities.hh"
#include "GPU_compute.hh"
#include "GPU_debug.hh"
#include "draw_command.hh"
#include "draw_pass.hh"
#include "draw_shader.hh"
#include "draw_view.hh"
#include <bitset>
#include <sstream>
namespace blender::draw::command {
static gpu::Batch *procedural_batch_get(GPUPrimType primitive)
{
switch (primitive) {
case GPU_PRIM_POINTS:
return GPU_batch_procedural_points_get();
case GPU_PRIM_LINES:
return GPU_batch_procedural_lines_get();
case GPU_PRIM_TRIS:
return GPU_batch_procedural_triangles_get();
case GPU_PRIM_TRI_STRIP:
return GPU_batch_procedural_triangle_strips_get();
default:
/* Add new one as needed. */
BLI_assert_unreachable();
return nullptr;
}
}
/* -------------------------------------------------------------------- */
/** \name Commands Execution
* \{ */
void ShaderBind::execute(RecordingState &state) const
{
state.shader_use_specialization = !GPU_shader_get_default_constant_state(shader).is_empty();
if (assign_if_different(state.shader, shader) || state.shader_use_specialization) {
GPU_shader_bind(shader, state.specialization_constants_get());
}
/* Signal that we can reload the default for a different specialization later on.
* However, we keep the specialization_constants state around for compute shaders. */
state.specialization_constants_in_use = false;
}
void FramebufferBind::execute() const
{
GPU_framebuffer_bind(*framebuffer);
}
void SubPassTransition::execute() const
{
/* TODO(fclem): Require framebuffer bind to always be part of the pass so that we can track it
* inside RecordingState. */
GPUFrameBuffer *framebuffer = GPU_framebuffer_active_get();
/* Unpack to the real enum type. */
const GPUAttachmentState states[9] = {
GPUAttachmentState(depth_state),
GPUAttachmentState(color_states[0]),
GPUAttachmentState(color_states[1]),
GPUAttachmentState(color_states[2]),
GPUAttachmentState(color_states[3]),
GPUAttachmentState(color_states[4]),
GPUAttachmentState(color_states[5]),
GPUAttachmentState(color_states[6]),
GPUAttachmentState(color_states[7]),
};
GPU_framebuffer_subpass_transition_array(framebuffer, states, ARRAY_SIZE(states));
}
void ResourceBind::execute() const
{
if (slot == -1) {
return;
}
switch (type) {
case ResourceBind::Type::Sampler:
GPU_texture_bind_ex(is_reference ? *texture_ref : texture, sampler, slot);
break;
case ResourceBind::Type::BufferSampler:
GPU_vertbuf_bind_as_texture(is_reference ? *vertex_buf_ref : vertex_buf, slot);
break;
case ResourceBind::Type::Image:
GPU_texture_image_bind(is_reference ? *texture_ref : texture, slot);
break;
case ResourceBind::Type::UniformBuf:
GPU_uniformbuf_bind(is_reference ? *uniform_buf_ref : uniform_buf, slot);
break;
case ResourceBind::Type::StorageBuf:
GPU_storagebuf_bind(is_reference ? *storage_buf_ref : storage_buf, slot);
break;
case ResourceBind::Type::UniformAsStorageBuf:
GPU_uniformbuf_bind_as_ssbo(is_reference ? *uniform_buf_ref : uniform_buf, slot);
break;
case ResourceBind::Type::VertexAsStorageBuf:
GPU_vertbuf_bind_as_ssbo(is_reference ? *vertex_buf_ref : vertex_buf, slot);
break;
case ResourceBind::Type::IndexAsStorageBuf:
GPU_indexbuf_bind_as_ssbo(is_reference ? *index_buf_ref : index_buf, slot);
break;
}
}
void PushConstant::execute(RecordingState &state) const
{
if (location == -1) {
return;
}
switch (type) {
case PushConstant::Type::IntValue:
GPU_shader_uniform_int_ex(state.shader, location, comp_len, array_len, int4_value);
break;
case PushConstant::Type::IntReference:
GPU_shader_uniform_int_ex(state.shader, location, comp_len, array_len, int_ref);
break;
case PushConstant::Type::FloatValue:
GPU_shader_uniform_float_ex(state.shader, location, comp_len, array_len, float4_value);
break;
case PushConstant::Type::FloatReference:
GPU_shader_uniform_float_ex(state.shader, location, comp_len, array_len, float_ref);
break;
}
}
void SpecializeConstant::execute(command::RecordingState &state) const
{
/* All specialization constants should exist as they are not optimized out like uniforms. */
BLI_assert(location != -1);
if (state.specialization_constants_in_use == false) {
state.specialization_constants = GPU_shader_get_default_constant_state(this->shader);
state.specialization_constants_in_use = true;
}
switch (type) {
case SpecializeConstant::Type::IntValue:
state.specialization_constants.set_value(location, int_value);
break;
case SpecializeConstant::Type::IntReference:
state.specialization_constants.set_value(location, *int_ref);
break;
case SpecializeConstant::Type::UintValue:
state.specialization_constants.set_value(location, uint_value);
break;
case SpecializeConstant::Type::UintReference:
state.specialization_constants.set_value(location, *uint_ref);
break;
case SpecializeConstant::Type::FloatValue:
state.specialization_constants.set_value(location, float_value);
break;
case SpecializeConstant::Type::FloatReference:
state.specialization_constants.set_value(location, *float_ref);
break;
case SpecializeConstant::Type::BoolValue:
state.specialization_constants.set_value(location, bool_value);
break;
case SpecializeConstant::Type::BoolReference:
state.specialization_constants.set_value(location, *bool_ref);
break;
}
}
void Draw::execute(RecordingState &state) const
{
state.front_facing_set(res_index.has_inverted_handedness());
/* Use same logic as in `finalize_commands`. */
uint instance_first = 0;
if (res_index.raw > 0) {
instance_first = state.instance_offset;
state.instance_offset += instance_len;
}
GPU_shader_get_default_constant_state(state.shader).is_empty();
if (is_primitive_expansion()) {
/* Expanded draw-call. */
IndexRange expanded_range = GPU_batch_draw_expanded_parameter_get(
batch->prim_type,
GPUPrimType(expand_prim_type),
vertex_len,
vertex_first,
expand_prim_len);
if (expanded_range.is_empty()) {
/* Nothing to draw, and can lead to asserts in GPU_batch_bind_as_resources. */
return;
}
GPU_batch_bind_as_resources(batch, state.shader, state.specialization_constants_get());
gpu::Batch *gpu_batch = procedural_batch_get(GPUPrimType(expand_prim_type));
GPU_batch_set_shader(gpu_batch, state.shader, state.specialization_constants_get());
GPU_batch_draw_advanced(
gpu_batch, expanded_range.start(), expanded_range.size(), instance_first, instance_len);
}
else {
/* Regular draw-call. */
GPU_batch_set_shader(batch, state.shader, state.specialization_constants_get());
GPU_batch_draw_advanced(batch, vertex_first, vertex_len, instance_first, instance_len);
}
}
void DrawMulti::execute(RecordingState &state) const
{
DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
uint group_index = this->group_first;
while (group_index != uint(-1)) {
const DrawGroup &group = groups[group_index];
if (group.vertex_len > 0) {
gpu::Batch *batch = group.desc.gpu_batch;
if (GPUPrimType(group.desc.expand_prim_type) != GPU_PRIM_NONE) {
/* Bind original batch as resource and use a procedural batch to issue the draw-call. */
GPU_batch_bind_as_resources(
group.desc.gpu_batch, state.shader, state.specialization_constants_get());
batch = procedural_batch_get(GPUPrimType(group.desc.expand_prim_type));
}
GPU_batch_set_shader(batch, state.shader, state.specialization_constants_get());
constexpr intptr_t stride = sizeof(DrawCommand);
/* We have 2 indirect command reserved per draw group. */
intptr_t offset = stride * group_index * 2;
/* Draw negatively scaled geometry first. */
if (group.len - group.front_facing_len > 0) {
state.front_facing_set(true);
GPU_batch_draw_indirect(batch, indirect_buf, offset);
}
if (group.front_facing_len > 0) {
state.front_facing_set(false);
GPU_batch_draw_indirect(batch, indirect_buf, offset + stride);
}
}
group_index = group.next;
}
}
void DrawIndirect::execute(RecordingState &state) const
{
state.front_facing_set(res_index.has_inverted_handedness());
GPU_batch_draw_indirect(batch, *indirect_buf, 0);
}
void Dispatch::execute(RecordingState &state) const
{
if (is_reference) {
GPU_compute_dispatch(
state.shader, size_ref->x, size_ref->y, size_ref->z, state.specialization_constants_get());
}
else {
GPU_compute_dispatch(
state.shader, size.x, size.y, size.z, state.specialization_constants_get());
}
}
void DispatchIndirect::execute(RecordingState &state) const
{
GPU_compute_dispatch_indirect(state.shader, *indirect_buf, state.specialization_constants_get());
}
void Barrier::execute() const
{
GPU_memory_barrier(type);
}
void Clear::execute() const
{
GPUFrameBuffer *fb = GPU_framebuffer_active_get();
GPU_framebuffer_clear(fb, (eGPUFrameBufferBits)clear_channels, color, depth, stencil);
}
void ClearMulti::execute() const
{
GPUFrameBuffer *fb = GPU_framebuffer_active_get();
GPU_framebuffer_multi_clear(fb, (const float(*)[4])colors);
}
void StateSet::execute(RecordingState &recording_state) const
{
bool state_changed = assign_if_different(recording_state.pipeline_state, new_state);
bool clip_changed = assign_if_different(recording_state.clip_plane_count, clip_plane_count);
if (!state_changed && !clip_changed) {
return;
}
GPU_state_set(to_write_mask(new_state),
to_blend(new_state),
to_face_cull_test(new_state),
to_depth_test(new_state),
to_stencil_test(new_state),
to_stencil_op(new_state),
to_provoking_vertex(new_state));
if (new_state & DRW_STATE_CLIP_CONTROL_UNIT_RANGE) {
GPU_clip_control_unit_range(true);
}
else {
GPU_clip_control_unit_range(false);
}
if (new_state & DRW_STATE_SHADOW_OFFSET) {
GPU_shadow_offset(true);
}
else {
GPU_shadow_offset(false);
}
/* TODO: this should be part of shader state. */
GPU_clip_distances(recording_state.clip_plane_count);
if (new_state & DRW_STATE_IN_FRONT_SELECT) {
/* XXX `GPU_depth_range` is not a perfect solution
* since very distant geometries can still be occluded.
* Also the depth test precision of these geometries is impaired.
* However, it solves the selection for the vast majority of cases. */
GPU_depth_range(0.0f, 0.01f);
}
else {
GPU_depth_range(0.0f, 1.0f);
}
if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) {
GPU_program_point_size(true);
}
else {
GPU_program_point_size(false);
}
}
void StateSet::set(DRWState state)
{
RecordingState recording_state;
StateSet{state, 0}.execute(recording_state);
/* This function is used for cleaning the state for the viewport drawing.
* Make sure to reset textures resources to avoid feedback loop when rendering (see #131652). */
GPU_texture_unbind_all();
GPU_texture_image_unbind_all();
GPU_uniformbuf_debug_unbind_all();
GPU_storagebuf_debug_unbind_all();
/* Remained of legacy draw manager. Kept it to avoid regression, but might become unneeded. */
GPU_point_size(5);
GPU_line_smooth(false);
GPU_line_width(0.0f);
}
void StencilSet::execute() const
{
GPU_stencil_write_mask_set(write_mask);
GPU_stencil_compare_mask_set(compare_mask);
GPU_stencil_reference_set(reference);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Commands Serialization for debugging
* \{ */
std::string ShaderBind::serialize() const
{
return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")";
}
std::string FramebufferBind::serialize() const
{
return std::string(".framebuffer_bind(") +
(*framebuffer == nullptr ? "nullptr" : GPU_framebuffer_get_name(*framebuffer)) + ")";
}
std::string SubPassTransition::serialize() const
{
auto to_str = [](GPUAttachmentState state) {
return (state != GPU_ATTACHMENT_IGNORE) ?
((state == GPU_ATTACHMENT_WRITE) ? "write" : "read") :
"ignore";
};
return std::string(".subpass_transition(\n") +
"depth=" + to_str(GPUAttachmentState(depth_state)) + ",\n" +
"color0=" + to_str(GPUAttachmentState(color_states[0])) + ",\n" +
"color1=" + to_str(GPUAttachmentState(color_states[1])) + ",\n" +
"color2=" + to_str(GPUAttachmentState(color_states[2])) + ",\n" +
"color3=" + to_str(GPUAttachmentState(color_states[3])) + ",\n" +
"color4=" + to_str(GPUAttachmentState(color_states[4])) + ",\n" +
"color5=" + to_str(GPUAttachmentState(color_states[5])) + ",\n" +
"color6=" + to_str(GPUAttachmentState(color_states[6])) + ",\n" +
"color7=" + to_str(GPUAttachmentState(color_states[7])) + "\n)";
}
std::string ResourceBind::serialize() const
{
switch (type) {
case Type::Sampler:
return std::string(".bind_texture") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ", sampler=" + sampler.to_string() + ")";
case Type::BufferSampler:
return std::string(".bind_vertbuf_as_texture") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::Image:
return std::string(".bind_image") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::UniformBuf:
return std::string(".bind_uniform_buf") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::StorageBuf:
return std::string(".bind_storage_buf") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::UniformAsStorageBuf:
return std::string(".bind_uniform_as_ssbo") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::VertexAsStorageBuf:
return std::string(".bind_vertbuf_as_ssbo") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::IndexAsStorageBuf:
return std::string(".bind_indexbuf_as_ssbo") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
default:
BLI_assert_unreachable();
return "";
}
}
std::string PushConstant::serialize() const
{
std::stringstream ss;
for (int i = 0; i < array_len; i++) {
switch (comp_len) {
case 1:
switch (type) {
case Type::IntValue:
ss << int1_value;
break;
case Type::IntReference:
ss << int_ref[i];
break;
case Type::FloatValue:
ss << float1_value;
break;
case Type::FloatReference:
ss << float_ref[i];
break;
}
break;
case 2:
switch (type) {
case Type::IntValue:
ss << int2_value;
break;
case Type::IntReference:
ss << int2_ref[i];
break;
case Type::FloatValue:
ss << float2_value;
break;
case Type::FloatReference:
ss << float2_ref[i];
break;
}
break;
case 3:
switch (type) {
case Type::IntValue:
ss << int3_value;
break;
case Type::IntReference:
ss << int3_ref[i];
break;
case Type::FloatValue:
ss << float3_value;
break;
case Type::FloatReference:
ss << float3_ref[i];
break;
}
break;
case 4:
switch (type) {
case Type::IntValue:
ss << int4_value;
break;
case Type::IntReference:
ss << int4_ref[i];
break;
case Type::FloatValue:
ss << float4_value;
break;
case Type::FloatReference:
ss << float4_ref[i];
break;
}
break;
case 16:
switch (type) {
case Type::IntValue:
case Type::IntReference:
BLI_assert_unreachable();
break;
case Type::FloatValue:
ss << float4x4(
(&float4_value)[0], (&float4_value)[1], (&float4_value)[2], (&float4_value)[3]);
break;
case Type::FloatReference:
ss << *float4x4_ref;
break;
}
break;
}
if (i < array_len - 1) {
ss << ", ";
}
}
return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")";
}
std::string SpecializeConstant::serialize() const
{
std::stringstream ss;
switch (type) {
case Type::IntValue:
ss << int_value;
break;
case Type::UintValue:
ss << uint_value;
break;
case Type::FloatValue:
ss << float_value;
break;
case Type::BoolValue:
ss << bool_value;
break;
case Type::IntReference:
ss << *int_ref;
break;
case Type::UintReference:
ss << *uint_ref;
break;
case Type::FloatReference:
ss << *float_ref;
break;
case Type::BoolReference:
ss << *bool_ref;
break;
}
return std::string(".specialize_constant(") + std::to_string(location) + ", data=" + ss.str() +
")";
}
std::string Draw::serialize() const
{
std::string inst_len = std::to_string(instance_len);
std::string vert_len = (vertex_len == uint(-1)) ? "from_batch" : std::to_string(vertex_len);
std::string vert_first = (vertex_first == uint(-1)) ? "from_batch" :
std::to_string(vertex_first);
return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len +
", vert_first=" + vert_first + ", res_id=" + std::to_string(res_index.resource_index()) +
")";
}
std::string DrawMulti::serialize(const std::string &line_prefix) const
{
DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(),
multi_draw_buf->prototype_count_);
/* This emulates the GPU sorting but without the unstable draw order. */
std::sort(
prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) {
return (a.group_id < b.group_id) ||
(a.group_id == b.group_id && a.res_index > b.res_index);
});
/* Compute prefix sum to have correct offsets. */
uint prefix_sum = 0u;
for (DrawGroup &group : groups) {
group.start = prefix_sum;
prefix_sum += group.front_facing_counter + group.back_facing_counter;
}
std::stringstream ss;
uint group_len = 0;
uint group_index = this->group_first;
while (group_index != uint(-1)) {
const DrawGroup &grp = groups[group_index];
ss << std::endl << line_prefix << " .group(id=" << group_index << ", len=" << grp.len << ")";
intptr_t offset = grp.start;
if (grp.back_facing_counter > 0) {
for (DrawPrototype &proto : prototypes.slice_safe({offset, grp.back_facing_counter})) {
BLI_assert(proto.group_id == group_index);
ResourceIndex res_index(proto.res_index);
BLI_assert(res_index.has_inverted_handedness());
ss << std::endl
<< line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len)
<< ", resource_id=" << std::to_string(res_index.resource_index()) << ", back_face)";
}
offset += grp.back_facing_counter;
}
if (grp.front_facing_counter > 0) {
for (DrawPrototype &proto : prototypes.slice_safe({offset, grp.front_facing_counter})) {
BLI_assert(proto.group_id == group_index);
ResourceIndex res_index(proto.res_index);
BLI_assert(!res_index.has_inverted_handedness());
ss << std::endl
<< line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len)
<< ", resource_id=" << std::to_string(res_index.resource_index()) << ", front_face)";
}
}
group_index = grp.next;
group_len++;
}
ss << std::endl;
return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str();
}
std::string DrawIndirect::serialize() const
{
return std::string(".draw_indirect()");
}
std::string Dispatch::serialize() const
{
int3 sz = is_reference ? *size_ref : size;
return std::string(".dispatch") + (is_reference ? "_ref" : "") + "(" + std::to_string(sz.x) +
", " + std::to_string(sz.y) + ", " + std::to_string(sz.z) + ")";
}
std::string DispatchIndirect::serialize() const
{
return std::string(".dispatch_indirect()");
}
std::string Barrier::serialize() const
{
/* TODO(@fclem): Better serialization... */
return std::string(".barrier(") + std::to_string(type) + ")";
}
std::string Clear::serialize() const
{
std::stringstream ss;
if (eGPUFrameBufferBits(clear_channels) & GPU_COLOR_BIT) {
ss << "color=" << color;
if (eGPUFrameBufferBits(clear_channels) & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) {
ss << ", ";
}
}
if (eGPUFrameBufferBits(clear_channels) & GPU_DEPTH_BIT) {
ss << "depth=" << depth;
if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
ss << ", ";
}
}
if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
ss << "stencil=0b" << std::bitset<8>(stencil) << ")";
}
return std::string(".clear(") + ss.str() + ")";
}
std::string ClearMulti::serialize() const
{
std::stringstream ss;
for (float4 color : Span<float4>(colors, colors_len)) {
ss << color << ", ";
}
return std::string(".clear_multi(colors={") + ss.str() + "})";
}
std::string StateSet::serialize() const
{
/* TODO(@fclem): Better serialization... */
return std::string(".state_set(") + std::to_string(new_state) + ")";
}
std::string StencilSet::serialize() const
{
std::stringstream ss;
ss << ".stencil_set(write_mask=0b" << std::bitset<8>(write_mask) << ", reference=0b"
<< std::bitset<8>(reference) << ", compare_mask=0b" << std::bitset<8>(compare_mask) << ")";
return ss.str();
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Commands buffers binding / command / resource ID generation
* \{ */
void DrawCommandBuf::finalize_commands(Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands,
SubPassVector &sub_passes,
uint &resource_id_count,
ResourceIdBuf &resource_id_buf)
{
for (const Header &header : headers) {
if (header.type == Type::SubPass) {
/** WARNING: Recursive. */
auto &sub = sub_passes[int64_t(header.index)];
finalize_commands(
sub.headers_, sub.commands_, sub_passes, resource_id_count, resource_id_buf);
}
if (header.type != Type::Draw) {
continue;
}
Draw &cmd = commands[header.index].draw;
int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len;
/* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
GPU_batch_draw_parameter_get(
cmd.batch, &batch_vert_len, &batch_vert_first, &batch_base_index, &batch_inst_len);
/* Instancing attributes are not supported using the new pipeline since we use the base
* instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
BLI_assert(batch_inst_len == 1);
if (cmd.vertex_len == uint(-1)) {
cmd.vertex_len = batch_vert_len;
}
/* NOTE: Only do this if a handle is present. If a draw-call is using instancing with null
* handle, the shader should not rely on `resource_id` at ***all***. This allows procedural
* instanced draw-calls with lots of instances with no overhead. */
/* TODO(fclem): Think about either fixing this feature or removing support for instancing all
* together. */
if (cmd.res_index.raw > 0) {
/* Save correct offset to start of resource_id buffer region for this draw. */
uint instance_first = resource_id_count;
resource_id_count += cmd.instance_len;
/* Ensure the buffer is big enough. */
resource_id_buf.get_or_resize(resource_id_count - 1);
/* Copy the resource id for all instances. */
uint index = cmd.res_index.resource_index();
for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) {
resource_id_buf[i] = index;
}
}
}
}
void DrawCommandBuf::generate_commands(Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands,
SubPassVector &sub_passes)
{
/* First instance ID contains the null handle with identity transform.
* This is referenced for draw-calls with no handle. */
resource_id_buf_.get_or_resize(0) = 0;
resource_id_count_ = 1;
finalize_commands(headers, commands, sub_passes, resource_id_count_, resource_id_buf_);
resource_id_buf_.push_update();
}
void DrawCommandBuf::bind(RecordingState & /*state*/)
{
GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
}
void DrawMultiBuf::generate_commands(Vector<Header, 0> & /*headers*/,
Vector<Undetermined, 0> & /*commands*/,
VisibilityBuf &visibility_buf,
int visibility_word_per_draw,
int view_len,
bool use_custom_ids)
{
GPU_debug_group_begin("DrawMultiBuf.bind");
resource_id_count_ = 0u;
for (DrawGroup &group : MutableSpan<DrawGroup>(group_buf_.data(), group_count_)) {
/* Compute prefix sum of all instance of previous group. */
group.start = resource_id_count_;
resource_id_count_ += group.len;
int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len;
/* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
GPU_batch_draw_parameter_get(group.desc.gpu_batch,
&batch_vert_len,
&batch_vert_first,
&batch_base_index,
&batch_inst_len);
group.vertex_len = group.desc.vertex_len == 0 ? batch_vert_len : group.desc.vertex_len;
group.vertex_first = group.desc.vertex_first == -1 ? batch_vert_first :
group.desc.vertex_first;
group.base_index = batch_base_index;
/* Instancing attributes are not supported using the new pipeline since we use the base
* instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
BLI_assert(batch_inst_len == 1);
UNUSED_VARS_NDEBUG(batch_inst_len);
if (group.desc.expand_prim_type != GPU_PRIM_NONE) {
/* Expanded draw-call. */
IndexRange vert_range = GPU_batch_draw_expanded_parameter_get(
group.desc.gpu_batch->prim_type,
GPUPrimType(group.desc.expand_prim_type),
group.vertex_len,
group.vertex_first,
group.desc.expand_prim_len);
group.vertex_first = vert_range.start();
group.vertex_len = vert_range.size();
/* Override base index to -1 as the generated draw-call will not use an index buffer and do
* the indirection manually inside the shader. */
group.base_index = -1;
}
/* Reset counters to 0 for the GPU. */
group.total_counter = group.front_facing_counter = group.back_facing_counter = 0;
}
group_buf_.push_update();
prototype_buf_.push_update();
/* Allocate enough for the expansion pass. */
resource_id_buf_.get_or_resize(resource_id_count_ * view_len * (use_custom_ids ? 2 : 1));
/* Two commands per group (inverted and non-inverted scale). */
command_buf_.get_or_resize(group_count_ * 2);
if (prototype_count_ > 0) {
gpu::Shader *shader = DRW_shader_draw_command_generate_get();
GPU_shader_bind(shader);
GPU_shader_uniform_1i(shader, "prototype_len", prototype_count_);
GPU_shader_uniform_1i(shader, "visibility_word_per_draw", visibility_word_per_draw);
GPU_shader_uniform_1i(shader, "view_len", view_len);
GPU_shader_uniform_1i(shader, "view_shift", log2_ceil_u(view_len));
GPU_shader_uniform_1b(shader, "use_custom_ids", use_custom_ids);
GPU_storagebuf_bind(group_buf_, GPU_shader_get_ssbo_binding(shader, "group_buf"));
GPU_storagebuf_bind(visibility_buf, GPU_shader_get_ssbo_binding(shader, "visibility_buf"));
GPU_storagebuf_bind(prototype_buf_, GPU_shader_get_ssbo_binding(shader, "prototype_buf"));
GPU_storagebuf_bind(command_buf_, GPU_shader_get_ssbo_binding(shader, "command_buf"));
GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
GPU_compute_dispatch(shader, divide_ceil_u(prototype_count_, DRW_COMMAND_GROUP_SIZE), 1, 1);
/* TODO(@fclem): Investigate moving the barrier in the bind function. */
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
GPU_storagebuf_sync_as_indirect_buffer(command_buf_);
}
GPU_debug_group_end();
}
void DrawMultiBuf::bind(RecordingState & /*state*/)
{
GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
}
/** \} */
}; // namespace blender::draw::command